首页 > 解决方案 > 如何从网页中提取特定文本

问题描述

我对寻找星座模式很感兴趣。我正在使用 'Sky map' android 应用程序进行视觉检查,现在我想构建一个应用程序来查找类似的星座结构。其中的一个子问题是找到特定天体的坐标。

示例:如何获取给定时间、日期和位置的“月球”坐标。

https://theskylive.com/planetarium通过以下方式在其网页上提供此信息。

 Object: Moon [info|live][less]
 Right Asc: 04h 15m 12.5s **Decl: 17° 05' 46.3"** (J2000) [HMS|Dec]
 Magnitude: -10.54 Altitude: 56° Solar Elongation: 100.4° Constellation: Ari 
 Sun distance: 147.77 Million Km Earth distance: 0.38 Million Km
 Rise: 10:48 Transit: 18:40 Set: 01:35 **Europe/London**

对于月球,我们可以使用网页找到坐标,是否有一些 API?或者我们如何通过从网页中提取坐标信息来做到这一点。

标签: web-scraping

解决方案


我不是 Android 专家,但这是你可以做的

构建.gradle

plugins {
    id 'java'
}

group 'test.test'
version '1.0-SNAPSHOT'

sourceCompatibility = 1.8

repositories {
    mavenCentral()
}

dependencies {
    testCompile group: 'junit', name: 'junit', version: '4.12'
    implementation 'com.squareup.okhttp3:okhttp:3.13.1'
    compile group: 'org.json', name: 'json', version: '20180813'
}

天文馆.java

import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import org.json.JSONObject;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;

public class Planetarium {
    OkHttpClient client = new OkHttpClient();

    JSONObject get(String... objects) throws IOException {
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
        StringBuilder urlBuilder = new StringBuilder("https://theskylive.com/planetariumdata?");

        // Current date in YYYY-MM-DD format
        urlBuilder.append("date=").append(dateFormat.format(new Date()));

        // add url query string for all objects
        for (String obj : objects) {
            // URL encoded aobj[] => aobj%5B%5D=
            urlBuilder.append("&").append("aobj%5B%5D=").append(obj);
        }

        Request request = new Request.Builder()
                .url(urlBuilder.toString())
                .build();

        try (Response response = client.newCall(request).execute()) {
            String json = response.body().string();
            return new JSONObject(json);
        }
    }

    public static void main(String[] args) throws IOException {
        Planetarium planetarium = new Planetarium();
        JSONObject response = planetarium.get("moon", "mars");
        System.out.println(response.toString(2));
    }
}

输出:

{
  "utc_seconds": 1551816600,
  "utc_timestamp": "201903052010",
  "objects": {
    "moon": {
      "distsun": 1.479847408587E8,
      "altitude": -32.421642244539,
      "dec": -12.501182812768,
      "constell": "Cap",
      "timezone": "Europe/London",
      "hlat": "-0.0075",
      "hlong": "163.9072",
      "elongation": "9.6",
      "lastdate": "2019-Mar-05 00:00",
      "hx": -0.95427043393163,
      "hy": 0.26061067578779,
      "mag": "-4.82",
      "hlongRad": 2.8607203077248,
      "hz": -1.6343451194632E-4,
      "utc_time": 1551816600,
      "distearth": 405722.20937018,
      "sot": 350.29647638889,
      "id": "moon",
      "circumstances": {
        "transit_local": 11.428494722983,
        "raise_ut": 1.5517668981849E9,
        "set": 16.623858118962,
        "raise_local": 6.3606069281934,
        "visibility": "partial",
        "azimuth_set": 256.90380469917,
        "LSTs": 3.4997935653561,
        "LSTr": 17.208442522882,
        "set_local": 16.623858118962,
        "azimuth_rise": 104.50312047906,
        "GSTs": 3.4997935653561,
        "GSTr": 17.208442522882,
        "transit_ut": 1.551785142581E9,
        "transit": 11.428494722983,
        "raise": 6.3606069281934,
        "set_ut": 1.5518038458892E9,
        "transit_height": 24.710020581601
      },
      "ar": 22.578738425926,
      "name": "Moon",
      "category": "planets",
      "hlatRad": -1.3089969389957E-4,
      "age": 27,
      "status": true
    },
    "mars": {
      "distsun": 2.2963710671492E8,
      "altitude": 27.808183248664,
      "circumstances": {
        "transit_local": 15.80120694427,
        "raise_ut": 1.5517741680418E9,
        "set": 23.222402283833,
        "raise_local": 8.3800116047075,
        "visibility": "partial",
        "azimuth_set": 286.34760861411,
        "LSTs": 10.11640394619,
        "LSTr": 19.233376146402,
        "set_local": 23.222402283833,
        "azimuth_rise": 73.652391385888,
        "GSTs": 10.11640394619,
        "GSTr": 19.233376146402,
        "transit_ut": 1.551800884345E9,
        "transit": 15.80120694427,
        "raise": 8.3800116047075,
        "set_ut": 1.5518276006482E9,
        "transit_height": 54.867608614112
      },
      "dec": 16.347608614112,
      "constell": "Ari",
      "timezone": "Europe/London",
      "hlat": "0.8142",
      "hlong": "75.6345",
      "elongation": "58.1",
      "lastdate": "2019-Mar-05 00:00",
      "hx": 0.36958631955143,
      "ar": 2.6748900462963,
      "hy": 1.4897081109635,
      "mag": "1.23",
      "hlongRad": 1.3200710530997,
      "hz": 0.022145899657793,
      "utc_time": 1551816600,
      "distearth": 2.704192732295E8,
      "name": "Mars",
      "sot": 58.1002,
      "id": "mars",
      "category": "planets",
      "hlatRad": 0.014210470769738,
      "status": true
    },
    "sun": {
      "distsun": 0,
      "altitude": -22.992657046501,
      "circumstances": {
        "transit_local": 12.176106019167,
        "raise_ut": 1.551767861711E9,
        "set": 17.739026911053,
        "raise_local": 6.6282530456618,
        "visibility": "partial",
        "azimuth_set": 263.93596334029,
        "LSTs": 4.618015588543,
        "LSTr": 17.476821431166,
        "set_local": 17.739026911053,
        "azimuth_rise": 96.242086753282,
        "GSTs": 4.618015588543,
        "GSTr": 17.476821431166,
        "transit_ut": 1.5517878339817E9,
        "transit": 12.176106019167,
        "raise": 6.6282530456618,
        "set_ut": 1.5518078604969E9,
        "transit_height": 32.366908597329
      },
      "dec": -6.0242450863769,
      "constell": "Aqr",
      "timezone": "Europe/London",
      "hlat": "n.a.",
      "hlong": "n.a.",
      "elongation": 0,
      "lastdate": "2019-Mar-05 00:00",
      "hx": 0,
      "ar": 23.060617283951,
      "hy": 0,
      "mag": "-26.76",
      "hlongRad": null,
      "hz": 0,
      "utc_time": 1551816600,
      "distearth": 1.4838474994878E8,
      "name": "Sun",
      "sot": 0,
      "id": "sun",
      "category": "planets",
      "hlatRad": null,
      "status": true
    }
  },
  "target": "sun"
}

推荐阅读