分享一個天氣歷史數據的採集腳本

最近一個項目中須要用到過往的天氣數據, 我找到了天氣後報這個網站 (www.tianqihoubao.com), 並在SS中完成了相關採集, 和你們分享一下.html

首先分析這個網站提供了兩種信息:api

1. 省市關係數據結構

2. 天氣記錄ide

對應的咱們建立數據結構:網站

[Serializable]
public class Province
{
    public string ProvinceName;
    public string ProvinceUrl;
}

[Serializable]
public class City
{
    public Province Province;
    public string CityName;
    public string CityUrl;
}

[Serializable]
public class WeatherDataSet
{
    public City City;
    public string Title;
    public string Url;
}

[Serializable]
public class WeatherData
{
    public WeatherDataSet DataSet;
    public string Date;
    public string TextWeather;
    public string Temp;
    public string Wind;
}

 

>> 網站一共提供34個直轄市/省/特區的天氣信息, 完整的列表在: http://www.tianqihoubao.com/lishi/index.htmspa

對應的採集語句是:3d

var list = Default.SelectNodes("#content DT a");

 

 

>> 每一個省都有下轄的城市列表, 如: http://www.tianqihoubao.com/lishi/hebei.htmcode

對應的採集語句是:htm

var list = Default.SelectNodes("#content DD a");

 

 

>> 每一個城市都有一個歷史天氣記錄列表, 如: http://www.tianqihoubao.com/lishi/shijiazhuang.htmlblog

對應的採集語句是:

Default.SelectNodes("#content>div.pcity a");

 

 

>> 進入每條歷史天氣記錄, 就能夠獲得當月的天氣數據了:

對應的採集語句是:

var list = Default.SelectNodes("#content>table.b tr:gt(0)");
foreach(var item in list)
{
    var date = item.SelectSingleNode("td:eq(0)").Text();
    var textWeather = item.SelectSingleNode("td:eq(1)").Text();
    var temp = item.SelectSingleNode("td:eq(2)").Text();
    var wind = item.SelectSingleNode("td:eq(3)").Text();
}

 

 

將這些語句分別包裝爲方法, 並將結果綁定到最開始定義的數據結構中:

public List<Province> GetProvinceList() {...} //獲取直轄市/省/特區
public List<City> GetCityList(Province province) {...} //獲取城市列表
public List<WeatherDataSet> GetWeatherDataSet(City city) {...} //獲取指定城市的天氣歷史記錄集
public List<WeatherData> GetWeatherData(WeatherDataSet ds) {...} //獲取天氣歷史數據

 

 

>> 完整的腳本: (複製到SS中便可直接運行)

SS下載地址爲: http://www.gdtsearch.com/products.spiderstudio.docapi.htm

public void Run()
{
    Logger.ClearAll();
    Default.ScriptErrorsSuppressed = true;
    
    var pl = GetProvinceList();
    foreach(var p in pl)
    {
        Logger.Log(p.ProvinceName);
        Logger.Log(p.ProvinceUrl);
    }
    var cl = GetCityList(pl[1]);
    foreach(var c in cl)
    {
        Logger.Log(c.Province.ProvinceName);
        Logger.Log(c.Province.ProvinceUrl);
        Logger.Log(c.CityName);
        Logger.Log(c.CityUrl);
    }
    var ds = GetWeatherDataSet(cl[1]);
    foreach(var d in ds)
    {
        Logger.Log(d.City.CityName);
        Logger.Log(d.Title);
        Logger.Log(d.Url);
    }
    var dl = GetWeatherData(ds[0]);
    foreach(var d in dl)
    {
        Logger.Log(d.DataSet.Title);
        Logger.Log(d.Date);
        Logger.Log(d.TextWeather);
        Logger.Log(d.Temp);
        Logger.Log(d.Wind);
    }
} 


public List<Province> GetProvinceList()
{
    Default.Navigate("http://www.tianqihoubao.com/lishi/index.htm");
    Default.Ready("#content DT");
    var list = Default.SelectNodes("#content DT a");
    var result = new List<Province>();
    foreach(var item in list)
    {
        var p = new Province();
        p.ProvinceName = item.Text();
        p.ProvinceUrl = item.Attr("href");
        p.ProvinceUrl = new Uri(Default.Url, p.ProvinceUrl).ToString();
        result.Add(p);
    }
    return result;
}

public List<City> GetCityList(Province province)
{
    Default.Navigate(province.ProvinceUrl);
    Default.Ready("#content DD");
    var list = Default.SelectNodes("#content DD a");
    var result = new List<City>();
    foreach(var item in list)
    {
        var c = new City();
        c.Province = province;
        c.CityName = item.Text();
        c.CityUrl = item.Attr("href");
        c.CityUrl = new Uri(Default.Url, c.CityUrl).ToString();
        result.Add(c);
    }
    return result;
}

public List<WeatherDataSet> GetWeatherDataSet(City city)
{
    Default.Navigate(city.CityUrl);
    Default.Ready("#content>div.pcity");
    var list = Default.SelectNodes("#content>div.pcity a");
    var result = new List<WeatherDataSet>();
    foreach(var item in list)
    {
        var ds = new WeatherDataSet();
        ds.Title = item.Text();
        ds.Url = item.Attr("href");
        ds.Url = new Uri(Default.Url, ds.Url).ToString();
        ds.City = city;
        result.Add(ds);
    }
    return result;
}

public List<WeatherData> GetWeatherData(WeatherDataSet ds)
{
    Default.Navigate(ds.Url);
    Default.Ready("#content>table.b");
    var list = Default.SelectNodes("#content>table.b tr:gt(0)");
    var result = new List<WeatherData>();
    foreach(var item in list)
    {
        var d = new WeatherData();
        d.DataSet = ds;
        d.Date = item.SelectSingleNode("td:eq(0)").Text();
        d.TextWeather = item.SelectSingleNode("td:eq(1)").Text();
        d.Temp = item.SelectSingleNode("td:eq(2)").Text();
        d.Wind = item.SelectSingleNode("td:eq(3)").Text();
        result.Add(d);
    }
    return result;
}

[Serializable]
public class Province
{
    public string ProvinceName;
    public string ProvinceUrl;
}

[Serializable]
public class City
{
    public Province Province;
    public string CityName;
    public string CityUrl;
}

[Serializable]
public class WeatherDataSet
{
    public City City;
    public string Title;
    public string Url;
}

[Serializable]
public class WeatherData
{
    public WeatherDataSet DataSet;
    public string Date;
    public string TextWeather;
    public string Temp;
    public string Wind;
}
View Code

 

 

>> 運行效果:

相關文章
相關標籤/搜索