最近一個項目中須要用到過往的天氣數據, 我找到了天氣後報這個網站 (www.tianqihoubao.com), 並在SS中完成了相關採集, 和你們分享一下.html
首先分析這個網站提供了兩種信息:api
1. 省市關係數據結構
2. 天氣記錄ide
對應的咱們建立數據結構:網站
[Serializable] public class Province { public string ProvinceName; public string ProvinceUrl; } [Serializable] public class City { public Province Province; public string CityName; public string CityUrl; } [Serializable] public class WeatherDataSet { public City City; public string Title; public string Url; } [Serializable] public class WeatherData { public WeatherDataSet DataSet; public string Date; public string TextWeather; public string Temp; public string Wind; }
>> 網站一共提供34個直轄市/省/特區的天氣信息, 完整的列表在: http://www.tianqihoubao.com/lishi/index.htmspa
對應的採集語句是:3d
var list = Default.SelectNodes("#content DT a");
>> 每一個省都有下轄的城市列表, 如: http://www.tianqihoubao.com/lishi/hebei.htmcode
對應的採集語句是:htm
var list = Default.SelectNodes("#content DD a");
>> 每一個城市都有一個歷史天氣記錄列表, 如: http://www.tianqihoubao.com/lishi/shijiazhuang.htmlblog
對應的採集語句是:
Default.SelectNodes("#content>div.pcity a");
>> 進入每條歷史天氣記錄, 就能夠獲得當月的天氣數據了:
對應的採集語句是:
var list = Default.SelectNodes("#content>table.b tr:gt(0)"); foreach(var item in list) { var date = item.SelectSingleNode("td:eq(0)").Text(); var textWeather = item.SelectSingleNode("td:eq(1)").Text(); var temp = item.SelectSingleNode("td:eq(2)").Text(); var wind = item.SelectSingleNode("td:eq(3)").Text(); }
將這些語句分別包裝爲方法, 並將結果綁定到最開始定義的數據結構中:
public List<Province> GetProvinceList() {...} //獲取直轄市/省/特區 public List<City> GetCityList(Province province) {...} //獲取城市列表 public List<WeatherDataSet> GetWeatherDataSet(City city) {...} //獲取指定城市的天氣歷史記錄集 public List<WeatherData> GetWeatherData(WeatherDataSet ds) {...} //獲取天氣歷史數據
>> 完整的腳本: (複製到SS中便可直接運行)
SS下載地址爲: http://www.gdtsearch.com/products.spiderstudio.docapi.htm
public void Run() { Logger.ClearAll(); Default.ScriptErrorsSuppressed = true; var pl = GetProvinceList(); foreach(var p in pl) { Logger.Log(p.ProvinceName); Logger.Log(p.ProvinceUrl); } var cl = GetCityList(pl[1]); foreach(var c in cl) { Logger.Log(c.Province.ProvinceName); Logger.Log(c.Province.ProvinceUrl); Logger.Log(c.CityName); Logger.Log(c.CityUrl); } var ds = GetWeatherDataSet(cl[1]); foreach(var d in ds) { Logger.Log(d.City.CityName); Logger.Log(d.Title); Logger.Log(d.Url); } var dl = GetWeatherData(ds[0]); foreach(var d in dl) { Logger.Log(d.DataSet.Title); Logger.Log(d.Date); Logger.Log(d.TextWeather); Logger.Log(d.Temp); Logger.Log(d.Wind); } } public List<Province> GetProvinceList() { Default.Navigate("http://www.tianqihoubao.com/lishi/index.htm"); Default.Ready("#content DT"); var list = Default.SelectNodes("#content DT a"); var result = new List<Province>(); foreach(var item in list) { var p = new Province(); p.ProvinceName = item.Text(); p.ProvinceUrl = item.Attr("href"); p.ProvinceUrl = new Uri(Default.Url, p.ProvinceUrl).ToString(); result.Add(p); } return result; } public List<City> GetCityList(Province province) { Default.Navigate(province.ProvinceUrl); Default.Ready("#content DD"); var list = Default.SelectNodes("#content DD a"); var result = new List<City>(); foreach(var item in list) { var c = new City(); c.Province = province; c.CityName = item.Text(); c.CityUrl = item.Attr("href"); c.CityUrl = new Uri(Default.Url, c.CityUrl).ToString(); result.Add(c); } return result; } public List<WeatherDataSet> GetWeatherDataSet(City city) { Default.Navigate(city.CityUrl); Default.Ready("#content>div.pcity"); var list = Default.SelectNodes("#content>div.pcity a"); var result = new List<WeatherDataSet>(); foreach(var item in list) { var ds = new WeatherDataSet(); ds.Title = item.Text(); ds.Url = item.Attr("href"); ds.Url = new Uri(Default.Url, ds.Url).ToString(); ds.City = city; result.Add(ds); } return result; } public List<WeatherData> GetWeatherData(WeatherDataSet ds) { Default.Navigate(ds.Url); Default.Ready("#content>table.b"); var list = Default.SelectNodes("#content>table.b tr:gt(0)"); var result = new List<WeatherData>(); foreach(var item in list) { var d = new WeatherData(); d.DataSet = ds; d.Date = item.SelectSingleNode("td:eq(0)").Text(); d.TextWeather = item.SelectSingleNode("td:eq(1)").Text(); d.Temp = item.SelectSingleNode("td:eq(2)").Text(); d.Wind = item.SelectSingleNode("td:eq(3)").Text(); result.Add(d); } return result; } [Serializable] public class Province { public string ProvinceName; public string ProvinceUrl; } [Serializable] public class City { public Province Province; public string CityName; public string CityUrl; } [Serializable] public class WeatherDataSet { public City City; public string Title; public string Url; } [Serializable] public class WeatherData { public WeatherDataSet DataSet; public string Date; public string TextWeather; public string Temp; public string Wind; }
>> 運行效果: