1、查看代碼 :html
namespace ToutiaoSpider { class Program { static void Main(string[] args) { var db = Db.GetDataHelper(); db.CreateOrUpdateTable(typeof(Models.ArticleMonitorModule)); while (true) { var src = LoadSource(); try { //var item = GetItem(src); //if (item != null) //{ // try // { // db.Insert(item.ToArray()); // Console.WriteLine("Insert a row in :" + DateTime.UtcNow.AddHours(8).ToString("yyyy-MM-dd HH:mm:ss")); // } // catch (Exception ex) // { Console.WriteLine("Insert to database error:" + ex.Message); } //} if (!Directory.Exists("C:\\TempFiles\\")) Directory.CreateDirectory("C:\\TempFiles\\"); File.WriteAllText("C:\\TempFiles\\" + DateTime.Now.ToString("yyyy_MM_dd_HH_mm_ss") + ".html", src); } catch (Exception ex) { Console.WriteLine("Get item error:" + ex.Message); } Thread.Sleep(5 * 60 * 1000); } } static List<Models.ArticleMonitorModule> GetItem(string src) { List<Models.ArticleMonitorModule> lst = new List<ArticleMonitorModule>(); HtmlAgilityPack.HtmlDocument docs = new HtmlAgilityPack.HtmlDocument(); docs.LoadHtml(src); var jsonText = docs.DocumentNode.InnerText; var json = (JObject)JsonConvert.DeserializeObject(jsonText); if (json["message"].Value<string>() == "success") { var token = json.GetValue("data"); if (token == null) return lst; foreach (var data in token) { var title = data["title"]?.Value<string>(); //if (title != "中國與巴拿馬創建外交關係") continue; var go_detail_count = data["go_detail_count"]?.Value<int>(); var url = data["display_url"]?.Value<string>(); var itemId = data["item_id"]?.Value<string>(); var comments_count = data["comments_count"]?.Value<int>(); var site = "toutiao.com"; ArticleMonitorModule item = new ArticleMonitorModule() { __id = Guid.NewGuid(), article_id = itemId, site = site, url = url, comment_count = comments_count, visit_count = go_detail_count, fetch_time = DateTime.UtcNow.AddHours(8), like_count = null }; lst.Add(item); } } return lst; } static IWebDriver CreateDriver() { var service = PhantomJSDriverService.CreateDefaultService(); var driver = new OpenQA.Selenium.PhantomJS.PhantomJSDriver(service, new PhantomJSOptions(), TimeSpan.FromSeconds(120)); //var service = ChromeDriverService.CreateDefaultService(); //var driver = new ChromeDriver(service, new ChromeOptions(), TimeSpan.FromSeconds(120)); //var service = OpenQA.Selenium.Firefox.FirefoxDriverService.CreateDefaultService(); //var driver = new OpenQA.Selenium.Firefox.FirefoxDriver(service, new OpenQA.Selenium.Firefox.FirefoxOptions(), TimeSpan.FromSeconds(120)); return driver; } static string LoadSource() { var driver = CreateDriver(); var nav = driver.Navigate(); var cookies = driver.Manage().Cookies; //nav.GoToUrl("http://www.toutiao.com/c/user/favourite/?page_type=2&user_id=61045799395&max_behot_time=0&count=20&as=A15569B3CF98ED7&cp=593F781EDDB7FE1&max_repin_time=0"); int retryCount = 6; while (true) { try { //nav.GoToUrl("http://www.toutiao.com/c/user/favourite/?page_type=2&user_id=61045799395&max_behot_time=0&count=20&as=A14529438F1A7A4&cp=593F3A47DAD44E1&max_repin_time=0"); var url = "http://is.snssdk.com/2/article/information/v21/?version_code=6.1.6&app_name=news_article&vid=C5585644-2731-495E-8CF2-B42BBA4D7780&device_id=35980279488&channel=App%20Store&resolution=1125*2001&aid=13&ab_version=120431,134942,136400,126064,122834,130106,126068,128826,134127,136030,137117,136268,137571,126070,136111,116022,135623,125502,137069,125174,135489,133019,137083,126059,137452,135631,136930,122948,137474,137431,31210,133013,135290,131207,114338,133770&ab_feature=z1&openudid=f870822c71509e95ee8f58db8b1d70ce9cb14713&live_sdk_version=1.6.5&idfv=C5585644-2731-495E-8CF2-B42BBA4D7780&ac=WIFI&os_version=10.3.2&ssmix=a&device_platform=iphone&iid=11267657395&ab_client=a1,f2,f7,e1&device_type=iPhone%206S%20Plus&idfa=B1742B5B-DF14-44EF-A325-362873389ABA&aggr_type=1&article_page=0&device_id=35980279488&from_category=news_entertainment&group_id=6411002681368035586"; nav.GoToUrl(url); var uri = new Uri(url); cookies.AddCookie(new Cookie("csrftoken", "b9e36219cad78dfe6a1c687d6b368b52", uri.DnsSafeHost, "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("csrftoken", "b9e36219cad78dfe6a1c687d6b368b52", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("CNZZDATA1259612802", "2103889297-1495413998-null%7C1495413998", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("UM_distinctid", "15c2dec3e02f09-0fc740fef2ffb7-572f7b6e-1fa400-15c2dec3e03cf0", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("tt_webid", "6431015020234769922", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("uuid", "\"w:a5e3254676244e0ab15fc4291e372d14\"", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("_ga", "GA1.2.1639521857.1495419078", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("_gid", "GA1.2.396214455.1495419159", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("sso_login_status", "1", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("login_flag", "f8947cb01c5a760d0cbc4925e601ca60", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("sessionid", "5c865f7a96598cff3b3d580fcd3dfd27", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("sid_tt", "5c865f7a96598cff3b3d580fcd3dfd27", "toutiao.com", "/", DateTime.Now.AddYears(1))); //cookies.AddCookie(new Cookie("sid_guard", "\"5c865f7a96598cff3b3d580fcd3dfd27|1495419148|2591999|Wed\054 21-Jun-2017 02:12:27 GMT\"", "toutiao.com", "/", DateTime.Now.AddYears(1))); nav.Refresh(); Console.WriteLine("OK!" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")); break; } catch (Exception ex) { nav.Refresh();//說明,對於PhantomJS的話,雖然前面AddCookies時老是拋出異常,可是實際Cookies是添加成功的。因此須要在此Refresh。 Console.WriteLine("retry " + retryCount + ":" + ex.Message); Thread.Sleep(1000); if (retryCount-- <= 0) break; } } var pageSource = driver.PageSource; //var st = driver.TakeScreenshot(); //st.SaveAsFile("d:\\" + DateTime.Now.ToString("yyyyMMddHHmmssfff") + ".jpg", ScreenshotImageFormat.Jpeg); driver.Close(); driver.Dispose(); return pageSource; } }
2、代碼解說:web
在Main方法中調用了LoadSource方法,LoadSource方法中第1步先調用CreateDriver方法建立一個Driver,例子中建立了一個Phantomjs的Driver,而後經過driver.Navigate()取得Navigate對像,而後調用 GotoURL轉到一個URL,以後才能給driver.Manager().Cookies中添加Cookie,添加完Cookies以後,調用nav的Refresh方法,便可從新發起攜帶Cookies的請求。不過這裏有個小問題,就是添加Cookie這一步,當使用Chrome時,在GoToURL以後,便可直接成功添加Cookie,可是在Phantomjs中老是AddCookie時拋出異常。這時候查看官方文檔,官方文檔說,雖然這裏拋出異常,可是Cookies依然是成功添加的。因此咱們在Catch到異常以後,調用了一下Refresh去刷新。這時使用Fiddler檢測,果真攜帶了Cookie,以上代碼寫的很是亂,由於只是想簡單快速測試Demo,因此代碼很是亂。json
3、如圖:cookie