準備:html
1.新建控制檯項目web
2.引用System.Drawing類庫網絡
3.安裝HtmlAgilityPack 1.5.2.0學習
4.若是不會XPath語法的話,建議簡單看下測試
代碼:google
1 static void Main(string[] args) 2 { 3 #region 爬蟲測試 4 { 5 //此處爲將要解析的URL,可設置爲參數變量(我這裏把URL寫死了) 6 string html = HttpHelper.DownloadHtml(@"http://wyxa.googlefilm.com.cn/wenzhang/info_386854_w821759016~i5.html", Encoding.UTF8); 7 HtmlDocument doc = new HtmlDocument(); 8 9 //因爲本人要解析的網址,HTMl標籤是動態生成的,因此不能將XPath表達式一次性寫出,只能先解析出來非動態標籤,再一步一步解析動態生成的標籤. 10 doc.LoadHtml(html);//加載html(此時要解析的標籤是非動態生成的,很好獲取) 11 string pageNumberPath = @"//table[2]"; 12 HtmlNode pageNumberNode = doc.DocumentNode.SelectSingleNode(pageNumberPath); 13 14 //拿到以上獲取的非動態標籤以後,再解析裏邊的動態生成的標籤,因此要再次LoadHtml一下 15 doc.LoadHtml(pageNumberNode.InnerHtml); 16 pageNumberPath = "//div[@id='div_text']"; 17 pageNumberNode = doc.DocumentNode.SelectSingleNode(pageNumberPath); 18 19 //同理,解析動態標籤,再次LoadHtml 20 doc.LoadHtml(pageNumberNode.InnerHtml); 21 pageNumberPath = "//img"; 22 HtmlNodeCollection pageNumberNodes = doc.DocumentNode.SelectNodes(pageNumberPath); 23 24 //獲取到須要的img標籤集合後,遍歷,獲取src地址 25 foreach (var item in pageNumberNodes) 26 { 27 if (item.Attributes["data-src"] != null) 28 { 29 var imgSrc = item.Attributes["data-src"].Value; 30 31 //控制檯打印src地址 32 Console.WriteLine(imgSrc); 33 34 //經過解析出來的src地址下載圖片 35 DownloadPhotoFromUrl(imgSrc); 36 } 37 } 38 } 39 }
HttpHelper.DownloadHtml方法
public static string DownloadHtml(string url,Encoding encoding) { HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(url); myReq.Timeout = 24000; HttpWebResponse httpWResp = (HttpWebResponse)myReq.GetResponse(); Stream myStream = httpWResp.GetResponseStream(); if (myStream != null) { StreamReader sr = new StreamReader(myStream, encoding); return sr.ReadToEnd(); } return ""; }
下載網絡圖片的方法:url
1 /// <summary> 2 /// 從圖片地址下載圖片到本地磁盤 3 /// </summary> 4 /// <param name="Url">圖片網址</param> 5 /// <returns></returns> 6 public static void DownloadPhotoFromUrl(string Url) 7 { 8 HttpWebRequest webrequest = (HttpWebRequest)WebRequest.Create(Url); 9 HttpWebResponse webresponse = (HttpWebResponse)webrequest.GetResponse(); 10 if (webresponse.StatusCode == HttpStatusCode.OK) 11 { 12 System.Drawing.Image image = System.Drawing.Image.FromStream(webresponse.GetResponseStream()); 13 //保存在本地文件夾 14 image.Save(@"D:\\images\"+DateTime.Now.ToString("yyyyMMddHHmmssff")+".jpg"); 15 //釋放資源 16 image.Dispose(); 17 } 18 }
至此,功能完成.spa
如下爲本人控制檯打印結果:code
圖片下載截圖:htm
本人菜鳥,只是爲了記錄學習中的小知識點,請大神勿噴~