這篇這篇文章主要是展現了一個C#語言如何抓取網站中的圖片。實現原理就是基於http請求。C#給咱們提供了HttpWebRequest和WebClient兩個對象,方便發送請求獲取數據,下面看如何實html
1,HttpGetAction方法。用於發送請求獲取數據後處理字符串獲得圖片地址web
1 public static void HttpGetAction(string url,string path,int name) 2 { 3 Stopwatch sw = new Stopwatch(); 4 sw.Start(); 5 Console.WriteLine("抓取地址:" + url); 6 string result = string.Empty; 7 HttpWebRequest webRequest = WebRequest.CreateHttp(url); 8 webRequest.Method = "GET"; 9 var response= webRequest.GetResponse(); 10 using (StreamReader reader = new StreamReader((response as HttpWebResponse).GetResponseStream(), Encoding.UTF8)) 11 { 12 result = reader.ReadToEnd(); 13 reader.Close(); 14 } 15 if (string.IsNullOrEmpty(result)) 16 { 17 Console.WriteLine("請求地址錯誤"); 18 Console.ReadKey(); 19 return; 20 } 21 //提取img標籤src地址 22 Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); 23 // 搜索匹配的字符串 24 MatchCollection matches = regImg.Matches(result); 25 //爬取數量 26 int i = 0; 27 WebClient web = new WebClient(); 28 // 取得匹配項列表 29 foreach (Match match in matches) 30 { 31 string imgsrc = match.Groups["imgUrl"].Value; 32 if (imgsrc.Contains("http") && !imgsrc.Contains(".svg")) 33 { 34 i++; 35 HttpGetImg(web,imgsrc, path,name); 36 name++;//圖片名 37 } 38 } 39 sw.Stop(); 40 Console.WriteLine("爬取完成!總共爬取了" + i + "張圖片!"); 41 Console.WriteLine("爬取圖片耗時:" + sw.ElapsedMilliseconds / 1000 + "秒"); 42 }
2,HttpGetImg方法。下載圖片到指定目錄svg
1 public static void HttpGetImg(WebClient web, string src,string path,int name) 2 { 3 Console.WriteLine("爬取圖片:" + src); 4 if (!Directory.Exists(path)) 5 { 6 Console.WriteLine("路徑錯誤!"); 7 Console.ReadKey(); 8 return; 9 } 10 web.DownloadFile(src, path+name+".jpg"); 11 Console.WriteLine("爬取圖片成功:" + name+".jpg"); 12 }
3,控制檯調用網站
1 static void Main(string[] args) 2 { 3 string url= "https://www.xxxxxx.com/"; 4 string path = Path.Combine(@"D:\word 資料\img\冬天\"); 5 HttpHelper.HttpGetAction(url,path,1); 6 Console.ReadKey(); 7 }
效果圖:url
一個簡單的C#爬蟲程序就完成了。若有錯誤的地方還望大神指點spa
原文來自:一個簡單的C#程序-曾亞平我的博客pwa