最近在採集數據因此本身寫了個小工具,通常的網站能夠直接模擬瀏覽器發送請求,而後用正則表達式分析頁面提取須要的信息。html
1 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); 2 request.Method = "GET"; 3 request.Timeout = 5000; 4 request.UserAgent="Mozilla/5.0 (Windows NT 5.2; rv:27.0) Gecko/20100101 Firefox/27.0"; 5 request.Accept = " text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; 6 request.Headers.Add("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"); 7 request.Headers.Add("Accept-Encoding", "gzip, deflate"); 8 request.KeepAlive = true; 9 request.Referer = System.Web.HttpUtility.UrlEncode(url); 10 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 11 string html = string.Empty; 12 if (response.ContentEncoding.ToLower() == "gzip")//若是使用了GZip則先解壓 13 { 14 using (System.IO.Stream streamReceive = response.GetResponseStream()) 15 { 16 using (var zipStream = 17 new System.IO.Compression.GZipStream(streamReceive, System.IO.Compression.CompressionMode.Decompress)) 18 { 19 using (StreamReader sr = new System.IO.StreamReader(zipStream, Encoding.GetEncoding("gbk"))) 20 { 21 html = sr.ReadToEnd(); 22 } 23 } 24 } 25 } 26 else 27 { 28 using (System.IO.Stream streamReceive = response.GetResponseStream()) 29 { 30 using (System.IO.StreamReader sr = new System.IO.StreamReader(streamReceive, Encoding.GetEncoding("gbk"))) 31 { 32 html = sr.ReadToEnd(); 33 } 34 } 35 } 36 37 string pattern = "<div>(\\d+)</div>"; 38 Regex reg = new Regex(pattern, RegexOptions.IgnoreCase); 39 MatchCollection matchs = reg.Matches(html); 40 41 foreach (Match match in matchs) 42 { 43 string phone = match.Groups[1].Value; 44 }