HTTP Header 存放在HttpWebRequest中。html
特定的HTTP Header能夠手動添加瀏覽器
1 1 requestPost.Headers.Add("X-Requested-With", "XMLHttpRequest");
//寫入POST DATA
1 byte[] postdatabytes = encoding.GetBytes(postdata); 2 //request.ContentLength = postdatabytes.Length; 3 Stream stream; 4 stream = request.GetRequestStream(); 5 stream.Write(postdatabytes, 0, postdatabytes.Length); 6 stream.Close();
Cookiescookie
Cookies分爲請求Cookies和寫入Cookies,多數由服務端寫入本地或JS寫入本地,再提交服務端。app
Cookies多爲動態,可用於檢測爬蟲ide
服務端寫入的Cookies能夠從HttpWebResponse中獲取工具
HttpWebRequest請求post
當你解決了以上全部後,就須要使用HttpWebRequest來發送你的請求。this
HttpWebRequest能夠承載HTTP Header、Cookies和POST DATA編碼
HttpWebRequest 能夠設置請求類型(POST/GET)url
HttpWebRequest 能夠設置是否保持長鏈接
HttpWebRequest 能夠設置是否跟隨目標地址跳轉
HttpWebRequest能夠建立HttpWebResponse
1 //設置請求類型 2 request.Method = "POST"/"GET"; 3 //設置是否長鏈接 4 request.KeepAlive = true/false; 5 //設置是否跟隨跳轉 6 request.AllowAutoRedirect = true/false;
HttpWebResponse接收對象
HttpWebResponse存放了HttpWebRequest請求後接收到的Cookies
HttpWebResponse存放了HttpWebRequest請求後接收到的HTML
至此一個模擬登陸的爬蟲必備條件所有介紹完畢。
1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Net; 6 using System.IO; 7 using System.Xml; 8 using System.IO.Compression; 9 using System.Drawing; 10 11 namespace Common 12 { 13 public class HttpWebUtility 14 { 15 #region 屬性 16 /// <summary> 17 /// HttpWebRequest請求後返回的Html 18 /// </summary> 19 public string ResultHtml 20 { 21 get; 22 set; 23 } 24 /// <summary> 25 /// 若要從遠程調用中獲取COOKIE必定要爲request設定一個CookieContainer用來裝載返回的cookies 26 /// </summary> 27 public CookieContainer CookieContainer 28 { 29 get; 30 set; 31 } 32 /// <summary> 33 /// Cookies 字符串 34 /// </summary> 35 public string CookiesString 36 { 37 get; 38 set; 39 } 40 #endregion 41 42 #region 方法 43 /// <summary> 44 /// HttpWeb請求方法 POST OR GET 45 /// </summary> 46 /// <param name="request">Post OR Get的頭信息對象</param> 47 /// <param name="isPost">是否爲POST</param> 48 /// <param name="postdata">若爲POST時要發送的數據字符串</param> 49 /// <param name="encodingName">Post以GBK編碼 默認爲UTF8</param> 50 public void Request(HttpWebRequest request, bool isPost, string postdata = "", string encodingName = "UTF8", bool IsGZipString = false, bool AllowAutoRedirect = false) 51 { 52 HttpWebResponse response = null; 53 try 54 { 55 // 必須對request進行實例化 56 if (request == null) 57 throw new Exception("HttpWebPost Error:request = NULL"); 58 request.Method = isPost ? "POST" : "GET"; 59 request.KeepAlive = true; 60 request.AllowAutoRedirect = AllowAutoRedirect; 61 //檢測Cookie 並同仁給request 62 if (CookieContainer != null) 63 request.CookieContainer = CookieContainer; 64 else 65 { 66 request.CookieContainer = new CookieContainer(); 67 CookieContainer = request.CookieContainer; 68 } 69 70 Encoding encoding = Encoding.UTF8; 71 if (encodingName != "UTF8") 72 { 73 encoding = Encoding.GetEncoding(encodingName); 74 } 75 //如果POST 提交數據 76 if (isPost) 77 { 78 byte[] postdatabytes = encoding.GetBytes(postdata); 79 //if (request.ContentLength <= 0) 80 //request.ContentLength = postdatabytes.Length; 81 Stream stream; 82 stream = request.GetRequestStream(); 83 stream.Write(postdatabytes, 0, postdatabytes.Length); 84 stream.Close(); 85 } 86 //接收響應 87 //try 88 //{ 89 // response = (HttpWebResponse)request.GetResponse(); 90 //} 91 //catch (WebException ex) 92 //{ 93 // response = (HttpWebResponse)ex.Response; 94 //} 95 //接收響應 96 response = (HttpWebResponse)request.GetResponse(); 97 98 //獲取並保存返回cookie 99 response.Cookies = request.CookieContainer.GetCookies(request.RequestUri); 100 string strcrook = request.CookieContainer.GetCookieHeader(request.RequestUri); 101 CookiesString = strcrook; 102 CookieContainer = request.CookieContainer; 103 104 //response.Headers.ToString().ToLower().Contains("transfer-encoding: chunked") 105 //獲取並保存 返回內容 106 using (Stream streamReceive = response.GetResponseStream()) 107 { 108 if (IsGZipString) //解壓縮返回流... 109 { 110 using (GZipStream zipStream = new GZipStream(streamReceive, CompressionMode.Decompress)) 111 using (StreamReader sr = new StreamReader(zipStream, encoding)) 112 ResultHtml = sr.ReadToEnd(); 113 } 114 else 115 { 116 using (StreamReader sr = new StreamReader(streamReceive, encoding)) 117 ResultHtml = sr.ReadToEnd(); 118 } 119 } 120 121 //關閉與釋放 122 request.Abort(); 123 response.Close(); 124 } 125 catch (Exception ex) 126 { 127 throw new Exception("HttpWebPost Error:" + ex.Message); 128 } 129 } 130 131 private string GetGZipStreamString(WebResponse response) 132 { 133 string result = ""; 134 135 return result; 136 } 137 /// <summary> 138 /// 單獨的 一個Get請求, 與Cookie無關 139 /// </summary> 140 /// <param name="url"></param> 141 /// <returns></returns> 142 public string Request_Alone(string url) 143 { 144 //設置頭信息 145 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); 146 request.Credentials = CredentialCache.DefaultCredentials; 147 request.ContentType = "application/x-www-form-urlencoded"; 148 request.AllowAutoRedirect = false; 149 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 150 //獲取並保存 返回內容 151 StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312")); 152 return sr.ReadToEnd(); 153 } 154 155 /// <summary> 156 /// 單獨的 一個Get請求, 與Cookie無關 157 /// </summary> 158 /// <param name="url"></param> 159 /// <returns></returns> 160 public string Request_Alone_Keywords(string URL) 161 { 162 Encoding encoding = Encoding.UTF8; 163 //設置頭信息 164 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL); 165 request.Credentials = CredentialCache.DefaultCredentials; 166 request.Accept = "text/html, application/xhtml+xml, */*"; 167 request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0)"; 168 request.AllowAutoRedirect = false; 169 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 170 //獲取並保存 返回內容 171 StreamReader sr = new StreamReader(response.GetResponseStream(), encoding); 172 return sr.ReadToEnd(); 173 } 174 175 /// <summary> 176 /// HttpWeb請求方法 POST OR GET 177 /// </summary> 178 /// <param name="request">Post OR Get的頭信息對象</param> 179 /// <param name="isPost">是否爲POST</param> 180 /// <param name="postdata">若爲POST時要發送的數據字符串</param> 181 /// <param name="encodingName">Post以GBK編碼 默認爲UTF8</param> 182 public string Request_Alone_Post(HttpWebRequest request, bool isPost, string postdata = "", string encodingName = "UTF8", bool IsGZipString = false) 183 { 184 string _ResultHtml = ""; 185 HttpWebResponse response = null; 186 try 187 { 188 // 必須對request進行實例化 189 if (request == null) 190 throw new Exception("HttpWebPost Error:request = NULL"); 191 request.Method = isPost ? "POST" : "GET"; 192 request.KeepAlive = true; 193 request.AllowAutoRedirect = false; 194 //檢測Cookie 並同仁給request 195 request.CookieContainer = new CookieContainer(); 196 //if (CookieContainer != null) 197 // request.CookieContainer = CookieContainer; 198 //else 199 //{ 200 // CookieContainer = request.CookieContainer; 201 //} 202 203 Encoding encoding = Encoding.UTF8; 204 if (encodingName != "UTF8") 205 { 206 encoding = Encoding.GetEncoding(encodingName); 207 } 208 //如果POST 提交數據 209 if (isPost) 210 { 211 byte[] postdatabytes = encoding.GetBytes(postdata); 212 request.ContentLength = postdatabytes.Length; 213 Stream stream; 214 stream = request.GetRequestStream(); 215 stream.Write(postdatabytes, 0, postdatabytes.Length); 216 stream.Close(); 217 } 218 219 //接收響應 220 response = (HttpWebResponse)request.GetResponse(); 221 222 //獲取並保存返回cookie 223 //response.Cookies = request.CookieContainer.GetCookies(request.RequestUri); 224 //string strcrook = request.CookieContainer.GetCookieHeader(request.RequestUri); 225 //CookiesString = strcrook; 226 //CookieContainer = request.CookieContainer; 227 228 //response.Headers.ToString().ToLower().Contains("transfer-encoding: chunked") 229 //獲取並保存 返回內容 230 using (Stream streamReceive = response.GetResponseStream()) 231 { 232 if (IsGZipString) //解壓縮返回流... 233 { 234 using (GZipStream zipStream = new GZipStream(streamReceive, CompressionMode.Decompress)) 235 using (StreamReader sr = new StreamReader(zipStream, encoding)) 236 _ResultHtml = sr.ReadToEnd(); 237 } 238 else 239 { 240 using (StreamReader sr = new StreamReader(streamReceive, encoding)) 241 _ResultHtml = sr.ReadToEnd(); 242 } 243 } 244 245 //關閉與釋放 246 request.Abort(); 247 response.Close(); 248 return _ResultHtml; 249 } 250 catch (Exception ex) 251 { 252 throw new Exception("HttpWebPost Error:" + ex.Message); 253 } 254 } 255 256 /// <summary> 257 /// 獲取驗證碼圖片 258 /// </summary> 259 /// <param name="url"></param> 260 /// <returns></returns> 261 public Bitmap Request_GetBitmap(HttpWebRequest request, bool IsGZipString = false, bool IsAllowAutoRedirect = false) 262 { 263 HttpWebResponse response = null; 264 try 265 { 266 // 必須對request進行實例化 267 if (request == null) 268 throw new Exception("HttpWebPost Error:request = NULL"); 269 request.Method = "GET"; 270 request.KeepAlive = true; 271 request.AllowAutoRedirect = IsAllowAutoRedirect; 272 //檢測Cookie 並同仁給request 273 if (CookieContainer != null) 274 request.CookieContainer = CookieContainer; 275 else 276 { 277 request.CookieContainer = new CookieContainer(); 278 CookieContainer = request.CookieContainer; 279 } 280 Encoding encoding = Encoding.UTF8; 281 //接收響應 282 response = (HttpWebResponse)request.GetResponse(); 283 string html = this.ResultHtml; 284 285 //獲取並保存返回cookie 286 response.Cookies = request.CookieContainer.GetCookies(request.RequestUri); 287 string strcrook = request.CookieContainer.GetCookieHeader(request.RequestUri); 288 CookiesString = strcrook; 289 CookieContainer = request.CookieContainer; 290 //獲取並保存 返回內容 291 Bitmap bitmap = null; 292 using (Stream streamReceive = response.GetResponseStream()) 293 { 294 if (IsGZipString) //解壓縮返回流... 295 { 296 using (GZipStream zipStream = new GZipStream(streamReceive, CompressionMode.Decompress)) 297 bitmap = new Bitmap(zipStream); 298 } 299 else 300 bitmap = new Bitmap(streamReceive); 301 } 302 303 //關閉與釋放 304 request.Abort(); 305 response.Close(); 306 return bitmap; 307 } 308 catch (Exception ex) 309 { 310 throw new Exception("HttpWebPost Error:" + ex.Message); 311 } 312 } 313 #endregion 314 315 } 316 }