爬蟲軟件開發要用到的代碼

1.時間戳轉爲C#格式時間php

        /// <summary>
        /// 時間戳轉爲C#格式時間
        /// </summary>
        /// <param name="timeStamp">Unix時間戳格式</param>
        /// <returns>C#格式時間</returns>
        private DateTime GetTime(string timeStamp)
        {
            DateTime dtStart = TimeZone.CurrentTimeZone.ToLocalTime(new DateTime(1970, 1, 1));
            long lTime = long.Parse(timeStamp + "0000");
            TimeSpan toNow = new TimeSpan(lTime);
            return dtStart.Add(toNow);
        }

2.獲取驗證碼html

 public Image GetImg(string url)
        {
            if (string.IsNullOrWhiteSpace(url))
            {
                url = string.Format("http://ms.baihe.com/checkcode/defaultImageService?0.{0}", DateTime.Now.Ticks);
            }

            cookieCheckCode = "";

            var item = new HttpItem()
            {
                URL = url,
                //Encoding = System.Text.Encoding.GetEncoding("GBK"),
                Method = "get",
                //IsToLower = false,
                Expect100Continue = false,//代理時用這個
                Cookie = _cookie,
                Timeout = 100000,
                ReadWriteTimeout = 30000,
                UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0",
                ContentType = "text/html",
                ResultType = ResultType.Byte
            };
            var result = http.GetHtml(item);

            if (result.Cookie != null)
                cookieCheckCode += result.Cookie.StartsWith("PHPSESSID")
                    ? result.Cookie.Substring(result.Cookie.IndexOf(',') + 1)
                    : result.Cookie;

            return byteArrayToImage(result.ResultByte);
        }


        private Image byteArrayToImage(byte[] Bytes)
        {
            using (var ms = new MemoryStream(Bytes))
            {
                return Bitmap.FromStream(ms, true);
            }
        }

3.正則表使用類node

 Regex re = new Regex(@"&uid=(\d+)\"" class=\""yellow\"">(.+)</a>", RegexOptions.None);
            MatchCollection mc = re.Matches(resulthtml.Html);

            var list = new List<User>();
            foreach (Match match in mc)
            {
                var uid = match.Groups[1].Value;
                var nick = match.Groups[2].Value;
                list.Add(new User { uid = uid, nickname = nick });
            }

4.獲取時間web

        public long GetTimeLikeJS()
        {
            var st = new DateTime(1970, 1, 1);
            var t = (DateTime.Now.ToUniversalTime() - st);
            return (long)t.TotalMilliseconds;
        }

5.另外一線程操做主線程的控件ajax

           this.Invoke((Action)delegate()
                    {
                        this.textBox1.Text = "登陸成功";
                    });

 .net 2.0裏的匿名委託數據庫

 this.Invoke((EventHandler)delegate  
            {  
                button.Text = i.ToString();  
            });  

另:BackgroundWorker 組件用來執行諸如數據庫事務、文件下載等耗時的異步操做json

6.Json對象轉換api

 JArray array = JArray.Parse(json);
            JToken token = array[0];
            var total = token["total"];
            var users = token["list"].Children();
            foreach (var item in users)
            {
                var user = JsonConvert.DeserializeObject<UserInfo>(item.ToString());
                int age = user.age;
            }

7.趕集網登陸數組

HttpHelper http = new HttpHelper();
            HttpItem item = null;
            item = new HttpItem()
            {
                URL = string.Format("https://passport.ganji.com/login.php?callback=jQuery{0}&username={1}&password={2}",GetTime(),textBox2.Text,textBox3.Text),//URL                
                Referer = "https://passport.ganji.com/login.php?next=/",//來源URL     可選項  
            };
            item.Header.Add("x-requested-with", "XMLHttpRequest");  //主要就是這一句,趕集服務器只接受ajax請求。
            HttpResult result = http.GetHtml(item);            
            string cookie = result.Cookie.Replace("path=/;", "").Replace(",", "%2c");
 
            //登陸成功訪問我發佈的信息測試
            item = new HttpItem()
            {
                URL = "http://www.ganji.com/vip/my_post_list.php",//URL   
                Method = "get",//URL     可選項 默認爲Get   
                Cookie = cookie,//字符串Cookie     可選項   
            };
            result = http.GetHtml(item);
            string html = result.Html;
 
            textBox1.Text = html + "\r\n" ;

 

8.C# Unicode編碼/解碼瀏覽器

http://www.cnblogs.com/Rolends/archive/2011/09/22/2185276.html

//若是post josn 亂碼或服務器不能正常接收
HttpItem item = new HttpItem()
            {
                URL = url,// "http://159.142.15.196:8089/api/Users/Post_ErpUsers",//URL     必需項     
                Method = "post",//URL     可選項 默認爲Get   
                IsToLower = false,//獲得的HTML代碼是否轉成小寫     可選項默認轉小寫   
                Cookie = "",//字符串Cookie     可選項   
                Referer = "",//來源URL     可選項   
                //  Postdata = json, //System.Web.HttpUtility.UrlEncode(json, Encoding.UTF8),//Post數據     可選項GET時不須要寫   
                Timeout = 100000,//鏈接超時時間     可選項默認爲100000    
                ReadWriteTimeout = 30000,//寫入Post數據超時時間     可選項默認爲30000   
                UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",//用戶的瀏覽器類型,版本,操做系統     可選項有默認值   
                ContentType = "application/json",//返回類型    可選項有默認值   
                Allowautoredirect = false,//是否根據301跳轉     可選項 
                PostdataByte = UTF8Encoding.UTF8.GetBytes(json),
                PostDataType = PostDataType.Byte
            };

 

9.C#解析Html組件

組件名稱:HtmlAgilityPack

xpath獲取元素:"//*[@class=\"dc-intro\"]/ul/li"  ,查找class="dc-intro"下ul下的li

            HtmlWeb web = new HtmlWeb();
            var doc = web.Load(string.Format("{0}/soft/{1}.html", baseUrl, id));
            var node = doc.GetElementbyId("form2");
            var titleNode = node.SelectSingleNode("//*[@class=\"rr-title dc-title clearfix\"]/h1");
            string title = titleNode.InnerText;

            string kfyy = node.SelectNodes("//*[@class=\"dc-intro\"]/ul/li")[0].LastChild.InnerText;//開發語言
            string slsj = node.SelectNodes("//*[@class=\"dc-intro\"]/ul/li")[1].LastChild.InnerText;//收錄時間
            string sqxy = node.SelectNodes("//*[@class=\"dc-intro\"]/ul/li")[2].LastChild.InnerText;//受權協議
            string czxt = node.SelectNodes("//*[@class=\"dc-intro\"]/ul/li")[3].LastChild.InnerText;//支持的操做系統
            string rjpjHtml = node.SelectNodes("//*[@class=\"dc-intro\"]/ul/li")[4].InnerHtml;//軟件評級

            var level = Regex.Matches(rjpjHtml, "xx01.png").Count;//級數

            string content = node.SelectSingleNode("//*[@class=\"markdown-body entry-content\"]").InnerHtml;

            string rjsy = node.SelectNodes("//*[@class=\"dc-info\"]/a")[0].Attributes["href"].Value;
            string rjxz = node.SelectNodes("//*[@class=\"dc-info\"]/a")[1].Attributes["href"].Value;    

 10.上傳文件

        private static void UploadFile()
        {
            HttpHelper Http = new HttpHelper();

            string cookieStr = File.ReadAllText(@"E:\下載\UploadImageFile\UploadImageFile\bbs_cookie.txt");

            #region 變量
            byte[] UploadBuffers = null;
            string BoundStr = "----WebKitFormBoundarynp7wXmB7Ntr1BcsX";//根據抓包生成
            StringBuilder UploadBuf = new StringBuilder();
            #endregion

            #region 頭部數據
            UploadBuf.Append("--" + BoundStr + "\r\n");
            UploadBuf.Append(@"Content-Disposition: form-data; name=""uid""" + "\r\n\r\n1\r\n");
            UploadBuf.Append("--" + BoundStr + "\r\n" + @"Content-Disposition: form-data; name=""hash""" + "\r\n\r\n");
            UploadBuf.Append("dd865aaa4760a2715e5c5660754f7a7f");
            UploadBuf.Append("\r\n--" + BoundStr + "\r\n");
            UploadBuf.Append(@"Content-Disposition: form-data; name=""Filedata""; filename=""test.txt""" + "\r\n");
            UploadBuf.Append("Content-Type: text/plain\r\n\r\n");
            byte[] HeadBytes = Encoding.ASCII.GetBytes(UploadBuf.ToString());
            #endregion

            #region 圖片數據
            byte[] PicBytes = File.ReadAllBytes(@"c:\test.txt"); //System.Text.Encoding.Default.GetBytes();
            #endregion

            #region 尾部數據
            UploadBuf.Clear();
            UploadBuf.Append("\r\n--" + BoundStr + "--\r\n");
            byte[] TailBytes = Encoding.ASCII.GetBytes(UploadBuf.ToString());
            #endregion

            #region 數組拼接
            UploadBuffers = ComposeArrays(HeadBytes, PicBytes);
            UploadBuffers = ComposeArrays(UploadBuffers, TailBytes);
            #endregion

            var item = new HttpItem()
              {
                  URL = "http://www.xxx.com/misc.php?mod=swfupload&operation=upload&simple=1",
                  Method = "POST",
                  ContentType = "multipart/form-data; boundary=" + BoundStr,
                  PostDataType = PostDataType.Byte,
                  PostEncoding = Encoding.UTF8,
                  PostdataByte = UploadBuffers,
                  ResultType = ResultType.String,
                  Cookie = cookieStr
              };
            var result = Http.GetHtml(item);

            var a = result.Html;
        }

        public static byte[] ComposeArrays(byte[] Array1, byte[] Array2)
        {
            byte[] Temp = new byte[Array1.Length + Array2.Length];
            Array1.CopyTo(Temp, 0);
            Array2.CopyTo(Temp, Array1.Length);
            return Temp;
        }
相關文章
相關標籤/搜索