本身作Fiddler,實現http網站的抓取

---恢復內容開始---html

因爲工做須要,須要作一個小程序,實現網站監控,當瀏覽器瀏覽到目標網站的時候,實現爬取數據。因爲https存在證書驗證,須要別的方式來,暫時還沒研究,但必須能正常訪問。正則表達式

官方的Demo,我在這個基礎上進行了修改。http://fiddler.wikidot.com/fiddlercore-demo小程序

這邊我來介紹下個人Demo。瀏覽器

首先要去下載FiddlerCore4.Dll,百度上不少。session

先上代碼:工具

主界面代碼網站

using System;
using System.Collections.Generic;
using System.IO;
using System.Reflection;
using System.Threading;
using Fiddler;
using System.Net;
using System.Text.RegularExpressions;
using System.Text;
using System.Diagnostics;
using System.Runtime.InteropServices;

namespace FiddlerDemo
{
    class Program
    {
        public static Proxy oProxy;
        //static string sSecureEndpointHostname = "cd.chnai.com";
        //static int iSecureEndpointPort = 7777;
       
        #region 控制檯異常關閉時,程序不影響電腦
        public delegate bool ControlCtrlDelegate(int CtrlType);
        [DllImport("kernel32.dll")]
        private static extern bool SetConsoleCtrlHandler(ControlCtrlDelegate HandlerRoutine, bool Add);
        private static ControlCtrlDelegate cancelHandler = new ControlCtrlDelegate(HandlerRoutine);

        public static bool HandlerRoutine(int CtrlType)
        {
            switch (CtrlType)
            {
                case 0:
                    DoQuit(); //Ctrl+C關閉 
                    break;
                case 2:
                    //Console.WriteLine("2工具被強制關閉");//按控制檯關閉按鈕關閉 
                    DoQuit();
                    break;
            }
            return false;
        }
        #endregion
        static void Main(string[] args)
        {
            SetConsoleCtrlHandler(cancelHandler, true);
            FiddlerHelp fh = new FiddlerHelp();
            fh.StartSession();
        }
        static void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e)
        {
            DoQuit();
        }
        public static void DoQuit()
        {
            WriteCommandResponse("Shutting down...");
            if (null != oProxy) oProxy.Dispose();
            Fiddler.FiddlerApplication.Shutdown();
            Thread.Sleep(500);
        }
        public static void WriteCommandResponse(string s)
        {
            ConsoleColor oldColor = Console.ForegroundColor;
            Console.ForegroundColor = ConsoleColor.Yellow;
            Console.WriteLine(s);
            Console.ForegroundColor = oldColor;
        }
    }
}

處理類:ui

using Fiddler;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Configuration;
using System.Xml;


namespace FiddlerDemo
{
    public class HtmlConfig
    {
        public string sWeb { get; set; }
        public string sRegex { get; set; }
    }
    public class FiddlerHelp
    {
        public static Proxy oProxy;
        public static List<HtmlConfig> dicHtml = new List<HtmlConfig> { };
     
        public void StartSession()
        {
            LoadHtmlConfig();
            //設置別名
            Fiddler.FiddlerApplication.SetAppDisplayName("FiddlerCoreDemoApp");

            List<Fiddler.Session> oAllSessions = new List<Fiddler.Session>();

            Fiddler.FiddlerApplication.OnNotification += delegate(object sender, NotificationEventArgs oNEA) { Console.WriteLine("** NotifyUser: " + oNEA.NotifyString); };

            Fiddler.FiddlerApplication.BeforeRequest += delegate(Fiddler.Session oS)
            {
                // 爲了使反應篡改,必須使用緩衝模式
                // 被啓用。這容許FiddlerCore以容許修改
                // 在BeforeResponse處理程序中的反應,而不是流
                // 響應給客戶機做爲響應進來。
                oS.bBufferResponse = true;
                Monitor.Enter(oAllSessions);
                oAllSessions.Add(oS);
                Monitor.Exit(oAllSessions);
            };
           
            // 下面的事件,您能夠檢查由Fiddler閱讀每一響應緩衝區。  
            // 請注意,這不是爲絕大多數應用很是有用,由於原始緩衝區幾乎是無用的;它沒有解壓,它包括標題和正文字節數等。
            // 本次僅適用於極少數的應用程序這就須要一個原始的,未經處理的字節流獲取有用
            //Fiddler.FiddlerApplication.OnReadResponseBuffer += new EventHandler<RawReadEventArgs>(FiddlerApplication_OnReadResponseBuffer);
            Fiddler.FiddlerApplication.BeforeResponse += delegate(Fiddler.Session oS)
            {
                //HTTP響應,並隨後修改任何HTTP響應,以取代
                oS.utilDecodeResponse();
                foreach (var item in dicHtml)
                {
                    if (oS.fullUrl.Contains(item.sWeb))
            
{ Console.WriteLine("{0}:HTTP {1} for {2}", oS.id, oS.responseCode, oS.fullUrl); string sHtmlBody = oS.GetResponseBodyAsString(); if(!string.IsNullOrEmpty(sHtmlBody)) { Console.Write("獲取的內容爲:"+MatchRegex(sHtmlBody,item.sRegex) + "\n"); } } } Monitor.Enter(oAllSessions); oAllSessions.Add(oS); Monitor.Exit(oAllSessions); // 內容:{3} , oS.GetResponseBodyEncoding().GetString(oS.responseBodyBytes) //Console.WriteLine("{0}:HTTP {1} for {2}", oS.id, oS.responseCode, oS.fullUrl); }; Fiddler.FiddlerApplication.AfterSessionComplete += delegate(Fiddler.Session oS) { Console.Title = ("Session list contains: " + oAllSessions.Count.ToString() + " sessions");
          //數量大於1000條時進行清空
                if (oAllSessions.Count > 1000)
                {
                    Monitor.Enter(oAllSessions);
                    oAllSessions.Clear();
                    Monitor.Exit(oAllSessions);
                } }; Console.CancelKeyPress
+= new ConsoleCancelEventHandler(Console_CancelKeyPress); FiddlerApplication.Prefs.SetBoolPref("fiddler.network.streaming.abortifclientaborts", true); //啓動方式 //FiddlerCoreStartupFlags oFCSF = FiddlerCoreStartupFlags.Default; Fiddler.CONFIG.IgnoreServerCertErrors = false; int iPort = 8877; //Fiddler.FiddlerApplication.Startup(iPort, oFCSF); Fiddler.FiddlerApplication.Startup(iPort, true, false, true); bool bDone = false; #region 各類操做 do { Console.WriteLine("\nEnter a command [C=Clear; L=List; G=Collect Garbage; W=write SAZ; R=read SAZ;\n\tS=Toggle Forgetful Streaming; T=Trust Root Certificate; Q=Quit]:>"); ConsoleKeyInfo cki = Console.ReadKey(); Console.WriteLine(); switch (Char.ToLower(cki.KeyChar)) { case 'c': Monitor.Enter(oAllSessions); oAllSessions.Clear(); Monitor.Exit(oAllSessions); WriteCommandResponse("Clear..."); FiddlerApplication.Log.LogString("Cleared session list."); break; case 'd': FiddlerApplication.Log.LogString("FiddlerApplication::Shutdown."); FiddlerApplication.Shutdown(); break; //case 'l': // WriteSessionList(oAllSessions); // break; case 'g': Console.WriteLine("Working Set:\t" + Environment.WorkingSet.ToString("n0")); Console.WriteLine("Begin GC..."); GC.Collect(); Console.WriteLine("GC Done.\nWorking Set:\t" + Environment.WorkingSet.ToString("n0")); break; case 'q': bDone = true; DoQuit(); break; case 'r': WriteCommandResponse("This demo was compiled without SAZ_SUPPORT defined"); break; case 'w': WriteCommandResponse("This demo was compiled without SAZ_SUPPORT defined"); break; case 't': try { WriteCommandResponse("Result: " + Fiddler.CertMaker.trustRootCert().ToString()); } catch (Exception eX) { WriteCommandResponse("Failed: " + eX.ToString()); } break; // Forgetful streaming case 's': bool bForgetful = !FiddlerApplication.Prefs.GetBoolPref("fiddler.network.streaming.ForgetStreamedData", false); FiddlerApplication.Prefs.SetBoolPref("fiddler.network.streaming.ForgetStreamedData", bForgetful); Console.WriteLine(bForgetful ? "FiddlerCore will immediately dump streaming response data." : "FiddlerCore will keep a copy of streamed response data."); break; } } while (!bDone); #endregion } /// <summary> /// 經過讀取xml內的配置來獲取監聽的網站和獲取數據的正則表達式 /// </summary> private void LoadHtmlConfig() { try { XmlDocument xmlDoc = new XmlDocument();  string sPath = string.Empty;
                if (File.Exists(@"..\..\WatchHtml.xml"))
                {
                    //調試目錄
                    sPath = @"..\..\WatchHtml.xml";
                }
                else
                {
                    //編譯目錄下
                    sPath = @"WatchHtml.xml";
                }
XmlNode xn = xmlDoc.SelectSingleNode("Root"); XmlNodeList xnl = xn.ChildNodes; foreach (XmlNode item in xnl) { XmlElement xe = (XmlElement)item; HtmlConfig htmlConfig = new HtmlConfig { sWeb = xe.GetAttribute("Web").ToString(), sRegex = xe.GetAttribute("Regex").ToString() }; dicHtml.Add(htmlConfig); } } catch (Exception) { throw; } } public void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e) { DoQuit(); } /// <summary> /// 退出 /// </summary> public void DoQuit() { if (null != oProxy) oProxy.Dispose(); Fiddler.FiddlerApplication.Shutdown(); Thread.Sleep(500); } public static void WriteCommandResponse(string s) { ConsoleColor oldColor = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine(s); Console.ForegroundColor = oldColor; } /// <summary> /// /// </summary> /// <param name="sHtml">得到的Html頁面</param> /// <param name="sRegex">正則表達式</param> /// <returns></returns> public static string MatchRegex(string sHtml,string sRegex) { string sResult = string.Empty; try { if (string.IsNullOrEmpty(sHtml)) return null; var result = Regex.Match(sHtml.Replace('\r', ' ').Replace('\n', ' ').Trim(), sRegex, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (result.Success) { sResult = result.Value; } return sResult; } catch (Exception) { return null; } } } }

XML文件內容spa

實現效果.net

 

 

 

 

 

 

---恢復內容結束---

相關文章
相關標籤/搜索