---恢復內容開始---html
因爲工做須要,須要作一個小程序,實現網站監控,當瀏覽器瀏覽到目標網站的時候,實現爬取數據。因爲https存在證書驗證,須要別的方式來,暫時還沒研究,但必須能正常訪問。正則表達式
官方的Demo,我在這個基礎上進行了修改。http://fiddler.wikidot.com/fiddlercore-demo小程序
這邊我來介紹下個人Demo。瀏覽器
首先要去下載FiddlerCore4.Dll,百度上不少。session
先上代碼:工具
主界面代碼網站
using System; using System.Collections.Generic; using System.IO; using System.Reflection; using System.Threading; using Fiddler; using System.Net; using System.Text.RegularExpressions; using System.Text; using System.Diagnostics; using System.Runtime.InteropServices; namespace FiddlerDemo { class Program { public static Proxy oProxy; //static string sSecureEndpointHostname = "cd.chnai.com"; //static int iSecureEndpointPort = 7777; #region 控制檯異常關閉時,程序不影響電腦 public delegate bool ControlCtrlDelegate(int CtrlType); [DllImport("kernel32.dll")] private static extern bool SetConsoleCtrlHandler(ControlCtrlDelegate HandlerRoutine, bool Add); private static ControlCtrlDelegate cancelHandler = new ControlCtrlDelegate(HandlerRoutine); public static bool HandlerRoutine(int CtrlType) { switch (CtrlType) { case 0: DoQuit(); //Ctrl+C關閉 break; case 2: //Console.WriteLine("2工具被強制關閉");//按控制檯關閉按鈕關閉 DoQuit(); break; } return false; } #endregion static void Main(string[] args) { SetConsoleCtrlHandler(cancelHandler, true); FiddlerHelp fh = new FiddlerHelp(); fh.StartSession(); } static void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e) { DoQuit(); } public static void DoQuit() { WriteCommandResponse("Shutting down..."); if (null != oProxy) oProxy.Dispose(); Fiddler.FiddlerApplication.Shutdown(); Thread.Sleep(500); } public static void WriteCommandResponse(string s) { ConsoleColor oldColor = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine(s); Console.ForegroundColor = oldColor; } } }
處理類:ui
using Fiddler; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using System.Configuration; using System.Xml; namespace FiddlerDemo { public class HtmlConfig { public string sWeb { get; set; } public string sRegex { get; set; } } public class FiddlerHelp { public static Proxy oProxy; public static List<HtmlConfig> dicHtml = new List<HtmlConfig> { }; public void StartSession() { LoadHtmlConfig(); //設置別名 Fiddler.FiddlerApplication.SetAppDisplayName("FiddlerCoreDemoApp"); List<Fiddler.Session> oAllSessions = new List<Fiddler.Session>(); Fiddler.FiddlerApplication.OnNotification += delegate(object sender, NotificationEventArgs oNEA) { Console.WriteLine("** NotifyUser: " + oNEA.NotifyString); }; Fiddler.FiddlerApplication.BeforeRequest += delegate(Fiddler.Session oS) { // 爲了使反應篡改,必須使用緩衝模式 // 被啓用。這容許FiddlerCore以容許修改 // 在BeforeResponse處理程序中的反應,而不是流 // 響應給客戶機做爲響應進來。 oS.bBufferResponse = true; Monitor.Enter(oAllSessions); oAllSessions.Add(oS); Monitor.Exit(oAllSessions); }; // 下面的事件,您能夠檢查由Fiddler閱讀每一響應緩衝區。 // 請注意,這不是爲絕大多數應用很是有用,由於原始緩衝區幾乎是無用的;它沒有解壓,它包括標題和正文字節數等。 // 本次僅適用於極少數的應用程序這就須要一個原始的,未經處理的字節流獲取有用 //Fiddler.FiddlerApplication.OnReadResponseBuffer += new EventHandler<RawReadEventArgs>(FiddlerApplication_OnReadResponseBuffer); Fiddler.FiddlerApplication.BeforeResponse += delegate(Fiddler.Session oS) { //HTTP響應,並隨後修改任何HTTP響應,以取代 oS.utilDecodeResponse(); foreach (var item in dicHtml) {
if (oS.fullUrl.Contains(item.sWeb))
{ Console.WriteLine("{0}:HTTP {1} for {2}", oS.id, oS.responseCode, oS.fullUrl); string sHtmlBody = oS.GetResponseBodyAsString(); if(!string.IsNullOrEmpty(sHtmlBody)) { Console.Write("獲取的內容爲:"+MatchRegex(sHtmlBody,item.sRegex) + "\n"); } } } Monitor.Enter(oAllSessions); oAllSessions.Add(oS); Monitor.Exit(oAllSessions); // 內容:{3} , oS.GetResponseBodyEncoding().GetString(oS.responseBodyBytes) //Console.WriteLine("{0}:HTTP {1} for {2}", oS.id, oS.responseCode, oS.fullUrl); }; Fiddler.FiddlerApplication.AfterSessionComplete += delegate(Fiddler.Session oS) { Console.Title = ("Session list contains: " + oAllSessions.Count.ToString() + " sessions");
//數量大於1000條時進行清空
if (oAllSessions.Count > 1000)
{
Monitor.Enter(oAllSessions);
oAllSessions.Clear();
Monitor.Exit(oAllSessions);
} }; Console.CancelKeyPress += new ConsoleCancelEventHandler(Console_CancelKeyPress); FiddlerApplication.Prefs.SetBoolPref("fiddler.network.streaming.abortifclientaborts", true); //啓動方式 //FiddlerCoreStartupFlags oFCSF = FiddlerCoreStartupFlags.Default; Fiddler.CONFIG.IgnoreServerCertErrors = false; int iPort = 8877; //Fiddler.FiddlerApplication.Startup(iPort, oFCSF); Fiddler.FiddlerApplication.Startup(iPort, true, false, true); bool bDone = false; #region 各類操做 do { Console.WriteLine("\nEnter a command [C=Clear; L=List; G=Collect Garbage; W=write SAZ; R=read SAZ;\n\tS=Toggle Forgetful Streaming; T=Trust Root Certificate; Q=Quit]:>"); ConsoleKeyInfo cki = Console.ReadKey(); Console.WriteLine(); switch (Char.ToLower(cki.KeyChar)) { case 'c': Monitor.Enter(oAllSessions); oAllSessions.Clear(); Monitor.Exit(oAllSessions); WriteCommandResponse("Clear..."); FiddlerApplication.Log.LogString("Cleared session list."); break; case 'd': FiddlerApplication.Log.LogString("FiddlerApplication::Shutdown."); FiddlerApplication.Shutdown(); break; //case 'l': // WriteSessionList(oAllSessions); // break; case 'g': Console.WriteLine("Working Set:\t" + Environment.WorkingSet.ToString("n0")); Console.WriteLine("Begin GC..."); GC.Collect(); Console.WriteLine("GC Done.\nWorking Set:\t" + Environment.WorkingSet.ToString("n0")); break; case 'q': bDone = true; DoQuit(); break; case 'r': WriteCommandResponse("This demo was compiled without SAZ_SUPPORT defined"); break; case 'w': WriteCommandResponse("This demo was compiled without SAZ_SUPPORT defined"); break; case 't': try { WriteCommandResponse("Result: " + Fiddler.CertMaker.trustRootCert().ToString()); } catch (Exception eX) { WriteCommandResponse("Failed: " + eX.ToString()); } break; // Forgetful streaming case 's': bool bForgetful = !FiddlerApplication.Prefs.GetBoolPref("fiddler.network.streaming.ForgetStreamedData", false); FiddlerApplication.Prefs.SetBoolPref("fiddler.network.streaming.ForgetStreamedData", bForgetful); Console.WriteLine(bForgetful ? "FiddlerCore will immediately dump streaming response data." : "FiddlerCore will keep a copy of streamed response data."); break; } } while (!bDone); #endregion } /// <summary> /// 經過讀取xml內的配置來獲取監聽的網站和獲取數據的正則表達式 /// </summary> private void LoadHtmlConfig() { try { XmlDocument xmlDoc = new XmlDocument(); string sPath = string.Empty;
if (File.Exists(@"..\..\WatchHtml.xml"))
{
//調試目錄
sPath = @"..\..\WatchHtml.xml";
}
else
{
//編譯目錄下
sPath = @"WatchHtml.xml";
} XmlNode xn = xmlDoc.SelectSingleNode("Root"); XmlNodeList xnl = xn.ChildNodes; foreach (XmlNode item in xnl) { XmlElement xe = (XmlElement)item; HtmlConfig htmlConfig = new HtmlConfig { sWeb = xe.GetAttribute("Web").ToString(), sRegex = xe.GetAttribute("Regex").ToString() }; dicHtml.Add(htmlConfig); } } catch (Exception) { throw; } } public void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e) { DoQuit(); } /// <summary> /// 退出 /// </summary> public void DoQuit() { if (null != oProxy) oProxy.Dispose(); Fiddler.FiddlerApplication.Shutdown(); Thread.Sleep(500); } public static void WriteCommandResponse(string s) { ConsoleColor oldColor = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine(s); Console.ForegroundColor = oldColor; } /// <summary> /// /// </summary> /// <param name="sHtml">得到的Html頁面</param> /// <param name="sRegex">正則表達式</param> /// <returns></returns> public static string MatchRegex(string sHtml,string sRegex) { string sResult = string.Empty; try { if (string.IsNullOrEmpty(sHtml)) return null; var result = Regex.Match(sHtml.Replace('\r', ' ').Replace('\n', ' ').Trim(), sRegex, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (result.Success) { sResult = result.Value; } return sResult; } catch (Exception) { return null; } } } }
XML文件內容spa
實現效果.net
---恢復內容結束---