使用HtmlAgilityPack解析Html(很是好用)

 /// <summary>
    /// 設計成一個exe,解決WebBrowser控件內存泄漏的問題.
    /// </summary>
    public partial class MainForm : Form
    {
        /// <summary>
        /// 是否處理完成
        /// </summary>
        private bool isCompleted; //webBrowser只能運行在UI線程上,因此這裏不用信號通知,而用一個變量,不斷檢查這個變量的狀態

        /// <summary>
        /// 處理結果
        /// </summary>
        private List<RowData> executeResult = new List<RowData>();

        private static MainForm instance = new MainForm();
        /// <summary>
        /// 單件實例
        /// </summary>
        public static MainForm Instance { get { return instance; } }

        private MainForm()
        {
            InitializeComponent();
            webBrowser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(webBrowser_DocumentCompleted);
        }

        private void webBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            if (this.webBrowser.ReadyState != WebBrowserReadyState.Complete)
                return;

            var txt = webBrowser.Document.Body.InnerText;
            var html = webBrowser.Document.Body.InnerHtml;
            if (webBrowser.Document.Title == "選擇")
            {
                var items = ExtractData(html);
                executeResult.AddRange(items);
                isCompleted = true;
            }
        }

        private List<RowData> ExtractData(string html)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(html);

            HtmlAgilityPack.HtmlNode node = doc.GetElementbyId("div");
            var trNodes = node.SelectNodes("tbody/tr");

            List<RowData> rows = new List<RowData>();
            foreach (var trNode in trNodes)
            {
                var tdNode = trNode.SelectNodes("td/div");
                RowData row = new RowData();
                rows.Add(row);

                row.航班 = tdNode[0].InnerText;
                row.出發時間 = tdNode[1].InnerText;
                row.到達時間 = tdNode[2].InnerText;
                row.機場 = tdNode[3].InnerText;
                row.機型 = tdNode[4].InnerText;
                row.頭等 = tdNode[5].InnerText;
                row.公務 = tdNode[6].InnerText;
                row.全價 = tdNode[7].InnerText;
                row.折扣 = tdNode[8].InnerText;
                row.特價 = tdNode[9].InnerText;
            }

            return rows;
        }

        /// <summary>
        /// 查詢數據
        /// </summary>
        /// <param name="fromCity">出發城市代碼</param>
        /// <param name="toCity">到達城市代碼</param>
        /// <param name="date">出發日期</param>
        /// <param name="timeout">超時時間</param>
        /// <returns>機票信息</returns>
        [MethodImpl(MethodImplOptions.Synchronized)]
        public List<RowData> Query(string fromCity, string toCity, DateTime date, TimeSpan timeout)
        {
            isCompleted = false;
            executeResult.Clear();

            string urlTemplate = "http://www.xxx.com";
            string url = string.Format(urlTemplate, fromCity, date.Month, date.Day, date.Year, toCity);
            Navigate(url);

            DateTime startTime = DateTime.Now;
            //未處理完,且沒有超時,則等待
            while (!isCompleted && startTime.Add(timeout) > DateTime.Now)
            {
                Thread.Sleep(100);
                Application.DoEvents();
            }

            return executeResult;
        }

        private void Navigate(string url)
        {
            if (InvokeRequired)
            {
                BeginInvoke(new Action<string>(Navigate), url);
                return;
            }

            webBrowser.Navigate(url);
        }
    }

    /// <summary>
    /// 對應到頁面上的每一行數據
    /// 不喜歡中文請自行修改
    /// </summary>
    public class RowData
    {
        public string 航班 { get; set; }
        public string 出發時間 { get; set; }
        public string 到達時間 { get; set; }
        public string 機場 { get; set; }
        public string 機型 { get; set; }
        public string 頭等 { get; set; }
        public string 公務 { get; set; }
        public string 全價 { get; set; }
        public string 折扣 { get; set; }
        public string 特價 { get; set; }
    }
相關文章
相關標籤/搜索