//設置請求時間html
string html = string.Empty;
try
{
HttpWebRequest request = HttpWebRequest.Create(url) as HttpWebRequest;//模擬請求
request.Timeout = 30 * 1000;//設置30s的超時
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36";
request.ContentType = "text/html; charset=utf-8";
using (HttpWebResponse response = request.GetResponse() as HttpWebResponse) //發起請求
{
if (response.StatusCode != HttpStatusCode.OK)
{
log.Error("抓取{0}地址返回失敗,響應狀態爲{1}", url, response.StatusCode);
}
else
{
try
{
StreamReader sr = new StreamReader(response.GetResponseStream(), encode);
html = sr.ReadToEnd();//讀取數據
sr.Close();
}
catch (Exception ex)
{
log.Error("抓取{0}失敗", url, ex);
html = null;
}
}
}
}
catch (Exception ex)
{
log.Error("抓取{0}出現異常", url, ex);
html = null;
}
return html;服務器
//抓取鏈家、中原、安居客、我愛我家ide
string html = HttpHelper.DownloadUrl(pageurl);
if (html == null)
{
return houseList;
}
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
//鏈家
string psht = @"//*[@class='content']/div[@class='leftContent']/ul[@class='listContent']/li[@class='clear xiaoquListItem']";
HtmlNodeCollection noneNodeList = doc.DocumentNode.SelectNodes(psht);
if (noneNodeList == null)
{
log.ErrorAsync("數據爲空!");
return houseList;
}
foreach (var item in noneNodeList)
{
TrojanHorse house = new TrojanHorse();
HtmlDocument docChild = new HtmlDocument();
docChild.LoadHtml(item.OuterHtml);
//鏈家
string urlPath = @"//*[@class='info']/div[@class='title']/a";
HtmlNode urlNode = docChild.DocumentNode.SelectSingleNode(urlPath);
string tsct = urlNode.InnerText;//小區名稱
//鏈家 [@class='xiaoquListItemRight']/div
string strs = @"//*[@class='xiaoquListItemPrice']/div[@class='totalPrice']/span";
HtmlNode urlNodes = docChild.DocumentNode.SelectSingleNode(strs);
string s = "";
if (urlNodes == null)
{
s = "null";
}
else
{
s = urlNodes.InnerText;
}
//鏈家
string strst = @"//*[@class='xiaoquListItemRight']/div[@class='xiaoquListItemSellCount']/a[@class='totalSellCount']/span";
HtmlNode urlNodest = docChild.DocumentNode.SelectSingleNode(strst);
string st = "";
if (urlNodest == null)
{
st = "null";
}
else
{
st = urlNodest.InnerText;
}
#region
//string tscts = s.Replace(" ", "");
//string tsctst = tscts.Substring(0, 8);
//string tsctsb = tscts.Substring(tscts.Length - 7, 7);
// string rsf = s;
//string zf = tsctsb.Substring(0, 5);//租房
#endregion
house.title = tsct;
house.price = s;
house.remark = st;
houseList.Add(house);
#region
//house.StaffName = urlNode.Attributes["title"].Value; //經紀人姓名
//string companyPath = "//*[@class='jjr-info']/p[@class='jjr-desc mg-top']/a[position()<2]";
//HtmlNode companyNode = docChild.DocumentNode.SelectSingleNode(companyPath);
//if (companyNode == null)
//{
// continue;
//}
//house.Company = companyNode.InnerText; //中介公司
//string telPath = "//*[@class='jjr-side']";
//HtmlNode telNode = docChild.DocumentNode.SelectSingleNode(telPath);
//if (telNode == null)
//{
// continue;
//}
//string telstr = telNode.InnerText.Trim();
//house.Mobile = telstr; //經紀人電話
//house.CityCode = citycode; //城市代號
//house.CreateTime = DateTime.Now;
//var flag = houseList.Where(x => x.Mobile == telstr).FirstOrDefault(); //有重複的手機號不添加
//if (flag == null)
//{
// houseList.Add(house);
//}
#endregion
}
}url
/// <summary>
/// 抓取每一頁的數據
/// </summary>
/// <param name="pageurl"></param>
/// <returns></returns>
private static List<TrojanHorse> GetTrojanHorseList(string pageurl)
{
List<TrojanHorse> houseList = new List<TrojanHorse>();
try
{
string html = HttpHelper.DownloadUrl(pageurl);
if (html == null)
{
return houseList;
}
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
//鏈家
string psht = @"//*[@class='content']/div[@class='leftContent']/ul[@class='listContent']/li[@class='clear xiaoquListItem']";
HtmlNodeCollection noneNodeList = doc.DocumentNode.SelectNodes(psht);
if (noneNodeList == null)
{
log.ErrorAsync("數據爲空!");
return houseList;
}
foreach (var item in noneNodeList)
{
TrojanHorse house = new TrojanHorse();
HtmlDocument docChild = new HtmlDocument();
docChild.LoadHtml(item.OuterHtml);
//鏈家
string urlPath = @"//*[@class='info']/div[@class='title']/a";
HtmlNode urlNode = docChild.DocumentNode.SelectSingleNode(urlPath);
string tsct = urlNode.InnerText;//小區名稱
//鏈家 [@class='xiaoquListItemRight']/div
string strs = @"//*[@class='xiaoquListItemPrice']/div[@class='totalPrice']/span";
HtmlNode urlNodes = docChild.DocumentNode.SelectSingleNode(strs);
string s = "";
if (urlNodes == null)
{
s = "null";
}
else
{
s = urlNodes.InnerText;
}
//鏈家
string strst = @"//*[@class='xiaoquListItemRight']/div[@class='xiaoquListItemSellCount']/a[@class='totalSellCount']/span";
HtmlNode urlNodest = docChild.DocumentNode.SelectSingleNode(strst);
string st = "";
if (urlNodest == null)
{
st = "null";
}
else
{
st = urlNodest.InnerText;
}
#region
//string tscts = s.Replace(" ", "");
//string tsctst = tscts.Substring(0, 8);
//string tsctsb = tscts.Substring(tscts.Length - 7, 7);
// string rsf = s;
//string zf = tsctsb.Substring(0, 5);//租房
#endregion
house.title = tsct;
house.price = s;
house.remark = st;
houseList.Add(house);
#region
//house.StaffName = urlNode.Attributes["title"].Value; //經紀人姓名
//string companyPath = "//*[@class='jjr-info']/p[@class='jjr-desc mg-top']/a[position()<2]";
//HtmlNode companyNode = docChild.DocumentNode.SelectSingleNode(companyPath);
//if (companyNode == null)
//{
// continue;
//}
//house.Company = companyNode.InnerText; //中介公司
//string telPath = "//*[@class='jjr-side']";
//HtmlNode telNode = docChild.DocumentNode.SelectSingleNode(telPath);
//if (telNode == null)
//{
// continue;
//}
//string telstr = telNode.InnerText.Trim();
//house.Mobile = telstr; //經紀人電話
//house.CityCode = citycode; //城市代號
//house.CreateTime = DateTime.Now;
//var flag = houseList.Where(x => x.Mobile == telstr).FirstOrDefault(); //有重複的手機號不添加
//if (flag == null)
//{
// houseList.Add(house);
//}
#endregion
}
}spa
#region
// {
// string html = HttpHelper.DownloadUrl(pageurl);
// if (html == null)
// {
// return houseList;
// }
// HtmlDocument doc = new HtmlDocument();
// doc.LoadHtml(html);
// //*[@id="pebpwbwege"]
// //鏈家
// //string psht = @"//*[@class='leftContent']/ul[@class='listContent']/li[@class='clear xiaoquListItem']";code
// //中原
// string psht = @"//*[@class='section-wrap section-houselists']/div[@class='section']/div[@class='house-item clearfix']";
// HtmlNodeCollection noneNodeList = doc.DocumentNode.SelectNodes(psht);
// if (noneNodeList == null)
// {
// log.ErrorAsync("數據爲空!");
// return houseList;
// }
// foreach (var item in noneNodeList)
// {
// TrojanHorse house = new TrojanHorse();
// HtmlDocument docChild = new HtmlDocument();
// docChild.LoadHtml(item.OuterHtml);
// //docChild.LoadHtml();
// //鏈家
// //string urlPath = @"//*[@class='info']/div[@class='title']/a";
// //中原
// string urlPath = @"//*[@class='item-info fl']/h4/a";
// HtmlNode urlNode = docChild.DocumentNode.SelectSingleNode(urlPath);
// //docChild.DocumentNode.SelectSingleNode(str);
// string tsct = urlNode.InnerText;//小區名稱
// //鏈家
// //string strs = @"//*[@class='xiaoquListItemRight']/div[@class='xiaoquListItemPrice']/div[@class='totalPrice']/span";htm
// //中原
// string strs = @"//*[@class='item-pricearea fr']/p[@class='tc f666 f12 mt_10']/a";
// HtmlNode urlNodes = docChild.DocumentNode.SelectSingleNode(strs);
// string s = "";
// if (urlNodes == null)
// {
// s = "null";
// }
// else
// {
// s = urlNodes.InnerText;utf-8
// }
// //鏈家
// //string strst = @"//*[@class='xiaoquListItemRight']/div[@class='xiaoquListItemSellCount']/a[@class='totalSellCount']/span";
// //中原
// string strst = @"//*[@class='item-pricearea fr']/p[@class='price-nub cRed tc']/span";
// HtmlNode urlNodest = docChild.DocumentNode.SelectSingleNode(strst);
// string st = "";
// if (urlNodest == null)
// {
// st = "null";
// }
// else
// {
// st = urlNodest.InnerText;ci
// }rem
// //string tscts = s.Replace(" ", "");
// //string tsctst = tscts.Substring(0, 8);
// //string tsctsb = tscts.Substring(tscts.Length - 7, 7);
// // string rsf = s;
// //string zf = tsctsb.Substring(0, 5);//租房
// house.title = tsct;
// house.price = s;
// house.remark = st;
// houseList.Add(house);
// //house.StaffName = urlNode.Attributes["title"].Value; //經紀人姓名 // //string companyPath = "//*[@class='jjr-info']/p[@class='jjr-desc mg-top']/a[position()<2]"; // //HtmlNode companyNode = docChild.DocumentNode.SelectSingleNode(companyPath); // //if (companyNode == null) // //{ // // continue; // //} // //house.Company = companyNode.InnerText; //中介公司 // //string telPath = "//*[@class='jjr-side']"; // //HtmlNode telNode = docChild.DocumentNode.SelectSingleNode(telPath); // //if (telNode == null) // //{ // // continue; // //} // //string telstr = telNode.InnerText.Trim(); // //house.Mobile = telstr; //經紀人電話 // //house.CityCode = citycode; //城市代號 // //house.CreateTime = DateTime.Now; // //var flag = houseList.Where(x => x.Mobile == telstr).FirstOrDefault(); //有重複的手機號不添加 // //if (flag == null) // //{ // // houseList.Add(house); // //} // } //} #endregion catch (Exception ex) { log.ErrorAsync("服務器異常,異常信息:" + ex.Message); }