利用chrome插件批量讀取瀏覽器頁面內容並寫入數據庫

  試想一下,若是天天要收集100頁網頁數據甚至更多。若是採用人工收集會吐血,用程序去收集也就成爲一個不二的選擇。首先確定會想到說用java、php、C#等高級語言,但這恰恰又有個登錄和驗證碼,搞到無所適從。還在爲收集web端的數據感到苦惱嗎?很高興,你找對地方了。php

  應用場景:html

    一、須要天天大量重複收集web端的數據前端

    二、web頁面數據須要登錄後才能採集java

    三、web頁面存在翻頁mysql

  解決方案:jquery

    手工登錄,而後採用chrome插件的方式進行收集。固然你會說用selenium等自動化測試的方法進行收集更cool,並且能夠天天自動收集,徹底的自動化不用人工參與。可是做爲chrome的忠實腦殘粉,再者只須要前端的js、服務器端的接收文件、數據庫就能夠完美解決這一問題。再加上部署和操做簡單。 腦殘粉總有不少理由的嘛。好吧,就算是一種憋屈的曲線救國和實現方式吧。git

  思路:github

    

幫助手冊:http://open.chrome.360.cn/extension_dev/overview.htmlweb

實例:ajax

抓取某電商後臺訂單數據

一、建立一個項目文件夾並引入所需文件:如D:\tool\chrome_server_plugin

  jquery-2.1.1.min.js、icon.png

二、建立background.html

<html><head>
</head></html>

三、建立配置文件manifest.json文件

{

"name": "獲取某電商後臺訂單信息",
"version": "1.0",
"manifest_version": 2,
"description": "*********獲取某電商後臺訂單信息*********",
"browser_action": {
"default_icon": "icon.png"
},
"permissions": [
"webNavigation",
"tabs",
"contextMenus",
"http://服務器接受數據url/"
],
"background": {
"scripts": ["eventPage.js","jquery-2.1.1.min.js"]
},
"content_scripts": [
{
"matches": ["http://抓取頁面url/*"],
"js": ["jquery-2.1.1.min.js", "contentscript.js"]
}
]
}

四、建立前端js文件contentscript.js

var totalPage;
var page = 0;
//註冊前臺頁面監聽事件
chrome.extension.onMessage.addListener(
  function(request, sender, sendResponse) {
    totalPage = $("input[name=totalPage]").val();
    console.log("totalPage----------" + totalPage);
    //console.log("msg----------contentscript.js" + request.greeting);
    getOrderInfo( sendResponse );
  });

//獲取訂單信息
function getOrderInfo( sendResponse ){
  var flag = false;

  payMoney = [];//貨款金額
  orderTime = [];//下單時間
  $("tr[class=head] span").each(function(index){
    spantxt = '';
    spantxt = $(this).text();
    if(spantxt.indexOf('貨款金額:') > -1){
      money = spantxt.substr(5);
      //console.log(index + "---------payMoney-------貨款金額:" + money);
      payMoney.push(money);
    }else if(spantxt.indexOf('下單時間:') > -1){
      time = spantxt.substr(5);
      //console.log(index + "---------orderTime-------下單時間:" + time);
      orderTime.push(time);
    }
  });

paytype = [];//物流方式
yunfei = [];//運費
$("td[class=p-values]").each(function(index){
  tdtxt = '';
  tdtxt = $(this).text();
  if(tdtxt.indexOf('貨到付款') > -1){
    paytype.push('貨到付款');
  }else{
    paytype.push('在線支付');
  }

  yf_index = tdtxt.indexOf('運費:');
  if(yf_index > -1){
    temp = tdtxt.substr(yf_index);
    temp_yf = temp.substr(3);
    //console.log(index + "---------yunfei-------"+ temp +"===" + temp_yf);
    yunfei.push(temp_yf);
  }else{
    yunfei.push(0);
  }

  //console.log(index + "---------tdtxt-------" + tdtxt);
});

orderStatus = [];//訂單狀態
users = [];//買家帳號
remark = [];//備註
$("tr[class=content] td[class=t-c]").each(function(index){
  tdtxt = '';
  tdtxt = $(this).text().replace(/[\r\n]\ +/g,"");//將回車,換行,空格去掉
  temp = index % 5;
  if(1 == temp){
    orderStatus.push(tdtxt);
    //console.log(index + "---------statu-------" + tdtxt);
  }else if(2 == temp){
    users.push(tdtxt);
    //console.log(index + "---------users-------" + tdtxt);
  }else if(3 == temp){
    remark.push(tdtxt);
    //console.log(index + "---------remark-------" + tdtxt);
  }
});

express = [];//快遞單號
$("tr[class=content] td div[style='text-align: center;']").each(function(index){
  tdtxt = '';
  tdtxt = $(this).text().replace(/[\r\n]\ +/g,"");//將回車,換行,空格去掉
  express.push(tdtxt);
  //console.log( "============快遞單號=======" + tdtxt);
});

orderInfo = [];
splitstr = "@_@";
$("tr[class=head] a[track=orderinfopagebeta]").each(function(index){
  orderid = $(this).text();
  //console.log("---------orderid-------" + orderid);
  mycomment = $("a[id=comment_" + orderid + "]").attr('style').replace(/[\r\n]\ +/g,"");
  if("display: block;" == mycomment){
    mycomment = '已評價';
  }else if('display:none;' == mycomment){
    mycomment = '未評價';
  }

  tempshopid = $("img[id=remarkFlag_" + orderid + "]").attr('onclick');
  shopidIndex = tempshopid.indexOf(",");
  shopid = tempshopid.substr(shopidIndex + 1).replace(/[\)\;]/g,"");
  //console.log("---------shopid-------" + shopid);
  orderdesc = shopid + splitstr + orderid + splitstr + mycomment + splitstr + payMoney[index] + splitstr + orderTime[index] + splitstr + paytype[index] + splitstr + yunfei[index] + splitstr + orderStatus[index] + splitstr + users[index] + splitstr + remark[index] + splitstr + express[index];
  console.log("---------orderdesc-------" + orderdesc);
  orderInfo.push(orderdesc);
});

//chrome.extension.sendMessage({"orderInfo": orderInfo}, function(response) {});
page = parseInt($("a[class=current]").text());
totalPage = parseInt($("input[name=totalPage]").val());
console.log(page + "--page-----------totalPage---" + totalPage);
if(page < totalPage && page < 100){
  console.log("---------next-------");
  sendMsg( orderInfo, "next" );
  $('a.next')[1].click();
}else{
  console.log("---------end-------");
  sendMsg( orderInfo, "end" );
}
//

}

//將獲取內容傳遞給後臺文件進行處理
function sendMsg( msg, cmd){
  chrome.extension.sendMessage({"msg": msg, "cmd": cmd}, function(response) {});
}

五、建立後臺處理js文件eventPage.js

var flag = false;
var currentTabId;
chrome.browserAction.onClicked.addListener(function(tab) {
  counter = 40;
  console.log('Turning ' + tab.url);
  flag = true;
  currentTabId = tab.id;
  chrome.tabs.getSelected(null, function(tab) {
    sendMsg(tab.id);
  });
});


chrome.webNavigation.onCompleted.addListener(function( tab ){
  console.log('加載完成***' + tab.tabId );
  if( flag ){
    sendMsg( tab.tabId );
  }
});

chrome.extension.onMessage.addListener(

function(request, sender, sendResponse) {
  console.log("*******evenPage.js***chrome.extension.onMessage.addListener"); 
  articleData = request;
  $.ajax({
    url: "服務器接受數據URL/getOrderinfo.php",
    cache: false,
    type: "POST",
    data: {'orderinfo': request.msg.join("#$#")},
    dataType: "json"
    }).done(function(msg) {
      console.log('*******************json*************' + msg.sql );
      chrome.tabs.sendMessage(currentTabId, {"cmd":"end"}, 
      function(response) { 
      console.log(response); 
 });

}).fail(function(jqXHR, textStatus) {
  articleData.firstAccess = textStatus;
});

cmd = request.cmd;
if('end' == cmd){
  flag = false;//確保不會自動運行
}

});

function sendSku2Info(colores){
  chrome.tabs.query(
    {active: true, currentWindow: true}, function(tabs) {
      chrome.tabs.sendMessage(tabs[0].id, {"cmd":"ok", "sku": colores}, 
      function(response) { 
        console.log(response); 
      });

  });
}

function sendMsg( tabid ){
  console.log(tabid + "--sendMsg()----eventPage.js");
  chrome.tabs.sendMessage(tabid, {greeting: "start working"}, function(response) {
  });
}

 

四、建立服務器接收文件getOrderInfo.php(放在服務器哦,親!)

<?php

header("Content-type:text/html; charset=utf-8");
//include("./includes/global.php");
echo "***********************";
$con = mysql_connect("localhost","root","root");
echo "==============";
if (!$con)
{
  die('Could not connect: ' . mysql_error());
}
mysql_select_db("test", $con);
//var_dump($_REQUEST);
$orderinfo = $_POST['orderinfo'];
$orderArr = explode('#$#', $orderinfo);
print_r($orderArr);
$sql_value = array();
$split = "', '";
foreach($orderArr as $myorder){
  $value = explode('@_@', $myorder);
  echo "===========" . $value[10] ."</br>";
  $sql = "INSERT INTO test(venderId, orderid, pingjia, money, ordertime, paytype, yunfei, orderstatu, user, remark, express) VALUES ";
  $sql .= "('" . $value[0] . $split . $value[1] . $split . $value[2] . $split . $value[3] . $split . $value[4] . $split . $value[5] . $split . $value[6] . $split .     $value[7] . $split . $value[8] . $split . $value[9] . $split . $value[10] . "') ON DUPLICATE KEY UPDATE remark = '" . $value[9] . "', pingjia = '" .     $value[2] . "', orderstatu = '" . $value[7] . "', express = '" . $value[10] . "'";
  mysql_query($sql);
}
mysql_close($con);

?>

五、建立數據庫表

CREATE TABLE `test` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT 'id',
`venderId` int(10) NOT NULL DEFAULT '0' COMMENT '商家ID',
`orderid` bigint(20) NOT NULL DEFAULT '0' COMMENT '訂單ID',
`pingjia` varchar(100) NOT NULL DEFAULT '' COMMENT '訂單發出後的狀態(是否評價)',
`money` decimal(10,2) NOT NULL DEFAULT '0.00' COMMENT '訂單金額',
`ordertime` varchar(100) NOT NULL DEFAULT '' COMMENT '下單時間',
`paytype` varchar(100) NOT NULL DEFAULT '' COMMENT '付款方式',
`yunfei` decimal(10,2) NOT NULL DEFAULT '0.00' COMMENT '運費',
`orderstatu` varchar(100) NOT NULL DEFAULT '' COMMENT '訂單狀態',
`user` varchar(255) NOT NULL DEFAULT '' COMMENT '訂單用戶',
`remark` varchar(255) NOT NULL DEFAULT '' COMMENT '備註',
`express` varchar(255) NOT NULL DEFAULT '' COMMENT '物流方式和運單號',
`shop_id` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '店鋪表ID',
`shop_name` varchar(255) NOT NULL DEFAULT '' COMMENT '店鋪名稱',
`stattime` int(11) NOT NULL DEFAULT '0' COMMENT '下單年月日',
PRIMARY KEY (`id`),
UNIQUE KEY `orderid` (`orderid`)
) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT=''

注意:各文件中紅色部分須要手工修改,此外若是修改js文件名,請在manifest.json中修改相應的配置信息便可。

插件加載步驟:

  

示例代碼:https://github.com/jackgitgz/chrome_server_plugin

此示例僅供參考,有理解不到位或錯誤的還請指出。

相關文章
相關標籤/搜索