1. 簡單的爬蟲php
1. xampp開啓php的cURL -> xampp/php/php.ini 搜索curl去掉分號便可html
2. cmd D: -> cd xampp -> cd phpweb
php -f ../htdocs/mirror/curl/curl.php > ../htdocs/mirror/curl/haha.html (將爬下來的數據保存在haha.html中)cookie
// ../htdocs/mirror/curl/curl.php <?php $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.baidu.com"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $output = curl_exec($ch); //將輸出文件中百度替換成屌絲 echo str_replace("百度", "屌絲", $output); ?>
2. 訪問WebServicesession
http://www.webxml.com.cn/WebServices/WeatherWebService.asmx?op=getWeatherbyCityNameapp
<?php $data = 'theCityName=武漢'; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.webxml.com.cn/WebServices/WeatherWebService.asmx/getWeatherbyCityName"); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_USERAGENT, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8", "Content-length:".strlen($data) )); $rtn = curl_exec($ch); if(!curl_errno($ch)) echo $rtn; else echo "Curl error ".curl_error($ch); curl_close($ch); ?>
3. 模擬登錄後下載登錄以後的信息curl
<?php $data = "username=252973202@qq.com&password=這個密碼就不寫出來啦&remember=1"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.imooc.com/user/login"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //執行以後不直接打印出來 //模擬登錄開始 date_default_timezone_set('PRC'); //設置Cookie時區(有過時時間) //模板 大多數都是如此 curl_setopt($ch, CURLOPT_COOKIESESSION, true); curl_setopt($ch, CURLOPT_COOKIEFILE, "cookiefile"); curl_setopt($ch, CURLOPT_COOKIEJAR, "cookiefile"); curl_setopt($ch, CURLOPT_COOKIE, "session_name()"."=".session_id()); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_USERAGENT, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //cURL支持頁面連接跳轉 curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8", "Content-length:".strlen($data) )); $rtn = curl_exec($ch); //模擬登錄結束 //登錄成功後獲取登錄以後的信息 curl_setopt($ch, CURLOPT_URL, "http://www.imooc.com/space/index"); curl_setopt($ch, CURLOPT_POST, 0); curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-type:text/xml")); $output = curl_exec($ch); curl_close($ch); echo $output; ?>