curl請求多個url,之前都是使用循環來處理。最近發現能夠經過curl_multi_*系列函數來模擬多線程。比對一下,發現若是請求的url只有幾個,2種方案耗時差很少,可是url比較多,差距就很是明顯了。php
先來看下使用for循環的方案: html
1 <?php 2 //for循環 基礎方案 3 $start = microtime(true); 4 5 header('Content-type:text/html;charset=utf-8'); 6 7 $arrs = [ 8 'https://www.yahoo.com/', 9 'http://www.jtthink.com/', 10 'https://www.hao123.com/', 11 'http://www.cnblogs.com/loveyouyou616/', 12 'http://www.qq.com/', 13 'http://www.sina.com.cn/', 14 'http://www.163.com/', 15 'https://www.yahoo.com/', 16 'http://www.jtthink.com/', 17 'https://www.hao123.com/', 18 'http://www.cnblogs.com/loveyouyou616/', 19 'http://www.qq.com/', 20 'http://www.sina.com.cn/', 21 'http://www.163.com/', 22 'https://www.yahoo.com/', 23 'http://www.jtthink.com/', 24 'https://www.hao123.com/', 25 'http://www.cnblogs.com/loveyouyou616/', 26 'http://www.qq.com/', 27 'http://www.sina.com.cn/', 28 'http://www.163.com/' 29 ]; 30 31 $headers = array( 32 'User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36', 33 ); 34 35 $mh = curl_multi_init(); 36 37 foreach ($arrs as $i=>$url){ 38 $ch = curl_init($url); 39 curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); 40 curl_setopt($ch, CURLOPT_HTTPHEADER,$headers); 41 curl_setopt($ch, CURLOPT_HEADER, 0); 42 curl_setopt($ch, CURLOPT_TIMEOUT, 20); 43 44 45 if (strpos($url,'https')){ 46 curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER, false ); 47 curl_setopt ( $ch, CURLOPT_SSL_VERIFYHOST, 2 ); 48 } 49 50 $con = curl_exec($ch); 51 curl_close($ch); 52 var_dump($con); 53 } 54 55 56 $end = microtime(true) - $start; 57 58 echo '<br/>'; 59 echo $end; //平均19.002983093262s
接下來使用curl_multi_* 一次發送多個url請求多線程
1 <?php 2 //此模型雖然是一次多個url請求,但缺陷是 要等全部數據請求結束一塊兒返回,才能逐個處理數據。 3 $start = microtime(true); 4 5 header('Content-type:text/html;charset=utf-8'); 6 7 $arrs = [ 8 'https://www.yahoo.com/', 9 'http://www.jtthink.com/', 10 'https://www.hao123.com/', 11 'http://www.cnblogs.com/loveyouyou616/', 12 'http://www.qq.com/', 13 'http://www.sina.com.cn/', 14 'http://www.163.com/', 15 'https://www.yahoo.com/', 16 'http://www.jtthink.com/', 17 'https://www.hao123.com/', 18 'http://www.cnblogs.com/loveyouyou616/', 19 'http://www.qq.com/', 20 'http://www.sina.com.cn/', 21 'http://www.163.com/', 22 'https://www.yahoo.com/', 23 'http://www.jtthink.com/', 24 'https://www.hao123.com/', 25 'http://www.cnblogs.com/loveyouyou616/', 26 'http://www.qq.com/', 27 'http://www.sina.com.cn/', 28 'http://www.163.com/' 29 ]; 30 31 $headers = array( 32 'User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36', 33 ); 34 35 $mh = curl_multi_init(); 36 37 foreach ($arrs as $i=>$url){ 38 $conn[$i] = curl_init($url); 39 curl_setopt($conn[$i],CURLOPT_RETURNTRANSFER,1); 40 curl_setopt($conn[$i], CURLOPT_HTTPHEADER,$headers); 41 curl_setopt($conn[$i], CURLOPT_HEADER, 0); 42 curl_setopt($conn[$i], CURLOPT_TIMEOUT, 20); 43 44 45 if (strpos($url,'https')){ 46 curl_setopt ( $conn[$i], CURLOPT_SSL_VERIFYPEER, false ); 47 curl_setopt ( $conn[$i], CURLOPT_SSL_VERIFYHOST, 2 ); 48 } 49 curl_multi_add_handle($mh,$conn[$i]); 50 } 51 52 $active = null; 53 /* 54 * 這樣寫會輕易致使CPU佔用100% 55 56 do { 57 $n=curl_multi_exec($mh,$active); 58 } while ($active); 59 * 60 */ 61 62 //改寫 63 /* 64 do { 65 $mrc = curl_multi_exec($mh,$active); 66 }while($mrc == CURLM_CALL_MULTI_PERFORM); 67 68 while ($active and $mrc == CURLM_OK){ 69 if (curl_multi_select($mh) != -1) { 70 do { 71 $mrc = curl_multi_exec($mh, $active); 72 } while ($mrc == CURLM_CALL_MULTI_PERFORM); 73 } 74 } 75 */ 76 77 //最簡單方案 78 do { 79 curl_multi_exec($mh, $running); 80 curl_multi_select($mh); 81 } while ($running > 0); 82 83 84 //獲取內容 85 foreach ($arrs as $i => $url) { 86 $res[$i]=curl_multi_getcontent($conn[$i]); 87 var_dump($res[$i]); 88 curl_close($conn[$i]); 89 //等待全部http請求結束返回數據依次生成文件。 90 file_put_contents('curl_multi.log',$res[$i]."\r\n\r\n\r\n\r\n",FILE_APPEND); 91 } 92 93 $end = microtime(true) - $start; 94 95 echo '<br/>'; 96 echo $end; // 平均 10.091157913208s
執行上面2段代碼,能夠發現使用curl_multi_*系列函數來處理,效率明顯高很多。curl
可是上面的模型也存在一個問題,返回的時間依賴最長的請求,通俗的說就是結果數據要等全部http請求結束後一塊兒返回,而後逐個處理數據。函數