直接上代碼php
<?php // 設置腳本執行不超時 set_time_limit ( 0 ); class Gather { private $url; private $path; public function __construct($url, $path) { $this->url = $url; $this->path = $path; } public function fetch() { return $this->transform ( $this->url, $this->path ); } private function createPic($url, $path, $name) { $img = file_get_contents ( $url ); $info = getimagesize ( $url ); $type = str_replace ( 'image/', '', $info ['mime'] ); $fileName = $path . DIRECTORY_SEPARATOR . $name . ".$type"; file_put_contents ( $fileName, $img ); return $fileName; } private function transform($url, $path) { if (! file_exists ( $path )) mkdir ( $path ); $content = file_get_contents ( $url ); preg_match ( '/<title>(.*)<\/title>/i', $content, $result ); $data ['title'] = $result [1]; // 文章標題 preg_match ( '/var\s+msg_cdn_url\s*=\s*"([^\s]*)"/', $content, $result ); preg_match ( '/var\s+msg_desc\s*=\s*"([^\s]*)"/', $content, $result ); $data ['description'] = $result [1]; // 公衆號文章摘要 // 獲取微信主體內容 preg_match ( '/<div\s+class="rich_media_content\s*"\s+id="js_content">(.*?)<\/div>/is', $content, $result ); // 獲取微信主體中的防盜鏈圖片 preg_match_all ( '/data-src="[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg|mmbiz_gif]\/[^\s]*\/\d*\?([^\s]*=[^\s]*)*"|data-src="[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg|mmbiz_gif]\/[^\s]*\/\d+"|background-image\s*:\s*url\s*\(\s*[a-zA-z]+:\/\/[^\s]*mmbiz\/[^\s]*\/\d+|background-image\s*:\s*url\s*\(\s*[a-zA-z]+:\/\/[^\s]*mmbiz\/[^\s]*\/\d+\?[^\s]*=[^\s]*/is', $result [1], $result2 ); // 判斷微信主體中是否包含防盜鏈圖片 if (! empty ( $result2 [0] )) { foreach ( $result2 [0] as $value ) { // 取出防盜鏈地址中的data-src值後的http://url主體 //preg_match ( '/[a-zA-z]+:\/\/[^\s]*\/[mmbiz|mmbiz_jpg]\/([^\s\/]*)\/\d+\?[^\s"]*|[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg]\/([^\s\/]*)\/\d+/', $value, $temp ); preg_match ( '/[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg|mmbiz_gif]\/([^\s\/]*)\/\d*\?([^\s]*=[^\s]*)*[^"]|[a-zA-z]+:\/\/[^\s]*[mmbiz|mmbiz_jpg|mmbiz_gif]\/([^\s\/]*)\/\d+/', $value, $temp ); $temp = array_filter ($temp); $temp = array_values($temp); $urlList [] = $temp [0]; $nameList [] = $temp [1]; } $path = realpath($path); foreach ( $urlList as $value ) { $name = array_shift ( $nameList ); $fileName = $this->createPic ( $value, $path, $name ); // 把圖片保存到本地 $result [1] = str_replace ( $value, $fileName, $result [1] ); } } // 更新全部data-src的地址 $result [1] = str_replace ( "data-src", "src", $result [1] ); // 返回處理後的微信主體內容。 $data ['content'] = trim($result [1]); return $data; } }