爲了應對當前HTML格式和js腳本對老人機的不友好,咱們須要處理一下某些小說網站,讓它回到十幾年前的樣子,剔除大量CSS和JS,精簡HTML標籤,這樣就可以讓老人機快速、友好地看小說了。javascript
目前下面的腳本就能實現了,能夠爲老人機提供簡單地瀏覽和必備的搜索功能,而且支持緩存。php
_remoteurl_
:填寫連接_band_word_
:刪除關鍵詞_band_href_
:刪除URL包含該值的連接_band_ele_
:把某些沒有替換掉的元素刪除掉,這個設置的存在是由一個正則匹配引發的,通常不要修改。
關於緩存:在源碼內搜索$keeptime
便可找到相關代碼。css
define處修改remoteurl和其餘配置,注意看註釋,別亂來。html
找到if ($_GET['search']) :
這一行,下面有java
$html = c_get('https://search2.booktxt.net/s.php', 'post', 't=1&keyword='.urlencode(_iconv($_GET['search'], 'GBK')));
這樣一行,修改c_get的參數便可。
第一個參數是url,第二個是方式(post/get),第三個是傳輸過去的請求字符串,可能要用到轉碼功能,具體看對面頁面的編碼,用_iconv
(腳本自構的函數) 轉換。具體寫什麼須要手動抓取網站信息手動填好。緩存
例子:服務器
# define_remoteurl: define('_remoteurl_', 'https://www.biqutxt.com/'); # $_GET['search']後邊: $html = c_get('https://www.biqutxt.com/modules/article/search.php', 'post', 'searchtype=articlename&action=login&searchkey='.urlencode(_iconv($_GET['search'], 'GBK')));
若出現亂碼,則可能爲轉碼問題,也可能爲原網頁的問題,需排查,能夠把URL上面的page參數進行urldecode+base64decode即獲得源URL。curl
搬上完整代碼👇:函數
<?php /* author: foxnes/luuljh */ error_reporting(E_ALL ^ E_WARNING ^ E_NOTICE); date_default_timezone_set("PRC"); define('_remoteurl_', 'https://m.booktxt.net/'); // 必須用 / 結尾 且 http(s)://開頭 define('_band_word_', '字體:|頂點小說移動網(m\.booktxt\.net)|37小說網'); // 用 | 隔開,不能留空 define('_band_href_', 'cnzz\.com|mybook\.php');// 同上 define('_band_ele_', 'link'); $cachee = "cachepage/" . md5($_SERVER["QUERY_STRING"].'haha'); (file_exists("./cachepage/")) ?: mkdir('cachepage'); if (file_exists($cachee)) { if (!$_GET['page']) $keeptime = 259200; //保存3天 else $keeptime = 31536000; //保存1年 if (time() - filectime($cachee) >= $keeptime){ unlink($cachee); }else{ echo file_get_contents($cachee); exit; } } ob_start(); echo '<?xml version="1.0" encoding="UTF-8"?>'; ?> <!DOCTYPE html PUBLIC "-//WAPFORUM//DTD XHTML Mobile 1.2//EN" "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile12.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <title><?php echo 'WT ' . ($_GET['page'] ? base64_decode($_GET['page']) : ''); ?></title> <meta name="viewport" content="width=device-width, minimum-scale=1.0, maximum-scale=2.0"/> <style type="text/css">body{font-size: 13px;line-height: 19px} p{margin: 3px;} a+a{display: block}</style> </head> <body> <a href="?">回到主頁</a> <br /> <form action="" method="get"> 搜索小說: <input type="text" name="search" /> <button type='submit'>搜索</button> </form> <?php if ($_GET['search']) : $html = c_get('https://search2.booktxt.net/s.php', 'post', 't=1&keyword='.urlencode(_iconv($_GET['search'], 'GBK'))); elseif ($_GET['page']): $html = c_get(base64_decode($_GET['page'])); else: $html = c_get(_remoteurl_); endif; if (!preg_match('/200 OK/i', $html[1])) { echo '服務器可能出現了點問題噢(⊙o⊙)?'; echo "<br />返回:".str_replace(PHP_EOL, '<br />', $html[1]); $cachee = false; goto foot; } $html = _iconv($html[0]); $html = preg_replace('/<(style|script)[^>]*?>[\s\S]*?<\/\1>/i', '', $html); $html = preg_replace('/<(?:'._band_ele_.')[^>]*?>/i', '', $html); $html = preg_replace('/<(?!a|\/a|p|\/p|br|li|\/li|table|\/table|td|\/td|tr|\/tr)(?:[^>]*?)>/i', '', $html); $html = preg_replace('/(id|class|title|style|target|alt|onclick)=("|\').*?\2/i', '', $html); $html = preg_replace('/<a[^>]*?=[^>]*?(?:javascript\:|'._band_href_.')[\s\S]*?<\/a>/i', '', $html); $html = preg_replace('/[\n\r\s]+|( )+/i', ' ', $html); $html = preg_replace('/'._band_word_.'/i', '', $html); $html = preg_replace('/<p[^>]*?><\/p>|<a >.*?<\/a>|<a[^>]*?><\/a>/i', '', $html); preg_match_all('/<a[^>]*?href=("|\')([^>]*?)\1/i', $html, $links); $rep = []; $rem = []; foreach ($links[2] as $key => $value) { if (!(strlen($links[2][$key]) > 5)) continue; $qt = $links[1][$key]; $rep[] = $qt . $links[2][$key] . $qt; $rem[] = $qt . "?page=".urlencode(base64_encode(rel2abs($value))) . $qt; } if (count($links) > 1) $html = str_replace($rep, $rem, $html); echo $html; foot: ?> <br /> <a href="?">回到主頁</a> <small>[<?php echo date("y-m-d H:i:s"); ?>]</small> </body> </html><?php $html = ob_get_clean(); echo $html; if ($cachee) file_put_contents($cachee, $html); function c_get($url, $method = 'get', $data = '', $referer = _remoteurl_, $timeout = 10, $useck = false, $saveck = false, $ckfile = "ck.txt") { $headerinfo = array( "User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" ); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HTTPHEADER, $headerinfo); curl_setopt($ch, CURLOPT_TIMEOUT_MS, $timeout * 1000); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_REFERER, $referer); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); if ($saveck == true) { curl_setopt($ch, CURLOPT_COOKIEJAR, $ckfile); } if (file_exists($ckfile) && $useck == true) { curl_setopt($ch, CURLOPT_COOKIEFILE, $ckfile); } if ($method == "post") { curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); } $content = curl_exec($ch); if (curl_errno($ch)) { return 'Curl error: ' . curl_error($ch); } if ($content == false) { return "Get content false!"; } $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); $header = substr($content, 0, $headerSize); $body = substr($content, $headerSize); if (in_array(curl_getinfo($ch, CURLINFO_HTTP_CODE), ['301','302'])) { preg_match("@location: (.*?)[\n\r;]@i", $header, $tmpgo); curl_close($ch); return c_get($tmpgo[1]); } curl_close($ch); $content = array( $body, $header ); return $content; } function rel2abs($n){ if ($_GET['page']) { $fix = pathinfo(base64_decode(urldecode($_GET['page']))); $fix = $fix["dirname"]."/"; }else{ $fix = _remoteurl_; } if (strpos($n,"#") !== false) { $n = substr($n, 0, strpos($n,"#")); } if (substr($n, 0, 7) == "http://" || substr($n, 0, 8) == "https://") { return $n; }elseif (substr($n, 0, 2) == "//") { return "http:".$n; }else{ if (empty($n)) { return false; } if (substr($n, 0, 1) == "/") { return _remoteurl_.substr($n, 1); }else{ return $fix.$n; } } } function _iconv($data, $output = 'utf-8') { $encode_arr = array('UTF-8','ASCII','GBK','GB2312','BIG5','JIS','eucjp-win','sjis-win','EUC-JP'); $encoded = mb_detect_encoding($data, $encode_arr); if (!is_array($data)) { return mb_convert_encoding($data, $output, $encoded); } else { foreach ($data as $key=>$val) { $key = _iconv($key, $output); if(is_array($val)) { $data[$key] = _iconv($val, $output); } else { $data[$key] = mb_convert_encoding($data, $output, $encoded); } } return $data; } }