話說本機保存了大量的網頁,多半是技術文章。javascript
瀏覽器自帶的保存方式「還原度」不高,把內容粘貼到word吧又太費勁,是據說有一些現成的工具,但仍是決定本身造個輪子。。。css
網頁自己、引用的腳本文件(默認不下載)、樣式文件(樣式文件裏引用的圖片也會下載下來)、圖片等均可如下載下來,而後再替換網頁、樣式文件裏的路徑爲本地路徑。html
我得認可,使用起來有點費勁,須要先粘貼網址到剪貼板而後敲下回車,再粘貼「實際」的網頁內容到剪貼板而後再敲下回車,工具自身會去讀系統的剪貼板裏的內容。java
1 const argv = require('minimist')(process.argv.slice(2)); 2 const log = require('console').log; 3 const readlineSync = require('readline-sync'); 4 const clipboard = require('clipboardy'); 5 const shell = require('shelljs'); 6 7 const fs = require('fs'); 8 const path = require('path'); 9 10 const digger = require('./digger.js'); 11 const parser = require('./parser.js'); 12 const getter = require('./getter.js'); 13 const cssDigger = require('./cssDigger.js'); 14 const cssParser = require('./cssParser.js'); 15 16 const cfg = require('./configs/config.json'); 17 const en = require('./configs/en.json'); 18 const cn = require('./configs/cn.json'); 19 20 let langCfg = argv['lang'] ? getLang(argv['lang']) : getLang('en');//設置語言 支持中文 英文 21 if (!langCfg) { 22 langCfg = getLang('en'); 23 } 24 25 if (argv['help']) {//顯示幫助 26 logContent(langCfg.intro); 27 process.exit(1); 28 } 29 30 const name = argv['name'] ? argv['name'] : '';//設置生成的html文件的名稱 31 const verbose = argv['verbose'] ? !!argv['verbose'] : true;//是否顯示下載文件詳細信息 32 33 log(langCfg.help); 34 log(langCfg.lang); 35 log(langCfg.name); 36 log(langCfg.verbose); 37 38 waitAnyKey(langCfg.askURL); 39 const url = getFromClipboard();//從剪貼板讀取網址 40 waitAnyKey(langCfg.askContent); 41 let content = getFromClipboard();//從剪貼板讀取實際網頁內容 42 43 const dest = genIdentifier(); 44 shell.exec('md ' + dest);//建立存放資源文件的目錄 45 46 digger.config(cfg.html, content); 47 digger.dig(); 48 const originalAddrList = digger.originalAddrList();//「挖掘」出網頁裏須要下載的路徑信息 49 50 parser.config(cfg.html, dest, getDomain(url), originalAddrList); 51 parser.parse();//生成實際的下載路徑、用於替換的本地路徑 52 53 const done = getter.downloadAllOfThese(verbose, 'FROM HTML: ', parser.downloadAddrList(), parser.saveAddrList()); 54 55 makeHtmlFile(name, content);//生成最終的Html文件 56 57 done.then(() => { 58 fs.readdirSync(dest).map(function(file) {//Promise.then 當所有下載完畢後,遍歷資源目錄,針對每個樣式文件進行「挖掘」 59 if (file.substr(file.lastIndexOf('.')) === '.css') { 60 let content = fs.readFileSync(path.join(dest, file)).toString(); 61 62 cssDigger.reset(); 63 cssDigger.config(cfg.css, content); 64 cssDigger.dig(); 65 const originalAddrList = cssDigger.originalAddrList(); 66 67 cssParser.reset(); 68 cssParser.config(cfg.css, dest, path.join(dest, file), parser.cssBox(), originalAddrList); 69 cssParser.parse(); 70 71 if (cssParser.downloadAddrList().length > 0) { 72 getter.downloadAllOfThese(true, 'FROM CSS: ', cssParser.downloadAddrList(), cssParser.saveAddrList()); 73 74 makeCssFile(file, content, originalAddrList, cssParser.replaceAddrList());//對原樣式文件更名,生成一個新的樣式文件,裏面的路徑已替換成本地路徑 75 } 76 } 77 }) 78 }) 79 80 function getLang(lang) { 81 return {'en': en, 'cn': cn}[lang]; 82 } 83 84 function logContent(content) { 85 content.forEach((item) => { 86 log(item); 87 }); 88 } 89 90 function waitAnyKey(queryText) { 91 readlineSync.question(queryText); 92 } 93 94 function getFromClipboard() { 95 return clipboard.readSync(); 96 } 97 98 function getDomain(url) { 99 let domain = ''; 100 101 if (url.substr(0, 7) != 'http://' && url.substr(0, 8) != 'https://') { 102 throw new Error(); 103 } 104 url = url.substr(url.length - 1) === '/' ? url : url + '/'; 105 106 if (url.substr(0, 7) === 'http://') { 107 url = url.substr(7); 108 domain = 'http://' + url.substr(0, url.indexOf('/')); 109 } else { 110 url = url.substr(8); 111 domain = 'https://' + url.substr(0, url.indexOf('/')); 112 } 113 114 return domain; 115 } 116 117 function genIdentifier() { 118 const d = new Date(); 119 120 return d.getFullYear().toString() + ifNeedZero(d.getMonth() + 1) + ifNeedZero(d.getUTCDate()) + ifNeedZero(d.getHours().toString()) + ifNeedZero(d.getMinutes().toString()) + ifNeedZero(d.getSeconds().toString()); 121 } 122 123 function ifNeedZero(number) { 124 return number < 10 ? "0" + number.toString() : number.toString(); 125 } 126 127 function replace(content, originalAddrList, replaceAddrList) { 128 originalAddrList.forEach((addr, idx) => { 129 addr = addr.replace(new RegExp('\\?', 'g'), '\\?'); 130 content = content.replace(new RegExp(addr, 'g'), replaceAddrList[idx]); 131 }); 132 133 return content; 134 } 135 136 function makeFile(name, content) { 137 const stream = fs.createWriteStream(name); 138 stream.write(content); 139 stream.end(); 140 } 141 142 function makeHtmlFile(name, content) { 143 if (cfg.killScript) { 144 content = content.replace(new RegExp('<script.*?</script>', 'g'), ''); 145 content = content.replace(new RegExp('<script.*?/>', 'g'), ''); 146 content = content.replace(new RegExp('<script', 'g'), '<textarea style="display:none" '); 147 content = content.replace(new RegExp('</script>', 'g'), '</textarea>'); 148 } 149 150 content = replace(content, originalAddrList, parser.replaceAddrList()); 151 makeFile(name + '_' + dest + '.html', '<!-- Current URL: ' + url + '-->\r\n' + content); 152 } 153 154 function makeCssFile(name, content, originalArr, replaceArr) { 155 fs.renameSync(path.join(dest, name), path.join(dest, name + '.xss')); 156 157 content = replace(content, originalArr, replaceArr); 158 makeFile(path.join(dest, name), content); 159 }