1, https://blog.csdn.net/qq_24076135/article/details/78045034html
2. http://www.vogella.com/tutorials/JavaRegularExpressions/article.html#java-regex-examplesjava
3. https://www.w3cschool.cn/java/java-regex-character-classes.htmlapi
4. 提取文檔內容:緩存
package com.happySpider;app
import java.io.*;
import java.net.*;ide
public class Main {編碼
public static void main(String[] args) {
String urlTarget = "http://yun.52tencent.com:808/api/simple/nuomi/eat/meishi/2";
String happyOutputPath = "D:/happySpider/";
try {
URL happyUrl = new URL(urlTarget);//URl對象
URLConnection happyConnect = happyUrl.openConnection();//創建一個連接
InputStream happyStream = happyConnect.getInputStream();//建立爲一個字節流url
BufferedReader/*緩存 ¥ 類*/ happyBuffer = new BufferedReader(new InputStreamReader(happyStream,"UTF-8"));//字節流 編碼形式 把字節流轉換成字符流的緩衝區
PrintWriter happyOutputFile = new PrintWriter/*保存文件*/(new File(happyOutputPath+System.currentTimeMillis()/*轉換成毫秒的時間,且永遠不會重複*/+".doc"));
String happyLine;
while((happyLine = happyBuffer.readLine()/*逐行讀取*/)!= null)
{.net
System.out.println(happyLine);
happyOutputFile.println(happyLine);
}
happyOutputFile.close();
happyBuffer.close();
}
catch(IOException ex){//定義了一個輸入輸出異常對象叫ex
ex.printStackTrace();
}
}
}htm