上一個是橫向爬取的,這個縱向爬取,具體怎麼作的看代碼vue
package main import ( "fmt" "io" "net/http" "os" "regexp" "strconv" ) func HttpGetDB(url string) (result string, err error) { resp, err1 := http.Get(url) if err1 != nil { err = err1 return } defer resp.Body.Close() buf := make([]byte, 4096) for { n, err2 := resp.Body.Read(buf) if n == 0 { break } if err2 != nil && err2 != io.EOF { err = err2 return } result += string(buf[:n]) } return } func Save2file(idx int, fileName [][]string) { path := "第" + strconv.Itoa(idx) + "頁" + ".txt" f, err := os.Create(path) if err != nil { fmt.Println("os.Create err", err) return } defer f.Close() n := len(fileName) f.WriteString("名稱\n") for i := 0; i < n; i++ { f.WriteString(fileName[i][1] + "\n") } } func SpiderPageDB(idx int, page chan int) { // url := "https://movie.douban.com/review/best/?start=" + strconv.Itoa((idx-1)*20) // url := "https://movie.douban.com/annual/2018?source=navigation#" + strconv.Itoa(idx-1) // https://movie.douban.com/review/best/?start=20 url := "https://tieba.baidu.com/f?kw=vue&ie=utf-8&pn=" + strconv.Itoa((idx-1)*50) result, err := HttpGetDB(url) if err != nil { fmt.Println("HttpGet2 err", err) return } // fmt.Println("result=", result) ret := regexp.MustCompile(`<span class="tb_icon_author_rely j_replyer" title="最後回覆人:(?s:(.*?))"`) fileName := ret.FindAllStringSubmatch(result, -1) // for _, name := range fileName { // fmt.Println("name", name[1]) // } Save2file(idx, fileName) page <- idx } func toWork(start, end int) { fmt.Printf("正在爬取%d到%d頁。。。\n", start, end) page := make(chan int) for i := start; i <= end; i++ { go SpiderPageDB(i, page) } for i := start; i <= end; i++ { fmt.Print("第%d頁爬取完成\n", <-page) } } func main() { var start, end int fmt.Print("請輸入起始頁(>=1):") fmt.Scan(&start) fmt.Print("請輸入終止頁(>=start):") fmt.Scan(&end) toWork(start, end) }