package main import ( "fmt" "io" "net/http" "os" "strconv" ) func HttpGet(url string) (result string, err error) { // var result string resp, err1 := http.Get(url) if err1 != nil { err = err1 return } defer resp.Body.Close() buf := make([]byte, 4096) for { n, err2 := resp.Body.Read(buf) if n == 0 { fmt.Println("讀取網頁完成") break } if err2 != nil && err2 != io.EOF { err = err2 return } result += string(buf[:n]) } return } func working(start, end int) { fmt.Printf("正在爬取第%d到第%d頁", start, end) //爬取每個網頁 for i := start; i <= end; i++ { url := "https://tieba.baidu.com/f?kw=vue&ie=utf-8&pn=" + strconv.Itoa((i-1)*50) result, err := HttpGet(url) if err != nil { fmt.Println("httpGet err", err) continue } // fmt.Println("result", result) f, err := os.Create("第" + strconv.Itoa(i) + "頁" + ".html") if err != nil { fmt.Println("HttpGet err", err) continue } f.WriteString(result) f.Close() } } func main() { var start, end int fmt.Print("請輸入起始頁。。。") fmt.Scan(&start) fmt.Print("請輸入終止頁。。。") fmt.Scan(&end) working(start, end) }
這個只是在上面的基礎上加了管道和開了協程html
package main import ( "fmt" "io" "net/http" "os" "strconv" ) func HttpGet(url string) (result string, err error) { // var result string resp, err1 := http.Get(url) if err1 != nil { err = err1 return } defer resp.Body.Close() buf := make([]byte, 4096) for { n, err2 := resp.Body.Read(buf) if n == 0 { fmt.Println("讀取網頁完成\n") break } if err2 != nil && err2 != io.EOF { err = err2 return } result += string(buf[:n]) } return } func SpiderPage(index int, page chan int) { fmt.Printf("正在爬取第%d到頁\n", index) //爬取每個網頁 // for i := start; i <= end; i++ { url := "https://tieba.baidu.com/f?kw=vue&ie=utf-8&pn=" + strconv.Itoa((index-1)*50) result, err := HttpGet(url) if err != nil { fmt.Println("httpGet err", err) return } // fmt.Println("result", result) f, err := os.Create("第" + strconv.Itoa(index) + "頁" + ".html") if err != nil { fmt.Println("HttpGet err", err) return } f.WriteString(result) f.Close() // } page <- index } func working2(start, end int) { fmt.Printf("正在爬取第%d頁到%d頁\n", start, end) page := make(chan int) for i := start; i <= end; i++ { go SpiderPage(i, page) } for i := start; i <= end; i++ { fmt.Printf("第%d個頁面爬取完成\n", <-page) } } func main() { var start, end int fmt.Print("請輸入起始頁。。。") fmt.Scan(&start) fmt.Print("請輸入終止頁。。。") fmt.Scan(&end) working2(start, end) }