做業回顧
- 爬取分類下的圖書名和對應價格, 保存到books.txt
- books.toscrape.com
- 最終效果...
做業解析
- 兩個難度
- 簡單難度
- 爬取分類和分類下的第一頁的數據, 不包括分頁
- 所須要的知識已經講過, 不須要額外的知識
- 中等難度
- 爬取分類和分類下的全部數據, 須要判斷總頁數
- 須要額外的知識, 字符串切割/截取
做業分析
- 先獲取分類的名稱和url
- 再經過url獲取分類下的圖書信息
- 注意分類的url須要拼接
小彩蛋: 運行前清屏
代碼解析-簡單難度-帶註釋
<?php
require 'vendor/autoload.php';
use QL\QueryList;
$ql = new QueryList();
function get_category($url) {
global $ql;
$data = $ql->get($url)->rules([
"category_name" => ['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a', 'text'],
"category_url" => ['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a', 'href'],
])->queryData();
foreach ($data as $key => $value) {
$value['category_url'] = $url . $value['category_url'];
$data[$key] = $value;
}
return $data;
}
function get_book($url) {
global $ql;
$data = $ql->get($url)->rules([
"book_name" => ['#default > div > div > div > div > section > div:nth-child(2) > ol > li > article > h3 > a', 'title'],
"book_price" => ['#default > div > div > div > div > section > div:nth-child(2) > ol > li> article > div.product_price > p.price_color', 'text'],
])->queryData();
return $data;
}
function make_array($data) {
foreach ($data as $key => $value) {
echo $value['category_url']."\n";
$value['books'] = get_book($value['category_url']);
$data[$key] = $value;
}
return $data;
}
function make_txt($data) {
$txt_obj = fopen('books.txt', 'w+');
foreach ($data as $key => $value) {
$category_name = $value['category_name'];
fwrite($txt_obj, "{$category_name}\n");
foreach ($value['books'] as $k => $book) {
$book_name = $book['book_name'];
$book_price = $book['book_price'];
fwrite($txt_obj, "\t\"{$book_name}\" {$book_price}\n");
}
}
fclose($txt_obj);
}
$data = make_array(get_category('http://books.toscrape.com/'));
make_txt($data);
複製代碼
代碼解析-中等難度-帶註釋
<?php
require 'vendor/autoload.php';
use QL\QueryList;
$ql = new QueryList();
function get_category($url) {
global $ql;
$data = $ql->get($url)->rules([
"category_name" => ['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a', 'text'],
"category_url" => ['#default > div > div > div > aside > div.side_categories > ul > li > ul > li > a', 'href'],
])->queryData();
foreach ($data as $key => $value) {
$value['category_url'] = $url . $value['category_url'];
$data[$key] = $value;
}
return $data;
}
function get_book($url) {
global $ql;
echo $url."\n";
$data = $ql->get($url)->rules([
"book_name" => ['#default > div > div > div > div > section > div:nth-child(2) > ol > li > article > h3 > a', 'title'],
"book_price" => ['#default > div > div > div > div > section > div:nth-child(2) > ol > li> article > div.product_price > p.price_color', 'text'],
])->queryData();
$next = has_next($url);
if($next){
$tmp_arr = explode('/',$url);
$tmp_arr[count($tmp_arr)-1] = $next;
$next_url = implode('/',$tmp_arr);
$data = array_merge($data,get_book($next_url));
}
return $data;
}
function has_next($url){
global $ql;
$res = $ql->get($url)->find('#default > div > div > div > div > section > div:nth-child(2) > div > ul > li.next > a')->href;
return $res;
}
function make_array($data) {
foreach ($data as $key => $value) {
echo $value['category_url']."\n";
$value['books'] = get_book($value['category_url']);
$data[$key] = $value;
}
return $data;
}
function make_txt($data) {
$txt_obj = fopen('books.txt', 'w+');
foreach ($data as $key => $value) {
$category_name = $value['category_name'];
fwrite($txt_obj, "{$category_name}\n");
foreach ($value['books'] as $k => $book) {
$book_name = $book['book_name'];
$book_price = $book['book_price'];
fwrite($txt_obj, "\t\"{$book_name}\" {$book_price}\n");
}
}
fclose($txt_obj);
}
$data = make_array(get_category('http://books.toscrape.com/'));
make_txt($data);
複製代碼
下一節