python:beautifulSoup抓取電影數據且入庫

import requests
from bs4 import BeautifulSoup
import pymysql
from qiniu import Auth
from qiniu import BucketManager
import datetime
import time

# 上傳文件到七牛雲
q = Auth(access_key,secret_key)
#要上傳的空間
bucket_name = 'bucket_name'
bucket = BucketManager(q)

# 連接數據庫
db = pymysql.connect(host='127.0.0.1',port= 3306,user = 'root',passwd='',db='movies',charset='utf8')
#建立遊標
cur = db.cursor()

url = 'http://www.mp4pa.com/dy/hd3.html'
res = requests.get(url)
res.encoding = 'utf-8'

soup = BeautifulSoup(res.text,'lxml')

# 找出第一頁全部的電影信息
lis = soup.find(class_='layout_newlist4').find_all('li')
for li in lis:
    # 電影名稱
    movie_name = li.select('.titl')[0].get_text().strip().split(' ')[0]
    
    # 電影連接
    movie_link = 'http://www.mp4pa.com'+li.find(class_ = 'titl')['href'].strip()
    res = requests.get(movie_link)
    res.encoding = 'utf-8'
    soup = BeautifulSoup(res.text,'lxml')

    # 找出全部的電影圖片
    info = soup.find(class_ = 'layout_newlist4')
    movie_img_link = info.find('img')['src']
    for a in info.find_all(class_ = 'videourl1'):
        if 'baidu' in a.find('a')['href']:
            baiduyun = a.find('a')['href'].strip()

    '''
    # 圖片下載到本地
    imgres = requests.get(movie_img_link)
    with open(movie_name+'.png','wb') as f:
        f.write(imgres.content)
        f.close()
    print(video_link)
    '''
    
    key = 'movie/logo/'+datetime.datetime.now().strftime('%Y%m%d')+'/'+str(int(time.time()))+'.png'
    bucket.fetch(movie_img_link, bucket_name, key)
    
    # 存入數據庫
    dateTime = datetime.datetime.now().strftime('%Y-%m-%d')
    key = 'http://og23vuzev.bkt.clouddn.com/'+key
    cur.execute("insert into movie(movie_name,movie_logo,movie_public_time,baiduyun_link,created_at,updated_at) values(%s,%s,%s,%s,%s,%s)",(movie_name,key,dateTime,baiduyun,dateTime,dateTime))

#提交
db.commit()
#關閉指針對象
cur.close()
#關閉鏈接對象
db.close()
#打印結果
相關文章
相關標籤/搜索