Python實現商品價格監控，識破雙十一的套路

時間 2019-11-08

標籤 python 實現商品價格監控識破雙十套路欄目 Python 简体版

原文原文鏈接

一年一度的「雙十一」又要來了，不少人已經開始摩拳擦掌，畢竟幾天以後手還在不在就很差說了。css

各類社交軟件也是跟着遭殃，成天就是「來幫我一塊兒蓋樓」，各類字體繞過屏蔽，什麼奇葩的腦洞也出來了：不過也感謝這些電商平臺，讓多年未聯繫的好友、加過但沒有對話的陌生人都找到了打破尷尬的話題。（讓場面更加尷尬）html

月薪上萬的白領們爲了2塊5毛錢的優惠券起早貪黑，也是堪稱人類迷惑行爲大賞了……python

問題是，你覺得本身真的賺到了？git

商品「明降暗升」的傳言早有耳聞：不少商品在雙十一以前早早地把價格調高，加上優惠以後也不過就是跟之前的原價至關。讓不知情的消費者在心理上感受佔了便宜。github

這個傳言是否是真的，很好判斷，只要按期去訪問商品頁面，記錄價格就能夠。不過通常人也沒閒工夫這麼去作。因而，咱們用 Python 作了一個能夠定時監控商品的小工具，能夠幫你監控想要關注的商品。api

工具完成以後，咱們隨機挑選了幾個商品做爲測試，結果就有一箇中招了……（真的是隨便選的）：這款保暖背心產品，以前標價 39.9元，到11月以後卻忽然調價爲 49.9元，並標註上了「雙11狂歡價」，也就是原價…… 數組

商品價格監控

實現功能瀏覽器

輸入天貓、蘇寧、京東、拼多多（網頁頁面 yangkeduo.com/）任一商品連接，不是口令。請複製選擇好商品配置的頁面連接，即返回相應商品價格，並保存到文件。商品頁面如有團購與單獨購買兩個價格，返回團購價格。bash
使用 Windows 任務計劃或 Linux 定時任務，定時執行程序。獲取不一樣時段的商品價格信息。app
單獨運行畫圖程序，可根據定時任務獲取的數據，生成商品價格時間變化折線圖。
程序監測的兩件商品截圖以下，具體文件在 pic 文件夾下 bnbx.html、kyy.html，推薦本地查看。簡單的商品查看頁面 https://htmlpreview.github.io/?https://raw.githubusercontent.com/spiderbeg/price_monitor/master/search/search.html 。輸入查詢商品關鍵詞，選擇商城，便可查看相應商城商品列表。默認爲蘇寧。效果圖以下。注意：點擊後請等待一段時間便可，請勿頻繁刷新。

運行環境

python3.7
Windows
jupyter notebook

運行依賴包

requests
pyecharts
beautifulsoup4

項目思路

部分問題回答

項目的大體思路流程：

第一步：使用商品詳細頁連接獲取商品信息與商品價格，並保存獲取數據時間、商品介紹，價格到 csv 文件中；
第二步：使用定時任務定時執行第一步完成的程序；
第三步：讀取前兩步獲取到的時間、商品介紹、價格數據。使用 pyecharts 繪製繪製商品價格時間變化折線圖。
爲何不使用 pc 端來調試網頁，獲取價格信息？

由於在未登陸狀態天貓的詳細商品頁的信息是虛假的，同時從移動端網頁入手，能夠下降調試難度。

谷歌瀏覽器如何開啓手機調試模式？

F12 進入開發者模式，而後鼠標點擊一下，具體見下圖,包括後文的查找價格接口信息。

實現代碼

test.py

測試商品連接是否可以成功獲取到商品價格。

import timing
""" 1 調用 timing.py 中的 go 方法測試連接的可用性 2 調用 timing.py 中的 go, get_url() 方法測試 goods.csv 文件中連接的可用性 """

# 連接測試
# urls = ['https://m.suning.com/product/0000000000/000000011210599174.html?utm_source=baidu&utm_midium=brand-wuxian&utm_content=&utm_campaign=title&safp=f73ee1cf.wapindex7.113464229882.4&safc=prd.1.rec_14-40_0_A_ab:A',
# 'https://m.suning.com/product/0070067092/000000000188392234.html?utm_source=baidu&utm_midium=brand-wuxian&utm_content=&utm_campaign=title&safp=f73ee1cf.wapindex7.113464229882.60&safc=prd.1.rec_5-5_1018C,1014C$c3ae37eafeb814a098d120647449da6f_H_ab:A',
# 'https://m.suning.com/product/0000000000/000000000107426461.html?src=snsxpd_none_recssxcnxhq_1-3_p_0000000000_000000000107426461_rec_21-65_3_A&safp=f73ee1cf.71jyzx.112079032536.4&safc=prd.1.rec_21-65_3_A',
# 'https://m.suning.com/product/0000000000/10606656136.html?safp=f73ee1cf.phone2019.121927933306.2&safc=prd.0.0']

# 輸入文本的連接可用性測試
if __name__ == '__main__':
    urls = timing.get_url()
    for url in urls:
        try:
            timing.go(url) # 獲取返回信息 
        except BaseException as e:
            print(url,'\n',e)複製代碼

timing.py

進行定時抓取任務時，運行的文件。

# encoding:utf8
import time
import os
import re
import csv
from shop.jd import JD # 自定義
from shop.tm import TM
from shop.sn import SN
from shop.pdd import PDD
from apscheduler.schedulers.blocking import BlockingScheduler

# import logging
# formats = "%(asctime)s %(name)s %(levelname)s function:%(funcName)s -> :%(message)s"
# logging.basicConfig(format=formats, datefmt='%m/%d/%Y %I:%M:%S %p') # ,handlers=[logging.FileHandler(log_path, 'a+', 'utf-8')]
# LOGGER = logging.getLogger(__name__)
# LOGGER.setLevel(logging.INFO)

basePath = os.path.dirname(os.path.abspath(__file__)) # 當前文件夾

def get_date():
    """獲取日期"""
    timestamp = int(time.time())
    time_local = time.localtime(timestamp) # #時間戳 轉 時間數組
    dt = time.strftime("%Y-%m-%d %H:%M:%S",time_local) # #時間數組 轉 新的時間格式(2016-05-05 20:28:54)
    return dt

def get_url():
    """讀取商品連接 返回：圖像名，商品名，商品連接 元組 """
    urls = []
    with open(os.path.join(basePath, 'goods.csv'),'r',encoding='utf8') as f:
        f_csv = csv.reader(f)
        next(f_csv) # 返回標題,直接到內容
        for row in f_csv: # 內容
            if row:
                urls.append(row)
    return urls

def go(url):
    '''輸入：連接 輸出：(時間，標題，商品價格), 文件路徑 元組 統一價格輸出，以最低價格爲標準，若有團購和單獨購買以單獨購買爲準 '''
    result = re.findall('://(.+?).com', url[2])
    if result:
        result = result[0]
        if 'yangkeduo' in result:
            pd = PDD(url[2])
            title,price = pd.main()
        elif 'suning' in result:
            sn = SN(url[2])
            title,price = sn.main()
        elif 'tmall' in result or 'taobao' in result:
            tm = TM(url[2]) # 605030977928：聯想筆記本 ； 603330883901 華爲 mate30 pro ; 523962011119: 酸奶 
            title,price = tm.main()
        elif 'jd' in result:
            jd = JD(url[2]) # 測試 id：100009083152 商品：聯想 y9000x 筆記本電腦 2 熱水壺 or 薯條？
            title,price = jd.main()
        else:
            raise TypeError('請檢查輸入的網站連接')
        print('%s 標題 %s, 價格（多個價格以團購爲準） %s. '%(result,title,price))
    else:
        raise TypeError('請檢查輸入是否爲目標網站的商品詳細頁面連接')
    # 文件名
    replace_string = ['.',' ',r'/',r'\\']
    for rs in replace_string:
        url[1] = url[1].replace(rs,'_')
    path = os.path.join(os.path.join(basePath, 'data'), url[1]+'.csv')

    today = get_date() # 日期
    return (today, title, price),path

def addData(row, path):
    """數據寫入文件"""
    with open(path,'a+',encoding='utf8') as f:
        fieldnames = ['時間', '標題','價格']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        if f.tell() == 0: # 若是內容爲空則添加標題
            writer.writeheader()
        writer.writerow({'時間': row[0], '標題': row[1],'價格':row[2]})

def main():
    """運行程序"""
    urls = get_url()
    for url in urls:
        try:
            row,path = go(url) # 獲取返回信息 
            addData(row,path) # 寫入文件
        except BaseException as e:
            print('請求問題？報錯：%s'%e)


if __name__ == '__main__':
    print('時間',get_date())
    main()
    # scheduler = BlockingScheduler()
    # scheduler.add_job(go,'cron', args=[url],hour='8-23', minute= '5,35' , second='15')
    # # scheduler.add_job(main,'cron', args=[3088512],hour='8-23', minute= 5 , second='15')
    # print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))

    # try:
    # scheduler.start()
    # except (KeyboardInterrupt, SystemExit):
    # pass複製代碼

draw.py

圖像文件生成在 pic 文件中。

# encoding: utf8

from pyecharts import options as opts
from pyecharts.charts import Page, Line
import os
import csv

basePath = os.path.dirname(os.path.abspath(__file__)) # 當前文件夾

def line(title,checktime,price) -> Line:
    """繪圖函數"""
    c = (
        Line()
        .add_xaxis(checktime)
        .add_yaxis(title, price, is_smooth=True)
        .set_global_opts(title_opts=opts.TitleOpts(title="商品價格"),
                yaxis_opts=opts.AxisOpts(name="元/臺"),
                xaxis_opts=opts.AxisOpts(name=title,
                axislabel_opts=opts.LabelOpts(formatter="{value}", font_size=12, rotate=30,) # x,y 軸標籤
                    )
                )
        )
    return c

def files():
    """ 輸出字典，每個鍵值表明一張圖表 """
    global basePath
    files = {}
    with open(os.path.join(basePath,'goods.csv'),'r',encoding='utf8') as f:
        f_csv = csv.reader(f)
        next(f_csv) # 標題
        for row in f_csv: # 內容
            if row:
                replace_string = ['.',' ',r'/',r'\\'] # 特殊字符處理
                for rs in replace_string:
                    row[1] = row[1].replace(rs,'_')
                files.setdefault(row[0],[]).append(row[1])
    return files

def draw(files):
    """繪製圖形文件"""
    datapath = os.path.join(basePath,'data')
    picpath = os.path.join(basePath,'pic')
    for k,i in files.items():
        page = Page()
        for n in i:
            try:
                with open(os.path.join(datapath, n +'.csv'),'r', encoding='utf8') as f:
                    f_csv = csv.DictReader(f)
                    price,checktime = [],[]
                    for row in f_csv:
                        checktime.append(row['時間'])
                        price.append(row['價格'])
                    title = n
                page.add(line(title,checktime,price)) # 24 發帖回帖變化圖、近3月變化圖、瀏覽、回覆散點圖
            except:
                print('未製圖：',n)
        page.render(os.path.join(picpath, k +'.html'))


if __name__ == '__main__':
    draw(files())複製代碼

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。