電商工做代碼

時間 2019-11-11
標籤代碼简体版
原文原文鏈接
from selenium import webdriverfrom scrapy.selector import Selectorimport  timeimport randomimport pymysqlfrom urllib import parseimport reimport  osclass spider(object):    def chul3(self,dates):        a = Selector(text=dates)        next_url = a.xpath('//*[@id="J_ShopSearchResult"]/div/div[2]/div[10]/a[11]/@href').extract_first("")        return 'https:'+next_urlchuli=spider()conection = pymysql.connect(host='localhost',user='root',password='123',db='7.25',charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)with conection.cursor() as cursor:    sql1 = "select * from 商品id"    cursor.execute(sql1)    shop_id = cursor.fetchall()    shop_oldid=[i['id'] for i in shop_id]    sql1 = '''    SELECT`商品id`.id,`上架時間`,'1天銷量' as 日期FROM`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架時間`,CURDATE())   =1 union  SELECT`商品id`.id,`上架時間`,'7天銷量' as 日期FROM`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架時間`,CURDATE())   =7union  SELECT`商品id`.id,`上架時間`,'30天銷量' as 日期FROM`商品id` WHERE  TIMESTAMPDIFF(DAY,`上架時間`,CURDATE())   =30'''    cursor.execute(sql1)    shop_id = cursor.fetchall()    shop_olxx = [i for i in shop_id]conection.commit()cursor =conection.cursor()webdriver = webdriver.Ie()url = 'https://login.taobao.com/member/login.jhtml?spm=a21bo.50862.754894437.1.5dcec6f76Oq9Wh&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F%3Fspm%3Da1z10.1-c-s.1581860521.1.559a715a3EnsHq'webdriver.get(url)time.sleep(20)def lll(url):    webdriver.implicitly_wait(200)    webdriver.get(url)    myDynamicElement = webdriver.find_element_by_class_name('pagination')    a=webdriver.page_source    time.sleep(random.randrange(2,6))    selects=Selector(text=a)    for i in selects.xpath('//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl'):        bd_pig = i.xpath("./dt/a/img/@src").re('(.*)_')        bd_name = ''.join(re.findall('[\u4e00-\u9fa5]', i.xpath('./dd[1]/a/text()').extract_first('')))        bd_id = ''.join(re.findall('\d', i.xpath('./dd[1]/a/@href').extract_first('')))        bd_much = i.xpath('./dd[1]/div/div[1]/span[2]/text()').extract_first('')        bd_idlian='http://item.taobao.com/item.htm?id='+bd_id        bd_liang = i.xpath('./dd[1]/div/div[last()]/span[last()]/text()').extract_first('')        if  bd_id not in shop_oldid:            sql = "INSERT INTO 商品id (`品牌`, `id`,圖片連接,價格,標題,商品地址) VALUES (%s,%s,%s,%s,%s,%s)"            cursor.execute(sql,                           (shop.split(",")[0], bd_id, bd_pig, bd_much, bd_name,bd_idlian))            conection.commit()            webdriver.implicitly_wait(200)            webdriver.get('http://item.taobao.com/item.htm?id='+bd_id)            myDynamicElement = webdriver.find_element_by_class_name('tb-price-spec')            time.sleep(random.randrange(2, 6))            date=webdriver.page_source            select_xixi = Selector(text=date)            liem = select_xixi.xpath('//*[@id="J_TMySize"]/@data-value').extract_first("")            pinjia=select_xixi.xpath('//*[@id="J_RateCounter"]/text()').extract_first("")            if  int(pinjia) == 0 :                time_id=select_xixi.xpath('//script').extract()                a = [i for i in time_id if len(str(i)) > 1000]                new_time = re.findall(r".*dbst:(.\d*)", str(a[0]).replace(" ", ""))[0][0:10]                timeTuple = time.strftime("%Y-%m-%d", time.localtime(int(new_time)))                sql = 'update  `商品id`  set  `商品id`.`類目` = %s, `商品id`.`上架時間` = %s  where id = %s'                cursor.execute(sql,                               (liem, timeTuple, bd_id))                conection.commit()                title = path + '\\' + shop.split(",")[0] + '\\' + re.sub("\W", "", webdriver.title + bd_id)                capture(webdriver, title + '.jpg')            else:                sql = 'update  `商品id`  set  `商品id`.`類目` = %s where id = %s'                cursor.execute(sql,                               (liem, bd_id))                conection.commit()            c=1            ee=1            for i in select_xixi.xpath('//*[@id="J_isku"]/div/dl'):                b = i.xpath('./dt/text()').extract_first("")                if '尺碼' in b:                    aa = i.xpath('./dd/ul/li/a/span/text()').extract()                    ee = len(aa)                    dd = ' '.join(aa)                    sql = 'update  `商品id`  set  `商品id`.`尺碼` = %s  where id = %s'                    cursor.execute(sql,                                   (dd, bd_id))                    conection.commit()                if '顏色' in b:                    a = i.xpath('./dd/ul/li/a/span/text()').extract()                    c = len(a)                    d = ' '.join(a)                    sql = 'update  `商品id`  set  `商品id`.`顏色` = %s  where id = %s'                    cursor.execute(sql,                                   (d, bd_id))                    conection.commit()            w = c * ee            sql= 'update  `商品id`  set  `商品id`.`sku量` = %s  where id = %s'            cursor.execute(sql,                           (w,bd_id))            conection.commit()        for i in shop_olxx:            if i['id'] == bd_id:                sql = "UPDATE 商品id set " + i['日期'] + " =  (%s) where id = %s"                cursor.execute(sql,                               (bd_liang, i['id']))                conection.commit()    if  selects.xpath('//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href').extract_first(""):        lll('https:'+selects.xpath('//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href').extract_first(""))path=os.getcwd()def capture(webder, save_fn="capture.png"):    # browser = webdriver.Ie()  # Get local session of firefox    # browser.get(url)  # Load page    webder.execute_script("""               (function () {                 var y = 0;                 var step = 100;                 window.scroll(0, 0);                 function f() {                   if (y < document.body.scrollHeight) {                     y += step;                     window.scroll(0, y);                     setTimeout(f, 50);                   } else {                     window.scroll(0, 0);                     document.title += "scroll-done";                   }                 }                 setTimeout(f, 1000);               })();             """)    for i in range(30):        if "scroll-done" in webder.title:            break        time.sleep(1)    webder.save_screenshot(save_fn)with open(os.getcwd() + r'\1.csv', 'r') as c:    for shop in c.readlines():        url = shop.split(",")[2]        lll(url)
相關標籤/搜索
每日一句
每一个你不满意的现在，都有一个你没有努力的曾经。