python 讀寫excel

時間 2019-12-07

標籤 python 讀寫 excel 欄目 Python 简体版

原文原文鏈接

最近老大讓從網站上獲取數據，手動太慢，網上找了點python，用腳本操做。html

 1 import os
 2 import re
 3 
 4 import xlrd
 5 import requests
 6 import xlwt
 7 from bs4 import BeautifulSoup
 8 from xlutils.copy import copy
 9 from xlwt import *
10 
11 
12 def read_excel(path):
13     # 打開文件
14     workbook = xlrd.open_workbook(path)
15     # 獲取全部sheet
16 
17     # 根據sheet索引或者名稱獲取sheet內容
18     sheet1 = workbook.sheet_by_index(0)  # sheet索引從0開始
19 
20     # sheet的名稱，行數，列數
21     i = 0
22     for sheet1_values in sheet1._cell_values:
23 
24         str = sheet1_values[0]
25         str.replace('\'','')
26         print (str,i)
27         response = get_responseHtml(str)
28         soup = get_beautifulSoup(response)
29         pattern1 = '^https://ews-aln-core.cisco.com/applmgmt/view-appl/+[0-9]*$'
30         pattern2 = '^https://ews-aln-core.cisco.com/applmgmt/view-endpoint/+[0-9]*$'
31         pattern3 = '^https://ews-aln-core.cisco.com/applmgmt/view-appl/by-name/'
32         if pattern_match(str,pattern1) or pattern_match(str,pattern3):
33             priority = soup.find("table", class_="main_table_layout").find("tr", class_="centered sub_section_header").find_next("tr",
34                                                                                                                   align="center").find_all(
35             "td")
36         elif pattern_match(str,pattern2):
37             priority = soup.find("table", class_="main_table_layout").find("tr",
38                                                                            class_="centered").find_next(
39                 "tr",
40                 align="center").find_all(
41                 "td")
42         else:
43             print("no pattern")
44         try:
45             priorityNumble ='P' + get_last_td(priority)
46 
47         except Exception:
48             print("沒有找到"+str)
49             priorityNumble = 'P' + get_last_td(priority)
50         write_excel(path,i,1,priorityNumble)
51         i = i + 1
52 def write_excel(path,row,col,value):
53     oldwb = xlrd.open_workbook(path)
54     wb =copy(oldwb)
55     ws = wb.get_sheet(0)
56     ws.write(row,col,value)
57     wb.save(path)
58 def get_last_td(result):
59     for idx  in range(len(result)):
60         returnResult = result[idx].contents[0]
61     return returnResult
62 def get_beautifulSoup(request):
63     soup = BeautifulSoup(request, 'html.parser', from_encoding='utf-8', exclude_encodings='utf-8')
64     return soup
65 def get_responseHtml(url):
66     headers = {
67         'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
68     response = requests.get(url, auth=(userName, passWord),headers=headers).content
69     return response
70 def pattern_match(str,pattern,flags = 0):
71     pattern = re.compile(pattern)
72     return re.match(pattern,str,flags)
73 
74 if __name__ == '__main__':
75     userName = '*';
76     passWord = '*'
77     path = r'*'
78     read_excel(path)