pandas模塊是數據分析的大殺器,它使得對於文件相關的操做變得簡單。html
看一下它的簡單使用python
import pandas as pd # 讀取 df = pd.read_csv('all_forum_info.csv') print(df.info()) # 寫入 df.to_csv('data.csv')
具體用法參照個人博客pandas系列 http://www.javashuo.com/article/p-cpbmpkek-bm.htmlide
import csv # 讀取 with open('all_forum_info.csv', encoding='utf-8') as myFile: lines = csv.reader(myFile) for line in lines: print(line) ['hgroup', 'forum_topic', 'forum_url', 'count_topic', 'count_post'] ['Welcome to Breastcancer.org', 'Info & Resources for New Patients & Members - Please No Posts!', 'https://community.breastcancer.org/forum/131', '11', '11'] ['Welcome to Breastcancer.org', 'Acknowledging and honoring our Community', 'https://community.breastcancer.org/forum/135', '111', '5,372'] ['Not Diagnosed but Concerned', 'Not Diagnosed But Worried', 'https://community.breastcancer.org/forum/83', '16,638', '133,217'] ['Not Diagnosed but Concerned', 'Waiting for Test Results', 'https://community.breastcancer.org/forum/62', '6,712', '62,439'] ['Not Diagnosed but Concerned', 'Benign Breast Conditions', 'https://community.breastcancer.org/forum/148', '795', '4,094'] ['Not Diagnosed but Concerned', 'High Risk for Breast Cancer', 'https://community.breastcancer.org/forum/47', '2,146', '23,435'] ['Tests, Treatments & Side Effects', 'Just Diagnosed', 'https://community.breastcancer.org/forum/5', '6,894', '103,851'] ['Tests, Treatments & Side Effects', 'Diagnosed and Waiting for Test Results', 'https://community.breastcancer.org/forum/147', '592', '6,442']
""" 寫入 'w'表示寫模式。 首先open()函數打開當前路徑下的名字爲't.csv'的文件,若是不存在這個文件,則建立它,返回myFile文件對象。 csv.writer(myFile)返回writer對象myWriter。 writerow()方法是一行一行寫入,writerows方法是一次寫入多行。 注意:若是文件't.csv'事先存在,調用writer函數會先清空原文件中的文本,再執行writerow/writerows方法。 """ with open('data.csv', 'w', encoding='utf-8') as myFile: myWriter = csv.writer(myFile) myWriter.writerow(['title', '數學']) myWriter.writerow([8, 'h']) myList = [[1, 2, 3], [4, 5, 6]] myWriter.writerows(myList)
data.csv函數
title,數學 8,h 1,2,3 4,5,6
csv.register_dialect('mydialect', delimiter='|', quoting=csv.QUOTE_ALL) with open('test.csv', 'r') as myFile: lines = csv.reader(myFile, 'mydialect') # print(lines.line_num) for line in lines: print(line) ['1', '2', '3'] ['2', '3', '4']
import xlrd myWorkbook = xlrd.open_workbook('data.xlsx') # 讀取excel的三種方式 # 方式一 mySheets = myWorkbook.sheets() # 獲取工做表list。 mySheet = mySheets[0] # 經過索引順序獲取。 # 方式二 # mySheet1 = myWorkbook.sheet_by_index(0) # 經過索引順序獲取。 # 方式三 # mySheet2 = myWorkbook.sheet_by_name('table') # 經過名稱獲取。 # 獲取行數和列數 nrows = mySheet.nrows ncols = mySheet.ncols print(nrows, ncols) # 獲取一行和一列 myRowValues = mySheet.row_values(0) # i是行數,從0開始計數,返回list對象。 myColValues = mySheet.col_values(0) # i是列數,從0開始計數,返回list對象。 print(myRowValues, myColValues) # 讀取單元格數據 # myCell = mySheet.cell(0, 0) # 獲取單元格,i是行數,j是列數,行數和列數都是從0開始計數。 # myCellValue = myCell.value # 經過單元格獲取單元格數據。 myCellValue = mySheet.cell_value(0, 0) # 直接獲取單元格數據,i是行數,j是列數,行數和列數都是從0開始計數。 print(myCellValue)
# 寫入 import xlwt # 建立Excel工做薄 myWorkbook = xlwt.Workbook() # 添加Excel工做表 mySheet = myWorkbook.add_sheet('A Test Sheet') # 寫入數據 myStyle = xlwt.easyxf('font: name Times New Roman, color-index red, bold on', num_format_str='#,##0.00') #數據格式 mySheet.write(1, 1, 1234.56, myStyle) mySheet.write(2, 0, 1) #寫入A3,數值等於1 mySheet.write(2, 1, 1) #寫入B3,數值等於1 mySheet.write(2, 2, xlwt.Formula("A3+B3")) #寫入C3,數值等於2(A3+B3) # 保存 myWorkbook.save('excelFile.xlsx')
import xlrd from xlutils.copy import copy workbook = xlrd.open_workbook('excelFile.xlsx') workbooknew = copy(workbook) ws = workbooknew.get_sheet(0) ws.write(3, 0, 'changed!') workbooknew.save('excelFilecopy.xls')
from openpyxl import Workbook from openpyxl import load_workbook from openpyxl.writer.excel import ExcelWriter workbook_ = load_workbook("data.xlsx") sheetnames = workbook_.get_sheet_names() # 得到表單名字 print(sheetnames) sheet = workbook_.get_sheet_by_name(sheetnames[0]) print(sheet.cell(row=3, column=3).value) sheet['A1'] = '47' workbook_.save("data_new.xlsx") wb = Workbook() ws = wb.active ws['A1'] = 4 wb.save("data_new2.xlsx")
import xlsxwriter def get_chart(series): chart = workbook.add_chart({'type': 'line'}) for ses in series: name = ses["name"] values = ses["values"] chart.add_series({ 'name': name, 'categories': 'A2:A10', 'values': values }) chart.set_size({'width': 700, 'height': 350}) return chart if __name__ == '__main__': workbook = xlsxwriter.Workbook('H5應用中心關鍵數據及趨勢.xlsx') worksheet = workbook.add_worksheet("每日PV,UV") headings = ['日期', '平均值'] worksheet.write_row('A1', headings) index = 0 for row in range(1, 10): for com in [0, 1]: worksheet.write(row, com, index) index += 1 series = [{"name": "平均值", "values": "B2:B10"}] chart = get_chart(series) chart.set_title({'name': '每日頁面分享數據'}) worksheet.insert_chart('H7', chart) workbook.close()