對一個20667行的xlsx文件進行遍歷測試python
import pandas as pd # 定義一個計算執行時間的函數做裝飾器,傳入參數爲裝飾的函數或方法 def print_execute_time(func): from time import time # 定義嵌套函數,用來打印出裝飾的函數的執行時間 def wrapper(*args, **kwargs): # 定義開始時間和結束時間,將func夾在中間執行,取得其返回值 start = time() func_return = func(*args, **kwargs) end = time() # 打印方法名稱和其執行時間 print(f'{func.__name__}() execute time: {end - start}s') # 返回func的返回值 return func_return # 返回嵌套的函數 return wrapper file_path = r"D:\git\xxxx\dev\pd-xxx1.2\合併.xlsx" data = pd.read_excel(file_path,sheet_name="xxxx",engine='openpyxl') # 空值處理 df = data.where(data.notnull(),None) @print_execute_time def iterrows(): for index, row in df.iterrows(): # print(index," = ",row['機號']) pass @print_execute_time def itertuples(): for row in df.itertuples(): # print(row['機號']) pass @print_execute_time def iteritems(): for index, row in df.iteritems(): # print(index," = ",row['機號']) pass @print_execute_time def index(): for i in df.index: # print(i," = ",df['機號'].at[i]) pass if __name__ == '__main__': print('begining ...') print(iterrows(),itertuples(),iteritems(),index()) print('Done !')
begining ... iterrows() execute time: 2.003657817840576s itertuples() execute time: 0.04618692398071289s iteritems() execute time: 0.0009987354278564453s index() execute time: 0.0029909610748291016s Done !
iterrows() execute time: 2.2464449405670166s itertuples() execute time: 0.08178043365478516s iteritems() execute time: 0.000997781753540039s index() execute time: 0.0059833526611328125s
所以從效率上考慮,優先採用iteritems
或index
來進行遍歷數據git