pandas

一、讀寫csv文件python

import csv
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]
with open(input_file, 'r') as csv_in_file:
    with open(output_file, 'w') as csv_out_file:
         filereader = csv.reader(csv_in_file, delimiter=',')
         filewriter = csv.writer(csv_out_file, delimiter=',')
         for row_list in filereader:
             print(row_list)
             filewriter.writerow(row_list)

二、篩選特定行正則表達式

2.1 行中的值知足某個條件app

基礎python版:函數

import csv
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]

with open(input_file, 'rU') as csv_in_file:
    with open(output_file, 'w') as csv_out_file:
        filereader = csv.reader(csv_in_file)
        filewriter = csv.writer(csv_out_file)
        header = next(filereader)

        #print header

        filewriter.writerow(header)
        for row_list in filereader:
            print row_list
            supplier = str(row_list[0]).strip()
            cost = str(row_list[3]).strip("$").replace(',', '')
            if supplier == 'Supplier X' or float(cost)>600.0:
                filewriter.writerow(row_list)

 pandas版(主要用到了loc()函數)spa

import pandas as pd 
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]

data_frame = pd.read_csv(input_file)
data_frame['Cost'] = data_frame['Cost'].str.strip('$').astype(float)
data_frame_value_meets_condition = data_frame.loc[(data_frame['Supplier Name'].str.contains('X')) & (data_frame['Cost']>600.0), :]
data_frame_value_meets_condition.to_csv(output_file, index=False)

 2.2 行中的值屬於某個集合code

基礎python版:blog

pandas版:索引

2.3 行中的值匹配於某個正則表達式/模式ip

基礎python版:input

import csv
import sys
import re

input_file = sys.argv[1]
output_file = sys.argv[2]

pattern = re.compile(r'(?P<my_pattern_group>^001-.*)', re.I)

with open(input_file, 'rU') as csv_in_file:
    with open(output_file, 'w') as csv_out_file:
        filereader = csv.reader(csv_in_file)
        filewriter = csv.writer(csv_out_file)
        header = next(filereader)
        filewriter.writerow(header)

        for row_list in filereader:
            invoice_number = row_list[1]
            if pattern.search(invoice_number):
                filewriter.writerow(row_list)

pandas版:

import pandas as pd 
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]

data_frame = pd.read_csv(input_file)
data_frame_value_matches_pattern = data_frame.loc[data_frame['Invoice Number'].str.startswith("001-"), :]

data_frame_value_matches_pattern.to_csv(output_file, index=False)

 三、篩選特定的列

3.1 列索引值

基礎python版

import csv
import sys

input_file = sys.argv[1]
output_file = sys.argv[2]

my_columns = [0,3]

with open(input_file, 'rU') as csv_in_file:
    with open(output_file, 'w') as csv_out_file:
        filereader = csv.reader(csv_in_file)
        filewriter = csv.writer(csv_out_file)

        for row_list in filereader:
            row_list_ouput = []
            for index_value in my_columns:
                row_list_ouput.append(row_list[index_value])
            filewriter.writerow(row_list_ouput)

pandas版

import sys
import pandas as pd 

input_file = sys.argv[1]
output_file = sys.argv[2]

data_frame = pd.read_csv(input_file)
data_frame_column_by_index = data_frame.iloc[:,[0,3]]
data_frame_column_by_index.to_csv(output_file, index=False)

 3.2 列標題

基礎python版

pandas版

四、選定特定的行

相關文章
相關標籤/搜索
本站公眾號
   歡迎關注本站公眾號,獲取更多信息