python csv file comparator

import os, csv, sys
import argparse

#compare.py -f1 file1.csv -f2 file2.csv -k key1,key2

parser = argparse.ArgumentParser(description='compare two files to check if they contain same records')
parser.add_argument('-f1', '--file1', required=True)
parser.add_argument('-f2', '--file2', required=True)
parser.add_argument('-k', '--key', required=True, help='used to get the alignment key')

args = parser.parse_args()
file1 = args.file1
file2 = args.file2
key = args.key 
keys = key.split(',')

file1Dict = dict()	
headerDict=""
matched = 0
mismatchedTrades = []
missedInFile1Trades = []
missedInFile2Trades = []
with open(file1, 'rb') as file1:			
	reader1 = csv.reader(file1, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
	header = reader1.next()	
	headerDict = dict(zip(header, range(len(header))))
	for row_in_file1 in reader1:	
		alignmentKey = "";
		for key in keys:	
			if len(alignmentKey) > 0 and len(str(row_in_file1[headerDict[key]]).upper()) > 0:
				alignmentKey = alignmentKey + "|" + str(row_in_file1[headerDict[key]]).upper()
			else:
				alignmentKey = alignmentKey + str(row_in_file1[headerDict[key]]).upper()
		file1Dict[alignmentKey] = row_in_file1

with open(file2, 'rb') as file2:			
	reader2 = csv.reader(file2, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)	
	header = reader2.next()	
	for row_in_file2 in reader2:	
		alignmentKey = "";
		for key in keys:	
			if len(alignmentKey) > 0 and len(str(row_in_file2[headerDict[key]]).upper()) > 0:			
				alignmentKey = alignmentKey + "|" + str(row_in_file2[headerDict[key]]).upper()
			else:
				alignmentKey = alignmentKey + str(row_in_file2[headerDict[key]]).upper()
				
		if alignmentKey in file1Dict:
			row_in_file1 = file1Dict[alignmentKey]
			del file1Dict[alignmentKey]
			if row_in_file2 == row_in_file1:
				print alignmentKey, "matched"	
				matched += 1
			else:				
				print row_in_file2
				print row_in_file1				
				mismatchedTrades.append(alignmentKey)
		else:
			print row_in_file2			
			missedInFile1Trades.append(alignmentKey)

missedInFile2Trades = list(file1Dict.keys())
print "matched = %s"%(matched)	
print "Mismatched trades[%d]:\n%s" % (len(mismatchedTrades), mismatchedTrades)
print "Missed in [%s] trades[%d]:\n%s" % (file1.name, len(missedInFile1Trades), missedInFile1Trades)
print "Missed in [%s] trades[%d]:\n%s" % (file2.name, len(missedInFile2Trades), missedInFile2Trades)
相關文章
相關標籤/搜索