11.切片python
slice()
app
record = '....................100.......513.25..........' cost = int(record[20:32]) * float(record[40:48]) 更pythonic方式是對切片命名 SHARES = slice(20,32) PRICE = slice(40,48) cost = int(record[SHARES]) * float(record[PRICE]) >>>a = slice(5, 50, 2) >>>a.start 5 >>>a.stop 50 >>>a.step 2 能夠用indices()將實際大小映射到a.stop >>>s='HelloWorld' >>> a.indices(len(s)) (5, 10, 2)
12找出序列中出現頻率最高的元素ui
collections.Counter()spa
>>> words = [ ... 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes', ... 'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the', ... 'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into', ... 'my', 'eyes', "you're", 'under' ... ] >>> from collections import Counter >>> word_counts = Counter(words) >>>word_counts.most_common(3) [('eyes', 8), ('the', 5), ('look', 4)] >>> word_counts Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, "you're": 1, "don't": 1, 'under': 1, 'not': 1}) 若是須要再添加另外一個序列的統計,能夠用update() >>> morewords = ['why','are','you','not','looking','in','my','eyes'] >>> word_counts.update(morewords) >>> _#終端中'_'表明前一個輸出,在此是word_counts Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "you're": 1, "don't": 1, 'in': 1, 'you': 1, 'looking': 1, 'are': 1, 'under': 1, 'why': 1}) 對Counter()能夠直接進行+-操做 >>> a = Counter(words) >>> b = Counter(morewords) >>> a Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, "you're": 1, "don't": 1, 'under': 1, 'not': 1}) >>> b Counter({'eyes': 1, 'looking': 1, 'are': 1, 'in': 1, 'not': 1, 'you': 1, 'my': 1, 'why': 1}) >>> a+b Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "you're": 1, "don't": 1, 'in': 1, 'why': 1, 'looking': 1, 'are': 1, 'under': 1, 'you': 1}) >>> a-b Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, "you're": 1, "don't": 1, 'under': 1})
13對含有公共key的dict排序code
operator.itemgetter()orm
>>> rows = [ ... {'fname':'Brian', 'lname': 'Jones', 'uid': 1003}, ... {'fname':'David', 'lname': 'Beazley', 'uid': 1002}, ... {'fname':'John', 'lname': 'Cleese', 'uid': 1001}, ... {'fname':'Big', 'lname': 'Jones', 'uid': 1004} ... ] >>> from operator import itemgetter >>> sorted(rows, key=itemgetter('uid')) [{'lname': 'Cleese', 'uid': 1001, 'fname': 'John'}, {'lname': 'Beazley', 'uid': 1002, 'fname': 'David'}, {'lname': 'Jones', 'uid': 1003, 'fname': 'Brian'}, {'lname': 'Jones', 'uid': 1004, 'fname': 'Big'}] #itemgetter也支持多個參數 >>> sorted(rows, key=itemgetter('lname','fname')) [{'lname': 'Beazley', 'uid': 1002, 'fname': 'David'}, {'lname': 'Cleese', 'uid': 1001, 'fname': 'John'}, {'lname': 'Jones', 'uid': 1004, 'fname': 'Big'}, {'lname': 'Jones', 'uid': 1003, 'fname': 'Brian'}] 固然咱們也能夠採用經常使用的方法實現 >>> sorted(rows, key=lambda r: r['fname']) [{'lname': 'Jones', 'uid': 1004, 'fname': 'Big'}, {'lname': 'Jones', 'uid': 1003, 'fname': 'Brian'}, {'lname': 'Beazley', 'uid': 1002, 'fname': 'David'}, {'lname': 'Cleese', 'uid': 1001, 'fname': 'John'}] >>> sorted(rows, key=lambda r: (r['lname'],r['fname'])) [{'lname': 'Beazley', 'uid': 1002, 'fname': 'David'}, {'lname': 'Cleese', 'uid': 1001, 'fname': 'John'}, {'lname': 'Jones', 'uid': 1004, 'fname': 'Big'}, {'lname': 'Jones', 'uid': 1003, 'fname': 'Brian'}]
可是itemgetter()方法一般運行會更快對象
14對象的排序排序
operator.attrgetterget
>>> class User(object): ... def __init__(self,user_id): ... self.user_id=user_id ... def __repr__(self): ... return 'User({})'.format(self.user_id) ... >>> users = [User(23), User(3), User(99)] >>> users [User(23), User(3), User(99)] 經常使用方式 >>> sorted(users, key=lambda u: u.user_id) [User(3), User(23), User(99)] 還有另外一種選擇 >>> from operator import attrgetter >>> sorted(users, key=attrgetter('user_id')) [User(3), User(23), User(99)]
和itemgetter()類似,attrgetter()方法一般運行會更快it
15基於某個域將記錄分組
itertools.groupby()
rows = [ {'address':'5412 CLARK', 'date': '07/01/2012'}, {'address':'5148 CLARK', 'date': '07/04/2012'}, {'address':'5800 58TH', 'date': '07/02/2012'}, {'address':'2122 CLARK', 'date': '07/03/2012'}, {'address':'5645 RAVENSWOOD', 'date': '07/02/2012'}, {'address':'1060 ADDISON', 'date': '07/02/2012'}, {'address':'4801 BROADWAY', 'date': '07/01/2012'}, {'address':'1039 GRANVILLE', 'date': '07/04/2012'}, ] >>>rows.sort(key=itemgetter('date')) >>> for date, items in groupby(rows, key=itemgetter('date')): ... print date ... for i in items: ... print i ... 07/01/2012 {'date': '07/01/2012', 'address': '5412 CLARK'} {'date': '07/01/2012', 'address': '4801 BROADWAY'} 07/02/2012 {'date': '07/02/2012', 'address': '5800 58TH'} {'date': '07/02/2012', 'address': '5645 RAVENSWOOD'} {'date': '07/02/2012', 'address': '1060 ADDISON'} 07/03/2012 {'date': '07/03/2012', 'address': '2122 CLARK'} 07/04/2012 {'date': '07/04/2012', 'address': '5148 CLARK'} {'date': '07/04/2012', 'address': '1039 GRANVILLE'}
因爲groupby()只檢測連續的序列,因此首先應該排序
也能夠採用1.6節的方法defaultdict()
from collections import defaultdict rows_by_date = defaultdict(list) for row in rows: rows_by_date[row['date']].append(row)