
1. Collections安全
1.1 Counter()app
from collections import Counter
li = ["Dog", "Cat", "Mouse", 42, "Dog", 42, "Cat", "Dog"]
a = Counter(li)
print a # Counter({'Dog': 3, 42: 2, 'Cat': 2, 'Mouse': 1})
from collections import Counter
li = ["Dog", "Cat", "Mouse", 42, "Dog", 42, "Cat", "Dog"]
a = Counter(li)
print a # Counter({'Dog': 3, 42: 2, 'Cat': 2, 'Mouse': 1})
print len(set(li)) # 4
from collections import Counter
li = ["Dog", "Cat", "Mouse","Dog","Cat", "Dog"]
a = Counter(li)
print a # Counter({'Dog': 3, 'Cat': 2, 'Mouse': 1})
print "{0} : {1}".format(a.values(),a.keys())
# [1, 3, 2] : ['Mouse', 'Dog', 'Cat']
print(a.most_common(3)) # [('Dog', 3), ('Cat', 2), ('Mouse', 1)]
import re
from collections import Counter
string = """
Lorem ipsum dolor sit amet, consectetur
adipiscing elit. Nunc ut elit id mi ultricies
adipiscing. Nulla facilisi. Praesent pulvinar,
sapien vel feugiat vestibulum, nulla dui pretium orci,
non ultricies elit lacus quis ante. Lorem ipsum dolor
sit amet, consectetur adipiscing elit. Aliquam
pretium ullamcorper urna quis iaculis. Etiam ac massa
sed turpis tempor luctus. Curabitur sed nibh eu elit
mollis congue. Praesent ipsum diam, consectetur vitae
ornare a, aliquam a nunc. In id magna pellentesque
tellus posuere adipiscing. Sed non mi metus, at lacinia
augue. Sed magna nisi, ornare in mollis in, mollis
sed nunc. Etiam at justo in leo congue mollis.
Nullam in neque eget metus hendrerit scelerisque
eu non enim. Ut malesuada lacus eu nulla bibendum
id euismod urna sodales.
words = re.findall(r'\w ', string) #This finds words in the document
lower_words = [word.lower() for word in words] #lower all the words
word_counts = Counter(lower_words) #counts the number each time a word appears
print word_counts
# Counter({'elit': 5, 'sed': 5, 'in': 5, 'adipiscing': 4, 'mollis': 4, 'eu': 3,
# 'id': 3, 'nunc': 3, 'consectetur': 3, 'non': 3, 'ipsum': 3, 'nulla': 3, 'pretium':
# 2, 'lacus': 2, 'ornare': 2, 'at': 2, 'praesent': 2, 'quis': 2, 'sit': 2, 'congue': 2, 'amet': 2,
# 'etiam': 2, 'urna': 2, 'a': 2, 'magna': 2, 'lorem': 2, 'aliquam': 2, 'ut': 2, 'ultricies': 2, 'mi': 2,
# 'dolor': 2, 'metus': 2, 'ac': 1, 'bibendum': 1, 'posuere': 1, 'enim': 1, 'ante': 1, 'sodales': 1, 'tellus': 1,
# 'vitae': 1, 'dui': 1, 'diam': 1, 'pellentesque': 1, 'massa': 1, 'vel': 1, 'nullam': 1, 'feugiat': 1, 'luctus': 1,
# 'pulvinar': 1, 'iaculis': 1, 'hendrerit': 1, 'orci': 1, 'turpis': 1, 'nibh': 1, 'scelerisque': 1, 'ullamcorper': 1,
# 'eget': 1, 'neque': 1, 'euismod': 1, 'curabitur': 1, 'leo': 1, 'sapien': 1, 'facilisi': 1, 'vestibulum': 1, 'nisi': 1,
# 'justo': 1, 'augue': 1, 'tempor': 1, 'lacinia': 1, 'malesuada': 1})
1.2 Deque
Deque是一種由隊列結構擴展而來的雙端隊列(double-ended queue),隊列元素可以在隊列兩端添加或刪除。所以它還被稱爲頭尾鏈接列表(head-tail linked list),儘管叫這個名字的還有另外一個特殊的數據結構實現。
Deque支持線程安全的,通過優化的append和pop操做,在隊列兩端的相關操做都可以達到近乎O(1)的時間複雜度。雖然list也支持相似的操做,可是它是對定長列表的操做表現很不錯,而當遇到pop(0)和insert(0, v)這樣既改變了列表的長度又改變其元素位置的操做時,其複雜度就變爲O(n)了。
import time
from collections import deque
num = 100000
def append(c):
for i in range(num):
def appendleft(c):
if isinstance(c, deque):
for i in range(num):
for i in range(num):
c.insert(0, i)
def pop(c):
for i in range(num):
def popleft(c):
if isinstance(c, deque):
for i in range(num):
for i in range(num):
for container in [deque, list]:
for operation in [append, appendleft, pop, popleft]:
c = container(range(num))
start = time.time()
elapsed = time.time() - start
print "Completed {0}/{1} in {2} seconds: {3} ops/sec".format(
container.__name__, operation.__name__, elapsed, num / elapsed)
# Completed deque/append in 0.0250000953674 seconds: 3999984.74127 ops/sec
# Completed deque/appendleft in 0.0199999809265 seconds: 5000004.76838 ops/sec
# Completed deque/pop in 0.0209999084473 seconds: 4761925.52225 ops/sec
# Completed deque/popleft in 0.0199999809265 seconds: 5000004.76838 ops/sec
# Completed list/append in 0.0220000743866 seconds: 4545439.17637 ops/sec
# Completed list/appendleft in 21.3209998608 seconds: 4690.21155917 ops/sec
# Completed list/pop in 0.0240001678467 seconds: 4166637.52682 ops/sec
# Completed list/popleft in 4.01799988747 seconds: 24888.0046791 ops/sec
from collections import deque
q = deque(range(5))
print q
print q.pop()
print q.popleft()
print q.rotate(3)
print q
print q.rotate(-1)
print q
# deque([6, 0, 1, 2, 3, 4, 5])
# 5
# 6
# None
# deque([2, 3, 4, 0, 1])
# None
# deque([3, 4, 0, 1, 2])
譯者注:rotate是隊列的旋轉操做,Right rotate(正參數)是將右端的元素移動到左端,而Left rotate(負參數)則相反。
1.3 Defaultdict
from collections import defaultdict
s = "the quick brown fox jumps over the lazy dog"
words = s.split()
location = defaultdict(list)
for m, n in enumerate(words):
print location
# defaultdict(, {'brown': [2], 'lazy': [7], 'over': [5], 'fox': [3],
# 'dog': [8], 'quick': [1], 'the': [0, 6], 'jumps': [4]})
from collections import defaultdict
s = "the quick brown fox jumps over the lazy dog"
words = s.split()
location = defaultdict(set)
for m, n in enumerate(words):
print location
# defaultdict(, {'brown': set([2]), 'lazy': set([7]),
# 'over': set([5]), 'fox': set([3]), 'dog': set([8]), 'quick': set([1]),
# 'the': set([0, 6]), 'jumps': set([4])})
s = "the quick brown fox jumps over the lazy dog"
d = {}
words = s.split()
for key, value in enumerate(words):
d.setdefault(key, []).append(value)
print d
# {0: ['the'], 1: ['quick'], 2: ['brown'], 3: ['fox'], 4: ['jumps'], 5: ['over'], 6: ['the'], 7: ['lazy'], 8: ['dog']}
class Example(dict):
def __getitem__(self, item):
return dict.__getitem__(self, item)
except KeyError:
value = self[item] = type(self)()
return value
a = Example()
a[1][2][3] = 4
a[1][3][3] = 5
a[1][2]['test'] = 6
print a # {1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}
2. Array
在使用array進行計算的時候,須要特別注意那些建立list的操做。例如,使用列表推導式(list comprehension)的時候,會將array整個轉換爲list,使得存儲空間膨脹。一個可行的替代方案是使用生成器表達式建立新的array。看代碼:
import array
a = array.array("i", [1,2,3,4,5])
b = array.array(a.typecode, (2*x for x in a))
import array
a = array.array("i", [1,2,3,4,5])
for i, x in enumerate(a):
a[i] = 2*x
import array
from timeit import Timer
def arraytest():
a = array.array("i", [1, 2, 3, 4, 5])
b = array.array(a.typecode, (2 * x for x in a))
def enumeratetest():
a = array.array("i", [1, 2, 3, 4, 5])
for i, x in enumerate(a):
a[i] = 2 * x
if __name__=='__main__':
m = Timer("arraytest()", "from __main__ import arraytest")
n = Timer("enumeratetest()", "from __main__ import enumeratetest")
print m.timeit() # 5.22479210582
print n.timeit() # 4.34367196717
堆是一種樹形的數據結構,樹上的子節點與父節點之間存在順序關係。二叉堆(binary heap)可以用一個通過組織的列表或數組結構來標識,在這種結構中,元素N的子節點的序號爲2*N 1和2*N 2(下標始於0)。簡單來講,這個模塊中的全部函數都假設序列是有序的,因此序列中的第一個元素(seq[0])是最小的,序列的其餘部分構成一個二叉樹,而且seq[i]節點的子節點分別爲seq[2*i 1]以及seq[2*i 2]。當對序列進行修改時,相關函數老是確保子節點大於等於父節點。
import heapq
heap = []
for value in [20, 10, 30, 50, 40]:
heapq.heappush(heap, value)
while heap:
print heapq.heappop(heap)
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]
import heapq
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print cheap
# [{'price': 16.35, 'name': 'YHOO', 'shares': 45},
# {'price': 21.09, 'name': 'FB', 'shares': 200}, {'price': 31.75, 'name': 'HPQ', 'shares': 35}]
print expensive
# [{'price': 543.22, 'name': 'AAPL', 'shares': 50}, {'price': 115.65, 'name': 'ACME',
# 'shares': 75}, {'price': 91.1, 'name': 'IBM', 'shares': 100}]
import heapq
class Item:
def __init__(self, name):
self.name = name
def __repr__(self):
return 'Item({!r})'.format(self.name)
class PriorityQueue:
def __init__(self):
self._queue = []
self._index = 0
def push(self, item, priority):
heapq.heappush(self._queue, (-priority, self._index, item))
self._index = 1
def pop(self):
return heapq.heappop(self._queue)[-1]
q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)
print q.pop() # Item('bar')
print q.pop() # Item('spam')
print q.pop() # Item('foo')
print q.pop() # Item('grok')
4. Bisect
a = [(0, 100), (150, 220), (500, 1000)]
若是我想添加一個range (250, 400),我可能會這麼作:
import bisect
a = [(0, 100), (150, 220), (500, 1000)]
bisect.insort_right(a, (250,400))
print a # [(0, 100), (150, 220), (250, 400), (500, 1000)]
import bisect
a = [(0, 100), (150, 220), (500, 1000)]
bisect.insort_right(a, (250,400))
bisect.insort_right(a, (399, 450))
print a # [(0, 100), (150, 220), (250, 400), (500, 1000)]
print bisect.bisect(a, (550, 1200)) # 5
bisect(sequence, item) => index 返回元素應該的插入點,但序列並不被修改。
import bisect
a = [(0, 100), (150, 220), (500, 1000)]
bisect.insort_right(a, (250,400))
bisect.insort_right(a, (399, 450))
print a # [(0, 100), (150, 220), (250, 400), (500, 1000)]
print bisect.bisect(a, (550, 1200)) # 5
bisect.insort_right(a, (550, 1200))
print a # [(0, 100), (150, 220), (250, 400), (399, 450), (500, 1000), (550, 1200)]
5. Weakref
weakref模塊可以幫助咱們建立Python引用,卻不會阻止對象的銷燬操做。這一節包含了weak reference的基本用法,而且引入一個代理類。
在開始以前,咱們須要明白什麼是strong reference。strong reference是一個對對象的引用次數、生命週期以及銷燬時機產生影響的指針。strong reference如你所見,就是當你將一個對象賦值給一個變量的時候產生的:
>>> a = [1,2,3]
>>> b = a
在這種狀況下,這個列表有兩個strong reference,分別是a和b。在這兩個引用都被釋放以前,這個list不會被銷燬。
class Foo(object):
def __init__(self):
self.obj = None
print 'created'
def __del__(self):
print 'destroyed'
def show(self):
print self.obj
def store(self, obj):
self.obj = obj
a = Foo() # created
b = a
del a
del b # destroyed
Weak reference則是對對象的引用計數器不會產生影響。當一個對象存在weak reference時,並不會影響對象的撤銷。這就說,若是一個對象僅剩下weak reference,那麼它將會被銷燬。
你可使用weakref.ref函數來建立對象的weak reference。這個函數調用須要將一個strong reference做爲第一個參數傳給函數,而且返回一個weak reference。
>>> import weakref
>>> a = Foo()
>>> b = weakref.ref(a)
>>> b
一個臨時的strong reference能夠從weak reference中建立,便是下例中的b():
>>> a == b()
>>> b().show()
請注意當咱們刪除strong reference的時候,對象將當即被銷燬。
>>> del a
若是試圖在對象被摧毀以後經過weak reference使用對象,則會返回None:
>>> b() is None
如果使用weakref.proxy,就能提供相對於weakref.ref更透明的可選操做。一樣是使用一個strong reference做爲第一個參數而且返回一個weak reference,proxy更像是一個strong reference,但當對象不存在時會拋出異常。