我们知道,Python中有很多模块,有系统自带的模块,还有些需要从网上下载模块,Python之所以越来越受欢迎,主要是由于Python的类库很多,丰富多样,有很多人开发和维护。下面我们来学习了解collections模块,这也是我第一个系统了解的模块,希望多练习,多看掌握模块的知识。
一、Counter(dict)类
class Counter(dict): '''Dict subclass for counting hashable items. Sometimes called a bag or multiset. Elements are stored as dictionary keys and their counts are stored as dictionary values. >>> c = Counter('abcdeabcdabcaba') # count elements from a string >>> c.most_common(3) # three most common elements [('a', 5), ('b', 4), ('c', 3)] >>> sorted(c) # list all unique elements ['a', 'b', 'c', 'd', 'e'] >>> ''.join(sorted(c.elements())) # list elements with repetitions 'aaaaabbbbcccdde' >>> sum(c.values()) # total of all counts 15 >>> c['a'] # count of letter 'a' 5 >>> for elem in 'shazam': # update counts from an iterable ... c[elem] += 1 # by adding 1 to each element's count >>> c['a'] # now there are seven 'a' 7 >>> del c['b'] # remove all 'b' >>> c['b'] # now there are zero 'b' 0 >>> d = Counter('simsalabim') # make another counter >>> c.update(d) # add in the second counter >>> c['a'] # now there are nine 'a' 9 >>> c.clear() # empty the counter >>> c Counter() Note: If a count is set to zero or reduced to zero, it will remain in the counter until the entry is deleted or the counter is cleared: >>> c = Counter('aaabbc') >>> c['b'] -= 2 # reduce the count of 'b' by two >>> c.most_common() # 'b' is still in, but its count is zero [('a', 3), ('c', 1), ('b', 0)] ''' # References: # http://en.wikipedia.org/wiki/Multiset # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm # http://code.activestate.com/recipes/259174/ # Knuth, TAOCP Vol. II section 4.6.3 从上面可以看出,模块中包含Counter(dict)类,而且Counter(dict)继承父类dict类,因此Counter拥有父类的功能,下面我们来看一下Counter 类中都包含了那些方法: >>> c = Counter('abcdeabcdabcaba') # count elements from a string 首先,我们定义一个计数器c,用于存储Counter()中的结果,看结果是什么: >>> c Counter({'a': 5, 'b': 4, 'c': 3, 'd': 2, 'e': 1}) 从结果可以看出,生成了一个Counter({})类似于字典的形式,这是对字典的加工,具有字典的性质,我们可以使用c.keys()、c.values()来遍历 这个Counter中的键和值。
>>> c.most_common(3) # three most common elements [('a', 5), ('b', 4), ('c', 3)] >>> type(c.most_common(3))取列表中前三个元素最多的键值对,生成一个列表,返回前三个元素最多的列表。 >>> sorted(c) # list all unique elements ['a', 'b', 'c', 'd', 'e'] 对对象c中的元素进行排序,并且生成一个列表,排序,对对象中的fauns排序 >>> ''.join(sorted(c.elements())) # list elements with repetitions 'aaaaabbbbcccdde' >>> sorted(c.elements()) ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'e'] 对对象c中的所有元素进行排序,使用字符串的join(sorted(c.elements()))功能。 >>> sum(c.values()) # total of all counts 15 遍历Counter中的值,并且求和。 >>> c['a'] # count of letter 'a' 统计letter中"a"的个数 5 >>> for elem in 'shazam': # update counts from an iterable ... c[elem] += 1 # by adding 1 to each element's count >>> c['a'] # now there are seven 'a' 7 >>> print(c) Counter({'a': 7, 'b': 4, 'c': 3, 'd': 2, 'z': 1, 'h': 1, 'e': 1, 's': 1, 'm': 1}) 遍历"shazam"中的每个元素,向c中Counter()添加元素,如果没有这个元素,就增加,如有有,就个数加一,直到元素循环完毕。
>>> del c['b'] # remove all 'b' >>> c['b'] # now there are zero 'b' 0 >>> c Counter({'a': 7, 'c': 3, 'd': 2, 'z': 1, 'h': 1, 'e': 1, 's': 1, 'm': 1}) 删除Counter()中的元素b,删除的是元素的值,Counter()是一个计数器的功能,统计对象中的元素用的,只需要统计对象中的元素,因此即便 Counter()中没有这个值,也不会报错,只会返回0,代表没有,这就是计数器的功能,只是用来计数,包含字典的功能。但是本质上还是用来进行 计数的,能够使用数学的算法。 >>> c = Counter('aaabbc') >>> c['b'] -= 2 # reduce the count of 'b' by two >>> c.most_common() # 'b' is still in, but its count is zero [('a', 3), ('c', 1), ('b', 0)] >>> c = Counter("aaabbc") >>> c Counter({'a': 3, 'b': 2, 'c': 1}) >>> c["b"] -= 2 >>> c.most_common() [('a', 3), ('c', 1), ('b', 0)] >>> c = Counter("aaabbc") >>> c["b"] -= 3 >>> c Counter({'a': 3, 'c': 1, 'b': -1}) >>> c.most_common(3) [('a', 3), ('c', 1), ('b', -1)] Counter()计数器功能,统计对象中的个数,生成一个Counter()字典。 1.most_common(self,n=None) def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abcdeabcdabcaba').most_common(3) [('a', 5), ('b', 4), ('c', 3)] ''' # Emulate Bag.sortedByCount from Smalltalk if n is None: return sorted(self.items(), key=_itemgetter(1), reverse=True) return _heapq.nlargest(n, self.items(), key=_itemgetter(1)) 返回Counter中数量最多的前三个元素,并将结果封装到一个列表中,列表中的每个元素都是一个元组。 2.elements(self) def elements(self): '''Iterator over elements repeating each as many times as its count. >>> c = Counter('ABCABC') >>> sorted(c.elements()) ['A', 'A', 'B', 'B', 'C', 'C'] # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) >>> product = 1 >>> for factor in prime_factors.elements(): # loop over factors ... product *= factor # and multiply them >>> product 1836 Note, if an element's count has been set to zero or is a negative number, elements() will ignore it. ''' # Emulate Bag.do from Smalltalk and Multiset.begin from C++. return _chain.from_iterable(_starmap(_repeat, self.items())) elements(self)是返回对象中的每个元素,下面来验证一下: >>> c = Counter("abacdefa") >>> for item in c.elements(): ... print(item) ... b f d e a a a c >>> c.elements()>>> list(c.elements()) ['b', 'f', 'd', 'e', 'a', 'a', 'a', 'c'] >>> sorted(c.elements()) ['a', 'a', 'a', 'b', 'c', 'd', 'e', 'f'] 我们定义一个Counter计数器,并且使用elements()遍历其中的每个元素,并且打印其中的消息,最后使用list()生成一个列表。使用sorted() 函数进行排序。 Conter()类具有dict(字典)中的方法,实例如下: >>> c = Counter({'a': 3, 'b': 1, 'f': 1, 'd': 1, 'e': 1, 'c': 1}) >>> for k,v in c.items(): ... print(k,v) ... b 1 f 1 d 1 e 1 a 3 c 1 3.update(*args,**kwargs) def update(*args, **kwds): '''Like dict.update() but add counts instead of replacing them. Source can be an iterable, a dictionary, or another Counter instance. >>> c = Counter('which') >>> c.update('witch') # add elements from another iterable >>> d = Counter('watch') >>> c.update(d) # add elements from another counter >>> c['h'] # four 'h' in which, witch, and watch 4 ''' # The regular dict.update() operation makes no sense here because the # replace behavior results in the some of original untouched counts # being mixed-in with all of the other counts for a mismash that # doesn't have a straight-forward interpretation in most counting # contexts. Instead, we implement straight-addition. Both the inputs # and outputs are allowed to contain zero and negative counts. update(*args,**kwargs)是向Counter中添加元素,如果里面包含这个元素就加一,包含两个就加二,没有就新加入,就是更新计数器,统计里面 包含的元素的个数。实例如下: >>> c = Counter("which") >>> c.update("witch") >>> c Counter({'h': 3, 'w': 2, 'c': 2, 'i': 2, 't': 1}) >>> d = Counter("watch") >>> c.update(d) >>> c Counter({'h': 4, 'w': 3, 'c': 3, 't': 2, 'i': 2, 'a': 1}) >>> c["d"] 0 >>> c["h"] 4 可以向添加任意对象,["ww"]会被当成一个元素传递给计数器。 4.def subtract(*args,**kwargs) def subtract(*args, **kwds): '''Like dict.update() but subtracts counts instead of replacing them. Counts can be reduced below zero. Both the inputs and outputs are allowed to contain zero and negative counts. Source can be an iterable, a dictionary, or another Counter instance. >>> c = Counter('which') >>> c.subtract('witch') # subtract elements from another iterable >>> c.subtract(Counter('watch')) # subtract elements from another counter >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch 0 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch -1 ''' subtract(*args,**kwargs)与update(*args,**kwargs)正好相反,一个是加,一个是减,减掉里面元素的个数,计数器。 >>> c = Counter("which") >>> c.subtract("witch") >>> c Counter({'h': 1, 'w': 0, 'c': 0, 'i': 0, 't': -1}) >>> c.subtract(Counter("watch")) >>> c Counter({'h': 0, 'i': 0, 'w': -1, 'a': -1, 'c': -1, 't': -2}) >>> c["w"] -1 >>> c["h"] 0 5.copy(self) def copy(self): 'Return a shallow copy.' return self.__class__(self) 复制Counter计数器。 >>> c = Counter("which") >>> d = c >>> d Counter({'h': 2, 'w': 1, 'c': 1, 'i': 1}) >>> c Counter({'h': 2, 'w': 1, 'c': 1, 'i': 1}) 6.__reduce__(self) def __reduce__(self): return self.__class__, (dict(self),) 7.__delitem__(self,elem) def __delitem__(self, elem): 'Like dict.__delitem__() but does not raise KeyError for missing values.' if elem in self: super().__delitem__(elem) __delitem__(self,elem)删除Counter类中的指定的元素,示例如下: >>> c = Counter("abcadcedfad") >>> c.__delitem__("a") >>> c Counter({'d': 3, 'c': 2, 'b': 1, 'f': 1, 'e': 1}) 8.__repr__(self) def __repr__(self): if not self: return '%s()' % self.__class__.__name__ try: items = ', '.join(map('%r: %r'.__mod__, self.most_common())) return '%s({%s})' % (self.__class__.__name__, items) except TypeError: # handle case where values are not orderable return '{0}({1!r})'.format(self.__class__.__name__, dict(self)) >>> c = Counter("abdcadsdfs") >>> c Counter({'d': 3, 's': 2, 'a': 2, 'b': 1, 'f': 1, 'c': 1}) >>> c.__repr__() "Counter({'d': 3, 's': 2, 'a': 2, 'b': 1, 'f': 1, 'c': 1})" >>> type(c.__repr__()) __repr__(self)不需要参数,但是返回的类型是一个字符串类型,生成一个类似于Counter()字典的字符串。 9.__iand__(self,other): def __iand__(self, other): '''Inplace intersection is the minimum of corresponding counts. >>> c = Counter('abbb') >>> c &= Counter('bcc') >>> c Counter({'b': 1}) ''' for elem, count in self.items(): other_count = other[elem] if other_count < count: self[elem] = other_count return self._keep_positive() 10.__ior__(self,other) def __ior__(self, other): '''Inplace union is the maximum of value from either counter. >>> c = Counter('abbb') >>> c |= Counter('bcc') >>> c Counter({'b': 3, 'c': 2, 'a': 1}) ''' for elem, other_count in other.items(): count = self[elem] if other_count > count: self[elem] = other_count return self._keep_positive() 11.__isub__(self,other) def __isub__(self, other): '''Inplace subtract counter, but keep only results with positive counts. >>> c = Counter('abbbc') >>> c -= Counter('bccd') >>> c Counter({'b': 2, 'a': 1}) ''' for elem, count in other.items(): self[elem] -= count return self._keep_positive() __isub__(other)等价于self.subtract(other),类似于c -= Counter(other) 12.__iadd__(self,other) def __iadd__(self, other): '''Inplace add from another counter, keeping only positive counts. >>> c = Counter('abbb') >>> c += Counter('bcc') >>> c Counter({'b': 4, 'c': 2, 'a': 1}) ''' for elem, count in other.items(): self[elem] += count return self._keep_positive() 实例如下: >>> c = Counter("abbb") >>> c += Counter("bcc") >>> c Counter({'b': 4, 'c': 2, 'a': 1}) >>> c = Counter("abbb") >>> c.update("bcc") >>> c Counter({'b': 4, 'c': 2, 'a': 1}) >>> c = Counter("abbb") >>> c.__iadd__(Counter("bcc")) Counter({'b': 4, 'c': 2, 'a': 1}) 两个同类表的对象进行相加,属性要相同。 13._keep_positive(slef) def _keep_positive(self): '''Internal method to strip elements with a negative or zero count''' nonpositive = [elem for elem, count in self.items() if not count > 0] for elem in nonpositive: del self[elem] return self _keep_positive(self)清除计数器中元素个数小于零的元素。nonpositive = [elem for elem,count in self.items() if not count >0] >>> c = Counter({'b': 3, 'a': 1}) >>> c._keep_positive() Counter({'b': 3, 'a': 1}) >>> c.subtract("aa") >>> c = Counter({'b': 3, 'a': -1}) >>> c._keep_positive() Counter({'b': 3}) 14.__neg__(self) def __neg__(self): '''Subtracts from an empty counter. Strips positive and zero counts, and flips the sign on negative counts. ''' result = Counter() for elem, count in self.items(): if count < 0: result[elem] = 0 - count return result >>> c = Counter("abcada") >>> c.subtract("aaaaddd") >>> c Counter({'b': 1, 'c': 1, 'a': -1, 'd': -2}) >>> c.__neg__() Counter({'d': 2, 'a': 1}) 15.__pos__(self) def __pos__(self): 'Adds an empty counter, effectively stripping negative and zero counts' result = Counter() for elem, count in self.items(): if count > 0: result[elem] = count return result __pos__(self)是计数器中元素个数大于1的元素,元素个数大于1处于positive状态,如下所示: >>> c = Counter("abcada") >>> c.subtract("aaaaddd") >>> c Counter({'b': 1, 'c': 1, 'a': -1, 'd': -2}) >>> c.__pos__() Counter({'b': 1, 'c': 1}) 16.__add__(self,other) def __add__(self, other): '''Add counts from two counters. >>> Counter('abbb') + Counter('bcc') Counter({'b': 4, 'c': 2, 'a': 1}) ''' if not isinstance(other, Counter): return NotImplemented result = Counter() for elem, count in self.items(): newcount = count + other[elem] if newcount > 0: result[elem] = newcount for elem, count in other.items(): if elem not in self and count > 0: result[elem] = count return result 17.__sub__(self,other) def __sub__(self, other): ''' Subtract count, but keep only results with positive counts. >>> Counter('abbbc') - Counter('bccd') Counter({'b': 2, 'a': 1}) ''' if not isinstance(other, Counter): return NotImplemented result = Counter() for elem, count in self.items(): newcount = count - other[elem] if newcount > 0: result[elem] = newcount for elem, count in other.items(): if elem not in self and count < 0: result[elem] = 0 - count return result __sub__(slef,other)两个Counter类相减,Counter(self)-Counter(other). 18.__or__(self,other) def __or__(self, other): '''Union is the maximum of value in either of the input counters. >>> Counter('abbb') | Counter('bcc') Counter({'b': 3, 'c': 2, 'a': 1}) ''' if not isinstance(other, Counter): return NotImplemented result = Counter() for elem, count in self.items(): other_count = other[elem] newcount = other_count if count < other_count else count if newcount > 0: result[elem] = newcount for elem, count in other.items(): if elem not in self and count > 0: result[elem] = count return result __or__(self,other)异或等价于Counter(self) | self(other). 19.__and__(self,other) def __and__(self, other): ''' Intersection is the minimum of corresponding counts. 异与,必须两者同时满足才为真,二进制下 >>> Counter('abbb') & Counter('bcc') Counter({'b': 1}) ''' if not isinstance(other, Counter): return NotImplemented result = Counter() for elem, count in self.items(): other_count = other[elem] newcount = count if count < other_count else other_count if newcount > 0: result[elem] = newcount return result __and__(self,other)等价于Counter(self) & Counter(other).
二、OrderedDict(dict)有序字典
class OrderedDict(dict): 'Dictionary that remembers insertion order' # An inherited dict maps keys to values. # The inherited dict provides __getitem__, __len__, __contains__, and get. # The remaining methods are order-aware. # Big-O running times for all methods are the same as regular dictionaries. # The internal self.__map dict maps keys to links in a doubly linked list. # The circular doubly linked list starts and ends with a sentinel element. # The sentinel element never gets deleted (this simplifies the algorithm). # The sentinel is in self.__hardroot with a weakref proxy in self.__root. # The prev links are weakref proxies (to prevent circular references). # Individual links are kept alive by the hard reference in self.__map. # Those hard references disappear when a key is deleted from an OrderedDict.
我们知道正常我们使用的字典是无序的,而collections模块中的OrderedDict(dict)类能够实现有序字典的功能,当然也是继承了字典的父类, 具有字典的功能。上面介绍中,有序字典是给字典加上了顺序(order),下面来看看如何实现有序字典的功能: 字典是无序的,但是我们知道列表是有序的,OrderedDict(dict)就是整合了字典与列表的功能。 字典 {"k1":"v1","k2":"v2","k10":"v10"} 列表 [ k1, k2, k10 ] 有序字典内部实现的代码: dic = {"k1":"v1","k2":"v2","k10":"v10"} lis = ["k1","k2","k10"] for k in lis: print(dic[k]) 这样我们通过遍历列表中的值,同时列表中的值属于字典中的键,由于列表是有序的,因此打印字典的时候也是按着列表中元素的顺序进行打印 的。 创建有序字典的两种方法: (1)、import collections dic = collections.OrderedDict() (2)、from collections import OrderedDict dic = OrderedDict() 下面我们来看一个实例,有序字典和字典: from collections import OrderedDict dic = OrderedDict() dic["k1"] = "v1" dic["k2"] = "v2" dic["k3"] = "v3" print(dic) 结果如下: OrderedDict([('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3')]) from collections import OrderedDict dic = dict() #dic = OrderedDict() dic["k1"] = "v1" dic["k2"] = "v2" dic["k3"] = "v3" print(dic) 运行结果如下: {'k2': 'v2', 'k1': 'v1', 'k3': 'v3'} 可以看出,OrderedDict(dict)类生成的字典是有序的。所以我们在想让字典有序的时候记得调用collections模块中的OrderedDict()类功能。 1.__init__(*args,**kwargs) def __init__(*args, **kwds): '''Initialize an ordered dictionary. The signature is the same as regular dictionaries, but keyword arguments are not recommended because their insertion order is arbitrary. ''' if not args: raise TypeError("descriptor '__init__' of 'OrderedDict' object " "needs an argument") self, *args = args if len(args) > 1: raise TypeError('expected at most 1 arguments, got %d' % len(args)) try: self.__root except AttributeError: self.__hardroot = _Link() self.__root = root = _proxy(self.__hardroot) root.prev = root.next = root self.__map = {} self.__update(*args, **kwds) 2.__setitem__(self,key,value) def __setitem__(self, key, value, dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link): 'od.__setitem__(i, y) <==> od[i]=y' # Setting a new item creates a new link at the end of the linked list, # and the inherited dictionary is updated with the new key/value pair. if key not in self: self.__map[key] = link = Link() root = self.__root last = root.prev link.prev, link.next, link.key = last, root, key last.next = link root.prev = proxy(link) dict_setitem(self, key, value) __setitem__(self,key,value)向有序字典中添加元素,要指明键和值,实例如下: >>> import collections >>> dic = collections.OrderedDict() >>> dic.__setitem__("k1","v1") >>> dic["k2"] = "v2" >>> dic OrderedDict([('k1', 'v1'), ('k2', 'v2')]) 3.__delitem__(self,key,dict_delitem=dict.__detitem__) def __delitem__(self, key, dict_delitem=dict.__delitem__): 'od.__delitem__(y) <==> del od[y]' # Deleting an existing item uses self.__map to find the link which gets # removed by updating the links in the predecessor and successor nodes. dict_delitem(self, key) link = self.__map.pop(key) link_prev = link.prev link_next = link.next link_prev.next = link_next link_next.prev = link_prev link.prev = None link.next = None 4.pop(self,key,default=_marker) def pop(self, key, default=__marker): '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. If key is not found, d is returned if given, otherwise KeyError is raised. ''' if key in self: result = self[key] del self[key] return result if default is self.__marker: raise KeyError(key) return default pop(self,key,default=_marker)是指定删除有序字典中的值,popitem(self)是默认删除最后一个元素的值,实例如下: >>> dic = OrderedDict([('k1', 'v1'), ('k2', 'v2'), ('k3', 'v3'), ('k4', 'v4')]) >>> dic.pop("k2") 'v2' >>> dic OrderedDict([('k1', 'v1'), ('k3', 'v3'), ('k4', 'v4')]) 5.__repr__(self) def __repr__(self): 'od.__repr__() <==> repr(od)' if not self: return '%s()' % (self.__class__.__name__,) return '%s(%r)' % (self.__class__.__name__, list(self.items())) 6.__reduce__(self) def __reduce__(self): 'Return state information for pickling' inst_dict = vars(self).copy() for k in vars(OrderedDict()): inst_dict.pop(k, None) return self.__class__, (), inst_dict or None, None, iter(self.items()) 7.copy(self) def copy(self): 'od.copy() -> a shallow copy of od' return self.__class__(self) 8.fromkeys(cls,iterable,value=None) def fromkeys(cls, iterable, value=None): '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. If not specified, the value defaults to None. ''' self = cls() for key in iterable: self[key] = value return self 9.__eq__(self,other) def __eq__(self, other): '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive while comparison to a regular mapping is order-insensitive. ''' if isinstance(other, OrderedDict): return dict.__eq__(self, other) and all(map(_eq, self, other)) return dict.__eq__(self, other) 三、defaultdict(默认字典) class defaultdict(dict): """ defaultdict(default_factory[, ...]) --> dict with default factory The default factory is called without arguments to produce a new value when a key is not present, in __getitem__ only. A defaultdict compares equal to a dict with the same items. All remaining arguments are treated the same as if they were passed to the dict constructor, including keyword arguments. """ class defaultdict(dict)可以看出,默认字典defaultdict也是继承字典中的方法,下面来看看默认字典中提供的方法: 1.copy(self) def copy(self): # real signature unknown; restored from __doc__ """ D.copy() -> a shallow copy of D. """ pass copy(self)复制默认字典。 2.__copy__(self,*args,**kwargs) def __copy__(self, *args, **kwargs): # real signature unknown """ D.copy() -> a shallow copy of D. """ pass 3.__getattrubute__(self,*args,**kwargs) def __getattribute__(self, *args, **kwargs): # real signature unknown """ Return getattr(self, name). """ pass 4.__missing__(self,key) def __missing__(self, key): # real signature unknown; restored from __doc__ """ __missing__(key) # Called by __getitem__ for missing key; pseudo-code: if self.default_factory is None: raise KeyError((key,)) self[key] = value = self.default_factory() return value """ pass 5.__reduce__(self,*args,**kwargs) def __reduce__(self, *args, **kwargs): # real signature unknown """ Return state information for pickling. """ pass 6.__repr__(self,*args,**kwargs) def __repr__(self, *args, **kwargs): # real signature unknown """ Return repr(self). """ pass 下面来看一个实例,假如我们要把一个列表[11,22,33,44,66,55,77,88,99,90]中大于60的元素放在键值为"k1",小于60的元素放在键值为"k2"的字 典中,我们怎样来实现,一种是正常实现,一种是使用默认字典来实现,实例如下: 方法一:默认字典(defaultdict)
from collections import defaultdict values = [11,22,33,44,55,77,66,88,99,90] dic = defaultdict(list) for value in values: if value >= 60: dic["k1"].append(value) else: dic["k2"].append(value) print(dic) 运行结果如下: defaultdict(, {'k1': [77, 66, 88, 99, 90], 'k2': [11, 22, 33, 44, 55]}) 方法二: values = [11,22,33,44,55,88,66,77,99,90] dic = {} for value in values: if value >= 60: if "k1" in dic.keys(): dic["k1"].append(value) else: dic["k1"] = [value,] else: if "k2" in dic.keys(): dic["k2"].append(value) else: dic["k2"] = [value,] print(dic) 这个方法是我们首先定义一个空的字典,然后判断这个字典的键值中是否包含键"k1",如果不包含,我们就自己定义一个键-值对,创建键"k1"和 值列表,这样我们就不用额外定义空的列表,只需要加个判断即可。 四、可命名元组(nameedtuple) 根据namedtuple可以创建一个包含tuple所有功能以及其他功能的类型。
import
collections
Mytuple
=
collections.namedtuple(
'Mytuple'
,[
'x'
,
'y'
,
'z'
])
namedtuple()没有创建类,使用namedtuple()来创建方法,主要用在坐标上面,坐标轴,x y z
下面我们来看一个实例:
import collections #导入模块collections
MytupleClass = collections.namedtuple("MytupleClass",["x","y","z"])
obj = MytupleClass(11,22,33)
obj.x
11
obj.y
22
obj.z
33
五、deque(双向队列或双端队列)
其实发现collections就是对基础模块的一个补充,前面我们学习过元组,列表,字典等,在collections模块中,OrderedDict、defaultdict是对字典的操作,而namedtuple()是对列表功能的补充,我们这里学习的deque是对列表的操作。
下面我们来看看deque()中都有那些方法,以及如何对列表进行补充操作。
import collections
l1 = collections.deque() class deque(object): """ deque([iterable[, maxlen]]) --> deque object A list-like sequence optimized for data accesses near its endpoints. """
1.append(self,*args,**kwargs)
def append(self, *args, **kwargs): # real signature unknown """ Add an element to the right side of the deque. """ pass
append(self,*args,**kwargs)是向deque双向队列右侧添加元素(add an element to the right side of the deque),下面来看一个列子:
>>> from collections import deque
#从模块collections中导入deque双向队列,双向队列是对队列方法的补充 >>> l1 = deque() >>> l1.append("tom") >>> l1.appendleft("aoi") >>> l1.append("gg") >>> l1 deque(['aoi', 'tom', 'gg']) 从上面代码可以看出,append(object)是向列表末尾添加元素,我们都知道,append()是向列表末尾追加元素。
2.appendleft(self,*args,**kwargs)
def appendleft(self, *args, **kwargs): # real signature unknown """ Add an element to the left side of the deque. """ pass
我们知道,deque(双向队列或双端队列)是双向队列,可以从两端分别存入和拿取元素,不像单向队列一样,只能一边进出,双向队列是从两端都可以进出,我们知道append(self,value)是向末尾(右侧添加元素),appendleft(self,object)是向列表的左端添加元素,实例如下:
>>> l1 = deque(['aoi', 'tom', 'gg']) >>> l1.appendleft("divad") >>> l1.appendleft("alex") >>> l1.appendleft("sb") >>> l1 deque(['sb', 'alex', 'divad', 'aoi', 'tom', 'gg']) 3.clear(self,*args,**kwargs)
def clear(self, *args, **kwargs): # real signature unknown """ Remove all elements from the deque. """ pass
clear(self)是清除双向队列deque中的元素,实例如下:
>>> l2 = deque(['sb', 'alex', 'divad', 'aoi', 'tom', 'gg']) >>> l2.clear() >>> l2 deque([])
我们可以看出,使用clear(self)清除了双向队列deque中的元素。 4.copy(self,*args,**kwargs) def copy(self, *args, **kwargs): # real signature unknown """ Return a shallow copy of a deque. """ pass
5.count(self,value)
def count(self, value): # real signature unknown; restored from __doc__ """ D.count(value) -> integer -- return number of occurrences of value """
"""统计队列中某个元素的个数""" return 0
count(self,value)是统计队列中某个元素的个数,实例如下:
>>> l1 = deque(['sb', 'alex', 'divad', 'aoi', 'tom', 'gg', 'sb']) >>> l1.count("sb") 2 >>> l1.count("divad") 1 >>> l1.count("geng") 0 count(self,value)是统计双向队列deque中值value的个数。
6.extend(slef,*args,**kwargs)
def extend(self, *args, **kwargs): # real signature unknown """ Extend the right side of the deque with elements from the iterable """ pass
>>> l1 = deque(['sb', 'alex', 'divad', 'aoi', 'mazzy', 'cang']) >>> l1.extendleft(l2) >>> l1 deque(['is', 'sb', 'sb', 'alex', 'divad', 'aoi', 'mazzy', 'cang']) 我们可以看出,extend(self,*args,**kwargs)是向列表的右侧添加列表元素,
7.extendleft(self,*args,**kwargs)
def extendleft(self, *args, **kwargs): # real signature unknown """ Extend the left side of the deque with elements from the iterable """ pass
extend(self,*args,**kwargs)是向列表的左侧扩展对象。扩展就相当与多个元素的添加,可以以一个列表或者元组的形式添加元素,扩展性添加元素。
8.index(self,value,start=None,stop=None)
def index(self, value, start=None, stop=None): # real signature unknown; restored from __doc__ """ D.index(value, [start, [stop]]) -> integer -- return first index of value. Raises ValueError if the value is not present. """ return 0
index(slef,value,start=None,stop=None)是查找元素在双向队列deque中的位置索引。我们经常需要查找一个元素在列表中的位置,由于双向队列也是队列,也是有序的,是有索引位置的,因此当我们相向deque中添加元素的时候,如果是要往指定的位置添加,就需要找到这个元素的位置索引,index(self,value,start=None,stop=None)。实例如下:
>>> l1 = deque(['is', 'sb', 'sb', 'alex', 'divad', 'aoi', 'mazzy', 'cang', 'oboma', 'bushi']) >>> l1.index("aoi") 5 >>> l1.index("sb") 1 上面代码中,我们可以查找指定元素的索引,当deque中有多个相同的元素的时候,默认是查找第一个位置的索引,如果向查找之后的,就需要定义查找的起始位置。
9.insert(self,index,p_object)
def insert(self, index, p_object): # real signature unknown; restored from __doc__ """ D.insert(index, object) -- insert object before index """ pass
insert(self,value,p_object)是向列表中指定的位置索引插入元素,经常与index(self,value,start=None,stop=None)结合使用,一个是向指定位置插入元素,一个是查找指定元素的索引位置。实例如下:
例如,我们想要向”aoi”所在的位置插入一个元素“alexsb”,那么首先就需要找到"aoi"在deque中的位置索引,然后在这个位置插入元素即可:
>>> l1 = deque(['is', 'sb', 'sb', 'alex', 'divad', 'aoi', 'mazzy', 'cang', 'oboma', 'bushi']) >>> index_num = l1.index("aoi")
#查找"aoi"在列表中的位置索引 >>> l1.insert(index_num,"Alexsb")
"""在“aoi”的位置索引处插入元素"Alexsb"""" >>> l1 deque(['is', 'sb', 'sb', 'alex', 'divad', 'Alexsb', 'aoi', 'mazzy', 'cang', 'oboma', 'bushi'])
从上面代码处可以看出,我们首先找到插入元素位置的索引,然后使用insert(index,value)向指定位置索引处插入了一个元素。
10.pop(self,*args,**kwargs)
def pop(self, *args, **kwargs): # real signature unknown """ Remove and return the rightmost element. """ pass pop(self,*args,**kwargs)删除列表末尾的元素,弹出列表的末尾元素,并且可也指定一个接受值。把弹出的值赋给一个变量。
11.popleft(self,*args,**kwargs)
def popleft(self, *args, **kwargs): # real signature unknown """ Remove and return the leftmost element. """ pass
popleft(self)是从列表的左侧弹出元素,我们知道deque是双向队列,因此可以从左右两边都添加,删除元素。实例如下:
>>> l1 = deque(['is', 'sb', 'sb', 'alex', 'divad', 'Alexsb', 'aoi', 'mazzy', 'cang', 'oboma', 'bushi']) >>> l1.pop() 'bushi' >>> l1.pop() 'oboma' >>> l1.popleft() 'is' >>> l1.popleft() 'sb'
从上面代码我们可以看出,pop(self)是从列表的右侧末尾删除元素,而popleft(self)是从列表左侧删除元素。
12.remove(self,value)
def remove(self, value): # real signature unknown; restored from __doc__ """ D.remove(value) -- remove first occurrence of value. """ pass remove(self,value)是从列表中删除指定的值,pop()、popleft()是从列表的右端和左端删除元素,不需要参数,而remove(self,value)是删除指定的值,我们知道,remove(value)是移除指定的值。
>>> l1 = deque(['sb', 'alex', 'divad', 'Alexsb', 'aoi', 'mazzy', 'cang']) >>> l1.remove("cang") >>> l1 deque(['sb', 'alex', 'divad', 'Alexsb', 'aoi', 'mazzy'])
上面代码中,我们使用remove()删除"cang",可以看出,我们删除deque双向队列的元素,要知道是按照什么删除的。
13.reverse(self)
def reverse(self): # real signature unknown; restored from __doc__ """ D.reverse() -- reverse *IN PLACE* """ pass reverse(self)是倒置deque,把deque中的元素倒置,实例如下:
>>> l1 = deque(['sb', 'alex', 'divad', 'Alexsb', 'aoi', 'mazzy']) >>> l1.reverse() >>> l1 deque(['mazzy', 'aoi', 'Alexsb', 'divad', 'alex', 'sb'])
从上面代码可以看出,我们将队列中的元素进行了导致。
14.rotate(self,*args,**kwargs)
def rotate(self, *args, **kwargs): # real signature unknown """ Rotate the deque n steps to the right (default n=1). If n is negative, rotates left. """ pass rotate(self,*args,**kwargs)是将deque双向队列中的元素进行移位,把最后的元素移到deque双向队列的前面,实例如下:
>>> l1 = deque(['mazzy', 'aoi', 'Alexsb', 'divad', 'alex', 'sb']) >>> l1.rotate(1) >>> l1 deque(['sb', 'mazzy', 'aoi', 'Alexsb', 'divad', 'alex']) >>> l1.rotate(2) >>> l1 deque(['divad', 'alex', 'sb', 'mazzy', 'aoi', 'Alexsb'])
从上面代码可以看出,我们是将双向队列deque中的元素进行移位,是把后面的元素移到前面,进行順移。
15.__add__(self,*args,**kwargs)
def __add__(self, *args, **kwargs): # real signature unknown """ Return self+value. """ pass 16.__bool__(self,*args,**kwargs)
def __bool__(self, *args, **kwargs): # real signature unknown """ self != 0 """ pass __bool__(self,*args,**kwargs)是判断列表是否为空(self != 0),下面来看一个实例:
deque(['sb', 'is']) >>> l2.__bool__() True >>> l2.clear() >>> l2 deque([]) >>> l2.__bool__() False 从上面代码可以看出,__bool__(self,*args,**kwargs)是判断deque是否为空。
17.__contains__(self,*args,**kwargs)
def __contains__(self, *args, **kwargs): # real signature unknown """ Return key in self. """ pass __contains__(self,value)判断值是否在deque双向队列中。
18.__copy(self,*args,**kwargs)
def __copy__(self, *args, **kwargs): # real signature unknown """ Return a shallow copy of a deque. """ pass 19.__delitem__(self,*args,**kwargs)
def __delitem__(self, *args, **kwargs): # real signature unknown """ Delete self[key]. """ pass
20.__eq__(self,*args,**kwargs)
def __eq__(self, *args, **kwargs): # real signature unknown """ Return self==value. """ pass
21.__getattribute__(self,*args,**kwargs)
def __getattribute__(self, *args, **kwargs): # real signature unknown """ Return getattr(self, name). """ pass
22.__getitem__(self,*args,**kwargs)
def __getitem__(self, *args, **kwargs): # real signature unknown """ Return self[key]. """ pass
__getitem__(self,index)我们知道,列表是有序的,因此使用__getitem__(index)是需告诉deque双向队列索引位置即可,这跟字典是不一样的,字典是根据键值来查找值,而deque双向队列是使用位置索引查找值。实例如下:
>>> l1 = deque(['divad', 'alex', 'sb', 'mazzy', 'aoi', 'Alexsb']) >>> l1.__getitem__(1) 'alex'
23.__ge__(self,*args,**kwargs)
def __ge__(self, *args, **kwargs): # real signature unknown """ Return self>=value. """ pass
__ge__(self,*args,**kwargs)是单词Greater than or equal to大于等于的意思。
24.__ge__(self,*args,**kwargs)
def __gt__(self, *args, **kwargs): # real signature unknown """ Return self>value. """ pass
__gt__(self,*args,**kwargs)是单词Greater than的缩写,表示的含义是大于的意思。
25.__le__(self,*args,**kwargs)
def __le__(self, *args, **kwargs): # real signature unknown """ Return self<=value. """ pass
__le__(self,*args,**kwargs)是单词less than or equal to 的缩写,表示的函数是小于等于。
26.__lt__(self,*args,**kwargs)
def __lt__(self, *args, **kwargs): # real signature unknown """ Return self<value. """ pass
__lt__(self,*args,**kwargs)是单词less than的缩写,表示的函数是小于(<)。
27.__ne__(self,*args,**kwargs)
def __ne__(self, *args, **kwargs): # real signature unknown """ Return self!=value. """ pass
__ne__(self,*args,**kwargs)是单词not equal的缩写,表示的含义是(!=)不等于。
28.__iadd__(self,*args,**kwargs)
def __iadd__(self, *args, **kwargs): # real signature unknown """ Implement self+=value. """ pass
列表的拼接
29.__imul__(self,*args,**kwargs)
def __imul__(self, *args, **kwargs): # real signature unknown """ Implement self*=value. """ pass 30.__init__(self,*args,**kwargs)
def __init__(self, iterable=(), maxlen=None): # known case of _collections.deque.__init__ """ deque([iterable[, maxlen]]) --> deque object A list-like sequence optimized for data accesses near its endpoints. # (copied from class doc) """ pass
31.__iter__(self)
def __iter__(self, *args, **kwargs): # real signature unknown """ Implement iter(self). """ pass
32.__len__(self,*args,**kwargs)
def __len__(self, *args, **kwargs): # real signature unknown """ Return len(self). """ pass
33.__mul__(self,*args,**kwargs)
def __mul__(self, *args, **kwargs): # real signature unknown """ Return self*value.n """ pass
34.__new__(*args,**kwargs)
def __new__(*args, **kwargs): # real signature unknown """ Create and return a new object. See help(type) for accurate signature. """ pass 35.__reduce__(self,*args,**kwargs)
def __reduce__(self, *args, **kwargs): # real signature unknown """ Return state information for pickling. """ pass 36.__repr__(self,*args,**kwargs)
def __repr__(self, *args, **kwargs): # real signature unknown """ Return repr(self). """ pass 37.__reversed__(self,*args,**kwargs)
def __reversed__(self): # real signature unknown; restored from __doc__ """ D.__reversed__() -- return a reverse iterator over the deque """ pass
38.__rmul__(self,*args,**kwargs)
def __rmul__(self, *args, **kwargs): # real signature unknown """ Return self*value. """ pass
39.__setitem__(self,*args,**kwargs)
def __setitem__(self, *args, **kwargs): # real signature unknown """ Set self[key] to value. """ pass
40.__sizeof__(self)
def __sizeof__(self): # real signature unknown; restored from __doc__ """ D.__sizeof__() -- size of D in memory, in bytes """ pass
六、Queue(单向队列)
双向队列其实相当于双向队列,就是可以两边操作,下面来看看单向队列queue和列表有什么区别。
import queue
q = queue.Queue()
class Queue: '''Create a queue object with a given maximum size. If maxsize is <= 0, the queue size is infinite. '''
下面来看看单向队列都有那些方法:
1.__init__(self,maxsize=0)
def __init__(self, maxsize=0): self.maxsize = maxsize self._init(maxsize) # mutex must be held whenever the queue is mutating. All methods # that acquire mutex must release it before returning. mutex # is shared between the three conditions, so acquiring and # releasing the conditions also acquires and releases mutex. self.mutex = threading.Lock() # Notify not_empty whenever an item is added to the queue; a # thread waiting to get is notified then. self.not_empty = threading.Condition(self.mutex) # Notify not_full whenever an item is removed from the queue; # a thread waiting to put is notified then. self.not_full = threading.Condition(self.mutex) # Notify all_tasks_done whenever the number of unfinished tasks # drops to zero; thread waiting to join() is notified to resume self.all_tasks_done = threading.Condition(self.mutex) self.unfinished_tasks = 0
2.task_done(self)
def task_done(self): '''Indicate that a formerly enqueued task is complete. Used by Queue consumer threads. For each get() used to fetch a task, a subsequent call to task_done() tells the queue that the processing on the task is complete. If a join() is currently blocking, it will resume when all items have been processed (meaning that a task_done() call was received for every item that had been put() into the queue). Raises a ValueError if called more times than there were items placed in the queue. ''' with self.all_tasks_done: unfinished = self.unfinished_tasks - 1 if unfinished <= 0: if unfinished < 0: raise ValueError('task_done() called too many times') self.all_tasks_done.notify_all() self.unfinished_tasks = unfinished
3.join(self) def join(self): '''Blocks until all items in the Queue have been gotten and processed. The count of unfinished tasks goes up whenever an item is added to the queue. The count goes down whenever a consumer thread calls task_done() to indicate the item was retrieved and all work on it is complete. When the count of unfinished tasks drops to zero, join() unblocks. ''' 4.qsize(self) def qsize(self): '''Return the approximate size of the queue (not reliable!).''' with self.mutex: return self._qsize() qsize(self)查看queue里面元素的个数,qsize. >>> q1 = Queue() >>> q1.put("tom") >>> q1.put(123) >>> q1.qsize() 2 5.empty(self) def empty(self): '''Return True if the queue is empty, False otherwise (not reliable!). This method is likely to be removed at some point. Use qsize() == 0 as a direct substitute, but be aware that either approach risks a race condition where a queue can grow before the result of empty() or qsize() can be used. To create code that needs to wait for all queued tasks to be completed, the preferred technique is to use the join() method. ''' with self.mutex: return not self._qsize() empty(self)是判断一个Queue是否是空的单向队列,如果是空就返回True,否则返回False. >>> q1.qsize() 2 >>> q1.empty() False >>> q2 = Queue() >>> q2.empty() True 6.full(self) def full(self): '''Return True if the queue is full, False otherwise (not reliable!). This method is likely to be removed at some point. Use qsize() >= n as a direct substitute, but be aware that either approach risks a race condition where a queue can shrink before the result of full() or qsize() can be used. ''' with self.mutex: return 0 < self.maxsize <= self._qsize() full(self)我们可以定义一个Queue的长度,规定有多少个元素,而full(self)是用来判断这个Queue是否满了。 7.put(self,item,block=True,timeout=None) def put(self, item, block=True, timeout=None): '''Put an item into the queue. 8.get(self,block=True,timeout=None) def get(self, block=True, timeout=None): '''Remove and return an item from the queue. get(self,block=True,timeout=None)是从Queue中拿取数据,而且只能按照顺序拿取,不需要参数,是按照存取的顺序,先进先出的原则。 9.put_nowait(self,item) def put_nowait(self, item): '''Put an item into the queue without blocking. Only enqueue the item if a free slot is immediately available. Otherwise raise the Full exception. ''' return self.put(item, block=False) put_nowait(self,item) 10.get_nowait(self) def get_nowait(self): '''Remove and return an item from the queue without blocking. Only get an item if one is immediately available. Otherwise raise the Empty exception. ''' return self.get(block=False) 11.__init(self,maxsize) def _init(self, maxsize): self.queue = deque() 12._qsize(self) def _qsize(self): return len(self.queue) # Put a new item in the queue 13._put(self,item) def _put(self, item): self.queue.append(item) # Get an item from the queue 14._get(self) def _get(self): return self.queue.popleft()