标签:
Set集合:
不允许有重复的元素。正如Hash表。创建一个Set的对象:set()
应用(网络爬虫) 优点:访问速度快、解决重复问题
每个set对象都有以下功能:
1 class set(object): 2 """ 3 set() -> new empty set object 4 set(iterable) -> new set object 5 6 Build an unordered collection of unique elements. 7 """ 8 def add(self, *args, **kwargs): # real signature unknown 9 """ 添加 """ 10 """ 11 Add an element to a set. 12 13 This has no effect if the element is already present. 14 """ 15 pass 16 17 def clear(self, *args, **kwargs): # real signature unknown 18 """ Remove all elements from this set. """ 19 pass 20 21 def copy(self, *args, **kwargs): # real signature unknown 22 """ Return a shallow copy of a set. """ 23 pass 24 25 def difference(self, *args, **kwargs): # real signature unknown 26 """ 27 Return the difference of two or more sets as a new set. 28 29 (i.e. all elements that are in this set but not the others.) 30 """ 31 pass 32 33 def difference_update(self, *args, **kwargs): # real signature unknown 34 """ 删除当前set中的所有包含在 new set 里的元素 """ 35 """ Remove all elements of another set from this set. """ 36 pass 37 38 def discard(self, *args, **kwargs): # real signature unknown 39 """ 移除元素 """ 40 """ 41 Remove an element from a set if it is a member. 42 43 If the element is not a member, do nothing. 44 """ 45 pass 46 47 def intersection(self, *args, **kwargs): # real signature unknown 48 """ 取交集,新创建一个set """ 49 """ 50 Return the intersection of two or more sets as a new set. 51 52 (i.e. elements that are common to all of the sets.) 53 """ 54 pass 55 56 def intersection_update(self, *args, **kwargs): # real signature unknown 57 """ 取交集,修改原来set """ 58 """ Update a set with the intersection of itself and another. """ 59 pass 60 61 def isdisjoint(self, *args, **kwargs): # real signature unknown 62 """ 如果没有交集,返回true """ 63 """ Return True if two sets have a null intersection. """ 64 pass 65 66 def issubset(self, *args, **kwargs): # real signature unknown 67 """ 是否是子集 """ 68 """ Report whether another set contains this set. """ 69 pass 70 71 def issuperset(self, *args, **kwargs): # real signature unknown 72 """ 是否是父集 """ 73 """ Report whether this set contains another set. """ 74 pass 75 76 def pop(self, *args, **kwargs): # real signature unknown 77 """ 移除 """ 78 """ 79 Remove and return an arbitrary set element. 80 Raises KeyError if the set is empty. 81 """ 82 pass 83 84 def remove(self, *args, **kwargs): # real signature unknown 85 """ 移除 """ 86 """ 87 Remove an element from a set; it must be a member. 88 89 If the element is not a member, raise a KeyError. 90 """ 91 pass 92 93 def symmetric_difference(self, *args, **kwargs): # real signature unknown 94 """ 差集,创建新对象""" 95 """ 96 Return the symmetric difference of two sets as a new set. 97 98 (i.e. all elements that are in exactly one of the sets.) 99 """ 100 pass 101 102 def symmetric_difference_update(self, *args, **kwargs): # real signature unknown 103 """ 差集,改变原来 """ 104 """ Update a set with the symmetric difference of itself and another. """ 105 pass 106 107 def union(self, *args, **kwargs): # real signature unknown 108 """ 并集 """ 109 """ 110 Return the union of sets as a new set. 111 112 (i.e. all elements that are in either set.) 113 """ 114 pass 115 116 def update(self, *args, **kwargs): # real signature unknown 117 """ 更新 """ 118 """ Update a set with the union of itself and others. """ 119 pass 120 121 def __and__(self, y): # real signature unknown; restored from __doc__ 122 """ x.__and__(y) <==> x&y """ 123 pass 124 125 def __cmp__(self, y): # real signature unknown; restored from __doc__ 126 """ x.__cmp__(y) <==> cmp(x,y) """ 127 pass 128 129 def __contains__(self, y): # real signature unknown; restored from __doc__ 130 """ x.__contains__(y) <==> y in x. """ 131 pass 132 133 def __eq__(self, y): # real signature unknown; restored from __doc__ 134 """ x.__eq__(y) <==> x==y """ 135 pass 136 137 def __getattribute__(self, name): # real signature unknown; restored from __doc__ 138 """ x.__getattribute__(‘name‘) <==> x.name """ 139 pass 140 141 def __ge__(self, y): # real signature unknown; restored from __doc__ 142 """ x.__ge__(y) <==> x>=y """ 143 pass 144 145 def __gt__(self, y): # real signature unknown; restored from __doc__ 146 """ x.__gt__(y) <==> x>y """ 147 pass 148 149 def __iand__(self, y): # real signature unknown; restored from __doc__ 150 """ x.__iand__(y) <==> x&=y """ 151 pass 152 153 def __init__(self, seq=()): # known special case of set.__init__ 154 """ 155 set() -> new empty set object 156 set(iterable) -> new set object 157 158 Build an unordered collection of unique elements. 159 # (copied from class doc) 160 """ 161 pass 162 163 def __ior__(self, y): # real signature unknown; restored from __doc__ 164 """ x.__ior__(y) <==> x|=y """ 165 pass 166 167 def __isub__(self, y): # real signature unknown; restored from __doc__ 168 """ x.__isub__(y) <==> x-=y """ 169 pass 170 171 def __iter__(self): # real signature unknown; restored from __doc__ 172 """ x.__iter__() <==> iter(x) """ 173 pass 174 175 def __ixor__(self, y): # real signature unknown; restored from __doc__ 176 """ x.__ixor__(y) <==> x^=y """ 177 pass 178 179 def __len__(self): # real signature unknown; restored from __doc__ 180 """ x.__len__() <==> len(x) """ 181 pass 182 183 def __le__(self, y): # real signature unknown; restored from __doc__ 184 """ x.__le__(y) <==> x<=y """ 185 pass 186 187 def __lt__(self, y): # real signature unknown; restored from __doc__ 188 """ x.__lt__(y) <==> x<y """ 189 pass 190 191 @staticmethod # known case of __new__ 192 def __new__(S, *more): # real signature unknown; restored from __doc__ 193 """ T.__new__(S, ...) -> a new object with type S, a subtype of T """ 194 pass 195 196 def __ne__(self, y): # real signature unknown; restored from __doc__ 197 """ x.__ne__(y) <==> x!=y """ 198 pass 199 200 def __or__(self, y): # real signature unknown; restored from __doc__ 201 """ x.__or__(y) <==> x|y """ 202 pass 203 204 def __rand__(self, y): # real signature unknown; restored from __doc__ 205 """ x.__rand__(y) <==> y&x """ 206 pass 207 208 def __reduce__(self, *args, **kwargs): # real signature unknown 209 """ Return state information for pickling. """ 210 pass 211 212 def __repr__(self): # real signature unknown; restored from __doc__ 213 """ x.__repr__() <==> repr(x) """ 214 pass 215 216 def __ror__(self, y): # real signature unknown; restored from __doc__ 217 """ x.__ror__(y) <==> y|x """ 218 pass 219 220 def __rsub__(self, y): # real signature unknown; restored from __doc__ 221 """ x.__rsub__(y) <==> y-x """ 222 pass 223 224 def __rxor__(self, y): # real signature unknown; restored from __doc__ 225 """ x.__rxor__(y) <==> y^x """ 226 pass 227 228 def __sizeof__(self): # real signature unknown; restored from __doc__ 229 """ S.__sizeof__() -> size of S in memory, in bytes """ 230 pass 231 232 def __sub__(self, y): # real signature unknown; restored from __doc__ 233 """ x.__sub__(y) <==> x-y """ 234 pass 235 236 def __xor__(self, y): # real signature unknown; restored from __doc__ 237 """ x.__xor__(y) <==> x^y """ 238 pass 239 240 __hash__ = None 241 242 set
(1) add()添加一个元素 clear():清空元素
s1=set() s1.add(‘alex‘) print(s1) {‘alex‘}
(2) difference():
s2=set([‘alex’,’eric’,’tony’]) s3=s2.difference([‘alex’,’eric’]) print(s3) {‘tony‘}
(3)difference_update():删除当前set中的所有包含在参数集合里的元素
s2=([‘alex‘,‘eric‘,‘tony‘]) print(s2) s4=s2.difference_update([‘alex’,’eric’])#删除当前set中的所有包含在参数集合里的元素 print(s2) print(s4) {‘alex‘, ‘tony‘, ‘eric‘} {‘tony‘} None
(4)intersection():取交集,新创建一个set。 isdisjoint():如果没有交集,返回true。 issubset():是否是子集。
(5)pop()移除
s2=([‘alex‘,‘eric‘,‘tony‘]) print(s2) ret=s2.pop() print(ret) {‘alex‘,‘eric‘}
(6)remove():只去拿不会获取,没有返回值
s2=([‘alex‘,‘eric‘,‘tony‘]) print(s2) ret=s2.remove(‘tony‘) print(ret) {‘alex‘,‘eric‘}
(7)intersection():交集,(两个字典里都有的话)可能不动也可能更新(要更新的数据)。symmetric_difference():1 差集,(原来的有,新的没有)要删除;2 差集,(原来的没有,新的有)要增加。
# 数据库中原有 old_dict = { "#1":{ ‘hostname‘:‘c1‘, ‘cpu_count‘: 2, ‘mem_capicity‘: 80 }, "#2":{ ‘hostname‘:‘c1‘, ‘cpu_count‘: 2, ‘mem_capicity‘: 80 }, "#3":{ ‘hostname‘:‘c1‘, ‘cpu_count‘: 2, ‘mem_capicity‘: 80 } } # cmdb 新汇报的数据 new_dict = { "#1":{ ‘hostname‘:‘c1‘, ‘cpu_count‘: 2, ‘mem_capicity‘: 800 }, "#3":{ ‘hostname‘:‘c1‘, ‘cpu_count‘: 2, ‘mem_capicity‘: 80 }, "#4":{ ‘hostname‘:‘c2‘, ‘cpu_count‘: 2, ‘mem_capicity‘: 80 } } old=set(old_dict.keys()) new=set(new_dict.keys()) updateset=old.intersection(new) delete_set=old.symmetric_difference(updateset) add_set=new.symmetric_difference(updateset) print(updateset) print(delete_set) print(add_set) {‘#1‘, ‘#3‘} {‘#2‘} {‘#4‘}
collection系列
一、计数器(counter)
Counter是对字典类型的补充,用于追踪值的出现次数。
具备字典的所有功能和自己的功能:
具备的功能:
1 ######################################################################## 2 ### Counter 3 ######################################################################## 4 5 class Counter(dict): 6 ‘‘‘Dict subclass for counting hashable items. Sometimes called a bag 7 or multiset. Elements are stored as dictionary keys and their counts 8 are stored as dictionary values. 9 10 >>> c = Counter(‘abcdeabcdabcaba‘) # count elements from a string 11 12 >>> c.most_common(3) # three most common elements 13 [(‘a‘, 5), (‘b‘, 4), (‘c‘, 3)] 14 >>> sorted(c) # list all unique elements 15 [‘a‘, ‘b‘, ‘c‘, ‘d‘, ‘e‘] 16 >>> ‘‘.join(sorted(c.elements())) # list elements with repetitions 17 ‘aaaaabbbbcccdde‘ 18 >>> sum(c.values()) # total of all counts 19 20 >>> c[‘a‘] # count of letter ‘a‘ 21 >>> for elem in ‘shazam‘: # update counts from an iterable 22 ... c[elem] += 1 # by adding 1 to each element‘s count 23 >>> c[‘a‘] # now there are seven ‘a‘ 24 >>> del c[‘b‘] # remove all ‘b‘ 25 >>> c[‘b‘] # now there are zero ‘b‘ 26 27 >>> d = Counter(‘simsalabim‘) # make another counter 28 >>> c.update(d) # add in the second counter 29 >>> c[‘a‘] # now there are nine ‘a‘ 30 31 >>> c.clear() # empty the counter 32 >>> c 33 Counter() 34 35 Note: If a count is set to zero or reduced to zero, it will remain 36 in the counter until the entry is deleted or the counter is cleared: 37 38 >>> c = Counter(‘aaabbc‘) 39 >>> c[‘b‘] -= 2 # reduce the count of ‘b‘ by two 40 >>> c.most_common() # ‘b‘ is still in, but its count is zero 41 [(‘a‘, 3), (‘c‘, 1), (‘b‘, 0)] 42 43 ‘‘‘ 44 # References: 45 # http://en.wikipedia.org/wiki/Multiset 46 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html 47 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm 48 # http://code.activestate.com/recipes/259174/ 49 # Knuth, TAOCP Vol. II section 4.6.3 50 51 def __init__(self, iterable=None, **kwds): 52 ‘‘‘Create a new, empty Counter object. And if given, count elements 53 from an input iterable. Or, initialize the count from another mapping 54 of elements to their counts. 55 56 >>> c = Counter() # a new, empty counter 57 >>> c = Counter(‘gallahad‘) # a new counter from an iterable 58 >>> c = Counter({‘a‘: 4, ‘b‘: 2}) # a new counter from a mapping 59 >>> c = Counter(a=4, b=2) # a new counter from keyword args 60 61 ‘‘‘ 62 super(Counter, self).__init__() 63 self.update(iterable, **kwds) 64 65 def __missing__(self, key): 66 """ 对于不存在的元素,返回计数器为0 """ 67 ‘The count of elements not in the Counter is zero.‘ 68 # Needed so that self[missing_item] does not raise KeyError 69 return 0 70 71 def most_common(self, n=None): 72 """ 数量大于等n的所有元素和计数器 """ 73 ‘‘‘List the n most common elements and their counts from the most 74 common to the least. If n is None, then list all element counts. 75 76 >>> Counter(‘abcdeabcdabcaba‘).most_common(3) 77 [(‘a‘, 5), (‘b‘, 4), (‘c‘, 3)] 78 79 ‘‘‘ 80 # Emulate Bag.sortedByCount from Smalltalk 81 if n is None: 82 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True) 83 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1)) 84 85 def elements(self): 86 """ 计数器中的所有元素,注:此处非所有元素集合,而是包含所有元素集合的迭代器 """ 87 ‘‘‘Iterator over elements repeating each as many times as its count. 88 89 >>> c = Counter(‘ABCABC‘) 90 >>> sorted(c.elements()) 91 [‘A‘, ‘A‘, ‘B‘, ‘B‘, ‘C‘, ‘C‘] 92 93 # Knuth‘s example for prime factors of 1836: 2**2 * 3**3 * 17**1 94 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) 95 >>> product = 1 96 >>> for factor in prime_factors.elements(): # loop over factors 97 ... product *= factor # and multiply them 98 >>> product 99 100 Note, if an element‘s count has been set to zero or is a negative 101 number, elements() will ignore it. 102 103 ‘‘‘ 104 # Emulate Bag.do from Smalltalk and Multiset.begin from C++. 105 return _chain.from_iterable(_starmap(_repeat, self.iteritems())) 106 107 # Override dict methods where necessary 108 109 @classmethod 110 def fromkeys(cls, iterable, v=None): 111 # There is no equivalent method for counters because setting v=1 112 # means that no element can have a count greater than one. 113 raise NotImplementedError( 114 ‘Counter.fromkeys() is undefined. Use Counter(iterable) instead.‘) 115 116 def update(self, iterable=None, **kwds): 117 """ 更新计数器,其实就是增加;如果原来没有,则新建,如果有则加一 """ 118 ‘‘‘Like dict.update() but add counts instead of replacing them. 119 120 Source can be an iterable, a dictionary, or another Counter instance. 121 122 >>> c = Counter(‘which‘) 123 >>> c.update(‘witch‘) # add elements from another iterable 124 >>> d = Counter(‘watch‘) 125 >>> c.update(d) # add elements from another counter 126 >>> c[‘h‘] # four ‘h‘ in which, witch, and watch 127 128 ‘‘‘ 129 # The regular dict.update() operation makes no sense here because the 130 # replace behavior results in the some of original untouched counts 131 # being mixed-in with all of the other counts for a mismash that 132 # doesn‘t have a straight-forward interpretation in most counting 133 # contexts. Instead, we implement straight-addition. Both the inputs 134 # and outputs are allowed to contain zero and negative counts. 135 136 if iterable is not None: 137 if isinstance(iterable, Mapping): 138 if self: 139 self_get = self.get 140 for elem, count in iterable.iteritems(): 141 self[elem] = self_get(elem, 0) + count 142 else: 143 super(Counter, self).update(iterable) # fast path when counter is empty 144 else: 145 self_get = self.get 146 for elem in iterable: 147 self[elem] = self_get(elem, 0) + 1 148 if kwds: 149 self.update(kwds) 150 151 def subtract(self, iterable=None, **kwds): 152 """ 相减,原来的计数器中的每一个元素的数量减去后添加的元素的数量 """ 153 ‘‘‘Like dict.update() but subtracts counts instead of replacing them. 154 Counts can be reduced below zero. Both the inputs and outputs are 155 allowed to contain zero and negative counts. 156 157 Source can be an iterable, a dictionary, or another Counter instance. 158 159 >>> c = Counter(‘which‘) 160 >>> c.subtract(‘witch‘) # subtract elements from another iterable 161 >>> c.subtract(Counter(‘watch‘)) # subtract elements from another counter 162 >>> c[‘h‘] # 2 in which, minus 1 in witch, minus 1 in watch 163 >>> c[‘w‘] # 1 in which, minus 1 in witch, minus 1 in watch 164 -1 165 166 ‘‘‘ 167 if iterable is not None: 168 self_get = self.get 169 if isinstance(iterable, Mapping): 170 for elem, count in iterable.items(): 171 self[elem] = self_get(elem, 0) - count 172 else: 173 for elem in iterable: 174 self[elem] = self_get(elem, 0) - 1 175 if kwds: 176 self.subtract(kwds) 177 178 def copy(self): 179 """ 拷贝 """ 180 ‘Return a shallow copy.‘ 181 return self.__class__(self) 182 183 def __reduce__(self): 184 """ 返回一个元组(类型,元组) """ 185 return self.__class__, (dict(self),) 186 187 def __delitem__(self, elem): 188 """ 删除元素 """ 189 ‘Like dict.__delitem__() but does not raise KeyError for missing values.‘ 190 if elem in self: 191 super(Counter, self).__delitem__(elem) 192 193 def __repr__(self): 194 if not self: 195 return ‘%s()‘ % self.__class__.__name__ 196 items = ‘, ‘.join(map(‘%r: %r‘.__mod__, self.most_common())) 197 return ‘%s({%s})‘ % (self.__class__.__name__, items) 198 199 # Multiset-style mathematical operations discussed in: 200 # Knuth TAOCP Volume II section 4.6.3 exercise 19 201 # and at http://en.wikipedia.org/wiki/Multiset 202 # 203 # Outputs guaranteed to only include positive counts. 204 # 205 # To strip negative and zero counts, add-in an empty counter: 206 # c += Counter() 207 208 def __add__(self, other): 209 ‘‘‘Add counts from two counters. 210 211 >>> Counter(‘abbb‘) + Counter(‘bcc‘) 212 Counter({‘b‘: 4, ‘c‘: 2, ‘a‘: 1}) 213 214 ‘‘‘ 215 if not isinstance(other, Counter): 216 return NotImplemented 217 result = Counter() 218 for elem, count in self.items(): 219 newcount = count + other[elem] 220 if newcount > 0: 221 result[elem] = newcount 222 for elem, count in other.items(): 223 if elem not in self and count > 0: 224 result[elem] = count 225 return result 226 227 def __sub__(self, other): 228 ‘‘‘ Subtract count, but keep only results with positive counts. 229 230 >>> Counter(‘abbbc‘) - Counter(‘bccd‘) 231 Counter({‘b‘: 2, ‘a‘: 1}) 232 233 ‘‘‘ 234 if not isinstance(other, Counter): 235 return NotImplemented 236 result = Counter() 237 for elem, count in self.items(): 238 newcount = count - other[elem] 239 if newcount > 0: 240 result[elem] = newcount 241 for elem, count in other.items(): 242 if elem not in self and count < 0: 243 result[elem] = 0 - count 244 return result 245 246 def __or__(self, other): 247 ‘‘‘Union is the maximum of value in either of the input counters. 248 249 >>> Counter(‘abbb‘) | Counter(‘bcc‘) 250 Counter({‘b‘: 3, ‘c‘: 2, ‘a‘: 1}) 251 252 ‘‘‘ 253 if not isinstance(other, Counter): 254 return NotImplemented 255 result = Counter() 256 for elem, count in self.items(): 257 other_count = other[elem] 258 newcount = other_count if count < other_count else count 259 if newcount > 0: 260 result[elem] = newcount 261 for elem, count in other.items(): 262 if elem not in self and count > 0: 263 result[elem] = count 264 return result 265 266 def __and__(self, other): 267 ‘‘‘ Intersection is the minimum of corresponding counts. 268 269 >>> Counter(‘abbb‘) & Counter(‘bcc‘) 270 Counter({‘b‘: 1}) 271 272 ‘‘‘ 273 if not isinstance(other, Counter): 274 return NotImplemented 275 result = Counter() 276 for elem, count in self.items(): 277 other_count = other[elem] 278 newcount = count if count < other_count else other_count 279 if newcount > 0: 280 result[elem] = newcount 281 return result 282 283 Counter
(1)Counter():分别计算各个字母出现的次数
1 import collections#导入 2 obj=collections.Counter(‘aabgoomabbblilifffgggljg’)#分别计算各个字母出现的次数 3 print(obj) 4 ret=obj.most_common(4)#排在前4位的 5 print(ret) 6 7 Counter({‘g‘: 5, ‘b‘: 4, ‘l‘: 3, ‘a‘: 3, ‘f‘: 3, ‘o‘: 2, ‘i‘: 2, ‘j‘: 1, ‘m‘: 1}) 8 [(‘g‘, 5), (‘b‘, 4), (‘l‘, 3), (‘a‘, 3)]
(2)
标签:
原文地址:http://www.cnblogs.com/Peony-Y/p/5180276.html