PythonCookbook - Data Structures and Algorithms
Nov 26, 2016 am 10:13 AMChapter 1 Data Structures and Algorithms
1.1 Decompose a sequence into separate variables
p = (4, 5) x, y = p print x print y data = [ 'ACME', 50, 91.1, (2012, 12, 21) ] name, shares, price, date = data print name print shares print price print date name, shares, price, (year, mon, day ) = data print year p = (4, 5) #x, y, z = p 错误!!! s = 'hello!' a, b, c, d, e, f = s print a print f data = [ 'ACME', 50, 91.1, (2012, 12, 21) ] _, shares, price, _ = data print shares print price #其他数据可以丢弃了
1.2 Decompose elements from an iterable of arbitrary length
from audioop import avg def drop_first_last(grades): first, *middle, last = grades return avg(middle) record = ('Dave', 'dave@example.com', '777-333-2323', '234-234-2345') name, email, *phone_numbers = record print name print email print phone_numbers *trailing, current = [10, 8, 7, 2, 5] print trailing #[10, 8, 7, 2, ] print current #5 records = [ ('foo', 1, 2), ('bar', 'hello'), ('foo', 5, 3) ] def do_foo(x, y): print ('foo', x, y) def do_bar(s): print ('bar', s) for tag, *args in records: if tag == 'foo': do_foo(*args) elif tag == 'bar': do_bar(*args) line = 'asdf:fedfr234://wef:678d:asdf' uname, *fields, homedir, sh = line.split(':') print uname print homedir record = ('ACME', 50, 123.45, (12, 18, 2012)) name, *_, (*_, year) = record print name print year items = [1, 10, 7, 4, 5, 9] head, *tail = items print head #1 print tail #[10, 7, 4, 5, 9] def sum(items): head, *tail = items return head + sum(tail) if tail else head sum(items)
1.3 Save the last N elements
from _collections import deque def search(lines, pattern, history=5): previous_lines = deque(maxlen = history) for line in lines: if pattern in line: yield line, previous_lines previous_lines.append(line) # Example use on a file if __name__ == '__main__': with open('somefile.txt') as f: for line, prevlines in search(f, 'python', 5): for pline in prevlines: print (pline) #print (pline, end='') print (line) #print (pline, end='') print ('-'*20) q = deque(maxlen=3) q.append(1) q.append(2) q.append(3) print q q.append(4) print q q = deque() q.append(1) q.append(2) q.append(3) print q q.appendleft(4) print q q_pop = q.pop() print q_pop print q q_popleft = q.popleft() print q_popleft print q
1.4 Find the largest or Minimum N elements
import heapq nums = [1,30,6,2,36,33,46,3,23,43] print (heapq.nlargest(3, nums)) print (heapq.nsmallest(3, nums)) portfolio = [ {'name':'IBM', 'shares':100, 'price':2.4}, {'name':'A', 'shares':1040, 'price':12.4}, {'name':'S', 'shares':40, 'price':23.4}, {'name':'D', 'shares':1, 'price':2.49}, {'name':'F', 'shares':9, 'price':24} ] cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price']) expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price']) print cheap print expensive nums = [1,8,2,23,7,-4,18,23,42,37,2] heap = list(nums) print heap heapq.heapify(heap) print heap print heapq.heappop(heap) print heapq.heappop(heap) print heapq.heappop(heap)
1.5 Implement priority queue
import heapq class PriorityQueue: def __init__(self): self._queue = [] self._index = 0 def push(self, item, priority): heapq.heappush(self._queue, (-priority, self._index, item)) self._index += 1 def pop(self): return heapq.heappop(self._queue)[-1] #Example class Item: def __init__(self, name): self.name = name def __repr__(self): return 'Item({!r})'.format(self.name) q = PriorityQueue() q.push(Item('foo'), 1) q.push(Item('spam'), 4) q.push(Item('bar'), 5) q.push(Item('grok'), 1) print q.pop() print q.pop() print q.pop() a = Item('foo') b = Item('bar') #a < b error a = (1, Item('foo')) b = (5, Item('bar')) print a < b c = (1, Item('grok')) #a < c error a = (1, 0, Item('foo')) b = (5, 1, Item('bar')) c = (1, 2, Item('grok')) print a < b print a < c
1.6 Map objects to multiple values in the dictionary
d = { 'a' : [1, 2, 3], 'b' : [4, 5] } e = { 'a' : {1, 2, 3}, 'b' : {4, 5} } from collections import defaultdict d = defaultdict(list) d['a'].append(1) d['a'].append(2) d['a'].append(3) print d d = defaultdict(set) d['a'].add(1) d['a'].add(2) d['a'].add(3) print d d = {} d.setdefault('a', []).append(1) d.setdefault('a', []).append(2) d.setdefault('b', []).append(3) print d d = {} for key, value in d:#pairs: if key not in d: d[key] = [] d[key].append(value) d = defaultdict(list) for key, value in d:#pairs: d[key].append(value)
1.7 Keep the dictionary in order
from collections import OrderedDict d = OrderedDict() d['foo'] = 1 d['bar'] = 2 d['spam'] = 3 d['grol'] = 4 for key in d: print (key, d[key]) import json json.dumps(d)
1.8 Computational issues related to dictionary
price = { 'ACME':23.45, 'IBM':25.45, 'FB':13.45, 'IO':4.45, 'JAVA':45.45, 'AV':38.38, } min_price = min( zip( price.values(), price.keys() ) ) print min_price max_price = max( zip( price.values(), price.keys() ) ) print max_price price_sorted = sorted( zip( price.values(), price.keys() ) ) print price_sorted price_and_names = zip( price.values(), price.keys() ) print (min(price_and_names)) #print (max(price_and_names)) error zip()创建了迭代器,内容只能被消费一次 print min(price) print max(price) print min(price.values()) print max(price.values()) print min(price, key = lambda k : price[k]) print max(price, key = lambda k : price[k]) min_value = price[ min(price, key = lambda k : price[k]) ] print min_value price = { 'AAA': 23, 'ZZZ': 23, } print min( zip( price.values(), price.keys() ) ) print max( zip( price.values(), price.keys() ) )
1.9 Find similarities in two dictionaries
a = { 'x':1, 'y':2, 'z':3 } b = { 'x':11, 'y':2, 'w':10 } print a.keys() & b.keys() #{'x','y'} print a.keys() - b.keys() #{'z'} print a.items() & b.items() #{('y', 2)} c = {key: a[key] for key in a.keys() - {'z', 'w'} } print c #{'x':1, 'y':2}
1.10 Remove duplicates from the sequence and keep the order of elements unchanged
def dedupe(items): seen = set() for item in items: if item not in seen: yield item seen.add(item) #example a = [1,5,2,1,9,1,5,10] print list(dedupe(a)) def dedupe2(items, key = None): seen = set() for item in items: val = item if key is None else key(item) if val not in seen: yield item seen.add(val) #example a = [ {'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}, ] print list( dedupe2(a, key=lambda d : (d['x'], d['y']) ) ) print list( dedupe2(a, key=lambda d : (d['x']) ) ) a = [1,5,2,1,9,1,5,10] print set(a)
1.11 Name the slices
items = [0,1,2,3,4,5,6] a = slice(2,4) print items[2:4] print items[a] items[a] = [10,11] print items print a.start print a.stop print a.step
1.12 Find the one that appears the most in the sequence Elements
words = [ 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes', 'the', 'look' ] from collections import Counter word_counts = Counter(words) top_three = word_counts.most_common(3) print top_three print word_counts['look'] print word_counts['the'] morewords = ['why', 'are', 'you', 'not', 'looking', 'in', 'my', 'eyes'] for word in morewords: word_counts[word] += 1 print word_counts['eyes'] print word_counts['why'] word_counts.update(morewords) print word_counts['eyes'] print word_counts['why'] a = Counter(words) b = Counter(morewords) print a print b c = a + b print c d = a - b print b
1.13 Sorting dictionary lists by common keys
rows = [ {'fname':'Brian', 'lname':'Jones', 'uid':1003}, {'fname':'David', 'lname':'Beazley', 'uid':1002}, {'fname':'John', 'lname':'Cleese', 'uid':1001}, {'fname':'Big', 'lname':'Jones', 'uid':1004} ] from operator import itemgetter rows_by_fname = sorted(rows, key=itemgetter('fname')) rows_by_uid = sorted(rows, key=itemgetter('uid')) print rows_by_fname print rows_by_uid rows_by_lfname = sorted(rows, key=itemgetter('lname', 'fname')) print rows_by_lfname rows_by_fname = sorted(rows, key=lambda r: r['fname']) rows_by_lfname = sorted(rows, key=lambda r: (r['fname'], r['lname'])) print rows_by_fname print rows_by_lfname print min(rows, key=itemgetter('uid')) print max(rows, key=itemgetter('uid'))
1.14 Sorting objects that do not natively support comparison operations
class User: def __init__(self, user_id): self.user_id = user_id def __repr__(self): return 'User({})'.format(self.user_id) users = [User(23), User(3), User(99)] print users print sorted(users, key = lambda u: u.user_id) from operator import attrgetter print sorted(users, key=attrgetter('user_id')) print min(users, key=attrgetter('user_id')) print max(users, key=attrgetter('user_id'))
1.15 Grouping records based on fields
rows = [ {'address':'5412 N CLARK', 'data':'07/01/2012'}, {'address':'5232 N CLARK', 'data':'07/04/2012'}, {'address':'5542 E 58ARK', 'data':'07/02/2012'}, {'address':'5152 N CLARK', 'data':'07/03/2012'}, {'address':'7412 N CLARK', 'data':'07/02/2012'}, {'address':'6789 w CLARK', 'data':'07/03/2012'}, {'address':'9008 N CLARK', 'data':'07/01/2012'}, {'address':'2227 W CLARK', 'data':'07/04/2012'} ] from operator import itemgetter from itertools import groupby rows.sort(key=itemgetter('data')) for data, items in groupby(rows, key=itemgetter('data')): print (data) for i in items: print (' ', i) from collections import defaultdict rows_by_date = defaultdict(list) for row in rows: rows_by_date[row['data']].append(row) for r in rows_by_date['07/04/2012']: print(r)
1.16 Filtering elements in a sequence
mylist = [1,4,-5,10,-7,2,3,-1] print [n for n in mylist if n > 0]#列表推导式 print [n for n in mylist if n < 0] pos = (n for n in mylist if n > 0)#生成器表达式 print pos for x in pos: print(x) values = ['1', '2', '-3', '-', '4', 'N/A', '5'] def is_int(val): try: x = int(val) return True except ValueError: return False ivals = list(filter(is_int, values)) print(ivals) mylist = [1,4,-5,10,-7,2,3,-1] import math print [math.sqrt(n) for n in mylist if n > 0] clip_neg = [n if n > 0 else 0 for n in mylist] print clip_neg clip_pos = [n if n < 0 else 0 for n in mylist] print clip_pos addresses = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] counts = [0, 3, 10, 4, 1, 7, 6, 1] from itertools import compress more5 = [n > 5 for n in counts] print more5 print list(compress(addresses, more5))
1. 17 from the dictionary Extract the subset
prices = {'ACNE':45.23, 'AAPL':612.78, 'IBM':205.55, 'HPQ':37.20, 'FB':10.75} p1 = { key:value for key, value in prices.items() if value > 200 } print p1 tech_names = {'AAPL', 'IBM', 'HPQ'} p2 = { key:value for key, value in prices.items() if key in tech_names } print p2 p3 = dict( (key, value) for key, value in prices.items() if value > 200 ) #慢 print p3 tech_names = {'AAPL', 'IBM', 'HPQ'} p4 = { key:prices[key] for key in prices.keys() if key in tech_names } #慢 print p4
1.18 Map the name to the elements of the sequence
from collections import namedtuple Subscriber = namedtuple('Subscriber', ['addr', 'joined']) sub = Subscriber('wang@qq.com', '2020-10-10') print sub print sub.joined print sub.addr print len(sub) addr, joined = sub print addr print joined def compute_cost(records): total = 0.0 for rec in records: total += rec[1]*rec[2] return total Stock = namedtuple('Stock', ['name', 'shares', 'price']) def compute_cost2(records): total = 0.0 for rec in records: s = Stock(*rec) total += s.shares * s.price return total s = Stock('ACME', 100, 123.45) print s #s.shares = 75 #error s = s._replace(shares=75) print s Stock = namedtuple('Stock', ['name', 'shares', 'price', 'date', 'time']) stock_prototype = Stock('',0, 0.0, None, None) def dict_to_stock(s): return stock_prototype._replace(**s) a = {'name':'ACME', 'shares':100, 'price':123.45} print dict_to_stock(a) b = {'name':'ACME', 'shares':100, 'price':123.45, 'date':'12/12/2012'} print dict_to_stock(b)
1.19 Convert and convert the data at the same time
nums = [1, 2, 3, 4, 5] s = sum( x*x for x in nums ) print s import os files = os.listdir('dirname') if any(name.endswith('.py') for name in files): print('There be Python!') else: print('sorry, no Python!') s = ('ACME', 50, 123.45) print(','.join(str(x) for x in s)) portfolio = [ {'name':'GOOG', 'shares':50}, {'name':'YHOO', 'shares':75}, {'name':'AOL', 'shares':20}, {'name':'SCOX', 'shares':65} ] min_shares = min(s['shares'] for s in portfolio) print min_shares min_shares = min(portfolio, key=lambda s: s['shares']) print min_shares 1.20 将多个映射合并为单个映射 Java代码 a = {'x':1, 'z':3} b = {'y':2, 'z':4} #from collections import ChainMap from pip._vendor.distlib.compat import ChainMap c = ChainMap(a, b) print(c['x']) print(c['y']) print(c['z']) #from a 第一个映射中的值 print len(c) print list(c.values()) c['z'] = 10 c['w'] = 40 del c['x'] print a #del c['y'] #error 修改映射的操作总是会作用在列表的第一个映射结构上 values = ChainMap() values['x'] = 1 values = values.new_child()#add a new map values['x'] = 2 values = values.new_child() values['x'] = 3 #print values print values['x'] values = values.parents print values['x'] values = values.parents print values['x'] a = {'x':1, 'z':3} b = {'y':2, 'z':4} merged = dict(b) merged.update(a) print merged['x'] print merged['y'] print merged['z'] a['x'] = 13 print merged['x'] #不会反应到合并后的字典中 a = {'x':1, 'z':3} b = {'y':2, 'z':4} merged = ChainMap(a, b) print merged['x'] a['x'] = 42 print merged['x'] #会反应到合并后的字典中

Hot Article

Hot tools Tags

Hot Article

Hot Article Tags

Notepad++7.3.1
Easy-to-use and free code editor

SublimeText3 Chinese version
Chinese version, very easy to use

Zend Studio 13.0.1
Powerful PHP integrated development environment

Dreamweaver CS6
Visual web development tools

SublimeText3 Mac version
God-level code editing software (SublimeText3)

Hot Topics

What are the advantages and disadvantages of templating?

Google AI announces Gemini 1.5 Pro and Gemma 2 for developers

For only $250, Hugging Face's technical director teaches you how to fine-tune Llama 3 step by step

Share several .NET open source AI and LLM related project frameworks

A complete guide to golang function debugging and analysis
