PythonCookbook——数据结构和算法
Nov 26, 2016 am 10:13 AM第一章 数据结构和算法
1.1 将序列分解为单独的变量
p = (4, 5) x, y = p print x print y data = [ 'ACME', 50, 91.1, (2012, 12, 21) ] name, shares, price, date = data print name print shares print price print date name, shares, price, (year, mon, day ) = data print year p = (4, 5) #x, y, z = p 错误!!! s = 'hello!' a, b, c, d, e, f = s print a print f data = [ 'ACME', 50, 91.1, (2012, 12, 21) ] _, shares, price, _ = data print shares print price #其他数据可以丢弃了
1.2 从任意长度的可迭代对象中分解元素
from audioop import avg def drop_first_last(grades): first, *middle, last = grades return avg(middle) record = ('Dave', 'dave@example.com', '777-333-2323', '234-234-2345') name, email, *phone_numbers = record print name print email print phone_numbers *trailing, current = [10, 8, 7, 2, 5] print trailing #[10, 8, 7, 2, ] print current #5 records = [ ('foo', 1, 2), ('bar', 'hello'), ('foo', 5, 3) ] def do_foo(x, y): print ('foo', x, y) def do_bar(s): print ('bar', s) for tag, *args in records: if tag == 'foo': do_foo(*args) elif tag == 'bar': do_bar(*args) line = 'asdf:fedfr234://wef:678d:asdf' uname, *fields, homedir, sh = line.split(':') print uname print homedir record = ('ACME', 50, 123.45, (12, 18, 2012)) name, *_, (*_, year) = record print name print year items = [1, 10, 7, 4, 5, 9] head, *tail = items print head #1 print tail #[10, 7, 4, 5, 9] def sum(items): head, *tail = items return head + sum(tail) if tail else head sum(items)
1.3 保存最后N个元素
from _collections import deque def search(lines, pattern, history=5): previous_lines = deque(maxlen = history) for line in lines: if pattern in line: yield line, previous_lines previous_lines.append(line) # Example use on a file if __name__ == '__main__': with open('somefile.txt') as f: for line, prevlines in search(f, 'python', 5): for pline in prevlines: print (pline) #print (pline, end='') print (line) #print (pline, end='') print ('-'*20) q = deque(maxlen=3) q.append(1) q.append(2) q.append(3) print q q.append(4) print q q = deque() q.append(1) q.append(2) q.append(3) print q q.appendleft(4) print q q_pop = q.pop() print q_pop print q q_popleft = q.popleft() print q_popleft print q
1.4 找到最大或最小的N个元素
import heapq nums = [1,30,6,2,36,33,46,3,23,43] print (heapq.nlargest(3, nums)) print (heapq.nsmallest(3, nums)) portfolio = [ {'name':'IBM', 'shares':100, 'price':2.4}, {'name':'A', 'shares':1040, 'price':12.4}, {'name':'S', 'shares':40, 'price':23.4}, {'name':'D', 'shares':1, 'price':2.49}, {'name':'F', 'shares':9, 'price':24} ] cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price']) expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price']) print cheap print expensive nums = [1,8,2,23,7,-4,18,23,42,37,2] heap = list(nums) print heap heapq.heapify(heap) print heap print heapq.heappop(heap) print heapq.heappop(heap) print heapq.heappop(heap)
1.5 实现优先级队列
import heapq class PriorityQueue: def __init__(self): self._queue = [] self._index = 0 def push(self, item, priority): heapq.heappush(self._queue, (-priority, self._index, item)) self._index += 1 def pop(self): return heapq.heappop(self._queue)[-1] #Example class Item: def __init__(self, name): self.name = name def __repr__(self): return 'Item({!r})'.format(self.name) q = PriorityQueue() q.push(Item('foo'), 1) q.push(Item('spam'), 4) q.push(Item('bar'), 5) q.push(Item('grok'), 1) print q.pop() print q.pop() print q.pop() a = Item('foo') b = Item('bar') #a < b error a = (1, Item('foo')) b = (5, Item('bar')) print a < b c = (1, Item('grok')) #a < c error a = (1, 0, Item('foo')) b = (5, 1, Item('bar')) c = (1, 2, Item('grok')) print a < b print a < c
1.6 在字典中将建映射到多个值上
d = { 'a' : [1, 2, 3], 'b' : [4, 5] } e = { 'a' : {1, 2, 3}, 'b' : {4, 5} } from collections import defaultdict d = defaultdict(list) d['a'].append(1) d['a'].append(2) d['a'].append(3) print d d = defaultdict(set) d['a'].add(1) d['a'].add(2) d['a'].add(3) print d d = {} d.setdefault('a', []).append(1) d.setdefault('a', []).append(2) d.setdefault('b', []).append(3) print d d = {} for key, value in d:#pairs: if key not in d: d[key] = [] d[key].append(value) d = defaultdict(list) for key, value in d:#pairs: d[key].append(value)
1.7 让字典保持有序
from collections import OrderedDict d = OrderedDict() d['foo'] = 1 d['bar'] = 2 d['spam'] = 3 d['grol'] = 4 for key in d: print (key, d[key]) import json json.dumps(d)
1.8 与字典有关的计算问题
price = { 'ACME':23.45, 'IBM':25.45, 'FB':13.45, 'IO':4.45, 'JAVA':45.45, 'AV':38.38, } min_price = min( zip( price.values(), price.keys() ) ) print min_price max_price = max( zip( price.values(), price.keys() ) ) print max_price price_sorted = sorted( zip( price.values(), price.keys() ) ) print price_sorted price_and_names = zip( price.values(), price.keys() ) print (min(price_and_names)) #print (max(price_and_names)) error zip()创建了迭代器,内容只能被消费一次 print min(price) print max(price) print min(price.values()) print max(price.values()) print min(price, key = lambda k : price[k]) print max(price, key = lambda k : price[k]) min_value = price[ min(price, key = lambda k : price[k]) ] print min_value price = { 'AAA': 23, 'ZZZ': 23, } print min( zip( price.values(), price.keys() ) ) print max( zip( price.values(), price.keys() ) )
1.9 在两个字典中寻找相同点
a = { 'x':1, 'y':2, 'z':3 } b = { 'x':11, 'y':2, 'w':10 } print a.keys() & b.keys() #{'x','y'} print a.keys() - b.keys() #{'z'} print a.items() & b.items() #{('y', 2)} c = {key: a[key] for key in a.keys() - {'z', 'w'} } print c #{'x':1, 'y':2}
1.10 从序列中移除重复项且保持元素间顺序不变
def dedupe(items): seen = set() for item in items: if item not in seen: yield item seen.add(item) #example a = [1,5,2,1,9,1,5,10] print list(dedupe(a)) def dedupe2(items, key = None): seen = set() for item in items: val = item if key is None else key(item) if val not in seen: yield item seen.add(val) #example a = [ {'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}, ] print list( dedupe2(a, key=lambda d : (d['x'], d['y']) ) ) print list( dedupe2(a, key=lambda d : (d['x']) ) ) a = [1,5,2,1,9,1,5,10] print set(a)
1.11 对切片命名
items = [0,1,2,3,4,5,6] a = slice(2,4) print items[2:4] print items[a] items[a] = [10,11] print items print a.start print a.stop print a.step
1.12 找出序列中出现次数最多的元素
words = [ 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes', 'the', 'look' ] from collections import Counter word_counts = Counter(words) top_three = word_counts.most_common(3) print top_three print word_counts['look'] print word_counts['the'] morewords = ['why', 'are', 'you', 'not', 'looking', 'in', 'my', 'eyes'] for word in morewords: word_counts[word] += 1 print word_counts['eyes'] print word_counts['why'] word_counts.update(morewords) print word_counts['eyes'] print word_counts['why'] a = Counter(words) b = Counter(morewords) print a print b c = a + b print c d = a - b print b
1.13 通过公共键对字典列表排序
rows = [ {'fname':'Brian', 'lname':'Jones', 'uid':1003}, {'fname':'David', 'lname':'Beazley', 'uid':1002}, {'fname':'John', 'lname':'Cleese', 'uid':1001}, {'fname':'Big', 'lname':'Jones', 'uid':1004} ] from operator import itemgetter rows_by_fname = sorted(rows, key=itemgetter('fname')) rows_by_uid = sorted(rows, key=itemgetter('uid')) print rows_by_fname print rows_by_uid rows_by_lfname = sorted(rows, key=itemgetter('lname', 'fname')) print rows_by_lfname rows_by_fname = sorted(rows, key=lambda r: r['fname']) rows_by_lfname = sorted(rows, key=lambda r: (r['fname'], r['lname'])) print rows_by_fname print rows_by_lfname print min(rows, key=itemgetter('uid')) print max(rows, key=itemgetter('uid'))
1.14 对不原生支持比较操作的对象排序
class User: def __init__(self, user_id): self.user_id = user_id def __repr__(self): return 'User({})'.format(self.user_id) users = [User(23), User(3), User(99)] print users print sorted(users, key = lambda u: u.user_id) from operator import attrgetter print sorted(users, key=attrgetter('user_id')) print min(users, key=attrgetter('user_id')) print max(users, key=attrgetter('user_id'))
1.15 根据字段将记录分组
rows = [ {'address':'5412 N CLARK', 'data':'07/01/2012'}, {'address':'5232 N CLARK', 'data':'07/04/2012'}, {'address':'5542 E 58ARK', 'data':'07/02/2012'}, {'address':'5152 N CLARK', 'data':'07/03/2012'}, {'address':'7412 N CLARK', 'data':'07/02/2012'}, {'address':'6789 w CLARK', 'data':'07/03/2012'}, {'address':'9008 N CLARK', 'data':'07/01/2012'}, {'address':'2227 W CLARK', 'data':'07/04/2012'} ] from operator import itemgetter from itertools import groupby rows.sort(key=itemgetter('data')) for data, items in groupby(rows, key=itemgetter('data')): print (data) for i in items: print (' ', i) from collections import defaultdict rows_by_date = defaultdict(list) for row in rows: rows_by_date[row['data']].append(row) for r in rows_by_date['07/04/2012']: print(r)
1.16 筛选序列中的元素
mylist = [1,4,-5,10,-7,2,3,-1] print [n for n in mylist if n > 0]#列表推导式 print [n for n in mylist if n < 0] pos = (n for n in mylist if n > 0)#生成器表达式 print pos for x in pos: print(x) values = ['1', '2', '-3', '-', '4', 'N/A', '5'] def is_int(val): try: x = int(val) return True except ValueError: return False ivals = list(filter(is_int, values)) print(ivals) mylist = [1,4,-5,10,-7,2,3,-1] import math print [math.sqrt(n) for n in mylist if n > 0] clip_neg = [n if n > 0 else 0 for n in mylist] print clip_neg clip_pos = [n if n < 0 else 0 for n in mylist] print clip_pos addresses = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] counts = [0, 3, 10, 4, 1, 7, 6, 1] from itertools import compress more5 = [n > 5 for n in counts] print more5 print list(compress(addresses, more5))
1.17 从字典中提取子集
prices = {'ACNE':45.23, 'AAPL':612.78, 'IBM':205.55, 'HPQ':37.20, 'FB':10.75} p1 = { key:value for key, value in prices.items() if value > 200 } print p1 tech_names = {'AAPL', 'IBM', 'HPQ'} p2 = { key:value for key, value in prices.items() if key in tech_names } print p2 p3 = dict( (key, value) for key, value in prices.items() if value > 200 ) #慢 print p3 tech_names = {'AAPL', 'IBM', 'HPQ'} p4 = { key:prices[key] for key in prices.keys() if key in tech_names } #慢 print p4
1.18 将名称映射到序列的元素中
from collections import namedtuple Subscriber = namedtuple('Subscriber', ['addr', 'joined']) sub = Subscriber('wang@qq.com', '2020-10-10') print sub print sub.joined print sub.addr print len(sub) addr, joined = sub print addr print joined def compute_cost(records): total = 0.0 for rec in records: total += rec[1]*rec[2] return total Stock = namedtuple('Stock', ['name', 'shares', 'price']) def compute_cost2(records): total = 0.0 for rec in records: s = Stock(*rec) total += s.shares * s.price return total s = Stock('ACME', 100, 123.45) print s #s.shares = 75 #error s = s._replace(shares=75) print s Stock = namedtuple('Stock', ['name', 'shares', 'price', 'date', 'time']) stock_prototype = Stock('',0, 0.0, None, None) def dict_to_stock(s): return stock_prototype._replace(**s) a = {'name':'ACME', 'shares':100, 'price':123.45} print dict_to_stock(a) b = {'name':'ACME', 'shares':100, 'price':123.45, 'date':'12/12/2012'} print dict_to_stock(b)
1.19 同时对数据做转换和换算
nums = [1, 2, 3, 4, 5] s = sum( x*x for x in nums ) print s import os files = os.listdir('dirname') if any(name.endswith('.py') for name in files): print('There be Python!') else: print('sorry, no Python!') s = ('ACME', 50, 123.45) print(','.join(str(x) for x in s)) portfolio = [ {'name':'GOOG', 'shares':50}, {'name':'YHOO', 'shares':75}, {'name':'AOL', 'shares':20}, {'name':'SCOX', 'shares':65} ] min_shares = min(s['shares'] for s in portfolio) print min_shares min_shares = min(portfolio, key=lambda s: s['shares']) print min_shares 1.20 将多个映射合并为单个映射 Java代码 a = {'x':1, 'z':3} b = {'y':2, 'z':4} #from collections import ChainMap from pip._vendor.distlib.compat import ChainMap c = ChainMap(a, b) print(c['x']) print(c['y']) print(c['z']) #from a 第一个映射中的值 print len(c) print list(c.values()) c['z'] = 10 c['w'] = 40 del c['x'] print a #del c['y'] #error 修改映射的操作总是会作用在列表的第一个映射结构上 values = ChainMap() values['x'] = 1 values = values.new_child()#add a new map values['x'] = 2 values = values.new_child() values['x'] = 3 #print values print values['x'] values = values.parents print values['x'] values = values.parents print values['x'] a = {'x':1, 'z':3} b = {'y':2, 'z':4} merged = dict(b) merged.update(a) print merged['x'] print merged['y'] print merged['z'] a['x'] = 13 print merged['x'] #不会反应到合并后的字典中 a = {'x':1, 'z':3} b = {'y':2, 'z':4} merged = ChainMap(a, b) print merged['x'] a['x'] = 42 print merged['x'] #会反应到合并后的字典中

Article chaud

Outils chauds Tags

Article chaud

Tags d'article chaud

Bloc-notes++7.3.1
Éditeur de code facile à utiliser et gratuit

SublimeText3 version chinoise
Version chinoise, très simple à utiliser

Envoyer Studio 13.0.1
Puissant environnement de développement intégré PHP

Dreamweaver CS6
Outils de développement Web visuel

SublimeText3 version Mac
Logiciel d'édition de code au niveau de Dieu (SublimeText3)

Sujets chauds

Quels sont les avantages et les inconvénients des modèles ?

Google AI annonce Gemini 1.5 Pro et Gemma 2 pour les développeurs

Pour seulement 250$, le directeur technique de Hugging Face vous apprend étape par étape comment peaufiner Llama 3

Partagez plusieurs frameworks de projets open source .NET liés à l'IA et au LLM

Un guide complet sur le débogage et l'analyse des fonctions Golang

Comment enregistrer la fonction d'évaluation
