itertoolz.py 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. import itertools
  2. import heapq
  3. import collections
  4. import operator
  5. from functools import partial
  6. from itertools import filterfalse, zip_longest
  7. from collections.abc import Sequence
  8. from toolz.utils import no_default
  9. __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',
  10. 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth',
  11. 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',
  12. 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',
  13. 'sliding_window', 'partition', 'partition_all', 'count', 'pluck',
  14. 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample')
  15. def remove(predicate, seq):
  16. """ Return those items of sequence for which predicate(item) is False
  17. >>> def iseven(x):
  18. ... return x % 2 == 0
  19. >>> list(remove(iseven, [1, 2, 3, 4]))
  20. [1, 3]
  21. """
  22. return filterfalse(predicate, seq)
  23. def accumulate(binop, seq, initial=no_default):
  24. """ Repeatedly apply binary function to a sequence, accumulating results
  25. >>> from operator import add, mul
  26. >>> list(accumulate(add, [1, 2, 3, 4, 5]))
  27. [1, 3, 6, 10, 15]
  28. >>> list(accumulate(mul, [1, 2, 3, 4, 5]))
  29. [1, 2, 6, 24, 120]
  30. Accumulate is similar to ``reduce`` and is good for making functions like
  31. cumulative sum:
  32. >>> from functools import partial, reduce
  33. >>> sum = partial(reduce, add)
  34. >>> cumsum = partial(accumulate, add)
  35. Accumulate also takes an optional argument that will be used as the first
  36. value. This is similar to reduce.
  37. >>> list(accumulate(add, [1, 2, 3], -1))
  38. [-1, 0, 2, 5]
  39. >>> list(accumulate(add, [], 1))
  40. [1]
  41. See Also:
  42. itertools.accumulate : In standard itertools for Python 3.2+
  43. """
  44. seq = iter(seq)
  45. if initial == no_default:
  46. try:
  47. result = next(seq)
  48. except StopIteration:
  49. return
  50. else:
  51. result = initial
  52. yield result
  53. for elem in seq:
  54. result = binop(result, elem)
  55. yield result
  56. def groupby(key, seq):
  57. """ Group a collection by a key function
  58. >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
  59. >>> groupby(len, names) # doctest: +SKIP
  60. {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
  61. >>> iseven = lambda x: x % 2 == 0
  62. >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP
  63. {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
  64. Non-callable keys imply grouping on a member.
  65. >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},
  66. ... {'name': 'Bob', 'gender': 'M'},
  67. ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP
  68. {'F': [{'gender': 'F', 'name': 'Alice'}],
  69. 'M': [{'gender': 'M', 'name': 'Bob'},
  70. {'gender': 'M', 'name': 'Charlie'}]}
  71. Not to be confused with ``itertools.groupby``
  72. See Also:
  73. countby
  74. """
  75. if not callable(key):
  76. key = getter(key)
  77. d = collections.defaultdict(lambda: [].append)
  78. for item in seq:
  79. d[key(item)](item)
  80. rv = {}
  81. for k, v in d.items():
  82. rv[k] = v.__self__
  83. return rv
  84. def merge_sorted(*seqs, **kwargs):
  85. """ Merge and sort a collection of sorted collections
  86. This works lazily and only keeps one value from each iterable in memory.
  87. >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
  88. [1, 2, 3, 4, 5, 6]
  89. >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
  90. 'aaabbbccc'
  91. The "key" function used to sort the input may be passed as a keyword.
  92. >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))
  93. [2, 1, 3, 3]
  94. """
  95. if len(seqs) == 0:
  96. return iter([])
  97. elif len(seqs) == 1:
  98. return iter(seqs[0])
  99. key = kwargs.get('key', None)
  100. if key is None:
  101. return _merge_sorted_binary(seqs)
  102. else:
  103. return _merge_sorted_binary_key(seqs, key)
  104. def _merge_sorted_binary(seqs):
  105. mid = len(seqs) // 2
  106. L1 = seqs[:mid]
  107. if len(L1) == 1:
  108. seq1 = iter(L1[0])
  109. else:
  110. seq1 = _merge_sorted_binary(L1)
  111. L2 = seqs[mid:]
  112. if len(L2) == 1:
  113. seq2 = iter(L2[0])
  114. else:
  115. seq2 = _merge_sorted_binary(L2)
  116. try:
  117. val2 = next(seq2)
  118. except StopIteration:
  119. for val1 in seq1:
  120. yield val1
  121. return
  122. for val1 in seq1:
  123. if val2 < val1:
  124. yield val2
  125. for val2 in seq2:
  126. if val2 < val1:
  127. yield val2
  128. else:
  129. yield val1
  130. break
  131. else:
  132. break
  133. else:
  134. yield val1
  135. else:
  136. yield val2
  137. for val2 in seq2:
  138. yield val2
  139. return
  140. yield val1
  141. for val1 in seq1:
  142. yield val1
  143. def _merge_sorted_binary_key(seqs, key):
  144. mid = len(seqs) // 2
  145. L1 = seqs[:mid]
  146. if len(L1) == 1:
  147. seq1 = iter(L1[0])
  148. else:
  149. seq1 = _merge_sorted_binary_key(L1, key)
  150. L2 = seqs[mid:]
  151. if len(L2) == 1:
  152. seq2 = iter(L2[0])
  153. else:
  154. seq2 = _merge_sorted_binary_key(L2, key)
  155. try:
  156. val2 = next(seq2)
  157. except StopIteration:
  158. for val1 in seq1:
  159. yield val1
  160. return
  161. key2 = key(val2)
  162. for val1 in seq1:
  163. key1 = key(val1)
  164. if key2 < key1:
  165. yield val2
  166. for val2 in seq2:
  167. key2 = key(val2)
  168. if key2 < key1:
  169. yield val2
  170. else:
  171. yield val1
  172. break
  173. else:
  174. break
  175. else:
  176. yield val1
  177. else:
  178. yield val2
  179. for val2 in seq2:
  180. yield val2
  181. return
  182. yield val1
  183. for val1 in seq1:
  184. yield val1
  185. def interleave(seqs):
  186. """ Interleave a sequence of sequences
  187. >>> list(interleave([[1, 2], [3, 4]]))
  188. [1, 3, 2, 4]
  189. >>> ''.join(interleave(('ABC', 'XY')))
  190. 'AXBYC'
  191. Both the individual sequences and the sequence of sequences may be infinite
  192. Returns a lazy iterator
  193. """
  194. iters = itertools.cycle(map(iter, seqs))
  195. while True:
  196. try:
  197. for itr in iters:
  198. yield next(itr)
  199. return
  200. except StopIteration:
  201. predicate = partial(operator.is_not, itr)
  202. iters = itertools.cycle(itertools.takewhile(predicate, iters))
  203. def unique(seq, key=None):
  204. """ Return only unique elements of a sequence
  205. >>> tuple(unique((1, 2, 3)))
  206. (1, 2, 3)
  207. >>> tuple(unique((1, 2, 1, 3)))
  208. (1, 2, 3)
  209. Uniqueness can be defined by key keyword
  210. >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len))
  211. ('cat', 'mouse')
  212. """
  213. seen = set()
  214. seen_add = seen.add
  215. if key is None:
  216. for item in seq:
  217. if item not in seen:
  218. seen_add(item)
  219. yield item
  220. else: # calculate key
  221. for item in seq:
  222. val = key(item)
  223. if val not in seen:
  224. seen_add(val)
  225. yield item
  226. def isiterable(x):
  227. """ Is x iterable?
  228. >>> isiterable([1, 2, 3])
  229. True
  230. >>> isiterable('abc')
  231. True
  232. >>> isiterable(5)
  233. False
  234. """
  235. try:
  236. iter(x)
  237. return True
  238. except TypeError:
  239. return False
  240. def isdistinct(seq):
  241. """ All values in sequence are distinct
  242. >>> isdistinct([1, 2, 3])
  243. True
  244. >>> isdistinct([1, 2, 1])
  245. False
  246. >>> isdistinct("Hello")
  247. False
  248. >>> isdistinct("World")
  249. True
  250. """
  251. if iter(seq) is seq:
  252. seen = set()
  253. seen_add = seen.add
  254. for item in seq:
  255. if item in seen:
  256. return False
  257. seen_add(item)
  258. return True
  259. else:
  260. return len(seq) == len(set(seq))
  261. def take(n, seq):
  262. """ The first n elements of a sequence
  263. >>> list(take(2, [10, 20, 30, 40, 50]))
  264. [10, 20]
  265. See Also:
  266. drop
  267. tail
  268. """
  269. return itertools.islice(seq, n)
  270. def tail(n, seq):
  271. """ The last n elements of a sequence
  272. >>> tail(2, [10, 20, 30, 40, 50])
  273. [40, 50]
  274. See Also:
  275. drop
  276. take
  277. """
  278. try:
  279. return seq[-n:]
  280. except (TypeError, KeyError):
  281. return tuple(collections.deque(seq, n))
  282. def drop(n, seq):
  283. """ The sequence following the first n elements
  284. >>> list(drop(2, [10, 20, 30, 40, 50]))
  285. [30, 40, 50]
  286. See Also:
  287. take
  288. tail
  289. """
  290. return itertools.islice(seq, n, None)
  291. def take_nth(n, seq):
  292. """ Every nth item in seq
  293. >>> list(take_nth(2, [10, 20, 30, 40, 50]))
  294. [10, 30, 50]
  295. """
  296. return itertools.islice(seq, 0, None, n)
  297. def first(seq):
  298. """ The first element in a sequence
  299. >>> first('ABC')
  300. 'A'
  301. """
  302. return next(iter(seq))
  303. def second(seq):
  304. """ The second element in a sequence
  305. >>> second('ABC')
  306. 'B'
  307. """
  308. seq = iter(seq)
  309. next(seq)
  310. return next(seq)
  311. def nth(n, seq):
  312. """ The nth element in a sequence
  313. >>> nth(1, 'ABC')
  314. 'B'
  315. """
  316. if isinstance(seq, (tuple, list, Sequence)):
  317. return seq[n]
  318. else:
  319. return next(itertools.islice(seq, n, None))
  320. def last(seq):
  321. """ The last element in a sequence
  322. >>> last('ABC')
  323. 'C'
  324. """
  325. return tail(1, seq)[0]
  326. rest = partial(drop, 1)
  327. def _get(ind, seq, default):
  328. try:
  329. return seq[ind]
  330. except (KeyError, IndexError):
  331. return default
  332. def get(ind, seq, default=no_default):
  333. """ Get element in a sequence or dict
  334. Provides standard indexing
  335. >>> get(1, 'ABC') # Same as 'ABC'[1]
  336. 'B'
  337. Pass a list to get multiple values
  338. >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2])
  339. ('B', 'C')
  340. Works on any value that supports indexing/getitem
  341. For example here we see that it works with dictionaries
  342. >>> phonebook = {'Alice': '555-1234',
  343. ... 'Bob': '555-5678',
  344. ... 'Charlie':'555-9999'}
  345. >>> get('Alice', phonebook)
  346. '555-1234'
  347. >>> get(['Alice', 'Bob'], phonebook)
  348. ('555-1234', '555-5678')
  349. Provide a default for missing values
  350. >>> get(['Alice', 'Dennis'], phonebook, None)
  351. ('555-1234', None)
  352. See Also:
  353. pluck
  354. """
  355. try:
  356. return seq[ind]
  357. except TypeError: # `ind` may be a list
  358. if isinstance(ind, list):
  359. if default == no_default:
  360. if len(ind) > 1:
  361. return operator.itemgetter(*ind)(seq)
  362. elif ind:
  363. return seq[ind[0]],
  364. else:
  365. return ()
  366. else:
  367. return tuple(_get(i, seq, default) for i in ind)
  368. elif default != no_default:
  369. return default
  370. else:
  371. raise
  372. except (KeyError, IndexError): # we know `ind` is not a list
  373. if default == no_default:
  374. raise
  375. else:
  376. return default
  377. def concat(seqs):
  378. """ Concatenate zero or more iterables, any of which may be infinite.
  379. An infinite sequence will prevent the rest of the arguments from
  380. being included.
  381. We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs
  382. can be a generator.
  383. >>> list(concat([[], [1], [2, 3]]))
  384. [1, 2, 3]
  385. See also:
  386. itertools.chain.from_iterable equivalent
  387. """
  388. return itertools.chain.from_iterable(seqs)
  389. def concatv(*seqs):
  390. """ Variadic version of concat
  391. >>> list(concatv([], ["a"], ["b", "c"]))
  392. ['a', 'b', 'c']
  393. See also:
  394. itertools.chain
  395. """
  396. return concat(seqs)
  397. def mapcat(func, seqs):
  398. """ Apply func to each sequence in seqs, concatenating results.
  399. >>> list(mapcat(lambda s: [c.upper() for c in s],
  400. ... [["a", "b"], ["c", "d", "e"]]))
  401. ['A', 'B', 'C', 'D', 'E']
  402. """
  403. return concat(map(func, seqs))
  404. def cons(el, seq):
  405. """ Add el to beginning of (possibly infinite) sequence seq.
  406. >>> list(cons(1, [2, 3]))
  407. [1, 2, 3]
  408. """
  409. return itertools.chain([el], seq)
  410. def interpose(el, seq):
  411. """ Introduce element between each pair of elements in seq
  412. >>> list(interpose("a", [1, 2, 3]))
  413. [1, 'a', 2, 'a', 3]
  414. """
  415. inposed = concat(zip(itertools.repeat(el), seq))
  416. next(inposed)
  417. return inposed
  418. def frequencies(seq):
  419. """ Find number of occurrences of each value in seq
  420. >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP
  421. {'cat': 3, 'ox': 1, 'pig': 2}
  422. See Also:
  423. countby
  424. groupby
  425. """
  426. d = collections.defaultdict(int)
  427. for item in seq:
  428. d[item] += 1
  429. return dict(d)
  430. def reduceby(key, binop, seq, init=no_default):
  431. """ Perform a simultaneous groupby and reduction
  432. The computation:
  433. >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP
  434. is equivalent to the following:
  435. >>> def reduction(group): # doctest: +SKIP
  436. ... return reduce(binop, group, init) # doctest: +SKIP
  437. >>> groups = groupby(key, seq) # doctest: +SKIP
  438. >>> result = valmap(reduction, groups) # doctest: +SKIP
  439. But the former does not build the intermediate groups, allowing it to
  440. operate in much less space. This makes it suitable for larger datasets
  441. that do not fit comfortably in memory
  442. The ``init`` keyword argument is the default initialization of the
  443. reduction. This can be either a constant value like ``0`` or a callable
  444. like ``lambda : 0`` as might be used in ``defaultdict``.
  445. Simple Examples
  446. ---------------
  447. >>> from operator import add, mul
  448. >>> iseven = lambda x: x % 2 == 0
  449. >>> data = [1, 2, 3, 4, 5]
  450. >>> reduceby(iseven, add, data) # doctest: +SKIP
  451. {False: 9, True: 6}
  452. >>> reduceby(iseven, mul, data) # doctest: +SKIP
  453. {False: 15, True: 8}
  454. Complex Example
  455. ---------------
  456. >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},
  457. ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000},
  458. ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},
  459. ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]
  460. >>> reduceby('state', # doctest: +SKIP
  461. ... lambda acc, x: acc + x['cost'],
  462. ... projects, 0)
  463. {'CA': 1200000, 'IL': 2100000}
  464. Example Using ``init``
  465. ----------------------
  466. >>> def set_add(s, i):
  467. ... s.add(i)
  468. ... return s
  469. >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP
  470. {True: set([2, 4]),
  471. False: set([1, 3])}
  472. """
  473. is_no_default = init == no_default
  474. if not is_no_default and not callable(init):
  475. _init = init
  476. init = lambda: _init
  477. if not callable(key):
  478. key = getter(key)
  479. d = {}
  480. for item in seq:
  481. k = key(item)
  482. if k not in d:
  483. if is_no_default:
  484. d[k] = item
  485. continue
  486. else:
  487. d[k] = init()
  488. d[k] = binop(d[k], item)
  489. return d
  490. def iterate(func, x):
  491. """ Repeatedly apply a function func onto an original input
  492. Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc..
  493. >>> def inc(x): return x + 1
  494. >>> counter = iterate(inc, 0)
  495. >>> next(counter)
  496. 0
  497. >>> next(counter)
  498. 1
  499. >>> next(counter)
  500. 2
  501. >>> double = lambda x: x * 2
  502. >>> powers_of_two = iterate(double, 1)
  503. >>> next(powers_of_two)
  504. 1
  505. >>> next(powers_of_two)
  506. 2
  507. >>> next(powers_of_two)
  508. 4
  509. >>> next(powers_of_two)
  510. 8
  511. """
  512. while True:
  513. yield x
  514. x = func(x)
  515. def sliding_window(n, seq):
  516. """ A sequence of overlapping subsequences
  517. >>> list(sliding_window(2, [1, 2, 3, 4]))
  518. [(1, 2), (2, 3), (3, 4)]
  519. This function creates a sliding window suitable for transformations like
  520. sliding means / smoothing
  521. >>> mean = lambda seq: float(sum(seq)) / len(seq)
  522. >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))
  523. [1.5, 2.5, 3.5]
  524. """
  525. return zip(*(collections.deque(itertools.islice(it, i), 0) or it
  526. for i, it in enumerate(itertools.tee(seq, n))))
  527. no_pad = '__no__pad__'
  528. def partition(n, seq, pad=no_pad):
  529. """ Partition sequence into tuples of length n
  530. >>> list(partition(2, [1, 2, 3, 4]))
  531. [(1, 2), (3, 4)]
  532. If the length of ``seq`` is not evenly divisible by ``n``, the final tuple
  533. is dropped if ``pad`` is not specified, or filled to length ``n`` by pad:
  534. >>> list(partition(2, [1, 2, 3, 4, 5]))
  535. [(1, 2), (3, 4)]
  536. >>> list(partition(2, [1, 2, 3, 4, 5], pad=None))
  537. [(1, 2), (3, 4), (5, None)]
  538. See Also:
  539. partition_all
  540. """
  541. args = [iter(seq)] * n
  542. if pad is no_pad:
  543. return zip(*args)
  544. else:
  545. return zip_longest(*args, fillvalue=pad)
  546. def partition_all(n, seq):
  547. """ Partition all elements of sequence into tuples of length at most n
  548. The final tuple may be shorter to accommodate extra elements.
  549. >>> list(partition_all(2, [1, 2, 3, 4]))
  550. [(1, 2), (3, 4)]
  551. >>> list(partition_all(2, [1, 2, 3, 4, 5]))
  552. [(1, 2), (3, 4), (5,)]
  553. See Also:
  554. partition
  555. """
  556. args = [iter(seq)] * n
  557. it = zip_longest(*args, fillvalue=no_pad)
  558. try:
  559. prev = next(it)
  560. except StopIteration:
  561. return
  562. for item in it:
  563. yield prev
  564. prev = item
  565. if prev[-1] is no_pad:
  566. try:
  567. # If seq defines __len__, then
  568. # we can quickly calculate where no_pad starts
  569. yield prev[:len(seq) % n]
  570. except TypeError:
  571. # Get first index of no_pad without using .index()
  572. # https://github.com/pytoolz/toolz/issues/387
  573. # Binary search from CPython's bisect module,
  574. # modified for identity testing.
  575. lo, hi = 0, n
  576. while lo < hi:
  577. mid = (lo + hi) // 2
  578. if prev[mid] is no_pad:
  579. hi = mid
  580. else:
  581. lo = mid + 1
  582. yield prev[:lo]
  583. else:
  584. yield prev
  585. def count(seq):
  586. """ Count the number of items in seq
  587. Like the builtin ``len`` but works on lazy sequences.
  588. Not to be confused with ``itertools.count``
  589. See also:
  590. len
  591. """
  592. if hasattr(seq, '__len__'):
  593. return len(seq)
  594. return sum(1 for i in seq)
  595. def pluck(ind, seqs, default=no_default):
  596. """ plucks an element or several elements from each item in a sequence.
  597. ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more
  598. elements of each item in the sequence.
  599. This is equivalent to running `map(curried.get(ind), seqs)`
  600. ``ind`` can be either a single string/index or a list of strings/indices.
  601. ``seqs`` should be sequence containing sequences or dicts.
  602. e.g.
  603. >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]
  604. >>> list(pluck('name', data))
  605. ['Cheese', 'Pies']
  606. >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))
  607. [(1, 2), (4, 5)]
  608. See Also:
  609. get
  610. map
  611. """
  612. if default == no_default:
  613. get = getter(ind)
  614. return map(get, seqs)
  615. elif isinstance(ind, list):
  616. return (tuple(_get(item, seq, default) for item in ind)
  617. for seq in seqs)
  618. return (_get(ind, seq, default) for seq in seqs)
  619. def getter(index):
  620. if isinstance(index, list):
  621. if len(index) == 1:
  622. index = index[0]
  623. return lambda x: (x[index],)
  624. elif index:
  625. return operator.itemgetter(*index)
  626. else:
  627. return lambda x: ()
  628. else:
  629. return operator.itemgetter(index)
  630. def join(leftkey, leftseq, rightkey, rightseq,
  631. left_default=no_default, right_default=no_default):
  632. """ Join two sequences on common attributes
  633. This is a semi-streaming operation. The LEFT sequence is fully evaluated
  634. and placed into memory. The RIGHT sequence is evaluated lazily and so can
  635. be arbitrarily large.
  636. (Note: If right_default is defined, then unique keys of rightseq
  637. will also be stored in memory.)
  638. >>> friends = [('Alice', 'Edith'),
  639. ... ('Alice', 'Zhao'),
  640. ... ('Edith', 'Alice'),
  641. ... ('Zhao', 'Alice'),
  642. ... ('Zhao', 'Edith')]
  643. >>> cities = [('Alice', 'NYC'),
  644. ... ('Alice', 'Chicago'),
  645. ... ('Dan', 'Syndey'),
  646. ... ('Edith', 'Paris'),
  647. ... ('Edith', 'Berlin'),
  648. ... ('Zhao', 'Shanghai')]
  649. >>> # Vacation opportunities
  650. >>> # In what cities do people have friends?
  651. >>> result = join(second, friends,
  652. ... first, cities)
  653. >>> for ((a, b), (c, d)) in sorted(unique(result)):
  654. ... print((a, d))
  655. ('Alice', 'Berlin')
  656. ('Alice', 'Paris')
  657. ('Alice', 'Shanghai')
  658. ('Edith', 'Chicago')
  659. ('Edith', 'NYC')
  660. ('Zhao', 'Chicago')
  661. ('Zhao', 'NYC')
  662. ('Zhao', 'Berlin')
  663. ('Zhao', 'Paris')
  664. Specify outer joins with keyword arguments ``left_default`` and/or
  665. ``right_default``. Here is a full outer join in which unmatched elements
  666. are paired with None.
  667. >>> identity = lambda x: x
  668. >>> list(join(identity, [1, 2, 3],
  669. ... identity, [2, 3, 4],
  670. ... left_default=None, right_default=None))
  671. [(2, 2), (3, 3), (None, 4), (1, None)]
  672. Usually the key arguments are callables to be applied to the sequences. If
  673. the keys are not obviously callable then it is assumed that indexing was
  674. intended, e.g. the following is a legal change.
  675. The join is implemented as a hash join and the keys of leftseq must be
  676. hashable. Additionally, if right_default is defined, then keys of rightseq
  677. must also be hashable.
  678. >>> # result = join(second, friends, first, cities)
  679. >>> result = join(1, friends, 0, cities) # doctest: +SKIP
  680. """
  681. if not callable(leftkey):
  682. leftkey = getter(leftkey)
  683. if not callable(rightkey):
  684. rightkey = getter(rightkey)
  685. d = groupby(leftkey, leftseq)
  686. if left_default == no_default and right_default == no_default:
  687. # Inner Join
  688. for item in rightseq:
  689. key = rightkey(item)
  690. if key in d:
  691. for left_match in d[key]:
  692. yield (left_match, item)
  693. elif left_default != no_default and right_default == no_default:
  694. # Right Join
  695. for item in rightseq:
  696. key = rightkey(item)
  697. if key in d:
  698. for left_match in d[key]:
  699. yield (left_match, item)
  700. else:
  701. yield (left_default, item)
  702. elif right_default != no_default:
  703. seen_keys = set()
  704. seen = seen_keys.add
  705. if left_default == no_default:
  706. # Left Join
  707. for item in rightseq:
  708. key = rightkey(item)
  709. seen(key)
  710. if key in d:
  711. for left_match in d[key]:
  712. yield (left_match, item)
  713. else:
  714. # Full Join
  715. for item in rightseq:
  716. key = rightkey(item)
  717. seen(key)
  718. if key in d:
  719. for left_match in d[key]:
  720. yield (left_match, item)
  721. else:
  722. yield (left_default, item)
  723. for key, matches in d.items():
  724. if key not in seen_keys:
  725. for match in matches:
  726. yield (match, right_default)
  727. def diff(*seqs, **kwargs):
  728. """ Return those items that differ between sequences
  729. >>> list(diff([1, 2, 3], [1, 2, 10, 100]))
  730. [(3, 10)]
  731. Shorter sequences may be padded with a ``default`` value:
  732. >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
  733. [(3, 10), (None, 100)]
  734. A ``key`` function may also be applied to each item to use during
  735. comparisons:
  736. >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
  737. [('bananas', 'Oranges')]
  738. """
  739. N = len(seqs)
  740. if N == 1 and isinstance(seqs[0], list):
  741. seqs = seqs[0]
  742. N = len(seqs)
  743. if N < 2:
  744. raise TypeError('Too few sequences given (min 2 required)')
  745. default = kwargs.get('default', no_default)
  746. if default == no_default:
  747. iters = zip(*seqs)
  748. else:
  749. iters = zip_longest(*seqs, fillvalue=default)
  750. key = kwargs.get('key', None)
  751. if key is None:
  752. for items in iters:
  753. if items.count(items[0]) != N:
  754. yield items
  755. else:
  756. for items in iters:
  757. vals = tuple(map(key, items))
  758. if vals.count(vals[0]) != N:
  759. yield items
  760. def topk(k, seq, key=None):
  761. """ Find the k largest elements of a sequence
  762. Operates lazily in ``n*log(k)`` time
  763. >>> topk(2, [1, 100, 10, 1000])
  764. (1000, 100)
  765. Use a key function to change sorted order
  766. >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len)
  767. ('Charlie', 'Alice')
  768. See also:
  769. heapq.nlargest
  770. """
  771. if key is not None and not callable(key):
  772. key = getter(key)
  773. return tuple(heapq.nlargest(k, seq, key=key))
  774. def peek(seq):
  775. """ Retrieve the next element of a sequence
  776. Returns the first element and an iterable equivalent to the original
  777. sequence, still having the element retrieved.
  778. >>> seq = [0, 1, 2, 3, 4]
  779. >>> first, seq = peek(seq)
  780. >>> first
  781. 0
  782. >>> list(seq)
  783. [0, 1, 2, 3, 4]
  784. """
  785. iterator = iter(seq)
  786. item = next(iterator)
  787. return item, itertools.chain((item,), iterator)
  788. def peekn(n, seq):
  789. """ Retrieve the next n elements of a sequence
  790. Returns a tuple of the first n elements and an iterable equivalent
  791. to the original, still having the elements retrieved.
  792. >>> seq = [0, 1, 2, 3, 4]
  793. >>> first_two, seq = peekn(2, seq)
  794. >>> first_two
  795. (0, 1)
  796. >>> list(seq)
  797. [0, 1, 2, 3, 4]
  798. """
  799. iterator = iter(seq)
  800. peeked = tuple(take(n, iterator))
  801. return peeked, itertools.chain(iter(peeked), iterator)
  802. def random_sample(prob, seq, random_state=None):
  803. """ Return elements from a sequence with probability of prob
  804. Returns a lazy iterator of random items from seq.
  805. ``random_sample`` considers each item independently and without
  806. replacement. See below how the first time it returned 13 items and the
  807. next time it returned 6 items.
  808. >>> seq = list(range(100))
  809. >>> list(random_sample(0.1, seq)) # doctest: +SKIP
  810. [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95]
  811. >>> list(random_sample(0.1, seq)) # doctest: +SKIP
  812. [6, 44, 54, 61, 69, 94]
  813. Providing an integer seed for ``random_state`` will result in
  814. deterministic sampling. Given the same seed it will return the same sample
  815. every time.
  816. >>> list(random_sample(0.1, seq, random_state=2016))
  817. [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
  818. >>> list(random_sample(0.1, seq, random_state=2016))
  819. [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
  820. ``random_state`` can also be any object with a method ``random`` that
  821. returns floats between 0.0 and 1.0 (exclusive).
  822. >>> from random import Random
  823. >>> randobj = Random(2016)
  824. >>> list(random_sample(0.1, seq, random_state=randobj))
  825. [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
  826. """
  827. if not hasattr(random_state, 'random'):
  828. from random import Random
  829. random_state = Random(random_state)
  830. return filter(lambda _: random_state.random() < prob, seq)