tableutils.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2013, Mahmoud Hashemi
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are
  6. # met:
  7. #
  8. # * Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. #
  11. # * Redistributions in binary form must reproduce the above
  12. # copyright notice, this list of conditions and the following
  13. # disclaimer in the documentation and/or other materials provided
  14. # with the distribution.
  15. #
  16. # * The names of the contributors may not be used to endorse or
  17. # promote products derived from this software without specific
  18. # prior written permission.
  19. #
  20. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. """If there is one recurring theme in ``boltons``, it is that Python
  32. has excellent datastructures that constitute a good foundation for
  33. most quick manipulations, as well as building applications. However,
  34. Python usage has grown much faster than builtin data structure
  35. power. Python has a growing need for more advanced general-purpose
  36. data structures which behave intuitively.
  37. The :class:`Table` class is one example. When handed one- or
  38. two-dimensional data, it can provide useful, if basic, text and HTML
  39. renditions of small to medium sized data. It also heuristically
  40. handles recursive data of various formats (lists, dicts, namedtuples,
  41. objects).
  42. For more advanced :class:`Table`-style manipulation check out the
  43. `pandas`_ DataFrame.
  44. .. _pandas: http://pandas.pydata.org/
  45. """
  46. from __future__ import print_function
  47. try:
  48. from html import escape as html_escape
  49. except ImportError:
  50. from cgi import escape as html_escape
  51. import types
  52. from itertools import islice
  53. try:
  54. from collections.abc import Sequence, Mapping, MutableSequence
  55. except ImportError:
  56. from collections import Sequence, Mapping, MutableSequence
  57. try:
  58. string_types, integer_types = (str, unicode), (int, long)
  59. from cgi import escape as html_escape
  60. except NameError:
  61. # Python 3 compat
  62. unicode = str
  63. string_types, integer_types = (str, bytes), (int,)
  64. from html import escape as html_escape
  65. try:
  66. from .typeutils import make_sentinel
  67. _MISSING = make_sentinel(var_name='_MISSING')
  68. except ImportError:
  69. _MISSING = object()
  70. """
  71. Some idle feature thoughts:
  72. * shift around column order without rearranging data
  73. * gotta make it so you can add additional items, not just initialize with
  74. * maybe a shortcut would be to allow adding of Tables to other Tables
  75. * what's the perf of preallocating lists and overwriting items versus
  76. starting from empty?
  77. * is it possible to effectively tell the difference between when a
  78. Table is from_data()'d with a single row (list) or with a list of lists?
  79. * CSS: white-space pre-line or pre-wrap maybe?
  80. * Would be nice to support different backends (currently uses lists
  81. exclusively). Sometimes large datasets come in list-of-dicts and
  82. list-of-tuples format and it's desirable to cut down processing overhead.
  83. TODO: make iterable on rows?
  84. """
  85. __all__ = ['Table']
  86. def to_text(obj, maxlen=None):
  87. try:
  88. text = unicode(obj)
  89. except Exception:
  90. try:
  91. text = unicode(repr(obj))
  92. except Exception:
  93. text = unicode(object.__repr__(obj))
  94. if maxlen and len(text) > maxlen:
  95. text = text[:maxlen - 3] + '...'
  96. # TODO: inverse of ljust/rjust/center
  97. return text
  98. def escape_html(obj, maxlen=None):
  99. text = to_text(obj, maxlen=maxlen)
  100. return html_escape(text, quote=True)
  101. _DNR = set((type(None), bool, complex, float,
  102. type(NotImplemented), slice,
  103. types.FunctionType, types.MethodType, types.BuiltinFunctionType,
  104. types.GeneratorType) + string_types + integer_types)
  105. class UnsupportedData(TypeError):
  106. pass
  107. class InputType(object):
  108. def __init__(self, *a, **kw):
  109. pass
  110. def get_entry_seq(self, data_seq, headers):
  111. return [self.get_entry(entry, headers) for entry in data_seq]
  112. class DictInputType(InputType):
  113. def check_type(self, obj):
  114. return isinstance(obj, Mapping)
  115. def guess_headers(self, obj):
  116. return sorted(obj.keys())
  117. def get_entry(self, obj, headers):
  118. return [obj.get(h) for h in headers]
  119. def get_entry_seq(self, obj, headers):
  120. return [[ci.get(h) for h in headers] for ci in obj]
  121. class ObjectInputType(InputType):
  122. def check_type(self, obj):
  123. return type(obj) not in _DNR and hasattr(obj, '__class__')
  124. def guess_headers(self, obj):
  125. headers = []
  126. for attr in dir(obj):
  127. # an object's __dict__ could technically have non-string keys
  128. try:
  129. val = getattr(obj, attr)
  130. except Exception:
  131. # seen on greenlet: `run` shows in dir() but raises
  132. # AttributeError. Also properties misbehave.
  133. continue
  134. if callable(val):
  135. continue
  136. headers.append(attr)
  137. return headers
  138. def get_entry(self, obj, headers):
  139. values = []
  140. for h in headers:
  141. try:
  142. values.append(getattr(obj, h))
  143. except Exception:
  144. values.append(None)
  145. return values
  146. # might be better to hardcode list support since it's so close to the
  147. # core or might be better to make this the copy-style from_* importer
  148. # and have the non-copy style be hardcoded in __init__
  149. class ListInputType(InputType):
  150. def check_type(self, obj):
  151. return isinstance(obj, MutableSequence)
  152. def guess_headers(self, obj):
  153. return None
  154. def get_entry(self, obj, headers):
  155. return obj
  156. def get_entry_seq(self, obj_seq, headers):
  157. return obj_seq
  158. class TupleInputType(InputType):
  159. def check_type(self, obj):
  160. return isinstance(obj, tuple)
  161. def guess_headers(self, obj):
  162. return None
  163. def get_entry(self, obj, headers):
  164. return list(obj)
  165. def get_entry_seq(self, obj_seq, headers):
  166. return [list(t) for t in obj_seq]
  167. class NamedTupleInputType(InputType):
  168. def check_type(self, obj):
  169. return hasattr(obj, '_fields') and isinstance(obj, tuple)
  170. def guess_headers(self, obj):
  171. return list(obj._fields)
  172. def get_entry(self, obj, headers):
  173. return [getattr(obj, h, None) for h in headers]
  174. def get_entry_seq(self, obj_seq, headers):
  175. return [[getattr(obj, h, None) for h in headers] for obj in obj_seq]
  176. class Table(object):
  177. """
  178. This Table class is meant to be simple, low-overhead, and extensible. Its
  179. most common use would be for translation between in-memory data
  180. structures and serialization formats, such as HTML and console-ready text.
  181. As such, it stores data in list-of-lists format, and *does not* copy
  182. lists passed in. It also reserves the right to modify those lists in a
  183. "filling" process, whereby short lists are extended to the width of
  184. the table (usually determined by number of headers). This greatly
  185. reduces overhead and processing/validation that would have to occur
  186. otherwise.
  187. General description of headers behavior:
  188. Headers describe the columns, but are not part of the data, however,
  189. if the *headers* argument is omitted, Table tries to infer header
  190. names from the data. It is possible to have a table with no headers,
  191. just pass in ``headers=None``.
  192. Supported inputs:
  193. * :class:`list` of :class:`list` objects
  194. * :class:`dict` (list/single)
  195. * :class:`object` (list/single)
  196. * :class:`collections.namedtuple` (list/single)
  197. * TODO: DB API cursor?
  198. * TODO: json
  199. Supported outputs:
  200. * HTML
  201. * Pretty text (also usable as GF Markdown)
  202. * TODO: CSV
  203. * TODO: json
  204. * TODO: json lines
  205. To minimize resident size, the Table data is stored as a list of lists.
  206. """
  207. # order definitely matters here
  208. _input_types = [DictInputType(), ListInputType(),
  209. NamedTupleInputType(), TupleInputType(),
  210. ObjectInputType()]
  211. _html_tr, _html_tr_close = '<tr>', '</tr>'
  212. _html_th, _html_th_close = '<th>', '</th>'
  213. _html_td, _html_td_close = '<td>', '</td>'
  214. _html_thead, _html_thead_close = '<thead>', '</thead>'
  215. _html_tbody, _html_tbody_close = '<tbody>', '</tbody>'
  216. # _html_tfoot, _html_tfoot_close = '<tfoot>', '</tfoot>'
  217. _html_table_tag, _html_table_tag_close = '<table>', '</table>'
  218. def __init__(self, data=None, headers=_MISSING, metadata=None):
  219. if headers is _MISSING:
  220. headers = []
  221. if data:
  222. headers, data = list(data[0]), islice(data, 1, None)
  223. self.headers = headers or []
  224. self.metadata = metadata or {}
  225. self._data = []
  226. self._width = 0
  227. self.extend(data)
  228. def extend(self, data):
  229. """
  230. Append the given data to the end of the Table.
  231. """
  232. if not data:
  233. return
  234. self._data.extend(data)
  235. self._set_width()
  236. self._fill()
  237. def _set_width(self, reset=False):
  238. if reset:
  239. self._width = 0
  240. if self._width:
  241. return
  242. if self.headers:
  243. self._width = len(self.headers)
  244. return
  245. self._width = max([len(d) for d in self._data])
  246. def _fill(self):
  247. width, filler = self._width, [None]
  248. if not width:
  249. return
  250. for d in self._data:
  251. rem = width - len(d)
  252. if rem > 0:
  253. d.extend(filler * rem)
  254. return
  255. @classmethod
  256. def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None):
  257. """Create a Table from a :class:`dict`. Operates the same as
  258. :meth:`from_data`, but forces interpretation of the data as a
  259. Mapping.
  260. """
  261. return cls.from_data(data=data, headers=headers,
  262. max_depth=max_depth, _data_type=DictInputType(),
  263. metadata=metadata)
  264. @classmethod
  265. def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None):
  266. """Create a Table from a :class:`list`. Operates the same as
  267. :meth:`from_data`, but forces the interpretation of the data
  268. as a Sequence.
  269. """
  270. return cls.from_data(data=data, headers=headers,
  271. max_depth=max_depth, _data_type=ListInputType(),
  272. metadata=metadata)
  273. @classmethod
  274. def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None):
  275. """Create a Table from an :class:`object`. Operates the same as
  276. :meth:`from_data`, but forces the interpretation of the data
  277. as an object. May be useful for some :class:`dict` and
  278. :class:`list` subtypes.
  279. """
  280. return cls.from_data(data=data, headers=headers,
  281. max_depth=max_depth, _data_type=ObjectInputType(),
  282. metadata=metadata)
  283. @classmethod
  284. def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs):
  285. """Create a Table from any supported data, heuristically
  286. selecting how to represent the data in Table format.
  287. Args:
  288. data (object): Any object or iterable with data to be
  289. imported to the Table.
  290. headers (iterable): An iterable of headers to be matched
  291. to the data. If not explicitly passed, headers will be
  292. guessed for certain datatypes.
  293. max_depth (int): The level to which nested Tables should
  294. be created (default: 1).
  295. _data_type (InputType subclass): For advanced use cases,
  296. do not guess the type of the input data, use this data
  297. type instead.
  298. """
  299. # TODO: seen/cycle detection/reuse ?
  300. # maxdepth follows the same behavior as find command
  301. # i.e., it doesn't work if max_depth=0 is passed in
  302. metadata = kwargs.pop('metadata', None)
  303. _data_type = kwargs.pop('_data_type', None)
  304. if max_depth < 1:
  305. # return data instead?
  306. return cls(headers=headers, metadata=metadata)
  307. is_seq = isinstance(data, Sequence)
  308. if is_seq:
  309. if not data:
  310. return cls(headers=headers, metadata=metadata)
  311. to_check = data[0]
  312. if not _data_type:
  313. for it in cls._input_types:
  314. if it.check_type(to_check):
  315. _data_type = it
  316. break
  317. else:
  318. # not particularly happy about this rewind-y approach
  319. is_seq = False
  320. to_check = data
  321. else:
  322. if type(data) in _DNR:
  323. # hmm, got scalar data.
  324. # raise an exception or make an exception, nahmsayn?
  325. return cls([[data]], headers=headers, metadata=metadata)
  326. to_check = data
  327. if not _data_type:
  328. for it in cls._input_types:
  329. if it.check_type(to_check):
  330. _data_type = it
  331. break
  332. else:
  333. raise UnsupportedData('unsupported data type %r'
  334. % type(data))
  335. if headers is _MISSING:
  336. headers = _data_type.guess_headers(to_check)
  337. if is_seq:
  338. entries = _data_type.get_entry_seq(data, headers)
  339. else:
  340. entries = [_data_type.get_entry(data, headers)]
  341. if max_depth > 1:
  342. new_max_depth = max_depth - 1
  343. for i, entry in enumerate(entries):
  344. for j, cell in enumerate(entry):
  345. if type(cell) in _DNR:
  346. # optimization to avoid function overhead
  347. continue
  348. try:
  349. entries[i][j] = cls.from_data(cell,
  350. max_depth=new_max_depth)
  351. except UnsupportedData:
  352. continue
  353. return cls(entries, headers=headers, metadata=metadata)
  354. def __len__(self):
  355. return len(self._data)
  356. def __getitem__(self, idx):
  357. return self._data[idx]
  358. def __repr__(self):
  359. cn = self.__class__.__name__
  360. if self.headers:
  361. return '%s(headers=%r, data=%r)' % (cn, self.headers, self._data)
  362. else:
  363. return '%s(%r)' % (cn, self._data)
  364. def to_html(self, orientation=None, wrapped=True,
  365. with_headers=True, with_newlines=True,
  366. with_metadata=False, max_depth=1):
  367. """Render this Table to HTML. Configure the structure of Table
  368. HTML by subclassing and overriding ``_html_*`` class
  369. attributes.
  370. Args:
  371. orientation (str): one of 'auto', 'horizontal', or
  372. 'vertical' (or the first letter of any of
  373. those). Default 'auto'.
  374. wrapped (bool): whether or not to include the wrapping
  375. '<table></table>' tags. Default ``True``, set to
  376. ``False`` if appending multiple Table outputs or an
  377. otherwise customized HTML wrapping tag is needed.
  378. with_newlines (bool): Set to ``True`` if output should
  379. include added newlines to make the HTML more
  380. readable. Default ``False``.
  381. with_metadata (bool/str): Set to ``True`` if output should
  382. be preceded with a Table of preset metadata, if it
  383. exists. Set to special value ``'bottom'`` if the
  384. metadata Table HTML should come *after* the main HTML output.
  385. max_depth (int): Indicate how deeply to nest HTML tables
  386. before simply reverting to :func:`repr`-ing the nested
  387. data.
  388. Returns:
  389. A text string of the HTML of the rendered table.
  390. """
  391. lines = []
  392. headers = []
  393. if with_metadata and self.metadata:
  394. metadata_table = Table.from_data(self.metadata,
  395. max_depth=max_depth)
  396. metadata_html = metadata_table.to_html(with_headers=True,
  397. with_newlines=with_newlines,
  398. with_metadata=False,
  399. max_depth=max_depth)
  400. if with_metadata != 'bottom':
  401. lines.append(metadata_html)
  402. lines.append('<br />')
  403. if with_headers and self.headers:
  404. headers.extend(self.headers)
  405. headers.extend([None] * (self._width - len(self.headers)))
  406. if wrapped:
  407. lines.append(self._html_table_tag)
  408. orientation = orientation or 'auto'
  409. ol = orientation[0].lower()
  410. if ol == 'a':
  411. ol = 'h' if len(self) > 1 else 'v'
  412. if ol == 'h':
  413. self._add_horizontal_html_lines(lines, headers=headers,
  414. max_depth=max_depth)
  415. elif ol == 'v':
  416. self._add_vertical_html_lines(lines, headers=headers,
  417. max_depth=max_depth)
  418. else:
  419. raise ValueError("expected one of 'auto', 'vertical', or"
  420. " 'horizontal', not %r" % orientation)
  421. if with_metadata and self.metadata and with_metadata == 'bottom':
  422. lines.append('<br />')
  423. lines.append(metadata_html)
  424. if wrapped:
  425. lines.append(self._html_table_tag_close)
  426. sep = '\n' if with_newlines else ''
  427. return sep.join(lines)
  428. def get_cell_html(self, value):
  429. """Called on each value in an HTML table. By default it simply escapes
  430. the HTML. Override this method to add additional conditions
  431. and behaviors, but take care to ensure the final output is
  432. HTML escaped.
  433. """
  434. return escape_html(value)
  435. def _add_horizontal_html_lines(self, lines, headers, max_depth):
  436. esc = self.get_cell_html
  437. new_depth = max_depth - 1 if max_depth > 1 else max_depth
  438. if max_depth > 1:
  439. new_depth = max_depth - 1
  440. if headers:
  441. _thth = self._html_th_close + self._html_th
  442. lines.append(self._html_thead)
  443. lines.append(self._html_tr + self._html_th +
  444. _thth.join([esc(h) for h in headers]) +
  445. self._html_th_close + self._html_tr_close)
  446. lines.append(self._html_thead_close)
  447. trtd, _tdtd, _td_tr = (self._html_tr + self._html_td,
  448. self._html_td_close + self._html_td,
  449. self._html_td_close + self._html_tr_close)
  450. lines.append(self._html_tbody)
  451. for row in self._data:
  452. if max_depth > 1:
  453. _fill_parts = []
  454. for cell in row:
  455. if isinstance(cell, Table):
  456. _fill_parts.append(cell.to_html(max_depth=new_depth))
  457. else:
  458. _fill_parts.append(esc(cell))
  459. else:
  460. _fill_parts = [esc(c) for c in row]
  461. lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr]))
  462. lines.append(self._html_tbody_close)
  463. def _add_vertical_html_lines(self, lines, headers, max_depth):
  464. esc = self.get_cell_html
  465. new_depth = max_depth - 1 if max_depth > 1 else max_depth
  466. tr, th, _th = self._html_tr, self._html_th, self._html_th_close
  467. td, _tdtd = self._html_td, self._html_td_close + self._html_td
  468. _td_tr = self._html_td_close + self._html_tr_close
  469. for i in range(self._width):
  470. line_parts = [tr]
  471. if headers:
  472. line_parts.extend([th, esc(headers[i]), _th])
  473. if max_depth > 1:
  474. new_depth = max_depth - 1
  475. _fill_parts = []
  476. for row in self._data:
  477. cell = row[i]
  478. if isinstance(cell, Table):
  479. _fill_parts.append(cell.to_html(max_depth=new_depth))
  480. else:
  481. _fill_parts.append(esc(row[i]))
  482. else:
  483. _fill_parts = [esc(row[i]) for row in self._data]
  484. line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr])
  485. lines.append(''.join(line_parts))
  486. def to_text(self, with_headers=True, maxlen=None):
  487. """Get the Table's textual representation. Only works well
  488. for Tables with non-recursive data.
  489. Args:
  490. with_headers (bool): Whether to include a header row at the top.
  491. maxlen (int): Max length of data in each cell.
  492. """
  493. lines = []
  494. widths = []
  495. headers = list(self.headers)
  496. text_data = [[to_text(cell, maxlen=maxlen) for cell in row]
  497. for row in self._data]
  498. for idx in range(self._width):
  499. cur_widths = [len(cur) for cur in text_data]
  500. if with_headers:
  501. cur_widths.append(len(to_text(headers[idx], maxlen=maxlen)))
  502. widths.append(max(cur_widths))
  503. if with_headers:
  504. lines.append(' | '.join([h.center(widths[i])
  505. for i, h in enumerate(headers)]))
  506. lines.append('-|-'.join(['-' * w for w in widths]))
  507. for row in text_data:
  508. lines.append(' | '.join([cell.center(widths[j])
  509. for j, cell in enumerate(row)]))
  510. return '\n'.join(lines)