123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600 |
- # -*- coding: utf-8 -*-
- # Copyright (c) 2013, Mahmoud Hashemi
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are
- # met:
- #
- # * Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- #
- # * Redistributions in binary form must reproduce the above
- # copyright notice, this list of conditions and the following
- # disclaimer in the documentation and/or other materials provided
- # with the distribution.
- #
- # * The names of the contributors may not be used to endorse or
- # promote products derived from this software without specific
- # prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- """If there is one recurring theme in ``boltons``, it is that Python
- has excellent datastructures that constitute a good foundation for
- most quick manipulations, as well as building applications. However,
- Python usage has grown much faster than builtin data structure
- power. Python has a growing need for more advanced general-purpose
- data structures which behave intuitively.
- The :class:`Table` class is one example. When handed one- or
- two-dimensional data, it can provide useful, if basic, text and HTML
- renditions of small to medium sized data. It also heuristically
- handles recursive data of various formats (lists, dicts, namedtuples,
- objects).
- For more advanced :class:`Table`-style manipulation check out the
- `pandas`_ DataFrame.
- .. _pandas: http://pandas.pydata.org/
- """
- from __future__ import print_function
- try:
- from html import escape as html_escape
- except ImportError:
- from cgi import escape as html_escape
- import types
- from itertools import islice
- try:
- from collections.abc import Sequence, Mapping, MutableSequence
- except ImportError:
- from collections import Sequence, Mapping, MutableSequence
- try:
- string_types, integer_types = (str, unicode), (int, long)
- from cgi import escape as html_escape
- except NameError:
- # Python 3 compat
- unicode = str
- string_types, integer_types = (str, bytes), (int,)
- from html import escape as html_escape
- try:
- from .typeutils import make_sentinel
- _MISSING = make_sentinel(var_name='_MISSING')
- except ImportError:
- _MISSING = object()
- """
- Some idle feature thoughts:
- * shift around column order without rearranging data
- * gotta make it so you can add additional items, not just initialize with
- * maybe a shortcut would be to allow adding of Tables to other Tables
- * what's the perf of preallocating lists and overwriting items versus
- starting from empty?
- * is it possible to effectively tell the difference between when a
- Table is from_data()'d with a single row (list) or with a list of lists?
- * CSS: white-space pre-line or pre-wrap maybe?
- * Would be nice to support different backends (currently uses lists
- exclusively). Sometimes large datasets come in list-of-dicts and
- list-of-tuples format and it's desirable to cut down processing overhead.
- TODO: make iterable on rows?
- """
- __all__ = ['Table']
- def to_text(obj, maxlen=None):
- try:
- text = unicode(obj)
- except Exception:
- try:
- text = unicode(repr(obj))
- except Exception:
- text = unicode(object.__repr__(obj))
- if maxlen and len(text) > maxlen:
- text = text[:maxlen - 3] + '...'
- # TODO: inverse of ljust/rjust/center
- return text
- def escape_html(obj, maxlen=None):
- text = to_text(obj, maxlen=maxlen)
- return html_escape(text, quote=True)
- _DNR = set((type(None), bool, complex, float,
- type(NotImplemented), slice,
- types.FunctionType, types.MethodType, types.BuiltinFunctionType,
- types.GeneratorType) + string_types + integer_types)
- class UnsupportedData(TypeError):
- pass
- class InputType(object):
- def __init__(self, *a, **kw):
- pass
- def get_entry_seq(self, data_seq, headers):
- return [self.get_entry(entry, headers) for entry in data_seq]
- class DictInputType(InputType):
- def check_type(self, obj):
- return isinstance(obj, Mapping)
- def guess_headers(self, obj):
- return sorted(obj.keys())
- def get_entry(self, obj, headers):
- return [obj.get(h) for h in headers]
- def get_entry_seq(self, obj, headers):
- return [[ci.get(h) for h in headers] for ci in obj]
- class ObjectInputType(InputType):
- def check_type(self, obj):
- return type(obj) not in _DNR and hasattr(obj, '__class__')
- def guess_headers(self, obj):
- headers = []
- for attr in dir(obj):
- # an object's __dict__ could technically have non-string keys
- try:
- val = getattr(obj, attr)
- except Exception:
- # seen on greenlet: `run` shows in dir() but raises
- # AttributeError. Also properties misbehave.
- continue
- if callable(val):
- continue
- headers.append(attr)
- return headers
- def get_entry(self, obj, headers):
- values = []
- for h in headers:
- try:
- values.append(getattr(obj, h))
- except Exception:
- values.append(None)
- return values
- # might be better to hardcode list support since it's so close to the
- # core or might be better to make this the copy-style from_* importer
- # and have the non-copy style be hardcoded in __init__
- class ListInputType(InputType):
- def check_type(self, obj):
- return isinstance(obj, MutableSequence)
- def guess_headers(self, obj):
- return None
- def get_entry(self, obj, headers):
- return obj
- def get_entry_seq(self, obj_seq, headers):
- return obj_seq
- class TupleInputType(InputType):
- def check_type(self, obj):
- return isinstance(obj, tuple)
- def guess_headers(self, obj):
- return None
- def get_entry(self, obj, headers):
- return list(obj)
- def get_entry_seq(self, obj_seq, headers):
- return [list(t) for t in obj_seq]
- class NamedTupleInputType(InputType):
- def check_type(self, obj):
- return hasattr(obj, '_fields') and isinstance(obj, tuple)
- def guess_headers(self, obj):
- return list(obj._fields)
- def get_entry(self, obj, headers):
- return [getattr(obj, h, None) for h in headers]
- def get_entry_seq(self, obj_seq, headers):
- return [[getattr(obj, h, None) for h in headers] for obj in obj_seq]
- class Table(object):
- """
- This Table class is meant to be simple, low-overhead, and extensible. Its
- most common use would be for translation between in-memory data
- structures and serialization formats, such as HTML and console-ready text.
- As such, it stores data in list-of-lists format, and *does not* copy
- lists passed in. It also reserves the right to modify those lists in a
- "filling" process, whereby short lists are extended to the width of
- the table (usually determined by number of headers). This greatly
- reduces overhead and processing/validation that would have to occur
- otherwise.
- General description of headers behavior:
- Headers describe the columns, but are not part of the data, however,
- if the *headers* argument is omitted, Table tries to infer header
- names from the data. It is possible to have a table with no headers,
- just pass in ``headers=None``.
- Supported inputs:
- * :class:`list` of :class:`list` objects
- * :class:`dict` (list/single)
- * :class:`object` (list/single)
- * :class:`collections.namedtuple` (list/single)
- * TODO: DB API cursor?
- * TODO: json
- Supported outputs:
- * HTML
- * Pretty text (also usable as GF Markdown)
- * TODO: CSV
- * TODO: json
- * TODO: json lines
- To minimize resident size, the Table data is stored as a list of lists.
- """
- # order definitely matters here
- _input_types = [DictInputType(), ListInputType(),
- NamedTupleInputType(), TupleInputType(),
- ObjectInputType()]
- _html_tr, _html_tr_close = '<tr>', '</tr>'
- _html_th, _html_th_close = '<th>', '</th>'
- _html_td, _html_td_close = '<td>', '</td>'
- _html_thead, _html_thead_close = '<thead>', '</thead>'
- _html_tbody, _html_tbody_close = '<tbody>', '</tbody>'
- # _html_tfoot, _html_tfoot_close = '<tfoot>', '</tfoot>'
- _html_table_tag, _html_table_tag_close = '<table>', '</table>'
- def __init__(self, data=None, headers=_MISSING, metadata=None):
- if headers is _MISSING:
- headers = []
- if data:
- headers, data = list(data[0]), islice(data, 1, None)
- self.headers = headers or []
- self.metadata = metadata or {}
- self._data = []
- self._width = 0
- self.extend(data)
- def extend(self, data):
- """
- Append the given data to the end of the Table.
- """
- if not data:
- return
- self._data.extend(data)
- self._set_width()
- self._fill()
- def _set_width(self, reset=False):
- if reset:
- self._width = 0
- if self._width:
- return
- if self.headers:
- self._width = len(self.headers)
- return
- self._width = max([len(d) for d in self._data])
- def _fill(self):
- width, filler = self._width, [None]
- if not width:
- return
- for d in self._data:
- rem = width - len(d)
- if rem > 0:
- d.extend(filler * rem)
- return
- @classmethod
- def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None):
- """Create a Table from a :class:`dict`. Operates the same as
- :meth:`from_data`, but forces interpretation of the data as a
- Mapping.
- """
- return cls.from_data(data=data, headers=headers,
- max_depth=max_depth, _data_type=DictInputType(),
- metadata=metadata)
- @classmethod
- def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None):
- """Create a Table from a :class:`list`. Operates the same as
- :meth:`from_data`, but forces the interpretation of the data
- as a Sequence.
- """
- return cls.from_data(data=data, headers=headers,
- max_depth=max_depth, _data_type=ListInputType(),
- metadata=metadata)
- @classmethod
- def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None):
- """Create a Table from an :class:`object`. Operates the same as
- :meth:`from_data`, but forces the interpretation of the data
- as an object. May be useful for some :class:`dict` and
- :class:`list` subtypes.
- """
- return cls.from_data(data=data, headers=headers,
- max_depth=max_depth, _data_type=ObjectInputType(),
- metadata=metadata)
- @classmethod
- def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs):
- """Create a Table from any supported data, heuristically
- selecting how to represent the data in Table format.
- Args:
- data (object): Any object or iterable with data to be
- imported to the Table.
- headers (iterable): An iterable of headers to be matched
- to the data. If not explicitly passed, headers will be
- guessed for certain datatypes.
- max_depth (int): The level to which nested Tables should
- be created (default: 1).
- _data_type (InputType subclass): For advanced use cases,
- do not guess the type of the input data, use this data
- type instead.
- """
- # TODO: seen/cycle detection/reuse ?
- # maxdepth follows the same behavior as find command
- # i.e., it doesn't work if max_depth=0 is passed in
- metadata = kwargs.pop('metadata', None)
- _data_type = kwargs.pop('_data_type', None)
- if max_depth < 1:
- # return data instead?
- return cls(headers=headers, metadata=metadata)
- is_seq = isinstance(data, Sequence)
- if is_seq:
- if not data:
- return cls(headers=headers, metadata=metadata)
- to_check = data[0]
- if not _data_type:
- for it in cls._input_types:
- if it.check_type(to_check):
- _data_type = it
- break
- else:
- # not particularly happy about this rewind-y approach
- is_seq = False
- to_check = data
- else:
- if type(data) in _DNR:
- # hmm, got scalar data.
- # raise an exception or make an exception, nahmsayn?
- return cls([[data]], headers=headers, metadata=metadata)
- to_check = data
- if not _data_type:
- for it in cls._input_types:
- if it.check_type(to_check):
- _data_type = it
- break
- else:
- raise UnsupportedData('unsupported data type %r'
- % type(data))
- if headers is _MISSING:
- headers = _data_type.guess_headers(to_check)
- if is_seq:
- entries = _data_type.get_entry_seq(data, headers)
- else:
- entries = [_data_type.get_entry(data, headers)]
- if max_depth > 1:
- new_max_depth = max_depth - 1
- for i, entry in enumerate(entries):
- for j, cell in enumerate(entry):
- if type(cell) in _DNR:
- # optimization to avoid function overhead
- continue
- try:
- entries[i][j] = cls.from_data(cell,
- max_depth=new_max_depth)
- except UnsupportedData:
- continue
- return cls(entries, headers=headers, metadata=metadata)
- def __len__(self):
- return len(self._data)
- def __getitem__(self, idx):
- return self._data[idx]
- def __repr__(self):
- cn = self.__class__.__name__
- if self.headers:
- return '%s(headers=%r, data=%r)' % (cn, self.headers, self._data)
- else:
- return '%s(%r)' % (cn, self._data)
- def to_html(self, orientation=None, wrapped=True,
- with_headers=True, with_newlines=True,
- with_metadata=False, max_depth=1):
- """Render this Table to HTML. Configure the structure of Table
- HTML by subclassing and overriding ``_html_*`` class
- attributes.
- Args:
- orientation (str): one of 'auto', 'horizontal', or
- 'vertical' (or the first letter of any of
- those). Default 'auto'.
- wrapped (bool): whether or not to include the wrapping
- '<table></table>' tags. Default ``True``, set to
- ``False`` if appending multiple Table outputs or an
- otherwise customized HTML wrapping tag is needed.
- with_newlines (bool): Set to ``True`` if output should
- include added newlines to make the HTML more
- readable. Default ``False``.
- with_metadata (bool/str): Set to ``True`` if output should
- be preceded with a Table of preset metadata, if it
- exists. Set to special value ``'bottom'`` if the
- metadata Table HTML should come *after* the main HTML output.
- max_depth (int): Indicate how deeply to nest HTML tables
- before simply reverting to :func:`repr`-ing the nested
- data.
- Returns:
- A text string of the HTML of the rendered table.
- """
- lines = []
- headers = []
- if with_metadata and self.metadata:
- metadata_table = Table.from_data(self.metadata,
- max_depth=max_depth)
- metadata_html = metadata_table.to_html(with_headers=True,
- with_newlines=with_newlines,
- with_metadata=False,
- max_depth=max_depth)
- if with_metadata != 'bottom':
- lines.append(metadata_html)
- lines.append('<br />')
- if with_headers and self.headers:
- headers.extend(self.headers)
- headers.extend([None] * (self._width - len(self.headers)))
- if wrapped:
- lines.append(self._html_table_tag)
- orientation = orientation or 'auto'
- ol = orientation[0].lower()
- if ol == 'a':
- ol = 'h' if len(self) > 1 else 'v'
- if ol == 'h':
- self._add_horizontal_html_lines(lines, headers=headers,
- max_depth=max_depth)
- elif ol == 'v':
- self._add_vertical_html_lines(lines, headers=headers,
- max_depth=max_depth)
- else:
- raise ValueError("expected one of 'auto', 'vertical', or"
- " 'horizontal', not %r" % orientation)
- if with_metadata and self.metadata and with_metadata == 'bottom':
- lines.append('<br />')
- lines.append(metadata_html)
- if wrapped:
- lines.append(self._html_table_tag_close)
- sep = '\n' if with_newlines else ''
- return sep.join(lines)
- def get_cell_html(self, value):
- """Called on each value in an HTML table. By default it simply escapes
- the HTML. Override this method to add additional conditions
- and behaviors, but take care to ensure the final output is
- HTML escaped.
- """
- return escape_html(value)
- def _add_horizontal_html_lines(self, lines, headers, max_depth):
- esc = self.get_cell_html
- new_depth = max_depth - 1 if max_depth > 1 else max_depth
- if max_depth > 1:
- new_depth = max_depth - 1
- if headers:
- _thth = self._html_th_close + self._html_th
- lines.append(self._html_thead)
- lines.append(self._html_tr + self._html_th +
- _thth.join([esc(h) for h in headers]) +
- self._html_th_close + self._html_tr_close)
- lines.append(self._html_thead_close)
- trtd, _tdtd, _td_tr = (self._html_tr + self._html_td,
- self._html_td_close + self._html_td,
- self._html_td_close + self._html_tr_close)
- lines.append(self._html_tbody)
- for row in self._data:
- if max_depth > 1:
- _fill_parts = []
- for cell in row:
- if isinstance(cell, Table):
- _fill_parts.append(cell.to_html(max_depth=new_depth))
- else:
- _fill_parts.append(esc(cell))
- else:
- _fill_parts = [esc(c) for c in row]
- lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr]))
- lines.append(self._html_tbody_close)
- def _add_vertical_html_lines(self, lines, headers, max_depth):
- esc = self.get_cell_html
- new_depth = max_depth - 1 if max_depth > 1 else max_depth
- tr, th, _th = self._html_tr, self._html_th, self._html_th_close
- td, _tdtd = self._html_td, self._html_td_close + self._html_td
- _td_tr = self._html_td_close + self._html_tr_close
- for i in range(self._width):
- line_parts = [tr]
- if headers:
- line_parts.extend([th, esc(headers[i]), _th])
- if max_depth > 1:
- new_depth = max_depth - 1
- _fill_parts = []
- for row in self._data:
- cell = row[i]
- if isinstance(cell, Table):
- _fill_parts.append(cell.to_html(max_depth=new_depth))
- else:
- _fill_parts.append(esc(row[i]))
- else:
- _fill_parts = [esc(row[i]) for row in self._data]
- line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr])
- lines.append(''.join(line_parts))
- def to_text(self, with_headers=True, maxlen=None):
- """Get the Table's textual representation. Only works well
- for Tables with non-recursive data.
- Args:
- with_headers (bool): Whether to include a header row at the top.
- maxlen (int): Max length of data in each cell.
- """
- lines = []
- widths = []
- headers = list(self.headers)
- text_data = [[to_text(cell, maxlen=maxlen) for cell in row]
- for row in self._data]
- for idx in range(self._width):
- cur_widths = [len(cur) for cur in text_data]
- if with_headers:
- cur_widths.append(len(to_text(headers[idx], maxlen=maxlen)))
- widths.append(max(cur_widths))
- if with_headers:
- lines.append(' | '.join([h.center(widths[i])
- for i, h in enumerate(headers)]))
- lines.append('-|-'.join(['-' * w for w in widths]))
- for row in text_data:
- lines.append(' | '.join([cell.center(widths[j])
- for j, cell in enumerate(row)]))
- return '\n'.join(lines)
|