formatutils.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2013, Mahmoud Hashemi
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are
  6. # met:
  7. #
  8. # * Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. #
  11. # * Redistributions in binary form must reproduce the above
  12. # copyright notice, this list of conditions and the following
  13. # disclaimer in the documentation and/or other materials provided
  14. # with the distribution.
  15. #
  16. # * The names of the contributors may not be used to endorse or
  17. # promote products derived from this software without specific
  18. # prior written permission.
  19. #
  20. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. """`PEP 3101`_ introduced the :meth:`str.format` method, and what
  32. would later be called "new-style" string formatting. For the sake of
  33. explicit correctness, it is probably best to refer to Python's dual
  34. string formatting capabilities as *bracket-style* and
  35. *percent-style*. There is overlap, but one does not replace the
  36. other.
  37. * Bracket-style is more pluggable, slower, and uses a method.
  38. * Percent-style is simpler, faster, and uses an operator.
  39. Bracket-style formatting brought with it a much more powerful toolbox,
  40. but it was far from a full one. :meth:`str.format` uses `more powerful
  41. syntax`_, but `the tools and idioms`_ for working with
  42. that syntax are not well-developed nor well-advertised.
  43. ``formatutils`` adds several functions for working with bracket-style
  44. format strings:
  45. * :class:`DeferredValue`: Defer fetching or calculating a value
  46. until format time.
  47. * :func:`get_format_args`: Parse the positional and keyword
  48. arguments out of a format string.
  49. * :func:`tokenize_format_str`: Tokenize a format string into
  50. literals and :class:`BaseFormatField` objects.
  51. * :func:`construct_format_field_str`: Assists in programmatic
  52. construction of format strings.
  53. * :func:`infer_positional_format_args`: Converts anonymous
  54. references in 2.7+ format strings to explicit positional arguments
  55. suitable for usage with Python 2.6.
  56. .. _more powerful syntax: https://docs.python.org/2/library/string.html#format-string-syntax
  57. .. _the tools and idioms: https://docs.python.org/2/library/string.html#string-formatting
  58. .. _PEP 3101: https://www.python.org/dev/peps/pep-3101/
  59. """
  60. # TODO: also include percent-formatting utils?
  61. # TODO: include lithoxyl.formatters.Formatter (or some adaptation)?
  62. from __future__ import print_function
  63. import re
  64. from string import Formatter
  65. try:
  66. unicode # Python 2
  67. except NameError:
  68. unicode = str # Python 3
  69. __all__ = ['DeferredValue', 'get_format_args', 'tokenize_format_str',
  70. 'construct_format_field_str', 'infer_positional_format_args',
  71. 'BaseFormatField']
  72. _pos_farg_re = re.compile('({{)|' # escaped open-brace
  73. '(}})|' # escaped close-brace
  74. r'({[:!.\[}])') # anon positional format arg
  75. def construct_format_field_str(fname, fspec, conv):
  76. """
  77. Constructs a format field string from the field name, spec, and
  78. conversion character (``fname``, ``fspec``, ``conv``). See Python
  79. String Formatting for more info.
  80. """
  81. if fname is None:
  82. return ''
  83. ret = '{' + fname
  84. if conv:
  85. ret += '!' + conv
  86. if fspec:
  87. ret += ':' + fspec
  88. ret += '}'
  89. return ret
  90. def split_format_str(fstr):
  91. """Does very basic splitting of a format string, returns a list of
  92. strings. For full tokenization, see :func:`tokenize_format_str`.
  93. """
  94. ret = []
  95. for lit, fname, fspec, conv in Formatter().parse(fstr):
  96. if fname is None:
  97. ret.append((lit, None))
  98. continue
  99. field_str = construct_format_field_str(fname, fspec, conv)
  100. ret.append((lit, field_str))
  101. return ret
  102. def infer_positional_format_args(fstr):
  103. """Takes format strings with anonymous positional arguments, (e.g.,
  104. "{}" and {:d}), and converts them into numbered ones for explicitness and
  105. compatibility with 2.6.
  106. Returns a string with the inferred positional arguments.
  107. """
  108. # TODO: memoize
  109. ret, max_anon = '', 0
  110. # look for {: or {! or {. or {[ or {}
  111. start, end, prev_end = 0, 0, 0
  112. for match in _pos_farg_re.finditer(fstr):
  113. start, end, group = match.start(), match.end(), match.group()
  114. if prev_end < start:
  115. ret += fstr[prev_end:start]
  116. prev_end = end
  117. if group == '{{' or group == '}}':
  118. ret += group
  119. continue
  120. ret += '{%s%s' % (max_anon, group[1:])
  121. max_anon += 1
  122. ret += fstr[prev_end:]
  123. return ret
  124. # This approach is hardly exhaustive but it works for most builtins
  125. _INTCHARS = 'bcdoxXn'
  126. _FLOATCHARS = 'eEfFgGn%'
  127. _TYPE_MAP = dict([(x, int) for x in _INTCHARS] +
  128. [(x, float) for x in _FLOATCHARS])
  129. _TYPE_MAP['s'] = str
  130. def get_format_args(fstr):
  131. """
  132. Turn a format string into two lists of arguments referenced by the
  133. format string. One is positional arguments, and the other is named
  134. arguments. Each element of the list includes the name and the
  135. nominal type of the field.
  136. # >>> get_format_args("{noun} is {1:d} years old{punct}")
  137. # ([(1, <type 'int'>)], [('noun', <type 'str'>), ('punct', <type 'str'>)])
  138. # XXX: Py3k
  139. >>> get_format_args("{noun} is {1:d} years old{punct}") == \
  140. ([(1, int)], [('noun', str), ('punct', str)])
  141. True
  142. """
  143. # TODO: memoize
  144. formatter = Formatter()
  145. fargs, fkwargs, _dedup = [], [], set()
  146. def _add_arg(argname, type_char='s'):
  147. if argname not in _dedup:
  148. _dedup.add(argname)
  149. argtype = _TYPE_MAP.get(type_char, str) # TODO: unicode
  150. try:
  151. fargs.append((int(argname), argtype))
  152. except ValueError:
  153. fkwargs.append((argname, argtype))
  154. for lit, fname, fspec, conv in formatter.parse(fstr):
  155. if fname is not None:
  156. type_char = fspec[-1:]
  157. fname_list = re.split('[.[]', fname)
  158. if len(fname_list) > 1:
  159. raise ValueError('encountered compound format arg: %r' % fname)
  160. try:
  161. base_fname = fname_list[0]
  162. assert base_fname
  163. except (IndexError, AssertionError):
  164. raise ValueError('encountered anonymous positional argument')
  165. _add_arg(fname, type_char)
  166. for sublit, subfname, _, _ in formatter.parse(fspec):
  167. # TODO: positional and anon args not allowed here.
  168. if subfname is not None:
  169. _add_arg(subfname)
  170. return fargs, fkwargs
  171. def tokenize_format_str(fstr, resolve_pos=True):
  172. """Takes a format string, turns it into a list of alternating string
  173. literals and :class:`BaseFormatField` tokens. By default, also
  174. infers anonymous positional references into explicit, numbered
  175. positional references. To disable this behavior set *resolve_pos*
  176. to ``False``.
  177. """
  178. ret = []
  179. if resolve_pos:
  180. fstr = infer_positional_format_args(fstr)
  181. formatter = Formatter()
  182. for lit, fname, fspec, conv in formatter.parse(fstr):
  183. if lit:
  184. ret.append(lit)
  185. if fname is None:
  186. continue
  187. ret.append(BaseFormatField(fname, fspec, conv))
  188. return ret
  189. class BaseFormatField(object):
  190. """A class representing a reference to an argument inside of a
  191. bracket-style format string. For instance, in ``"{greeting},
  192. world!"``, there is a field named "greeting".
  193. These fields can have many options applied to them. See the
  194. Python docs on `Format String Syntax`_ for the full details.
  195. .. _Format String Syntax: https://docs.python.org/2/library/string.html#string-formatting
  196. """
  197. def __init__(self, fname, fspec='', conv=None):
  198. self.set_fname(fname)
  199. self.set_fspec(fspec)
  200. self.set_conv(conv)
  201. def set_fname(self, fname):
  202. "Set the field name."
  203. path_list = re.split('[.[]', fname) # TODO
  204. self.base_name = path_list[0]
  205. self.fname = fname
  206. self.subpath = path_list[1:]
  207. self.is_positional = not self.base_name or self.base_name.isdigit()
  208. def set_fspec(self, fspec):
  209. "Set the field spec."
  210. fspec = fspec or ''
  211. subfields = []
  212. for sublit, subfname, _, _ in Formatter().parse(fspec):
  213. if subfname is not None:
  214. subfields.append(subfname)
  215. self.subfields = subfields
  216. self.fspec = fspec
  217. self.type_char = fspec[-1:]
  218. self.type_func = _TYPE_MAP.get(self.type_char, str)
  219. def set_conv(self, conv):
  220. """There are only two built-in converters: ``s`` and ``r``. They are
  221. somewhat rare and appearlike ``"{ref!r}"``."""
  222. # TODO
  223. self.conv = conv
  224. self.conv_func = None # TODO
  225. @property
  226. def fstr(self):
  227. "The current state of the field in string format."
  228. return construct_format_field_str(self.fname, self.fspec, self.conv)
  229. def __repr__(self):
  230. cn = self.__class__.__name__
  231. args = [self.fname]
  232. if self.conv is not None:
  233. args.extend([self.fspec, self.conv])
  234. elif self.fspec != '':
  235. args.append(self.fspec)
  236. args_repr = ', '.join([repr(a) for a in args])
  237. return '%s(%s)' % (cn, args_repr)
  238. def __str__(self):
  239. return self.fstr
  240. _UNSET = object()
  241. class DeferredValue(object):
  242. """:class:`DeferredValue` is a wrapper type, used to defer computing
  243. values which would otherwise be expensive to stringify and
  244. format. This is most valuable in areas like logging, where one
  245. would not want to waste time formatting a value for a log message
  246. which will subsequently be filtered because the message's log
  247. level was DEBUG and the logger was set to only emit CRITICAL
  248. messages.
  249. The :class:``DeferredValue`` is initialized with a callable that
  250. takes no arguments and returns the value, which can be of any
  251. type. By default DeferredValue only calls that callable once, and
  252. future references will get a cached value. This behavior can be
  253. disabled by setting *cache_value* to ``False``.
  254. Args:
  255. func (function): A callable that takes no arguments and
  256. computes the value being represented.
  257. cache_value (bool): Whether subsequent usages will call *func*
  258. again. Defaults to ``True``.
  259. >>> import sys
  260. >>> dv = DeferredValue(lambda: len(sys._current_frames()))
  261. >>> output = "works great in all {0} threads!".format(dv)
  262. PROTIP: To keep lines shorter, use: ``from formatutils import
  263. DeferredValue as DV``
  264. """
  265. def __init__(self, func, cache_value=True):
  266. self.func = func
  267. self.cache_value = cache_value
  268. self._value = _UNSET
  269. def get_value(self):
  270. """Computes, optionally caches, and returns the value of the
  271. *func*. If ``get_value()`` has been called before, a cached
  272. value may be returned depending on the *cache_value* option
  273. passed to the constructor.
  274. """
  275. if self._value is not _UNSET and self.cache_value:
  276. value = self._value
  277. else:
  278. value = self.func()
  279. if self.cache_value:
  280. self._value = value
  281. return value
  282. def __int__(self):
  283. return int(self.get_value())
  284. def __float__(self):
  285. return float(self.get_value())
  286. def __str__(self):
  287. return str(self.get_value())
  288. def __unicode__(self):
  289. return unicode(self.get_value())
  290. def __repr__(self):
  291. return repr(self.get_value())
  292. def __format__(self, fmt):
  293. value = self.get_value()
  294. pt = fmt[-1:] # presentation type
  295. type_conv = _TYPE_MAP.get(pt, str)
  296. try:
  297. return value.__format__(fmt)
  298. except (ValueError, TypeError):
  299. # TODO: this may be overkill
  300. return type_conv(value).__format__(fmt)
  301. # end formatutils.py