# -*- coding: utf-8 -*- # Copyright (c) 2013, Mahmoud Hashemi # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * The names of the contributors may not be used to endorse or # promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """`PEP 3101`_ introduced the :meth:`str.format` method, and what would later be called "new-style" string formatting. For the sake of explicit correctness, it is probably best to refer to Python's dual string formatting capabilities as *bracket-style* and *percent-style*. There is overlap, but one does not replace the other. * Bracket-style is more pluggable, slower, and uses a method. * Percent-style is simpler, faster, and uses an operator. Bracket-style formatting brought with it a much more powerful toolbox, but it was far from a full one. :meth:`str.format` uses `more powerful syntax`_, but `the tools and idioms`_ for working with that syntax are not well-developed nor well-advertised. ``formatutils`` adds several functions for working with bracket-style format strings: * :class:`DeferredValue`: Defer fetching or calculating a value until format time. * :func:`get_format_args`: Parse the positional and keyword arguments out of a format string. * :func:`tokenize_format_str`: Tokenize a format string into literals and :class:`BaseFormatField` objects. * :func:`construct_format_field_str`: Assists in programmatic construction of format strings. * :func:`infer_positional_format_args`: Converts anonymous references in 2.7+ format strings to explicit positional arguments suitable for usage with Python 2.6. .. _more powerful syntax: https://docs.python.org/2/library/string.html#format-string-syntax .. _the tools and idioms: https://docs.python.org/2/library/string.html#string-formatting .. _PEP 3101: https://www.python.org/dev/peps/pep-3101/ """ # TODO: also include percent-formatting utils? # TODO: include lithoxyl.formatters.Formatter (or some adaptation)? from __future__ import print_function import re from string import Formatter try: unicode # Python 2 except NameError: unicode = str # Python 3 __all__ = ['DeferredValue', 'get_format_args', 'tokenize_format_str', 'construct_format_field_str', 'infer_positional_format_args', 'BaseFormatField'] _pos_farg_re = re.compile('({{)|' # escaped open-brace '(}})|' # escaped close-brace r'({[:!.\[}])') # anon positional format arg def construct_format_field_str(fname, fspec, conv): """ Constructs a format field string from the field name, spec, and conversion character (``fname``, ``fspec``, ``conv``). See Python String Formatting for more info. """ if fname is None: return '' ret = '{' + fname if conv: ret += '!' + conv if fspec: ret += ':' + fspec ret += '}' return ret def split_format_str(fstr): """Does very basic splitting of a format string, returns a list of strings. For full tokenization, see :func:`tokenize_format_str`. """ ret = [] for lit, fname, fspec, conv in Formatter().parse(fstr): if fname is None: ret.append((lit, None)) continue field_str = construct_format_field_str(fname, fspec, conv) ret.append((lit, field_str)) return ret def infer_positional_format_args(fstr): """Takes format strings with anonymous positional arguments, (e.g., "{}" and {:d}), and converts them into numbered ones for explicitness and compatibility with 2.6. Returns a string with the inferred positional arguments. """ # TODO: memoize ret, max_anon = '', 0 # look for {: or {! or {. or {[ or {} start, end, prev_end = 0, 0, 0 for match in _pos_farg_re.finditer(fstr): start, end, group = match.start(), match.end(), match.group() if prev_end < start: ret += fstr[prev_end:start] prev_end = end if group == '{{' or group == '}}': ret += group continue ret += '{%s%s' % (max_anon, group[1:]) max_anon += 1 ret += fstr[prev_end:] return ret # This approach is hardly exhaustive but it works for most builtins _INTCHARS = 'bcdoxXn' _FLOATCHARS = 'eEfFgGn%' _TYPE_MAP = dict([(x, int) for x in _INTCHARS] + [(x, float) for x in _FLOATCHARS]) _TYPE_MAP['s'] = str def get_format_args(fstr): """ Turn a format string into two lists of arguments referenced by the format string. One is positional arguments, and the other is named arguments. Each element of the list includes the name and the nominal type of the field. # >>> get_format_args("{noun} is {1:d} years old{punct}") # ([(1, )], [('noun', ), ('punct', )]) # XXX: Py3k >>> get_format_args("{noun} is {1:d} years old{punct}") == \ ([(1, int)], [('noun', str), ('punct', str)]) True """ # TODO: memoize formatter = Formatter() fargs, fkwargs, _dedup = [], [], set() def _add_arg(argname, type_char='s'): if argname not in _dedup: _dedup.add(argname) argtype = _TYPE_MAP.get(type_char, str) # TODO: unicode try: fargs.append((int(argname), argtype)) except ValueError: fkwargs.append((argname, argtype)) for lit, fname, fspec, conv in formatter.parse(fstr): if fname is not None: type_char = fspec[-1:] fname_list = re.split('[.[]', fname) if len(fname_list) > 1: raise ValueError('encountered compound format arg: %r' % fname) try: base_fname = fname_list[0] assert base_fname except (IndexError, AssertionError): raise ValueError('encountered anonymous positional argument') _add_arg(fname, type_char) for sublit, subfname, _, _ in formatter.parse(fspec): # TODO: positional and anon args not allowed here. if subfname is not None: _add_arg(subfname) return fargs, fkwargs def tokenize_format_str(fstr, resolve_pos=True): """Takes a format string, turns it into a list of alternating string literals and :class:`BaseFormatField` tokens. By default, also infers anonymous positional references into explicit, numbered positional references. To disable this behavior set *resolve_pos* to ``False``. """ ret = [] if resolve_pos: fstr = infer_positional_format_args(fstr) formatter = Formatter() for lit, fname, fspec, conv in formatter.parse(fstr): if lit: ret.append(lit) if fname is None: continue ret.append(BaseFormatField(fname, fspec, conv)) return ret class BaseFormatField(object): """A class representing a reference to an argument inside of a bracket-style format string. For instance, in ``"{greeting}, world!"``, there is a field named "greeting". These fields can have many options applied to them. See the Python docs on `Format String Syntax`_ for the full details. .. _Format String Syntax: https://docs.python.org/2/library/string.html#string-formatting """ def __init__(self, fname, fspec='', conv=None): self.set_fname(fname) self.set_fspec(fspec) self.set_conv(conv) def set_fname(self, fname): "Set the field name." path_list = re.split('[.[]', fname) # TODO self.base_name = path_list[0] self.fname = fname self.subpath = path_list[1:] self.is_positional = not self.base_name or self.base_name.isdigit() def set_fspec(self, fspec): "Set the field spec." fspec = fspec or '' subfields = [] for sublit, subfname, _, _ in Formatter().parse(fspec): if subfname is not None: subfields.append(subfname) self.subfields = subfields self.fspec = fspec self.type_char = fspec[-1:] self.type_func = _TYPE_MAP.get(self.type_char, str) def set_conv(self, conv): """There are only two built-in converters: ``s`` and ``r``. They are somewhat rare and appearlike ``"{ref!r}"``.""" # TODO self.conv = conv self.conv_func = None # TODO @property def fstr(self): "The current state of the field in string format." return construct_format_field_str(self.fname, self.fspec, self.conv) def __repr__(self): cn = self.__class__.__name__ args = [self.fname] if self.conv is not None: args.extend([self.fspec, self.conv]) elif self.fspec != '': args.append(self.fspec) args_repr = ', '.join([repr(a) for a in args]) return '%s(%s)' % (cn, args_repr) def __str__(self): return self.fstr _UNSET = object() class DeferredValue(object): """:class:`DeferredValue` is a wrapper type, used to defer computing values which would otherwise be expensive to stringify and format. This is most valuable in areas like logging, where one would not want to waste time formatting a value for a log message which will subsequently be filtered because the message's log level was DEBUG and the logger was set to only emit CRITICAL messages. The :class:``DeferredValue`` is initialized with a callable that takes no arguments and returns the value, which can be of any type. By default DeferredValue only calls that callable once, and future references will get a cached value. This behavior can be disabled by setting *cache_value* to ``False``. Args: func (function): A callable that takes no arguments and computes the value being represented. cache_value (bool): Whether subsequent usages will call *func* again. Defaults to ``True``. >>> import sys >>> dv = DeferredValue(lambda: len(sys._current_frames())) >>> output = "works great in all {0} threads!".format(dv) PROTIP: To keep lines shorter, use: ``from formatutils import DeferredValue as DV`` """ def __init__(self, func, cache_value=True): self.func = func self.cache_value = cache_value self._value = _UNSET def get_value(self): """Computes, optionally caches, and returns the value of the *func*. If ``get_value()`` has been called before, a cached value may be returned depending on the *cache_value* option passed to the constructor. """ if self._value is not _UNSET and self.cache_value: value = self._value else: value = self.func() if self.cache_value: self._value = value return value def __int__(self): return int(self.get_value()) def __float__(self): return float(self.get_value()) def __str__(self): return str(self.get_value()) def __unicode__(self): return unicode(self.get_value()) def __repr__(self): return repr(self.get_value()) def __format__(self, fmt): value = self.get_value() pt = fmt[-1:] # presentation type type_conv = _TYPE_MAP.get(pt, str) try: return value.__format__(fmt) except (ValueError, TypeError): # TODO: this may be overkill return type_conv(value).__format__(fmt) # end formatutils.py