gcutils.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2013, Mahmoud Hashemi
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are
  6. # met:
  7. #
  8. # * Redistributions of source code must retain the above copyright
  9. # notice, this list of conditions and the following disclaimer.
  10. #
  11. # * Redistributions in binary form must reproduce the above
  12. # copyright notice, this list of conditions and the following
  13. # disclaimer in the documentation and/or other materials provided
  14. # with the distribution.
  15. #
  16. # * The names of the contributors may not be used to endorse or
  17. # promote products derived from this software without specific
  18. # prior written permission.
  19. #
  20. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. """The Python Garbage Collector (`GC`_) doesn't usually get too much
  32. attention, probably because:
  33. - Python's `reference counting`_ effectively handles the vast majority of
  34. unused objects
  35. - People are slowly learning to avoid implementing `object.__del__()`_
  36. - The collection itself strikes a good balance between simplicity and
  37. power (`tunable generation sizes`_)
  38. - The collector itself is fast and rarely the cause of long pauses
  39. associated with GC in other runtimes
  40. Even so, for many applications, the time will come when the developer
  41. will need to track down:
  42. - Circular references
  43. - Misbehaving objects (locks, ``__del__()``)
  44. - Memory leaks
  45. - Or just ways to shave off a couple percent of execution time
  46. Thanks to the :mod:`gc` module, the GC is a well-instrumented entry
  47. point for exactly these tasks, and ``gcutils`` aims to facilitate it
  48. further.
  49. .. _GC: https://docs.python.org/2/glossary.html#term-garbage-collection
  50. .. _reference counting: https://docs.python.org/2/glossary.html#term-reference-count
  51. .. _object.__del__(): https://docs.python.org/2/glossary.html#term-reference-count
  52. .. _tunable generation sizes: https://docs.python.org/2/library/gc.html#gc.set_threshold
  53. """
  54. # TODO: type survey
  55. from __future__ import print_function
  56. import gc
  57. import sys
  58. __all__ = ['get_all', 'GCToggler', 'toggle_gc', 'toggle_gc_postcollect']
  59. def get_all(type_obj, include_subtypes=True):
  60. """Get a list containing all instances of a given type. This will
  61. work for the vast majority of types out there.
  62. >>> class Ratking(object): pass
  63. >>> wiki, hak, sport = Ratking(), Ratking(), Ratking()
  64. >>> len(get_all(Ratking))
  65. 3
  66. However, there are some exceptions. For example, ``get_all(bool)``
  67. returns an empty list because ``True`` and ``False`` are
  68. themselves built-in and not tracked.
  69. >>> get_all(bool)
  70. []
  71. Still, it's not hard to see how this functionality can be used to
  72. find all instances of a leaking type and track them down further
  73. using :func:`gc.get_referrers` and :func:`gc.get_referents`.
  74. ``get_all()`` is optimized such that getting instances of
  75. user-created types is quite fast. Setting *include_subtypes* to
  76. ``False`` will further increase performance in cases where
  77. instances of subtypes aren't required.
  78. .. note::
  79. There are no guarantees about the state of objects returned by
  80. ``get_all()``, especially in concurrent environments. For
  81. instance, it is possible for an object to be in the middle of
  82. executing its ``__init__()`` and be only partially constructed.
  83. """
  84. # TODO: old-style classes
  85. if not isinstance(type_obj, type):
  86. raise TypeError('expected a type, not %r' % type_obj)
  87. try:
  88. type_is_tracked = gc.is_tracked(type_obj)
  89. except AttributeError:
  90. type_is_tracked = False # Python 2.6 and below don't get the speedup
  91. if type_is_tracked:
  92. to_check = gc.get_referrers(type_obj)
  93. else:
  94. to_check = gc.get_objects()
  95. if include_subtypes:
  96. ret = [x for x in to_check if isinstance(x, type_obj)]
  97. else:
  98. ret = [x for x in to_check if type(x) is type_obj]
  99. return ret
  100. _IS_PYPY = '__pypy__' in sys.builtin_module_names
  101. if _IS_PYPY:
  102. # pypy's gc is just different, y'all
  103. del get_all
  104. class GCToggler(object):
  105. """The ``GCToggler`` is a context-manager that allows one to safely
  106. take more control of your garbage collection schedule. Anecdotal
  107. experience says certain object-creation-heavy tasks see speedups
  108. of around 10% by simply doing one explicit collection at the very
  109. end, especially if most of the objects will stay resident.
  110. Two GCTogglers are already present in the ``gcutils`` module:
  111. - :data:`toggle_gc` simply turns off GC at context entrance, and
  112. re-enables at exit
  113. - :data:`toggle_gc_postcollect` does the same, but triggers an
  114. explicit collection after re-enabling.
  115. >>> with toggle_gc:
  116. ... x = [object() for i in range(1000)]
  117. Between those two instances, the ``GCToggler`` type probably won't
  118. be used much directly, but is documented for inheritance purposes.
  119. """
  120. def __init__(self, postcollect=False):
  121. self.postcollect = postcollect
  122. def __enter__(self):
  123. gc.disable()
  124. def __exit__(self, exc_type, exc_val, exc_tb):
  125. gc.enable()
  126. if self.postcollect:
  127. gc.collect()
  128. toggle_gc = GCToggler()
  129. """A context manager for disabling GC for a code block. See
  130. :class:`GCToggler` for more details."""
  131. toggle_gc_postcollect = GCToggler(postcollect=True)
  132. """A context manager for disabling GC for a code block, and collecting
  133. before re-enabling. See :class:`GCToggler` for more details."""