zipimport.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. """zipimport provides support for importing Python modules from Zip archives.
  2. This module exports three objects:
  3. - zipimporter: a class; its constructor takes a path to a Zip archive.
  4. - ZipImportError: exception raised by zipimporter objects. It's a
  5. subclass of ImportError, so it can be caught as ImportError, too.
  6. - _zip_directory_cache: a dict, mapping archive paths to zip directory
  7. info dicts, as used in zipimporter._files.
  8. It is usually not needed to use the zipimport module explicitly; it is
  9. used by the builtin import mechanism for sys.path items that are paths
  10. to Zip archives.
  11. """
  12. #from importlib import _bootstrap_external
  13. #from importlib import _bootstrap # for _verbose_message
  14. import _frozen_importlib_external as _bootstrap_external
  15. from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
  16. import _frozen_importlib as _bootstrap # for _verbose_message
  17. import _imp # for check_hash_based_pycs
  18. import _io # for open
  19. import marshal # for loads
  20. import sys # for modules
  21. import time # for mktime
  22. __all__ = ['ZipImportError', 'zipimporter']
  23. path_sep = _bootstrap_external.path_sep
  24. alt_path_sep = _bootstrap_external.path_separators[1:]
  25. class ZipImportError(ImportError):
  26. pass
  27. # _read_directory() cache
  28. _zip_directory_cache = {}
  29. _module_type = type(sys)
  30. END_CENTRAL_DIR_SIZE = 22
  31. STRING_END_ARCHIVE = b'PK\x05\x06'
  32. MAX_COMMENT_LEN = (1 << 16) - 1
  33. class zipimporter:
  34. """zipimporter(archivepath) -> zipimporter object
  35. Create a new zipimporter instance. 'archivepath' must be a path to
  36. a zipfile, or to a specific path inside a zipfile. For example, it can be
  37. '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a
  38. valid directory inside the archive.
  39. 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip
  40. archive.
  41. The 'archive' attribute of zipimporter objects contains the name of the
  42. zipfile targeted.
  43. """
  44. # Split the "subdirectory" from the Zip archive path, lookup a matching
  45. # entry in sys.path_importer_cache, fetch the file directory from there
  46. # if found, or else read it from the archive.
  47. def __init__(self, path):
  48. if not isinstance(path, str):
  49. import os
  50. path = os.fsdecode(path)
  51. if not path:
  52. raise ZipImportError('archive path is empty', path=path)
  53. if alt_path_sep:
  54. path = path.replace(alt_path_sep, path_sep)
  55. prefix = []
  56. while True:
  57. try:
  58. st = _bootstrap_external._path_stat(path)
  59. except (OSError, ValueError):
  60. # On Windows a ValueError is raised for too long paths.
  61. # Back up one path element.
  62. dirname, basename = _bootstrap_external._path_split(path)
  63. if dirname == path:
  64. raise ZipImportError('not a Zip file', path=path)
  65. path = dirname
  66. prefix.append(basename)
  67. else:
  68. # it exists
  69. if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG
  70. # it's a not file
  71. raise ZipImportError('not a Zip file', path=path)
  72. break
  73. try:
  74. files = _zip_directory_cache[path]
  75. except KeyError:
  76. files = _read_directory(path)
  77. _zip_directory_cache[path] = files
  78. self._files = files
  79. self.archive = path
  80. # a prefix directory following the ZIP file path.
  81. self.prefix = _bootstrap_external._path_join(*prefix[::-1])
  82. if self.prefix:
  83. self.prefix += path_sep
  84. # Check whether we can satisfy the import of the module named by
  85. # 'fullname', or whether it could be a portion of a namespace
  86. # package. Return self if we can load it, a string containing the
  87. # full path if it's a possible namespace portion, None if we
  88. # can't load it.
  89. def find_loader(self, fullname, path=None):
  90. """find_loader(fullname, path=None) -> self, str or None.
  91. Search for a module specified by 'fullname'. 'fullname' must be the
  92. fully qualified (dotted) module name. It returns the zipimporter
  93. instance itself if the module was found, a string containing the
  94. full path name if it's possibly a portion of a namespace package,
  95. or None otherwise. The optional 'path' argument is ignored -- it's
  96. there for compatibility with the importer protocol.
  97. """
  98. mi = _get_module_info(self, fullname)
  99. if mi is not None:
  100. # This is a module or package.
  101. return self, []
  102. # Not a module or regular package. See if this is a directory, and
  103. # therefore possibly a portion of a namespace package.
  104. # We're only interested in the last path component of fullname
  105. # earlier components are recorded in self.prefix.
  106. modpath = _get_module_path(self, fullname)
  107. if _is_dir(self, modpath):
  108. # This is possibly a portion of a namespace
  109. # package. Return the string representing its path,
  110. # without a trailing separator.
  111. return None, [f'{self.archive}{path_sep}{modpath}']
  112. return None, []
  113. # Check whether we can satisfy the import of the module named by
  114. # 'fullname'. Return self if we can, None if we can't.
  115. def find_module(self, fullname, path=None):
  116. """find_module(fullname, path=None) -> self or None.
  117. Search for a module specified by 'fullname'. 'fullname' must be the
  118. fully qualified (dotted) module name. It returns the zipimporter
  119. instance itself if the module was found, or None if it wasn't.
  120. The optional 'path' argument is ignored -- it's there for compatibility
  121. with the importer protocol.
  122. """
  123. return self.find_loader(fullname, path)[0]
  124. def get_code(self, fullname):
  125. """get_code(fullname) -> code object.
  126. Return the code object for the specified module. Raise ZipImportError
  127. if the module couldn't be found.
  128. """
  129. code, ispackage, modpath = _get_module_code(self, fullname)
  130. return code
  131. def get_data(self, pathname):
  132. """get_data(pathname) -> string with file data.
  133. Return the data associated with 'pathname'. Raise OSError if
  134. the file wasn't found.
  135. """
  136. if alt_path_sep:
  137. pathname = pathname.replace(alt_path_sep, path_sep)
  138. key = pathname
  139. if pathname.startswith(self.archive + path_sep):
  140. key = pathname[len(self.archive + path_sep):]
  141. try:
  142. toc_entry = self._files[key]
  143. except KeyError:
  144. raise OSError(0, '', key)
  145. return _get_data(self.archive, toc_entry)
  146. # Return a string matching __file__ for the named module
  147. def get_filename(self, fullname):
  148. """get_filename(fullname) -> filename string.
  149. Return the filename for the specified module.
  150. """
  151. # Deciding the filename requires working out where the code
  152. # would come from if the module was actually loaded
  153. code, ispackage, modpath = _get_module_code(self, fullname)
  154. return modpath
  155. def get_source(self, fullname):
  156. """get_source(fullname) -> source string.
  157. Return the source code for the specified module. Raise ZipImportError
  158. if the module couldn't be found, return None if the archive does
  159. contain the module, but has no source for it.
  160. """
  161. mi = _get_module_info(self, fullname)
  162. if mi is None:
  163. raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
  164. path = _get_module_path(self, fullname)
  165. if mi:
  166. fullpath = _bootstrap_external._path_join(path, '__init__.py')
  167. else:
  168. fullpath = f'{path}.py'
  169. try:
  170. toc_entry = self._files[fullpath]
  171. except KeyError:
  172. # we have the module, but no source
  173. return None
  174. return _get_data(self.archive, toc_entry).decode()
  175. # Return a bool signifying whether the module is a package or not.
  176. def is_package(self, fullname):
  177. """is_package(fullname) -> bool.
  178. Return True if the module specified by fullname is a package.
  179. Raise ZipImportError if the module couldn't be found.
  180. """
  181. mi = _get_module_info(self, fullname)
  182. if mi is None:
  183. raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
  184. return mi
  185. # Load and return the module named by 'fullname'.
  186. def load_module(self, fullname):
  187. """load_module(fullname) -> module.
  188. Load the module specified by 'fullname'. 'fullname' must be the
  189. fully qualified (dotted) module name. It returns the imported
  190. module, or raises ZipImportError if it wasn't found.
  191. """
  192. code, ispackage, modpath = _get_module_code(self, fullname)
  193. mod = sys.modules.get(fullname)
  194. if mod is None or not isinstance(mod, _module_type):
  195. mod = _module_type(fullname)
  196. sys.modules[fullname] = mod
  197. mod.__loader__ = self
  198. try:
  199. if ispackage:
  200. # add __path__ to the module *before* the code gets
  201. # executed
  202. path = _get_module_path(self, fullname)
  203. fullpath = _bootstrap_external._path_join(self.archive, path)
  204. mod.__path__ = [fullpath]
  205. if not hasattr(mod, '__builtins__'):
  206. mod.__builtins__ = __builtins__
  207. _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath)
  208. exec(code, mod.__dict__)
  209. except:
  210. del sys.modules[fullname]
  211. raise
  212. try:
  213. mod = sys.modules[fullname]
  214. except KeyError:
  215. raise ImportError(f'Loaded module {fullname!r} not found in sys.modules')
  216. _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath)
  217. return mod
  218. def get_resource_reader(self, fullname):
  219. """Return the ResourceReader for a package in a zip file.
  220. If 'fullname' is a package within the zip file, return the
  221. 'ResourceReader' object for the package. Otherwise return None.
  222. """
  223. try:
  224. if not self.is_package(fullname):
  225. return None
  226. except ZipImportError:
  227. return None
  228. if not _ZipImportResourceReader._registered:
  229. from importlib.abc import ResourceReader
  230. ResourceReader.register(_ZipImportResourceReader)
  231. _ZipImportResourceReader._registered = True
  232. return _ZipImportResourceReader(self, fullname)
  233. def __repr__(self):
  234. return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">'
  235. # _zip_searchorder defines how we search for a module in the Zip
  236. # archive: we first search for a package __init__, then for
  237. # non-package .pyc, and .py entries. The .pyc entries
  238. # are swapped by initzipimport() if we run in optimized mode. Also,
  239. # '/' is replaced by path_sep there.
  240. _zip_searchorder = (
  241. (path_sep + '__init__.pyc', True, True),
  242. (path_sep + '__init__.py', False, True),
  243. ('.pyc', True, False),
  244. ('.py', False, False),
  245. )
  246. # Given a module name, return the potential file path in the
  247. # archive (without extension).
  248. def _get_module_path(self, fullname):
  249. return self.prefix + fullname.rpartition('.')[2]
  250. # Does this path represent a directory?
  251. def _is_dir(self, path):
  252. # See if this is a "directory". If so, it's eligible to be part
  253. # of a namespace package. We test by seeing if the name, with an
  254. # appended path separator, exists.
  255. dirpath = path + path_sep
  256. # If dirpath is present in self._files, we have a directory.
  257. return dirpath in self._files
  258. # Return some information about a module.
  259. def _get_module_info(self, fullname):
  260. path = _get_module_path(self, fullname)
  261. for suffix, isbytecode, ispackage in _zip_searchorder:
  262. fullpath = path + suffix
  263. if fullpath in self._files:
  264. return ispackage
  265. return None
  266. # implementation
  267. # _read_directory(archive) -> files dict (new reference)
  268. #
  269. # Given a path to a Zip archive, build a dict, mapping file names
  270. # (local to the archive, using SEP as a separator) to toc entries.
  271. #
  272. # A toc_entry is a tuple:
  273. #
  274. # (__file__, # value to use for __file__, available for all files,
  275. # # encoded to the filesystem encoding
  276. # compress, # compression kind; 0 for uncompressed
  277. # data_size, # size of compressed data on disk
  278. # file_size, # size of decompressed data
  279. # file_offset, # offset of file header from start of archive
  280. # time, # mod time of file (in dos format)
  281. # date, # mod data of file (in dos format)
  282. # crc, # crc checksum of the data
  283. # )
  284. #
  285. # Directories can be recognized by the trailing path_sep in the name,
  286. # data_size and file_offset are 0.
  287. def _read_directory(archive):
  288. try:
  289. fp = _io.open_code(archive)
  290. except OSError:
  291. raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
  292. with fp:
  293. try:
  294. fp.seek(-END_CENTRAL_DIR_SIZE, 2)
  295. header_position = fp.tell()
  296. buffer = fp.read(END_CENTRAL_DIR_SIZE)
  297. except OSError:
  298. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  299. if len(buffer) != END_CENTRAL_DIR_SIZE:
  300. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  301. if buffer[:4] != STRING_END_ARCHIVE:
  302. # Bad: End of Central Dir signature
  303. # Check if there's a comment.
  304. try:
  305. fp.seek(0, 2)
  306. file_size = fp.tell()
  307. except OSError:
  308. raise ZipImportError(f"can't read Zip file: {archive!r}",
  309. path=archive)
  310. max_comment_start = max(file_size - MAX_COMMENT_LEN -
  311. END_CENTRAL_DIR_SIZE, 0)
  312. try:
  313. fp.seek(max_comment_start)
  314. data = fp.read()
  315. except OSError:
  316. raise ZipImportError(f"can't read Zip file: {archive!r}",
  317. path=archive)
  318. pos = data.rfind(STRING_END_ARCHIVE)
  319. if pos < 0:
  320. raise ZipImportError(f'not a Zip file: {archive!r}',
  321. path=archive)
  322. buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
  323. if len(buffer) != END_CENTRAL_DIR_SIZE:
  324. raise ZipImportError(f"corrupt Zip file: {archive!r}",
  325. path=archive)
  326. header_position = file_size - len(data) + pos
  327. header_size = _unpack_uint32(buffer[12:16])
  328. header_offset = _unpack_uint32(buffer[16:20])
  329. if header_position < header_size:
  330. raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
  331. if header_position < header_offset:
  332. raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
  333. header_position -= header_size
  334. arc_offset = header_position - header_offset
  335. if arc_offset < 0:
  336. raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
  337. files = {}
  338. # Start of Central Directory
  339. count = 0
  340. try:
  341. fp.seek(header_position)
  342. except OSError:
  343. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  344. while True:
  345. buffer = fp.read(46)
  346. if len(buffer) < 4:
  347. raise EOFError('EOF read where not expected')
  348. # Start of file header
  349. if buffer[:4] != b'PK\x01\x02':
  350. break # Bad: Central Dir File Header
  351. if len(buffer) != 46:
  352. raise EOFError('EOF read where not expected')
  353. flags = _unpack_uint16(buffer[8:10])
  354. compress = _unpack_uint16(buffer[10:12])
  355. time = _unpack_uint16(buffer[12:14])
  356. date = _unpack_uint16(buffer[14:16])
  357. crc = _unpack_uint32(buffer[16:20])
  358. data_size = _unpack_uint32(buffer[20:24])
  359. file_size = _unpack_uint32(buffer[24:28])
  360. name_size = _unpack_uint16(buffer[28:30])
  361. extra_size = _unpack_uint16(buffer[30:32])
  362. comment_size = _unpack_uint16(buffer[32:34])
  363. file_offset = _unpack_uint32(buffer[42:46])
  364. header_size = name_size + extra_size + comment_size
  365. if file_offset > header_offset:
  366. raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
  367. file_offset += arc_offset
  368. try:
  369. name = fp.read(name_size)
  370. except OSError:
  371. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  372. if len(name) != name_size:
  373. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  374. # On Windows, calling fseek to skip over the fields we don't use is
  375. # slower than reading the data because fseek flushes stdio's
  376. # internal buffers. See issue #8745.
  377. try:
  378. if len(fp.read(header_size - name_size)) != header_size - name_size:
  379. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  380. except OSError:
  381. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  382. if flags & 0x800:
  383. # UTF-8 file names extension
  384. name = name.decode()
  385. else:
  386. # Historical ZIP filename encoding
  387. try:
  388. name = name.decode('ascii')
  389. except UnicodeDecodeError:
  390. name = name.decode('latin1').translate(cp437_table)
  391. name = name.replace('/', path_sep)
  392. path = _bootstrap_external._path_join(archive, name)
  393. t = (path, compress, data_size, file_size, file_offset, time, date, crc)
  394. files[name] = t
  395. _bootstrap._verbose_message('zipimport: name : {}', name)
  396. count += 1
  397. _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
  398. return files
  399. # During bootstrap, we may need to load the encodings
  400. # package from a ZIP file. But the cp437 encoding is implemented
  401. # in Python in the encodings package.
  402. #
  403. # Break out of this dependency by using the translation table for
  404. # the cp437 encoding.
  405. cp437_table = (
  406. # ASCII part, 8 rows x 16 chars
  407. '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
  408. '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
  409. ' !"#$%&\'()*+,-./'
  410. '0123456789:;<=>?'
  411. '@ABCDEFGHIJKLMNO'
  412. 'PQRSTUVWXYZ[\\]^_'
  413. '`abcdefghijklmno'
  414. 'pqrstuvwxyz{|}~\x7f'
  415. # non-ASCII part, 16 rows x 8 chars
  416. '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7'
  417. '\xea\xeb\xe8\xef\xee\xec\xc4\xc5'
  418. '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9'
  419. '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192'
  420. '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba'
  421. '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb'
  422. '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556'
  423. '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510'
  424. '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f'
  425. '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567'
  426. '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b'
  427. '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580'
  428. '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4'
  429. '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229'
  430. '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248'
  431. '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0'
  432. )
  433. _importing_zlib = False
  434. # Return the zlib.decompress function object, or NULL if zlib couldn't
  435. # be imported. The function is cached when found, so subsequent calls
  436. # don't import zlib again.
  437. def _get_decompress_func():
  438. global _importing_zlib
  439. if _importing_zlib:
  440. # Someone has a zlib.py[co] in their Zip file
  441. # let's avoid a stack overflow.
  442. _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
  443. raise ZipImportError("can't decompress data; zlib not available")
  444. _importing_zlib = True
  445. try:
  446. from zlib import decompress
  447. except Exception:
  448. _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
  449. raise ZipImportError("can't decompress data; zlib not available")
  450. finally:
  451. _importing_zlib = False
  452. _bootstrap._verbose_message('zipimport: zlib available')
  453. return decompress
  454. # Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
  455. def _get_data(archive, toc_entry):
  456. datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
  457. if data_size < 0:
  458. raise ZipImportError('negative data size')
  459. with _io.open_code(archive) as fp:
  460. # Check to make sure the local file header is correct
  461. try:
  462. fp.seek(file_offset)
  463. except OSError:
  464. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  465. buffer = fp.read(30)
  466. if len(buffer) != 30:
  467. raise EOFError('EOF read where not expected')
  468. if buffer[:4] != b'PK\x03\x04':
  469. # Bad: Local File Header
  470. raise ZipImportError(f'bad local file header: {archive!r}', path=archive)
  471. name_size = _unpack_uint16(buffer[26:28])
  472. extra_size = _unpack_uint16(buffer[28:30])
  473. header_size = 30 + name_size + extra_size
  474. file_offset += header_size # Start of file data
  475. try:
  476. fp.seek(file_offset)
  477. except OSError:
  478. raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
  479. raw_data = fp.read(data_size)
  480. if len(raw_data) != data_size:
  481. raise OSError("zipimport: can't read data")
  482. if compress == 0:
  483. # data is not compressed
  484. return raw_data
  485. # Decompress with zlib
  486. try:
  487. decompress = _get_decompress_func()
  488. except Exception:
  489. raise ZipImportError("can't decompress data; zlib not available")
  490. return decompress(raw_data, -15)
  491. # Lenient date/time comparison function. The precision of the mtime
  492. # in the archive is lower than the mtime stored in a .pyc: we
  493. # must allow a difference of at most one second.
  494. def _eq_mtime(t1, t2):
  495. # dostime only stores even seconds, so be lenient
  496. return abs(t1 - t2) <= 1
  497. # Given the contents of a .py[co] file, unmarshal the data
  498. # and return the code object. Return None if it the magic word doesn't
  499. # match, or if the recorded .py[co] metadata does not match the source,
  500. # (we do this instead of raising an exception as we fall back
  501. # to .py if available and we don't want to mask other errors).
  502. def _unmarshal_code(self, pathname, fullpath, fullname, data):
  503. exc_details = {
  504. 'name': fullname,
  505. 'path': fullpath,
  506. }
  507. try:
  508. flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
  509. except ImportError:
  510. return None
  511. hash_based = flags & 0b1 != 0
  512. if hash_based:
  513. check_source = flags & 0b10 != 0
  514. if (_imp.check_hash_based_pycs != 'never' and
  515. (check_source or _imp.check_hash_based_pycs == 'always')):
  516. source_bytes = _get_pyc_source(self, fullpath)
  517. if source_bytes is not None:
  518. source_hash = _imp.source_hash(
  519. _bootstrap_external._RAW_MAGIC_NUMBER,
  520. source_bytes,
  521. )
  522. try:
  523. _bootstrap_external._validate_hash_pyc(
  524. data, source_hash, fullname, exc_details)
  525. except ImportError:
  526. return None
  527. else:
  528. source_mtime, source_size = \
  529. _get_mtime_and_size_of_source(self, fullpath)
  530. if source_mtime:
  531. # We don't use _bootstrap_external._validate_timestamp_pyc
  532. # to allow for a more lenient timestamp check.
  533. if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
  534. _unpack_uint32(data[12:16]) != source_size):
  535. _bootstrap._verbose_message(
  536. f'bytecode is stale for {fullname!r}')
  537. return None
  538. code = marshal.loads(data[16:])
  539. if not isinstance(code, _code_type):
  540. raise TypeError(f'compiled module {pathname!r} is not a code object')
  541. return code
  542. _code_type = type(_unmarshal_code.__code__)
  543. # Replace any occurrences of '\r\n?' in the input string with '\n'.
  544. # This converts DOS and Mac line endings to Unix line endings.
  545. def _normalize_line_endings(source):
  546. source = source.replace(b'\r\n', b'\n')
  547. source = source.replace(b'\r', b'\n')
  548. return source
  549. # Given a string buffer containing Python source code, compile it
  550. # and return a code object.
  551. def _compile_source(pathname, source):
  552. source = _normalize_line_endings(source)
  553. return compile(source, pathname, 'exec', dont_inherit=True)
  554. # Convert the date/time values found in the Zip archive to a value
  555. # that's compatible with the time stamp stored in .pyc files.
  556. def _parse_dostime(d, t):
  557. return time.mktime((
  558. (d >> 9) + 1980, # bits 9..15: year
  559. (d >> 5) & 0xF, # bits 5..8: month
  560. d & 0x1F, # bits 0..4: day
  561. t >> 11, # bits 11..15: hours
  562. (t >> 5) & 0x3F, # bits 8..10: minutes
  563. (t & 0x1F) * 2, # bits 0..7: seconds / 2
  564. -1, -1, -1))
  565. # Given a path to a .pyc file in the archive, return the
  566. # modification time of the matching .py file and its size,
  567. # or (0, 0) if no source is available.
  568. def _get_mtime_and_size_of_source(self, path):
  569. try:
  570. # strip 'c' or 'o' from *.py[co]
  571. assert path[-1:] in ('c', 'o')
  572. path = path[:-1]
  573. toc_entry = self._files[path]
  574. # fetch the time stamp of the .py file for comparison
  575. # with an embedded pyc time stamp
  576. time = toc_entry[5]
  577. date = toc_entry[6]
  578. uncompressed_size = toc_entry[3]
  579. return _parse_dostime(date, time), uncompressed_size
  580. except (KeyError, IndexError, TypeError):
  581. return 0, 0
  582. # Given a path to a .pyc file in the archive, return the
  583. # contents of the matching .py file, or None if no source
  584. # is available.
  585. def _get_pyc_source(self, path):
  586. # strip 'c' or 'o' from *.py[co]
  587. assert path[-1:] in ('c', 'o')
  588. path = path[:-1]
  589. try:
  590. toc_entry = self._files[path]
  591. except KeyError:
  592. return None
  593. else:
  594. return _get_data(self.archive, toc_entry)
  595. # Get the code object associated with the module specified by
  596. # 'fullname'.
  597. def _get_module_code(self, fullname):
  598. path = _get_module_path(self, fullname)
  599. for suffix, isbytecode, ispackage in _zip_searchorder:
  600. fullpath = path + suffix
  601. _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
  602. try:
  603. toc_entry = self._files[fullpath]
  604. except KeyError:
  605. pass
  606. else:
  607. modpath = toc_entry[0]
  608. data = _get_data(self.archive, toc_entry)
  609. if isbytecode:
  610. code = _unmarshal_code(self, modpath, fullpath, fullname, data)
  611. else:
  612. code = _compile_source(modpath, data)
  613. if code is None:
  614. # bad magic number or non-matching mtime
  615. # in byte code, try next
  616. continue
  617. modpath = toc_entry[0]
  618. return code, ispackage, modpath
  619. else:
  620. raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
  621. class _ZipImportResourceReader:
  622. """Private class used to support ZipImport.get_resource_reader().
  623. This class is allowed to reference all the innards and private parts of
  624. the zipimporter.
  625. """
  626. _registered = False
  627. def __init__(self, zipimporter, fullname):
  628. self.zipimporter = zipimporter
  629. self.fullname = fullname
  630. def open_resource(self, resource):
  631. fullname_as_path = self.fullname.replace('.', '/')
  632. path = f'{fullname_as_path}/{resource}'
  633. from io import BytesIO
  634. try:
  635. return BytesIO(self.zipimporter.get_data(path))
  636. except OSError:
  637. raise FileNotFoundError(path)
  638. def resource_path(self, resource):
  639. # All resources are in the zip file, so there is no path to the file.
  640. # Raising FileNotFoundError tells the higher level API to extract the
  641. # binary data and create a temporary file.
  642. raise FileNotFoundError
  643. def is_resource(self, name):
  644. # Maybe we could do better, but if we can get the data, it's a
  645. # resource. Otherwise it isn't.
  646. fullname_as_path = self.fullname.replace('.', '/')
  647. path = f'{fullname_as_path}/{name}'
  648. try:
  649. self.zipimporter.get_data(path)
  650. except OSError:
  651. return False
  652. return True
  653. def contents(self):
  654. # This is a bit convoluted, because fullname will be a module path,
  655. # but _files is a list of file names relative to the top of the
  656. # archive's namespace. We want to compare file paths to find all the
  657. # names of things inside the module represented by fullname. So we
  658. # turn the module path of fullname into a file path relative to the
  659. # top of the archive, and then we iterate through _files looking for
  660. # names inside that "directory".
  661. from pathlib import Path
  662. fullname_path = Path(self.zipimporter.get_filename(self.fullname))
  663. relative_path = fullname_path.relative_to(self.zipimporter.archive)
  664. # Don't forget that fullname names a package, so its path will include
  665. # __init__.py, which we want to ignore.
  666. assert relative_path.name == '__init__.py'
  667. package_path = relative_path.parent
  668. subdirs_seen = set()
  669. for filename in self.zipimporter._files:
  670. try:
  671. relative = Path(filename).relative_to(package_path)
  672. except ValueError:
  673. continue
  674. # If the path of the file (which is relative to the top of the zip
  675. # namespace), relative to the package given when the resource
  676. # reader was created, has a parent, then it's a name in a
  677. # subdirectory and thus we skip it.
  678. parent_name = relative.parent.name
  679. if len(parent_name) == 0:
  680. yield relative.name
  681. elif parent_name not in subdirs_seen:
  682. subdirs_seen.add(parent_name)
  683. yield parent_name