upstream/mercurial-mirror Commit - r37729:65a23cc8

cborutil: implement support for streaming encoding, bytestring decoding...

Gregory Szorc -

r37729:65a23cc8 default

parent child

mercurial/utils/cborutil.py

0 created 644 +258 0

			@@ -0,0 +1,258 b''
		1	# cborutil.py - CBOR extensions
		2	#
		3	# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
		4	#
		5	# This software may be used and distributed according to the terms of the
		6	# GNU General Public License version 2 or any later version.
		7
		8	from __future__ import absolute_import
		9
		10	import struct
		11
		12	from ..thirdparty.cbor.cbor2 import (
		13	decoder as decodermod,
		14	)
		15
		16	# Very short very of RFC 7049...
		17	#
		18	# Each item begins with a byte. The 3 high bits of that byte denote the
		19	# "major type." The lower 5 bits denote the "subtype." Each major type
		20	# has its own encoding mechanism.
		21	#
		22	# Most types have lengths. However, bytestring, string, array, and map
		23	# can be indefinite length. These are denotes by a subtype with value 31.
		24	# Sub-components of those types then come afterwards and are terminated
		25	# by a "break" byte.
		26
		27	MAJOR_TYPE_UINT = 0
		28	MAJOR_TYPE_NEGINT = 1
		29	MAJOR_TYPE_BYTESTRING = 2
		30	MAJOR_TYPE_STRING = 3
		31	MAJOR_TYPE_ARRAY = 4
		32	MAJOR_TYPE_MAP = 5
		33	MAJOR_TYPE_SEMANTIC = 6
		34	MAJOR_TYPE_SPECIAL = 7
		35
		36	SUBTYPE_MASK = 0b00011111
		37
		38	SUBTYPE_HALF_FLOAT = 25
		39	SUBTYPE_SINGLE_FLOAT = 26
		40	SUBTYPE_DOUBLE_FLOAT = 27
		41	SUBTYPE_INDEFINITE = 31
		42
		43	# Indefinite types begin with their major type ORd with information value 31.
		44	BEGIN_INDEFINITE_BYTESTRING = struct.pack(
		45	r'>B', MAJOR_TYPE_BYTESTRING << 5 \| SUBTYPE_INDEFINITE)
		46	BEGIN_INDEFINITE_ARRAY = struct.pack(
		47	r'>B', MAJOR_TYPE_ARRAY << 5 \| SUBTYPE_INDEFINITE)
		48	BEGIN_INDEFINITE_MAP = struct.pack(
		49	r'>B', MAJOR_TYPE_MAP << 5 \| SUBTYPE_INDEFINITE)
		50
		51	ENCODED_LENGTH_1 = struct.Struct(r'>B')
		52	ENCODED_LENGTH_2 = struct.Struct(r'>BB')
		53	ENCODED_LENGTH_3 = struct.Struct(r'>BH')
		54	ENCODED_LENGTH_4 = struct.Struct(r'>BL')
		55	ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
		56
		57	# The break ends an indefinite length item.
		58	BREAK = b'\xff'
		59	BREAK_INT = 255
		60
		61	def encodelength(majortype, length):
		62	"""Obtain a value encoding the major type and its length."""
		63	if length < 24:
		64	return ENCODED_LENGTH_1.pack(majortype << 5 \| length)
		65	elif length < 256:
		66	return ENCODED_LENGTH_2.pack(majortype << 5 \| 24, length)
		67	elif length < 65536:
		68	return ENCODED_LENGTH_3.pack(majortype << 5 \| 25, length)
		69	elif length < 4294967296:
		70	return ENCODED_LENGTH_4.pack(majortype << 5 \| 26, length)
		71	else:
		72	return ENCODED_LENGTH_5.pack(majortype << 5 \| 27, length)
		73
		74	def streamencodebytestring(v):
		75	yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
		76	yield v
		77
		78	def streamencodebytestringfromiter(it):
		79	"""Convert an iterator of chunks to an indefinite bytestring.
		80
		81	Given an input that is iterable and each element in the iterator is
		82	representable as bytes, emit an indefinite length bytestring.
		83	"""
		84	yield BEGIN_INDEFINITE_BYTESTRING
		85
		86	for chunk in it:
		87	yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
		88	yield chunk
		89
		90	yield BREAK
		91
		92	def streamencodeindefinitebytestring(source, chunksize=65536):
		93	"""Given a large source buffer, emit as an indefinite length bytestring.
		94
		95	This is a generator of chunks constituting the encoded CBOR data.
		96	"""
		97	yield BEGIN_INDEFINITE_BYTESTRING
		98
		99	i = 0
		100	l = len(source)
		101
		102	while True:
		103	chunk = source[i:i + chunksize]
		104	i += len(chunk)
		105
		106	yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
		107	yield chunk
		108
		109	if i >= l:
		110	break
		111
		112	yield BREAK
		113
		114	def streamencodeint(v):
		115	if v >= 18446744073709551616 or v < -18446744073709551616:
		116	raise ValueError('big integers not supported')
		117
		118	if v >= 0:
		119	yield encodelength(MAJOR_TYPE_UINT, v)
		120	else:
		121	yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
		122
		123	def streamencodearray(l):
		124	"""Encode a known size iterable to an array."""
		125
		126	yield encodelength(MAJOR_TYPE_ARRAY, len(l))
		127
		128	for i in l:
		129	for chunk in streamencode(i):
		130	yield chunk
		131
		132	def streamencodearrayfromiter(it):
		133	"""Encode an iterator of items to an indefinite length array."""
		134
		135	yield BEGIN_INDEFINITE_ARRAY
		136
		137	for i in it:
		138	for chunk in streamencode(i):
		139	yield chunk
		140
		141	yield BREAK
		142
		143	def streamencodeset(s):
		144	# https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
		145	# semantic tag 258 for finite sets.
		146	yield encodelength(MAJOR_TYPE_SEMANTIC, 258)
		147
		148	for chunk in streamencodearray(sorted(s)):
		149	yield chunk
		150
		151	def streamencodemap(d):
		152	"""Encode dictionary to a generator.
		153
		154	Does not supporting indefinite length dictionaries.
		155	"""
		156	yield encodelength(MAJOR_TYPE_MAP, len(d))
		157
		158	for key, value in sorted(d.iteritems()):
		159	for chunk in streamencode(key):
		160	yield chunk
		161	for chunk in streamencode(value):
		162	yield chunk
		163
		164	def streamencodemapfromiter(it):
		165	"""Given an iterable of (key, value), encode to an indefinite length map."""
		166	yield BEGIN_INDEFINITE_MAP
		167
		168	for key, value in it:
		169	for chunk in streamencode(key):
		170	yield chunk
		171	for chunk in streamencode(value):
		172	yield chunk
		173
		174	yield BREAK
		175
		176	def streamencodebool(b):
		177	# major type 7, simple value 20 and 21.
		178	yield b'\xf5' if b else b'\xf4'
		179
		180	def streamencodenone(v):
		181	# major type 7, simple value 22.
		182	yield b'\xf6'
		183
		184	STREAM_ENCODERS = {
		185	bytes: streamencodebytestring,
		186	int: streamencodeint,
		187	list: streamencodearray,
		188	tuple: streamencodearray,
		189	dict: streamencodemap,
		190	set: streamencodeset,
		191	bool: streamencodebool,
		192	type(None): streamencodenone,
		193	}
		194
		195	def streamencode(v):
		196	"""Encode a value in a streaming manner.
		197
		198	Given an input object, encode it to CBOR recursively.
		199
		200	Returns a generator of CBOR encoded bytes. There is no guarantee
		201	that each emitted chunk fully decodes to a value or sub-value.
		202
		203	Encoding is deterministic - unordered collections are sorted.
		204	"""
		205	fn = STREAM_ENCODERS.get(v.__class__)
		206
		207	if not fn:
		208	raise ValueError('do not know how to encode %s' % type(v))
		209
		210	return fn(v)
		211
		212	def readindefinitebytestringtoiter(fh, expectheader=True):
		213	"""Read an indefinite bytestring to a generator.
		214
		215	Receives an object with a ``read(X)`` method to read N bytes.
		216
		217	If ``expectheader`` is True, it is expected that the first byte read
		218	will represent an indefinite length bytestring. Otherwise, we
		219	expect the first byte to be part of the first bytestring chunk.
		220	"""
		221	read = fh.read
		222	decodeuint = decodermod.decode_uint
		223	byteasinteger = decodermod.byte_as_integer
		224
		225	if expectheader:
		226	initial = decodermod.byte_as_integer(read(1))
		227
		228	majortype = initial >> 5
		229	subtype = initial & SUBTYPE_MASK
		230
		231	if majortype != MAJOR_TYPE_BYTESTRING:
		232	raise decodermod.CBORDecodeError(
		233	'expected major type %d; got %d' % (MAJOR_TYPE_BYTESTRING,
		234	majortype))
		235
		236	if subtype != SUBTYPE_INDEFINITE:
		237	raise decodermod.CBORDecodeError(
		238	'expected indefinite subtype; got %d' % subtype)
		239
		240	# The indefinite bytestring is composed of chunks of normal bytestrings.
		241	# Read chunks until we hit a BREAK byte.
		242
		243	while True:
		244	# We need to sniff for the BREAK byte.
		245	initial = byteasinteger(read(1))
		246
		247	if initial == BREAK_INT:
		248	break
		249
		250	length = decodeuint(fh, initial & SUBTYPE_MASK)
		251	chunk = read(length)
		252
		253	if len(chunk) != length:
		254	raise decodermod.CBORDecodeError(
		255	'failed to read bytestring chunk: got %d bytes; expected %d' % (
		256	len(chunk), length))
		257
		258	yield chunk

tests/test-cbor.py

0 created 644 +210 0

			@@ -0,0 +1,210 b''
		1	from __future__ import absolute_import
		2
		3	import io
		4	import unittest
		5
		6	from mercurial.thirdparty import (
		7	cbor,
		8	)
		9	from mercurial.utils import (
		10	cborutil,
		11	)
		12
		13	def loadit(it):
		14	return cbor.loads(b''.join(it))
		15
		16	class BytestringTests(unittest.TestCase):
		17	def testsimple(self):
		18	self.assertEqual(
		19	list(cborutil.streamencode(b'foobar')),
		20	[b'\x46', b'foobar'])
		21
		22	self.assertEqual(
		23	loadit(cborutil.streamencode(b'foobar')),
		24	b'foobar')
		25
		26	def testlong(self):
		27	source = b'x' * 1048576
		28
		29	self.assertEqual(loadit(cborutil.streamencode(source)), source)
		30
		31	def testfromiter(self):
		32	# This is the example from RFC 7049 Section 2.2.2.
		33	source = [b'\xaa\xbb\xcc\xdd', b'\xee\xff\x99']
		34
		35	self.assertEqual(
		36	list(cborutil.streamencodebytestringfromiter(source)),
		37	[
		38	b'\x5f',
		39	b'\x44',
		40	b'\xaa\xbb\xcc\xdd',
		41	b'\x43',
		42	b'\xee\xff\x99',
		43	b'\xff',
		44	])
		45
		46	self.assertEqual(
		47	loadit(cborutil.streamencodebytestringfromiter(source)),
		48	b''.join(source))
		49
		50	def testfromiterlarge(self):
		51	source = [b'a' * 16, b'b' * 128, b'c' * 1024, b'd' * 1048576]
		52
		53	self.assertEqual(
		54	loadit(cborutil.streamencodebytestringfromiter(source)),
		55	b''.join(source))
		56
		57	def testindefinite(self):
		58	source = b'\x00\x01\x02\x03' + b'\xff' * 16384
		59
		60	it = cborutil.streamencodeindefinitebytestring(source, chunksize=2)
		61
		62	self.assertEqual(next(it), b'\x5f')
		63	self.assertEqual(next(it), b'\x42')
		64	self.assertEqual(next(it), b'\x00\x01')
		65	self.assertEqual(next(it), b'\x42')
		66	self.assertEqual(next(it), b'\x02\x03')
		67	self.assertEqual(next(it), b'\x42')
		68	self.assertEqual(next(it), b'\xff\xff')
		69
		70	dest = b''.join(cborutil.streamencodeindefinitebytestring(
		71	source, chunksize=42))
		72	self.assertEqual(cbor.loads(dest), b''.join(source))
		73
		74	def testreadtoiter(self):
		75	source = io.BytesIO(b'\x5f\x44\xaa\xbb\xcc\xdd\x43\xee\xff\x99\xff')
		76
		77	it = cborutil.readindefinitebytestringtoiter(source)
		78	self.assertEqual(next(it), b'\xaa\xbb\xcc\xdd')
		79	self.assertEqual(next(it), b'\xee\xff\x99')
		80
		81	with self.assertRaises(StopIteration):
		82	next(it)
		83
		84	class IntTests(unittest.TestCase):
		85	def testsmall(self):
		86	self.assertEqual(list(cborutil.streamencode(0)), [b'\x00'])
		87	self.assertEqual(list(cborutil.streamencode(1)), [b'\x01'])
		88	self.assertEqual(list(cborutil.streamencode(2)), [b'\x02'])
		89	self.assertEqual(list(cborutil.streamencode(3)), [b'\x03'])
		90	self.assertEqual(list(cborutil.streamencode(4)), [b'\x04'])
		91
		92	def testnegativesmall(self):
		93	self.assertEqual(list(cborutil.streamencode(-1)), [b'\x20'])
		94	self.assertEqual(list(cborutil.streamencode(-2)), [b'\x21'])
		95	self.assertEqual(list(cborutil.streamencode(-3)), [b'\x22'])
		96	self.assertEqual(list(cborutil.streamencode(-4)), [b'\x23'])
		97	self.assertEqual(list(cborutil.streamencode(-5)), [b'\x24'])
		98
		99	def testrange(self):
		100	for i in range(-70000, 70000, 10):
		101	self.assertEqual(
		102	b''.join(cborutil.streamencode(i)),
		103	cbor.dumps(i))
		104
		105	class ArrayTests(unittest.TestCase):
		106	def testempty(self):
		107	self.assertEqual(list(cborutil.streamencode([])), [b'\x80'])
		108	self.assertEqual(loadit(cborutil.streamencode([])), [])
		109
		110	def testbasic(self):
		111	source = [b'foo', b'bar', 1, -10]
		112
		113	self.assertEqual(list(cborutil.streamencode(source)), [
		114	b'\x84', b'\x43', b'foo', b'\x43', b'bar', b'\x01', b'\x29'])
		115
		116	def testemptyfromiter(self):
		117	self.assertEqual(b''.join(cborutil.streamencodearrayfromiter([])),
		118	b'\x9f\xff')
		119
		120	def testfromiter1(self):
		121	source = [b'foo']
		122
		123	self.assertEqual(list(cborutil.streamencodearrayfromiter(source)), [
		124	b'\x9f',
		125	b'\x43', b'foo',
		126	b'\xff',
		127	])
		128
		129	dest = b''.join(cborutil.streamencodearrayfromiter(source))
		130	self.assertEqual(cbor.loads(dest), source)
		131
		132	def testtuple(self):
		133	source = (b'foo', None, 42)
		134
		135	self.assertEqual(cbor.loads(b''.join(cborutil.streamencode(source))),
		136	list(source))
		137
		138	class SetTests(unittest.TestCase):
		139	def testempty(self):
		140	self.assertEqual(list(cborutil.streamencode(set())), [
		141	b'\xd9\x01\x02',
		142	b'\x80',
		143	])
		144
		145	def testset(self):
		146	source = {b'foo', None, 42}
		147
		148	self.assertEqual(cbor.loads(b''.join(cborutil.streamencode(source))),
		149	source)
		150
		151	class BoolTests(unittest.TestCase):
		152	def testbasic(self):
		153	self.assertEqual(list(cborutil.streamencode(True)), [b'\xf5'])
		154	self.assertEqual(list(cborutil.streamencode(False)), [b'\xf4'])
		155
		156	self.assertIs(loadit(cborutil.streamencode(True)), True)
		157	self.assertIs(loadit(cborutil.streamencode(False)), False)
		158
		159	class NoneTests(unittest.TestCase):
		160	def testbasic(self):
		161	self.assertEqual(list(cborutil.streamencode(None)), [b'\xf6'])
		162
		163	self.assertIs(loadit(cborutil.streamencode(None)), None)
		164
		165	class MapTests(unittest.TestCase):
		166	def testempty(self):
		167	self.assertEqual(list(cborutil.streamencode({})), [b'\xa0'])
		168	self.assertEqual(loadit(cborutil.streamencode({})), {})
		169
		170	def testemptyindefinite(self):
		171	self.assertEqual(list(cborutil.streamencodemapfromiter([])), [
		172	b'\xbf', b'\xff'])
		173
		174	self.assertEqual(loadit(cborutil.streamencodemapfromiter([])), {})
		175
		176	def testone(self):
		177	source = {b'foo': b'bar'}
		178	self.assertEqual(list(cborutil.streamencode(source)), [
		179	b'\xa1', b'\x43', b'foo', b'\x43', b'bar'])
		180
		181	self.assertEqual(loadit(cborutil.streamencode(source)), source)
		182
		183	def testmultiple(self):
		184	source = {
		185	b'foo': b'bar',
		186	b'baz': b'value1',
		187	}
		188
		189	self.assertEqual(loadit(cborutil.streamencode(source)), source)
		190
		191	self.assertEqual(
		192	loadit(cborutil.streamencodemapfromiter(source.items())),
		193	source)
		194
		195	def testcomplex(self):
		196	source = {
		197	b'key': 1,
		198	2: -10,
		199	}
		200
		201	self.assertEqual(loadit(cborutil.streamencode(source)),
		202	source)
		203
		204	self.assertEqual(
		205	loadit(cborutil.streamencodemapfromiter(source.items())),
		206	source)
		207
		208	if __name__ == '__main__':
		209	import silenttestrunner
		210	silenttestrunner.main(__name__)

contrib/import-checker.py

0 +2 0

              #!/usr/bin/env python
              from __future__ import absolute_import, print_function
              import ast
              import collections
              import os
              import re
              import sys
              # Import a minimal set of stdlib modules needed for list_stdlib_modules()
              # to work when run from a virtualenv.  The modules were chosen empirically
              # so that the return value matches the return value without virtualenv.
              if True: # disable lexical sorting checks
                  try:
                      import BaseHTTPServer as basehttpserver
                  except ImportError:
                      basehttpserver = None
                  import zlib
              # Whitelist of modules that symbols can be directly imported from.
              allowsymbolimports = (
                  '__future__',
                  'bzrlib',
                  'hgclient',
                  'mercurial',
                  'mercurial.hgweb.common',
                  'mercurial.hgweb.request',
                  'mercurial.i18n',
                  'mercurial.node',
                  # for cffi modules to re-export pure functions
                  'mercurial.pure.base85',
                  'mercurial.pure.bdiff',
                  'mercurial.pure.mpatch',
                  'mercurial.pure.osutil',
                  'mercurial.pure.parsers',
                  # third-party imports should be directly imported
                  'mercurial.thirdparty',
+                 'mercurial.thirdparty.cbor',
+                 'mercurial.thirdparty.cbor.cbor2',
                  'mercurial.thirdparty.zope',
                  'mercurial.thirdparty.zope.interface',
              )
              # Whitelist of symbols that can be directly imported.
              directsymbols = (
                  'demandimport',
              )
              # Modules that must be aliased because they are commonly confused with
              # common variables and can create aliasing and readability issues.
              requirealias = {
                  'ui': 'uimod',
              }
              def usingabsolute(root):
                  """Whether absolute imports are being used."""
                  if sys.version_info[0] >= 3:
                      return True
                  for node in ast.walk(root):
                      if isinstance(node, ast.ImportFrom):
                          if node.module == '__future__':
                              for n in node.names:
                                  if n.name == 'absolute_import':
                                      return True
                  return False
              def walklocal(root):
                  """Recursively yield all descendant nodes but not in a different scope"""
                  todo = collections.deque(ast.iter_child_nodes(root))
                  yield root, False
                  while todo:
                      node = todo.popleft()
                      newscope = isinstance(node, ast.FunctionDef)
                      if not newscope:
                          todo.extend(ast.iter_child_nodes(node))
                      yield node, newscope
              def dotted_name_of_path(path):
                  """Given a relative path to a source file, return its dotted module name.
                  >>> dotted_name_of_path('mercurial/error.py')
                  'mercurial.error'
                  >>> dotted_name_of_path('zlibmodule.so')
                  'zlib'
                  """
                  parts = path.replace(os.sep, '/').split('/')
                  parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
                  if parts[-1].endswith('module'):
                      parts[-1] = parts[-1][:-6]
                  return '.'.join(parts)
              def fromlocalfunc(modulename, localmods):
                  """Get a function to examine which locally defined module the
                  target source imports via a specified name.
                  `modulename` is an `dotted_name_of_path()`-ed source file path,
                  which may have `.__init__` at the end of it, of the target source.
                  `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
                  paths of locally defined (= Mercurial specific) modules.
                  This function assumes that module names not existing in
                  `localmods` are from the Python standard library.
                  This function returns the function, which takes `name` argument,
                  and returns `(absname, dottedpath, hassubmod)` tuple if `name`
                  matches against locally defined module. Otherwise, it returns
                  False.
                  It is assumed that `name` doesn't have `.__init__`.
                  `absname` is an absolute module name of specified `name`
                  (e.g. "hgext.convert"). This can be used to compose prefix for sub
                  modules or so.
                  `dottedpath` is a `dotted_name_of_path()`-ed source file path
                  (e.g. "hgext.convert.__init__") of `name`. This is used to look
                  module up in `localmods` again.
                  `hassubmod` is whether it may have sub modules under it (for
                  convenient, even though this is also equivalent to "absname !=
                  dottednpath")
                  >>> localmods = {'foo.__init__', 'foo.foo1',
                  ...              'foo.bar.__init__', 'foo.bar.bar1',
                  ...              'baz.__init__', 'baz.baz1'}
                  >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
                  >>> # relative
                  >>> fromlocal('foo1')
                  ('foo.foo1', 'foo.foo1', False)
                  >>> fromlocal('bar')
                  ('foo.bar', 'foo.bar.__init__', True)
                  >>> fromlocal('bar.bar1')
                  ('foo.bar.bar1', 'foo.bar.bar1', False)
                  >>> # absolute
                  >>> fromlocal('baz')
                  ('baz', 'baz.__init__', True)
                  >>> fromlocal('baz.baz1')
                  ('baz.baz1', 'baz.baz1', False)
                  >>> # unknown = maybe standard library
                  >>> fromlocal('os')
                  False
                  >>> fromlocal(None, 1)
                  ('foo', 'foo.__init__', True)
                  >>> fromlocal('foo1', 1)
                  ('foo.foo1', 'foo.foo1', False)
                  >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
                  >>> fromlocal2(None, 2)
                  ('foo', 'foo.__init__', True)
                  >>> fromlocal2('bar2', 1)
                  False
                  >>> fromlocal2('bar', 2)
                  ('foo.bar', 'foo.bar.__init__', True)
                  """
                  if not isinstance(modulename, str):
                      modulename = modulename.decode('ascii')
                  prefix = '.'.join(modulename.split('.')[:-1])
                  if prefix:
                      prefix += '.'
                  def fromlocal(name, level=0):
                      # name is false value when relative imports are used.
                      if not name:
                          # If relative imports are used, level must not be absolute.
                          assert level > 0
                          candidates = ['.'.join(modulename.split('.')[:-level])]
                      else:
                          if not level:
                              # Check relative name first.
                              candidates = [prefix + name, name]
                          else:
                              candidates = ['.'.join(modulename.split('.')[:-level]) +
                                            '.' + name]
                      for n in candidates:
                          if n in localmods:
                              return (n, n, False)
                          dottedpath = n + '.__init__'
                          if dottedpath in localmods:
                              return (n, dottedpath, True)
                      return False
                  return fromlocal
              def populateextmods(localmods):
                  """Populate C extension modules based on pure modules"""
                  newlocalmods = set(localmods)
                  for n in localmods:
                      if n.startswith('mercurial.pure.'):
                          m = n[len('mercurial.pure.'):]
                          newlocalmods.add('mercurial.cext.' + m)
                          newlocalmods.add('mercurial.cffi._' + m)
                  return newlocalmods
              def list_stdlib_modules():
                  """List the modules present in the stdlib.
                  >>> py3 = sys.version_info[0] >= 3
                  >>> mods = set(list_stdlib_modules())
                  >>> 'BaseHTTPServer' in mods or py3
                  True
                  os.path isn't really a module, so it's missing:
                  >>> 'os.path' in mods
                  False
                  sys requires special treatment, because it's baked into the
                  interpreter, but it should still appear:
                  >>> 'sys' in mods
                  True
                  >>> 'collections' in mods
                  True
                  >>> 'cStringIO' in mods or py3
                  True
                  >>> 'cffi' in mods
                  True
                  """
                  for m in sys.builtin_module_names:
                      yield m
                  # These modules only exist on windows, but we should always
                  # consider them stdlib.
                  for m in ['msvcrt', '_winreg']:
                      yield m
                  yield '__builtin__'
                  yield 'builtins' # python3 only
                  yield 'importlib.abc' # python3 only
                  yield 'importlib.machinery' # python3 only
                  yield 'importlib.util' # python3 only
                  for m in 'fcntl', 'grp', 'pwd', 'termios':  # Unix only
                      yield m
                  for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
                      yield m
                  for m in ['cffi']:
                      yield m
                  stdlib_prefixes = {sys.prefix, sys.exec_prefix}
                  # We need to supplement the list of prefixes for the search to work
                  # when run from within a virtualenv.
                  for mod in (basehttpserver, zlib):
                      if mod is None:
                          continue
                      try:
                          # Not all module objects have a __file__ attribute.
                          filename = mod.__file__
                      except AttributeError:
                          continue
                      dirname = os.path.dirname(filename)
                      for prefix in stdlib_prefixes:
                          if dirname.startswith(prefix):
                              # Then this directory is redundant.
                              break
                      else:
                          stdlib_prefixes.add(dirname)
                  for libpath in sys.path:
                      # We want to walk everything in sys.path that starts with
                      # something in stdlib_prefixes.
                      if not any(libpath.startswith(p) for p in stdlib_prefixes):
                          continue
                      for top, dirs, files in os.walk(libpath):
                          for i, d in reversed(list(enumerate(dirs))):
                              if (not os.path.exists(os.path.join(top, d, '__init__.py'))
                                  or top == libpath and d in ('hgdemandimport', 'hgext',
                                                              'mercurial')):
                                  del dirs[i]
                          for name in files:
                              if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
                                  continue
                              if name.startswith('__init__.py'):
                                  full_path = top
                              else:
                                  full_path = os.path.join(top, name)
                              rel_path = full_path[len(libpath) + 1:]
                              mod = dotted_name_of_path(rel_path)
                              yield mod
              stdlib_modules = set(list_stdlib_modules())
              def imported_modules(source, modulename, f, localmods, ignore_nested=False):
                  """Given the source of a file as a string, yield the names
                  imported by that file.
                  Args:
                    source: The python source to examine as a string.
                    modulename: of specified python source (may have `__init__`)
                    localmods: set of locally defined module names (may have `__init__`)
                    ignore_nested: If true, import statements that do not start in
                                   column zero will be ignored.
                  Returns:
                    A list of absolute module names imported by the given source.
                  >>> f = 'foo/xxx.py'
                  >>> modulename = 'foo.xxx'
                  >>> localmods = {'foo.__init__': True,
                  ...              'foo.foo1': True, 'foo.foo2': True,
                  ...              'foo.bar.__init__': True, 'foo.bar.bar1': True,
                  ...              'baz.__init__': True, 'baz.baz1': True }
                  >>> # standard library (= not locally defined ones)
                  >>> sorted(imported_modules(
                  ...        'from stdlib1 import foo, bar; import stdlib2',
                  ...        modulename, f, localmods))
                  []
                  >>> # relative importing
                  >>> sorted(imported_modules(
                  ...        'import foo1; from bar import bar1',
                  ...        modulename, f, localmods))
                  ['foo.bar.bar1', 'foo.foo1']
                  >>> sorted(imported_modules(
                  ...        'from bar.bar1 import name1, name2, name3',
                  ...        modulename, f, localmods))
                  ['foo.bar.bar1']
                  >>> # absolute importing
                  >>> sorted(imported_modules(
                  ...        'from baz import baz1, name1',
                  ...        modulename, f, localmods))
                  ['baz.__init__', 'baz.baz1']
                  >>> # mixed importing, even though it shouldn't be recommended
                  >>> sorted(imported_modules(
                  ...        'import stdlib, foo1, baz',
                  ...        modulename, f, localmods))
                  ['baz.__init__', 'foo.foo1']
                  >>> # ignore_nested
                  >>> sorted(imported_modules(
                  ... '''import foo
                  ... def wat():
                  ...     import bar
                  ... ''', modulename, f, localmods))
                  ['foo.__init__', 'foo.bar.__init__']
                  >>> sorted(imported_modules(
                  ... '''import foo
                  ... def wat():
                  ...     import bar
                  ... ''', modulename, f, localmods, ignore_nested=True))
                  ['foo.__init__']
                  """
                  fromlocal = fromlocalfunc(modulename, localmods)
                  for node in ast.walk(ast.parse(source, f)):
                      if ignore_nested and getattr(node, 'col_offset', 0) > 0:
                          continue
                      if isinstance(node, ast.Import):
                          for n in node.names:
                              found = fromlocal(n.name)
                              if not found:
                                  # this should import standard library
                                  continue
                              yield found[1]
                      elif isinstance(node, ast.ImportFrom):
                          found = fromlocal(node.module, node.level)
                          if not found:
                              # this should import standard library
                              continue
                          absname, dottedpath, hassubmod = found
                          if not hassubmod:
                              # "dottedpath" is not a package; must be imported
                              yield dottedpath
                              # examination of "node.names" should be redundant
                              # e.g.: from mercurial.node import nullid, nullrev
                              continue
                          modnotfound = False
                          prefix = absname + '.'
                          for n in node.names:
                              found = fromlocal(prefix + n.name)
                              if not found:
                                  # this should be a function or a property of "node.module"
                                  modnotfound = True
                                  continue
                              yield found[1]
                          if modnotfound:
                              # "dottedpath" is a package, but imported because of non-module
                              # lookup
                              yield dottedpath
              def verify_import_convention(module, source, localmods):
                  """Verify imports match our established coding convention.
                  We have 2 conventions: legacy and modern. The modern convention is in
                  effect when using absolute imports.
                  The legacy convention only looks for mixed imports. The modern convention
                  is much more thorough.
                  """
                  root = ast.parse(source)
                  absolute = usingabsolute(root)
                  if absolute:
                      return verify_modern_convention(module, root, localmods)
                  else:
                      return verify_stdlib_on_own_line(root)
              def verify_modern_convention(module, root, localmods, root_col_offset=0):
                  """Verify a file conforms to the modern import convention rules.
                  The rules of the modern convention are:
                  * Ordering is stdlib followed by local imports. Each group is lexically
                    sorted.
                  * Importing multiple modules via "import X, Y" is not allowed: use
                    separate import statements.
                  * Importing multiple modules via "from X import ..." is allowed if using
                    parenthesis and one entry per line.
                  * Only 1 relative import statement per import level ("from .", "from ..")
                    is allowed.
                  * Relative imports from higher levels must occur before lower levels. e.g.
                    "from .." must be before "from .".
                  * Imports from peer packages should use relative import (e.g. do not
                    "import mercurial.foo" from a "mercurial.*" module).
                  * Symbols can only be imported from specific modules (see
                    `allowsymbolimports`). For other modules, first import the module then
                    assign the symbol to a module-level variable. In addition, these imports
                    must be performed before other local imports. This rule only
                    applies to import statements outside of any blocks.
                  * Relative imports from the standard library are not allowed, unless that
                    library is also a local module.
                  * Certain modules must be aliased to alternate names to avoid aliasing
                    and readability problems. See `requirealias`.
                  """
                  if not isinstance(module, str):
                      module = module.decode('ascii')
                  topmodule = module.split('.')[0]
                  fromlocal = fromlocalfunc(module, localmods)
                  # Whether a local/non-stdlib import has been performed.
                  seenlocal = None
                  # Whether a local/non-stdlib, non-symbol import has been seen.
                  seennonsymbollocal = False
                  # The last name to be imported (for sorting).
                  lastname = None
                  laststdlib = None
                  # Relative import levels encountered so far.
                  seenlevels = set()
                  for node, newscope in walklocal(root):
                      def msg(fmt, *args):
                          return (fmt % args, node.lineno)
                      if newscope:
                          # Check for local imports in function
                          for r in verify_modern_convention(module, node, localmods,
                                                            node.col_offset + 4):
                              yield r
                      elif isinstance(node, ast.Import):
                          # Disallow "import foo, bar" and require separate imports
                          # for each module.
                          if len(node.names) > 1:
                              yield msg('multiple imported names: %s',
                                        ', '.join(n.name for n in node.names))
                          name = node.names[0].name
                          asname = node.names[0].asname
                          stdlib = name in stdlib_modules
                          # Ignore sorting rules on imports inside blocks.
                          if node.col_offset == root_col_offset:
                              if lastname and name < lastname and laststdlib == stdlib:
                                  yield msg('imports not lexically sorted: %s < %s',
                                            name, lastname)
                          lastname = name
                          laststdlib = stdlib
                          # stdlib imports should be before local imports.
                          if stdlib and seenlocal and node.col_offset == root_col_offset:
                              yield msg('stdlib import "%s" follows local import: %s',
                                        name, seenlocal)
                          if not stdlib:
                              seenlocal = name
                          # Import of sibling modules should use relative imports.
                          topname = name.split('.')[0]
                          if topname == topmodule:
                              yield msg('import should be relative: %s', name)
                          if name in requirealias and asname != requirealias[name]:
                              yield msg('%s module must be "as" aliased to %s',
                                        name, requirealias[name])
                      elif isinstance(node, ast.ImportFrom):
                          # Resolve the full imported module name.
                          if node.level > 0:
                              fullname = '.'.join(module.split('.')[:-node.level])
                              if node.module:
                                  fullname += '.%s' % node.module
                          else:
                              assert node.module
                              fullname = node.module
                              topname = fullname.split('.')[0]
                              if topname == topmodule:
                                  yield msg('import should be relative: %s', fullname)
                          # __future__ is special since it needs to come first and use
                          # symbol import.
                          if fullname != '__future__':
                              if not fullname or (
                                  fullname in stdlib_modules
                                  and fullname not in localmods
                                  and fullname + '.__init__' not in localmods):
                                  yield msg('relative import of stdlib module')
                              else:
                                  seenlocal = fullname
                          # Direct symbol import is only allowed from certain modules and
                          # must occur before non-symbol imports.
                          found = fromlocal(node.module, node.level)
                          if found and found[2]:  # node.module is a package
                              prefix = found[0] + '.'
                              symbols = (n.name for n in node.names
                                         if not fromlocal(prefix + n.name))
                          else:
                              symbols = (n.name for n in node.names)
                          symbols = [sym for sym in symbols if sym not in directsymbols]
                          if node.module and node.col_offset == root_col_offset:
                              if symbols and fullname not in allowsymbolimports:
                                  yield msg('direct symbol import %s from %s',
                                            ', '.join(symbols), fullname)
                              if symbols and seennonsymbollocal:
                                  yield msg('symbol import follows non-symbol import: %s',
                                            fullname)
                          if not symbols and fullname not in stdlib_modules:
                              seennonsymbollocal = True
                          if not node.module:
                              assert node.level
                              # Only allow 1 group per level.
                              if (node.level in seenlevels
                                  and node.col_offset == root_col_offset):
                                  yield msg('multiple "from %s import" statements',
                                            '.' * node.level)
                              # Higher-level groups come before lower-level groups.
                              if any(node.level > l for l in seenlevels):
                                  yield msg('higher-level import should come first: %s',
                                            fullname)
                              seenlevels.add(node.level)
                          # Entries in "from .X import ( ... )" lists must be lexically
                          # sorted.
                          lastentryname = None
                          for n in node.names:
                              if lastentryname and n.name < lastentryname:
                                  yield msg('imports from %s not lexically sorted: %s < %s',
                                            fullname, n.name, lastentryname)
                              lastentryname = n.name
                              if n.name in requirealias and n.asname != requirealias[n.name]:
                                  yield msg('%s from %s must be "as" aliased to %s',
                                            n.name, fullname, requirealias[n.name])
              def verify_stdlib_on_own_line(root):
                  """Given some python source, verify that stdlib imports are done
                  in separate statements from relative local module imports.
                  >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
                  [('mixed imports\\n   stdlib:    sys\\n   relative:  foo', 1)]
                  >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
                  []
                  >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
                  []
                  """
                  for node in ast.walk(root):
                      if isinstance(node, ast.Import):
                          from_stdlib = {False: [], True: []}
                          for n in node.names:
                              from_stdlib[n.name in stdlib_modules].append(n.name)
                          if from_stdlib[True] and from_stdlib[False]:
                              yield ('mixed imports\n   stdlib:    %s\n   relative:  %s' %
                                     (', '.join(sorted(from_stdlib[True])),
                                      ', '.join(sorted(from_stdlib[False]))), node.lineno)
              class CircularImport(Exception):
                  pass
              def checkmod(mod, imports):
                  shortest = {}
                  visit = [[mod]]
                  while visit:
                      path = visit.pop(0)
                      for i in sorted(imports.get(path[-1], [])):
                          if len(path) < shortest.get(i, 1000):
                              shortest[i] = len(path)
                              if i in path:
                                  if i == path[0]:
                                      raise CircularImport(path)
                                  continue
                              visit.append(path + [i])
              def rotatecycle(cycle):
                  """arrange a cycle so that the lexicographically first module listed first
                  >>> rotatecycle(['foo', 'bar'])
                  ['bar', 'foo', 'bar']
                  """
                  lowest = min(cycle)
                  idx = cycle.index(lowest)
                  return cycle[idx:] + cycle[:idx] + [lowest]
              def find_cycles(imports):
                  """Find cycles in an already-loaded import graph.
                  All module names recorded in `imports` should be absolute one.
                  >>> from __future__ import print_function
                  >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
                  ...            'top.bar': ['top.baz', 'sys'],
                  ...            'top.baz': ['top.foo'],
                  ...            'top.qux': ['top.foo']}
                  >>> print('\\n'.join(sorted(find_cycles(imports))))
                  top.bar -> top.baz -> top.foo -> top.bar
                  top.foo -> top.qux -> top.foo
                  """
                  cycles = set()
                  for mod in sorted(imports.keys()):
                      try:
                          checkmod(mod, imports)
                      except CircularImport as e:
                          cycle = e.args[0]
                          cycles.add(" -> ".join(rotatecycle(cycle)))
                  return cycles
              def _cycle_sortkey(c):
                  return len(c), c
              def embedded(f, modname, src):
                  """Extract embedded python code
                  >>> def _forcestr(thing):
                  ...     if not isinstance(thing, str):
                  ...         return thing.decode('ascii')
                  ...     return thing
                  >>> def test(fn, lines):
                  ...     for s, m, f, l in embedded(fn, b"example", lines):
                  ...         print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
                  ...         print(repr(_forcestr(s)))
                  >>> lines = [
                  ...   b'comment',
                  ...   b'  >>> from __future__ import print_function',
                  ...   b"  >>> ' multiline",
                  ...   b"  ... string'",
                  ...   b'  ',
                  ...   b'comment',
                  ...   b'  $ cat > foo.py <<EOF',
                  ...   b'  > from __future__ import print_function',
                  ...   b'  > EOF',
                  ... ]
                  >>> test(b"example.t", lines)
                  example[2] doctest.py 2
                  "from __future__ import print_function\\n' multiline\\nstring'\\n"
                  example[7] foo.py 7
                  'from __future__ import print_function\\n'
                  """
                  inlinepython = 0
                  shpython = 0
                  script = []
                  prefix = 6
                  t = ''
                  n = 0
                  for l in src:
                      n += 1
                      if not l.endswith(b'\n'):
                          l += b'\n'
                      if l.startswith(b'  >>> '): # python inlines
                          if shpython:
                              print("%s:%d: Parse Error" % (f, n))
                          if not inlinepython:
                              # We've just entered a Python block.
                              inlinepython = n
                              t = b'doctest.py'
                          script.append(l[prefix:])
                          continue
                      if l.startswith(b'  ... '): # python inlines
                          script.append(l[prefix:])
                          continue
                      cat = re.search(br"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
                      if cat:
                          if inlinepython:
                              yield b''.join(script), (b"%s[%d]" %
                                     (modname, inlinepython)), t, inlinepython
                              script = []
                              inlinepython = 0
                          shpython = n
                          t = cat.group(1)
                          continue
                      if shpython and l.startswith(b'  > '): # sh continuation
                          if l == b'  > EOF\n':
                              yield b''.join(script), (b"%s[%d]" %
                                     (modname, shpython)), t, shpython
                              script = []
                              shpython = 0
                          else:
                              script.append(l[4:])
                          continue
                      # If we have an empty line or a command for sh, we end the
                      # inline script.
                      if inlinepython and (l == b'  \n'
                                           or l.startswith(b'  $ ')):
                          yield b''.join(script), (b"%s[%d]" %
                                 (modname, inlinepython)), t, inlinepython
                          script = []
                          inlinepython = 0
                          continue
              def sources(f, modname):
                  """Yields possibly multiple sources from a filepath
                  input: filepath, modulename
                  yields:  script(string), modulename, filepath, linenumber
                  For embedded scripts, the modulename and filepath will be different
                  from the function arguments. linenumber is an offset relative to
                  the input file.
                  """
                  py = False
                  if not f.endswith('.t'):
                      with open(f, 'rb') as src:
                          yield src.read(), modname, f, 0
                          py = True
                  if py or f.endswith('.t'):
                      with open(f, 'rb') as src:
                          for script, modname, t, line in embedded(f, modname, src):
                              yield script, modname, t, line
              def main(argv):
                  if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
                      print('Usage: %s {-|file [file] [file] ...}')
                      return 1
                  if argv[1] == '-':
                      argv = argv[:1]
                      argv.extend(l.rstrip() for l in sys.stdin.readlines())
                  localmodpaths = {}
                  used_imports = {}
                  any_errors = False
                  for source_path in argv[1:]:
                      modname = dotted_name_of_path(source_path)
                      localmodpaths[modname] = source_path
                  localmods = populateextmods(localmodpaths)
                  for localmodname, source_path in sorted(localmodpaths.items()):
                      if not isinstance(localmodname, bytes):
                          # This is only safe because all hg's files are ascii
                          localmodname = localmodname.encode('ascii')
                      for src, modname, name, line in sources(source_path, localmodname):
                          try:
                              used_imports[modname] = sorted(
                                  imported_modules(src, modname, name, localmods,
                                                   ignore_nested=True))
                              for error, lineno in verify_import_convention(modname, src,
                                                                            localmods):
                                  any_errors = True
                                  print('%s:%d: %s' % (source_path, lineno + line, error))
                          except SyntaxError as e:
                              print('%s:%d: SyntaxError: %s' %
                                    (source_path, e.lineno + line, e))
                  cycles = find_cycles(used_imports)
                  if cycles:
                      firstmods = set()
                      for c in sorted(cycles, key=_cycle_sortkey):
                          first = c.split()[0]
                          # As a rough cut, ignore any cycle that starts with the
                          # same module as some other cycle. Otherwise we see lots
                          # of cycles that are effectively duplicates.
                          if first in firstmods:
                              continue
                          print('Import cycle:', c)
                          firstmods.add(first)
                      any_errors = True
                  return any_errors != 0
              if __name__ == '__main__':
                  sys.exit(int(main(sys.argv)))

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages