upstream/mercurial-mirror Commit - r45678:d0ef8c1d

manifest: tigher manifest parsing and flag use...

Joerg Sonnenberger -

r45678:d0ef8c1d default

parent child

mercurial/cext/manifest.c

0 +41 -33

              }
              /* get the node value of a single line */
-             static PyObject *nodeof(line *l)
+             static PyObject *nodeof(line *l, char *flag)
              {
              	char *s = l->start;
              	Py_ssize_t llen = pathlen(l);
              	Py_ssize_t hlen = l->len - llen - 2;
-             	Py_ssize_t hlen_raw = 20;
+             	Py_ssize_t hlen_raw;
              	PyObject *hash;
              	if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
              		PyErr_SetString(PyExc_ValueError, "manifest line too short");
              		return NULL;
              	}
+             	/* Detect flags after the hash first. */
+             	switch (s[llen + hlen]) {
+             	case 'l':
+             	case 't':
+             	case 'x':
+             		*flag = s[llen + hlen];
+             		--hlen;
+             		break;
+             	default:
+             		*flag = '\0';
+             		break;
+             	}
              	switch (hlen) {
              	case 40: /* sha1 */
-             	case 41: /* sha1 with cruft for a merge */
+             		hlen_raw = 20;
              		break;
              	case 64: /* new hash */
-             	case 65: /* new hash with cruft for a merge */
              		hlen_raw = 32;
              		break;
              	default:
              /* get the node hash and flags of a line as a tuple */
              static PyObject *hashflags(line *l)
              {
-             	char *s = l->start;
-             	Py_ssize_t plen = pathlen(l);
-             	PyObject *hash = nodeof(l);
+             	char flag;
+             	PyObject *hash = nodeof(l, &flag);
              	ssize_t hlen;
              	Py_ssize_t hplen, flen;
              	PyObject *flags;
              	if (!hash)
              		return NULL;
-             	/* hash is either 20 or 21 bytes for an old hash, so we use a
-             	   ternary here to get the "real" hexlified sha length. */
-             	hlen = PyBytes_GET_SIZE(hash) < 22 ? 40 : 64;
-             	/* 1 for null byte, 1 for newline */
-             	hplen = plen + hlen + 2;
-             	flen = l->len - hplen;
-             	flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
+             	flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
              	if (!flags) {
              		Py_DECREF(hash);
              		return NULL;
              {
              	Py_ssize_t pl;
              	line *l;
+             	char flag;
              	Py_ssize_t consumed;
              	PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
              	l = lmiter_nextline((lmIter *)o);
              	}
              	pl = pathlen(l);
              	path = PyBytes_FromStringAndSize(l->start, pl);
-             	hash = nodeof(l);
+             	hash = nodeof(l, &flag);
              	if (!path || !hash) {
              		goto done;
              	}
-             	consumed = pl + 41;
-             	flags = PyBytes_FromStringAndSize(l->start + consumed,
-             					   l->len - consumed - 1);
+             	flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
              	if (!flags) {
              		goto done;
              	}
              	pyhash = PyTuple_GetItem(value, 0);
              	if (!PyBytes_Check(pyhash)) {
              		PyErr_Format(PyExc_TypeError,
-             			     "node must be a 20-byte string");
+             			     "node must be a 20 or 32 bytes string");
              		return -1;
              	}
              	hlen = PyBytes_Size(pyhash);
-             	/* Some parts of the codebase try and set 21 or 22
-             	 * byte "hash" values in order to perturb things for
-             	 * status. We have to preserve at least the 21st
-             	 * byte. Sigh. If there's a 22nd byte, we drop it on
-             	 * the floor, which works fine.
-             	 */
-             	if (hlen != 20 && hlen != 21 && hlen != 22) {
+             	if (hlen != 20 && hlen != 32) {
              		PyErr_Format(PyExc_TypeError,
-             			     "node must be a 20-byte string");
+             			     "node must be a 20 or 32 bytes string");
              		return -1;
              	}
              	hash = PyBytes_AsString(pyhash);
              	pyflags = PyTuple_GetItem(value, 1);
              	if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
              		PyErr_Format(PyExc_TypeError,
-             			     "flags must a 0 or 1 byte string");
+             			     "flags must a 0 or 1 bytes string");
              		return -1;
              	}
              	if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
              		return -1;
              	}
+             	if (flen == 1) {
+             		switch (*flags) {
+             		case 'l':
+             		case 't':
+             		case 'x':
+             			break;
+             		default:
+             			PyErr_Format(PyExc_TypeError, "invalid manifest flag");
+             			return -1;
+             		}
+             	}
              	/* one null byte and one newline */
-             	dlen = plen + 41 + flen + 1;
+             	dlen = plen + hlen * 2 + 1 + flen + 1;
              	dest = malloc(dlen);
              	if (!dest) {
              		PyErr_NoMemory();
              		return -1;
              	}
              	memcpy(dest, path, plen + 1);
-             	for (i = 0; i < 20; i++) {
+             	for (i = 0; i < hlen; i++) {
              		/* Cast to unsigned, so it will not get sign-extended when promoted
              		 * to int (as is done when passing to a variadic function)
              		 */
              		sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
              	}
-             	memcpy(dest + plen + 41, flags, flen);
-             	dest[plen + 41 + flen] = '\n';
+             	memcpy(dest + plen + 2 * hlen + 1, flags, flen);
+             	dest[plen + 2 * hlen + 1 + flen] = '\n';
              	new.start = dest;
              	new.len = dlen;
              	new.hash_suffix = '\0';

mercurial/manifest.py

0 +34 -23

                          self.pos += 1
                          return data
                      zeropos = data.find(b'\x00', pos)
-                     hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
-                     flags = self.lm._getflags(data, self.pos, zeropos)
+                     nlpos = data.find(b'\n', pos)
+                     if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
+                         raise error.StorageError(b'Invalid manifest line')
+                     flags = data[nlpos - 1 : nlpos]
+                     if flags in _manifestflags:
+                         hlen = nlpos - zeropos - 2
+                     else:
+                         hlen = nlpos - zeropos - 1
+                         flags = b''
+                     if hlen not in (40, 64):
+                         raise error.StorageError(b'Invalid manifest line')
+                     hashval = unhexlify(
+                         data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
+                     )
                      self.pos += 1
                      return (data[pos:zeropos], hashval, flags)
                  return (a > b) - (a < b)
+             _manifestflags = {b'', b'l', b't', b'x'}
              class _lazymanifest(object):
                  """A pure python manifest backed by a byte string.  It is supplimented with
                  internal lists as it is modified, until it is compacted back to a pure byte
                  def __contains__(self, key):
                      return self.bsearch(key) != -1
-                 def _getflags(self, data, needle, pos):
-                     start = pos + 41
-                     end = data.find(b"\n", start)
-                     if end == -1:
-                         end = len(data) - 1
-                     if start == end:
-                         return b''
-                     return self.data[start:end]
                  def __getitem__(self, key):
                      if not isinstance(key, bytes):
                          raise TypeError(b"getitem: manifest keys must be a bytes.")
                      nlpos = data.find(b'\n', zeropos)
                      assert 0 <= needle <= len(self.positions)
                      assert len(self.extrainfo) == len(self.positions)
+                     if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
+                         raise error.StorageError(b'Invalid manifest line')
                      hlen = nlpos - zeropos - 1
-                     # Hashes sometimes have an extra byte tucked on the end, so
-                     # detect that.
-                     if hlen % 2:
+                     flags = data[nlpos - 1 : nlpos]
+                     if flags in _manifestflags:
                          hlen -= 1
+                     else:
+                         flags = b''
+                     if hlen not in (40, 64):
+                         raise error.StorageError(b'Invalid manifest line')
                      hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
-                     flags = self._getflags(data, needle, zeropos)
                      return (hashval, flags)
                  def __delitem__(self, key):
                  def _pack(self, d):
                      n = d[1]
-                     if len(n) == 21 or len(n) == 33:
-                         n = n[:-1]
-                     assert len(n) == 20 or len(n) == 32
+                     assert len(n) in (20, 32)
                      return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
                  def text(self):
                      return self._lm.diff(m2._lm, clean)
                  def setflag(self, key, flag):
+                     if flag not in _manifestflags:
+                         raise TypeError(b"Invalid manifest flag set.")
                      self._lm[key] = self[key], flag
                  def get(self, key, default=None):
                          self._dirs[dir].__setitem__(subpath, n)
                      else:
                          # manifest nodes are either 20 bytes or 32 bytes,
-                         # depending on the hash in use. An extra byte is
-                         # occasionally used by hg, but won't ever be
-                         # persisted. Trim to 21 or 33 bytes as appropriate.
-                         trim = 21 if len(n) < 25 else 33
-                         self._files[f] = n[:trim]  # to match manifestdict's behavior
+                         # depending on the hash in use. Assert this as historically
+                         # sometimes extra bytes were added.
+                         assert len(n) in (20, 32)
+                         self._files[f] = n
                      self._dirty = True
                  def _load(self):
                  def setflag(self, f, flags):
                      """Set the flags (symlink, executable) for path f."""
+                     if flags not in _manifestflags:
+                         raise TypeError(b"Invalid manifest flag set.")
                      self._load()
                      dir, subpath = _splittopdir(f)
                      if dir:

mercurial/merge.py

0 +1 -2

                                          b'prompt changed/deleted',
                                      )
                              elif n1 == addednodeid:
-                                 # This extra 'a' is added by working copy manifest to mark
-                                 # the file as locally added. We should forget it instead of
+                                 # This file was locally added. We should forget it instead of
                                  # deleting it.
                                  actions[f] = (
                                      mergestatemod.ACTION_FORGET,

tests/test-manifest.py

0 0 -33

		@@ -156,39 +156,6 b' class basemanifesttests(object):'
156	156	with self.assertRaises(KeyError):
157	157	m[b'foo']
158	158
159		def testSetGetNodeSuffix(self):
160		clean = self.parsemanifest(A_SHORT_MANIFEST)
161		m = self.parsemanifest(A_SHORT_MANIFEST)
162		h = m[b'foo']
163		f = m.flags(b'foo')
164		want = h + b'a'
165		# Merge code wants to set 21-byte fake hashes at times
166		m[b'foo'] = want
167		self.assertEqual(want, m[b'foo'])
168		self.assertEqual(
169		[(b'bar/baz/qux.py', BIN_HASH_2), (b'foo', BIN_HASH_1 + b'a')],
170		list(m.items()),
171		)
172		# Sometimes it even tries a 22-byte fake hash, but we can
173		# return 21 and it'll work out
174		m[b'foo'] = want + b'+'
175		self.assertEqual(want, m[b'foo'])
176		# make sure the suffix survives a copy
177		match = matchmod.match(util.localpath(b'/repo'), b'', [b're:foo'])
178		m2 = m._matches(match)
179		self.assertEqual(want, m2[b'foo'])
180		self.assertEqual(1, len(m2))
181		m2 = m.copy()
182		self.assertEqual(want, m2[b'foo'])
183		# suffix with iteration
184		self.assertEqual(
185		[(b'bar/baz/qux.py', BIN_HASH_2), (b'foo', want)], list(m.items())
186		)
187
188		# shows up in diff
189		self.assertEqual({b'foo': ((want, f), (h, b''))}, m.diff(clean))
190		self.assertEqual({b'foo': ((h, b''), (want, f))}, clean.diff(m))
191
192	159	def testMatchException(self):
193	160	m = self.parsemanifest(A_SHORT_MANIFEST)
194	161	match = matchmod.match(util.localpath(b'/repo'), b'', [b're:.*'])

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages