##// END OF EJS Templates
manifest: tigher manifest parsing and flag use...
Joerg Sonnenberger -
r45678:d0ef8c1d default
parent child Browse files
Show More
@@ -49,23 +49,35 b' static Py_ssize_t pathlen(line *l)'
49 }
49 }
50
50
51 /* get the node value of a single line */
51 /* get the node value of a single line */
52 static PyObject *nodeof(line *l)
52 static PyObject *nodeof(line *l, char *flag)
53 {
53 {
54 char *s = l->start;
54 char *s = l->start;
55 Py_ssize_t llen = pathlen(l);
55 Py_ssize_t llen = pathlen(l);
56 Py_ssize_t hlen = l->len - llen - 2;
56 Py_ssize_t hlen = l->len - llen - 2;
57 Py_ssize_t hlen_raw = 20;
57 Py_ssize_t hlen_raw;
58 PyObject *hash;
58 PyObject *hash;
59 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
59 if (llen + 1 + 40 + 1 > l->len) { /* path '\0' hash '\n' */
60 PyErr_SetString(PyExc_ValueError, "manifest line too short");
60 PyErr_SetString(PyExc_ValueError, "manifest line too short");
61 return NULL;
61 return NULL;
62 }
62 }
63 /* Detect flags after the hash first. */
64 switch (s[llen + hlen]) {
65 case 'l':
66 case 't':
67 case 'x':
68 *flag = s[llen + hlen];
69 --hlen;
70 break;
71 default:
72 *flag = '\0';
73 break;
74 }
75
63 switch (hlen) {
76 switch (hlen) {
64 case 40: /* sha1 */
77 case 40: /* sha1 */
65 case 41: /* sha1 with cruft for a merge */
78 hlen_raw = 20;
66 break;
79 break;
67 case 64: /* new hash */
80 case 64: /* new hash */
68 case 65: /* new hash with cruft for a merge */
69 hlen_raw = 32;
81 hlen_raw = 32;
70 break;
82 break;
71 default:
83 default:
@@ -89,9 +101,8 b' static PyObject *nodeof(line *l)'
89 /* get the node hash and flags of a line as a tuple */
101 /* get the node hash and flags of a line as a tuple */
90 static PyObject *hashflags(line *l)
102 static PyObject *hashflags(line *l)
91 {
103 {
92 char *s = l->start;
104 char flag;
93 Py_ssize_t plen = pathlen(l);
105 PyObject *hash = nodeof(l, &flag);
94 PyObject *hash = nodeof(l);
95 ssize_t hlen;
106 ssize_t hlen;
96 Py_ssize_t hplen, flen;
107 Py_ssize_t hplen, flen;
97 PyObject *flags;
108 PyObject *flags;
@@ -99,14 +110,7 b' static PyObject *hashflags(line *l)'
99
110
100 if (!hash)
111 if (!hash)
101 return NULL;
112 return NULL;
102 /* hash is either 20 or 21 bytes for an old hash, so we use a
113 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
103 ternary here to get the "real" hexlified sha length. */
104 hlen = PyBytes_GET_SIZE(hash) < 22 ? 40 : 64;
105 /* 1 for null byte, 1 for newline */
106 hplen = plen + hlen + 2;
107 flen = l->len - hplen;
108
109 flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
110 if (!flags) {
114 if (!flags) {
111 Py_DECREF(hash);
115 Py_DECREF(hash);
112 return NULL;
116 return NULL;
@@ -291,6 +295,7 b' static PyObject *lmiter_iterentriesnext('
291 {
295 {
292 Py_ssize_t pl;
296 Py_ssize_t pl;
293 line *l;
297 line *l;
298 char flag;
294 Py_ssize_t consumed;
299 Py_ssize_t consumed;
295 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
300 PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
296 l = lmiter_nextline((lmIter *)o);
301 l = lmiter_nextline((lmIter *)o);
@@ -299,13 +304,11 b' static PyObject *lmiter_iterentriesnext('
299 }
304 }
300 pl = pathlen(l);
305 pl = pathlen(l);
301 path = PyBytes_FromStringAndSize(l->start, pl);
306 path = PyBytes_FromStringAndSize(l->start, pl);
302 hash = nodeof(l);
307 hash = nodeof(l, &flag);
303 if (!path || !hash) {
308 if (!path || !hash) {
304 goto done;
309 goto done;
305 }
310 }
306 consumed = pl + 41;
311 flags = PyBytes_FromStringAndSize(&flag, flag ? 1 : 0);
307 flags = PyBytes_FromStringAndSize(l->start + consumed,
308 l->len - consumed - 1);
309 if (!flags) {
312 if (!flags) {
310 goto done;
313 goto done;
311 }
314 }
@@ -568,19 +571,13 b' static int lazymanifest_setitem('
568 pyhash = PyTuple_GetItem(value, 0);
571 pyhash = PyTuple_GetItem(value, 0);
569 if (!PyBytes_Check(pyhash)) {
572 if (!PyBytes_Check(pyhash)) {
570 PyErr_Format(PyExc_TypeError,
573 PyErr_Format(PyExc_TypeError,
571 "node must be a 20-byte string");
574 "node must be a 20 or 32 bytes string");
572 return -1;
575 return -1;
573 }
576 }
574 hlen = PyBytes_Size(pyhash);
577 hlen = PyBytes_Size(pyhash);
575 /* Some parts of the codebase try and set 21 or 22
578 if (hlen != 20 && hlen != 32) {
576 * byte "hash" values in order to perturb things for
577 * status. We have to preserve at least the 21st
578 * byte. Sigh. If there's a 22nd byte, we drop it on
579 * the floor, which works fine.
580 */
581 if (hlen != 20 && hlen != 21 && hlen != 22) {
582 PyErr_Format(PyExc_TypeError,
579 PyErr_Format(PyExc_TypeError,
583 "node must be a 20-byte string");
580 "node must be a 20 or 32 bytes string");
584 return -1;
581 return -1;
585 }
582 }
586 hash = PyBytes_AsString(pyhash);
583 hash = PyBytes_AsString(pyhash);
@@ -588,28 +585,39 b' static int lazymanifest_setitem('
588 pyflags = PyTuple_GetItem(value, 1);
585 pyflags = PyTuple_GetItem(value, 1);
589 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
586 if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
590 PyErr_Format(PyExc_TypeError,
587 PyErr_Format(PyExc_TypeError,
591 "flags must a 0 or 1 byte string");
588 "flags must a 0 or 1 bytes string");
592 return -1;
589 return -1;
593 }
590 }
594 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
591 if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
595 return -1;
592 return -1;
596 }
593 }
594 if (flen == 1) {
595 switch (*flags) {
596 case 'l':
597 case 't':
598 case 'x':
599 break;
600 default:
601 PyErr_Format(PyExc_TypeError, "invalid manifest flag");
602 return -1;
603 }
604 }
597 /* one null byte and one newline */
605 /* one null byte and one newline */
598 dlen = plen + 41 + flen + 1;
606 dlen = plen + hlen * 2 + 1 + flen + 1;
599 dest = malloc(dlen);
607 dest = malloc(dlen);
600 if (!dest) {
608 if (!dest) {
601 PyErr_NoMemory();
609 PyErr_NoMemory();
602 return -1;
610 return -1;
603 }
611 }
604 memcpy(dest, path, plen + 1);
612 memcpy(dest, path, plen + 1);
605 for (i = 0; i < 20; i++) {
613 for (i = 0; i < hlen; i++) {
606 /* Cast to unsigned, so it will not get sign-extended when promoted
614 /* Cast to unsigned, so it will not get sign-extended when promoted
607 * to int (as is done when passing to a variadic function)
615 * to int (as is done when passing to a variadic function)
608 */
616 */
609 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
617 sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
610 }
618 }
611 memcpy(dest + plen + 41, flags, flen);
619 memcpy(dest + plen + 2 * hlen + 1, flags, flen);
612 dest[plen + 41 + flen] = '\n';
620 dest[plen + 2 * hlen + 1 + flen] = '\n';
613 new.start = dest;
621 new.start = dest;
614 new.len = dlen;
622 new.len = dlen;
615 new.hash_suffix = '\0';
623 new.hash_suffix = '\0';
@@ -121,8 +121,20 b' class lazymanifestiterentries(object):'
121 self.pos += 1
121 self.pos += 1
122 return data
122 return data
123 zeropos = data.find(b'\x00', pos)
123 zeropos = data.find(b'\x00', pos)
124 hashval = unhexlify(data, self.lm.extrainfo[self.pos], zeropos + 1, 40)
124 nlpos = data.find(b'\n', pos)
125 flags = self.lm._getflags(data, self.pos, zeropos)
125 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
126 raise error.StorageError(b'Invalid manifest line')
127 flags = data[nlpos - 1 : nlpos]
128 if flags in _manifestflags:
129 hlen = nlpos - zeropos - 2
130 else:
131 hlen = nlpos - zeropos - 1
132 flags = b''
133 if hlen not in (40, 64):
134 raise error.StorageError(b'Invalid manifest line')
135 hashval = unhexlify(
136 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
137 )
126 self.pos += 1
138 self.pos += 1
127 return (data[pos:zeropos], hashval, flags)
139 return (data[pos:zeropos], hashval, flags)
128
140
@@ -140,6 +152,9 b' def _cmp(a, b):'
140 return (a > b) - (a < b)
152 return (a > b) - (a < b)
141
153
142
154
155 _manifestflags = {b'', b'l', b't', b'x'}
156
157
143 class _lazymanifest(object):
158 class _lazymanifest(object):
144 """A pure python manifest backed by a byte string. It is supplimented with
159 """A pure python manifest backed by a byte string. It is supplimented with
145 internal lists as it is modified, until it is compacted back to a pure byte
160 internal lists as it is modified, until it is compacted back to a pure byte
@@ -251,15 +266,6 b' class _lazymanifest(object):'
251 def __contains__(self, key):
266 def __contains__(self, key):
252 return self.bsearch(key) != -1
267 return self.bsearch(key) != -1
253
268
254 def _getflags(self, data, needle, pos):
255 start = pos + 41
256 end = data.find(b"\n", start)
257 if end == -1:
258 end = len(data) - 1
259 if start == end:
260 return b''
261 return self.data[start:end]
262
263 def __getitem__(self, key):
269 def __getitem__(self, key):
264 if not isinstance(key, bytes):
270 if not isinstance(key, bytes):
265 raise TypeError(b"getitem: manifest keys must be a bytes.")
271 raise TypeError(b"getitem: manifest keys must be a bytes.")
@@ -273,13 +279,17 b' class _lazymanifest(object):'
273 nlpos = data.find(b'\n', zeropos)
279 nlpos = data.find(b'\n', zeropos)
274 assert 0 <= needle <= len(self.positions)
280 assert 0 <= needle <= len(self.positions)
275 assert len(self.extrainfo) == len(self.positions)
281 assert len(self.extrainfo) == len(self.positions)
282 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
283 raise error.StorageError(b'Invalid manifest line')
276 hlen = nlpos - zeropos - 1
284 hlen = nlpos - zeropos - 1
277 # Hashes sometimes have an extra byte tucked on the end, so
285 flags = data[nlpos - 1 : nlpos]
278 # detect that.
286 if flags in _manifestflags:
279 if hlen % 2:
280 hlen -= 1
287 hlen -= 1
288 else:
289 flags = b''
290 if hlen not in (40, 64):
291 raise error.StorageError(b'Invalid manifest line')
281 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
292 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
282 flags = self._getflags(data, needle, zeropos)
283 return (hashval, flags)
293 return (hashval, flags)
284
294
285 def __delitem__(self, key):
295 def __delitem__(self, key):
@@ -408,9 +418,7 b' class _lazymanifest(object):'
408
418
409 def _pack(self, d):
419 def _pack(self, d):
410 n = d[1]
420 n = d[1]
411 if len(n) == 21 or len(n) == 33:
421 assert len(n) in (20, 32)
412 n = n[:-1]
413 assert len(n) == 20 or len(n) == 32
414 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
422 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
415
423
416 def text(self):
424 def text(self):
@@ -609,6 +617,8 b' class manifestdict(object):'
609 return self._lm.diff(m2._lm, clean)
617 return self._lm.diff(m2._lm, clean)
610
618
611 def setflag(self, key, flag):
619 def setflag(self, key, flag):
620 if flag not in _manifestflags:
621 raise TypeError(b"Invalid manifest flag set.")
612 self._lm[key] = self[key], flag
622 self._lm[key] = self[key], flag
613
623
614 def get(self, key, default=None):
624 def get(self, key, default=None):
@@ -1049,11 +1059,10 b' class treemanifest(object):'
1049 self._dirs[dir].__setitem__(subpath, n)
1059 self._dirs[dir].__setitem__(subpath, n)
1050 else:
1060 else:
1051 # manifest nodes are either 20 bytes or 32 bytes,
1061 # manifest nodes are either 20 bytes or 32 bytes,
1052 # depending on the hash in use. An extra byte is
1062 # depending on the hash in use. Assert this as historically
1053 # occasionally used by hg, but won't ever be
1063 # sometimes extra bytes were added.
1054 # persisted. Trim to 21 or 33 bytes as appropriate.
1064 assert len(n) in (20, 32)
1055 trim = 21 if len(n) < 25 else 33
1065 self._files[f] = n
1056 self._files[f] = n[:trim] # to match manifestdict's behavior
1057 self._dirty = True
1066 self._dirty = True
1058
1067
1059 def _load(self):
1068 def _load(self):
@@ -1066,6 +1075,8 b' class treemanifest(object):'
1066
1075
1067 def setflag(self, f, flags):
1076 def setflag(self, f, flags):
1068 """Set the flags (symlink, executable) for path f."""
1077 """Set the flags (symlink, executable) for path f."""
1078 if flags not in _manifestflags:
1079 raise TypeError(b"Invalid manifest flag set.")
1069 self._load()
1080 self._load()
1070 dir, subpath = _splittopdir(f)
1081 dir, subpath = _splittopdir(f)
1071 if dir:
1082 if dir:
@@ -725,8 +725,7 b' def manifestmerge('
725 b'prompt changed/deleted',
725 b'prompt changed/deleted',
726 )
726 )
727 elif n1 == addednodeid:
727 elif n1 == addednodeid:
728 # This extra 'a' is added by working copy manifest to mark
728 # This file was locally added. We should forget it instead of
729 # the file as locally added. We should forget it instead of
730 # deleting it.
729 # deleting it.
731 actions[f] = (
730 actions[f] = (
732 mergestatemod.ACTION_FORGET,
731 mergestatemod.ACTION_FORGET,
@@ -156,39 +156,6 b' class basemanifesttests(object):'
156 with self.assertRaises(KeyError):
156 with self.assertRaises(KeyError):
157 m[b'foo']
157 m[b'foo']
158
158
159 def testSetGetNodeSuffix(self):
160 clean = self.parsemanifest(A_SHORT_MANIFEST)
161 m = self.parsemanifest(A_SHORT_MANIFEST)
162 h = m[b'foo']
163 f = m.flags(b'foo')
164 want = h + b'a'
165 # Merge code wants to set 21-byte fake hashes at times
166 m[b'foo'] = want
167 self.assertEqual(want, m[b'foo'])
168 self.assertEqual(
169 [(b'bar/baz/qux.py', BIN_HASH_2), (b'foo', BIN_HASH_1 + b'a')],
170 list(m.items()),
171 )
172 # Sometimes it even tries a 22-byte fake hash, but we can
173 # return 21 and it'll work out
174 m[b'foo'] = want + b'+'
175 self.assertEqual(want, m[b'foo'])
176 # make sure the suffix survives a copy
177 match = matchmod.match(util.localpath(b'/repo'), b'', [b're:foo'])
178 m2 = m._matches(match)
179 self.assertEqual(want, m2[b'foo'])
180 self.assertEqual(1, len(m2))
181 m2 = m.copy()
182 self.assertEqual(want, m2[b'foo'])
183 # suffix with iteration
184 self.assertEqual(
185 [(b'bar/baz/qux.py', BIN_HASH_2), (b'foo', want)], list(m.items())
186 )
187
188 # shows up in diff
189 self.assertEqual({b'foo': ((want, f), (h, b''))}, m.diff(clean))
190 self.assertEqual({b'foo': ((h, b''), (want, f))}, clean.diff(m))
191
192 def testMatchException(self):
159 def testMatchException(self):
193 m = self.parsemanifest(A_SHORT_MANIFEST)
160 m = self.parsemanifest(A_SHORT_MANIFEST)
194 match = matchmod.match(util.localpath(b'/repo'), b'', [b're:.*'])
161 match = matchmod.match(util.localpath(b'/repo'), b'', [b're:.*'])
General Comments 0
You need to be logged in to leave comments. Login now