##// END OF EJS Templates
dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal -
r18567:194e63c1 default
parent child Browse files
Show More
@@ -1,816 +1,783 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 import errno
8 8
9 9 from node import nullid
10 10 from i18n import _
11 11 import scmutil, util, ignore, osutil, parsers, encoding
12 import struct, os, stat, errno
13 import cStringIO
12 import os, stat, errno
14 13
15 _format = ">cllll"
16 14 propertycache = util.propertycache
17 15 filecache = scmutil.filecache
18 16 _rangemask = 0x7fffffff
19 17
20 18 class repocache(filecache):
21 19 """filecache for files in .hg/"""
22 20 def join(self, obj, fname):
23 21 return obj._opener.join(fname)
24 22
25 23 class rootcache(filecache):
26 24 """filecache for files in the repository root"""
27 25 def join(self, obj, fname):
28 26 return obj._join(fname)
29 27
30 28 def _finddirs(path):
31 29 pos = path.rfind('/')
32 30 while pos != -1:
33 31 yield path[:pos]
34 32 pos = path.rfind('/', 0, pos)
35 33
36 34 def _incdirs(dirs, path):
37 35 for base in _finddirs(path):
38 36 if base in dirs:
39 37 dirs[base] += 1
40 38 return
41 39 dirs[base] = 1
42 40
43 41 def _decdirs(dirs, path):
44 42 for base in _finddirs(path):
45 43 if dirs[base] > 1:
46 44 dirs[base] -= 1
47 45 return
48 46 del dirs[base]
49 47
50 48 class dirstate(object):
51 49
52 50 def __init__(self, opener, ui, root, validate):
53 51 '''Create a new dirstate object.
54 52
55 53 opener is an open()-like callable that can be used to open the
56 54 dirstate file; root is the root of the directory tracked by
57 55 the dirstate.
58 56 '''
59 57 self._opener = opener
60 58 self._validate = validate
61 59 self._root = root
62 60 self._rootdir = os.path.join(root, '')
63 61 self._dirty = False
64 62 self._dirtypl = False
65 63 self._lastnormaltime = 0
66 64 self._ui = ui
67 65 self._filecache = {}
68 66
69 67 @propertycache
70 68 def _map(self):
71 69 '''Return the dirstate contents as a map from filename to
72 70 (state, mode, size, time).'''
73 71 self._read()
74 72 return self._map
75 73
76 74 @propertycache
77 75 def _copymap(self):
78 76 self._read()
79 77 return self._copymap
80 78
81 79 @propertycache
82 80 def _foldmap(self):
83 81 f = {}
84 82 for name in self._map:
85 83 f[util.normcase(name)] = name
86 84 for name in self._dirs:
87 85 f[util.normcase(name)] = name
88 86 f['.'] = '.' # prevents useless util.fspath() invocation
89 87 return f
90 88
91 89 @repocache('branch')
92 90 def _branch(self):
93 91 try:
94 92 return self._opener.read("branch").strip() or "default"
95 93 except IOError, inst:
96 94 if inst.errno != errno.ENOENT:
97 95 raise
98 96 return "default"
99 97
100 98 @propertycache
101 99 def _pl(self):
102 100 try:
103 101 fp = self._opener("dirstate")
104 102 st = fp.read(40)
105 103 fp.close()
106 104 l = len(st)
107 105 if l == 40:
108 106 return st[:20], st[20:40]
109 107 elif l > 0 and l < 40:
110 108 raise util.Abort(_('working directory state appears damaged!'))
111 109 except IOError, err:
112 110 if err.errno != errno.ENOENT:
113 111 raise
114 112 return [nullid, nullid]
115 113
116 114 @propertycache
117 115 def _dirs(self):
118 116 dirs = {}
119 117 for f, s in self._map.iteritems():
120 118 if s[0] != 'r':
121 119 _incdirs(dirs, f)
122 120 return dirs
123 121
124 122 def dirs(self):
125 123 return self._dirs
126 124
127 125 @rootcache('.hgignore')
128 126 def _ignore(self):
129 127 files = [self._join('.hgignore')]
130 128 for name, path in self._ui.configitems("ui"):
131 129 if name == 'ignore' or name.startswith('ignore.'):
132 130 files.append(util.expandpath(path))
133 131 return ignore.ignore(self._root, files, self._ui.warn)
134 132
135 133 @propertycache
136 134 def _slash(self):
137 135 return self._ui.configbool('ui', 'slash') and os.sep != '/'
138 136
139 137 @propertycache
140 138 def _checklink(self):
141 139 return util.checklink(self._root)
142 140
143 141 @propertycache
144 142 def _checkexec(self):
145 143 return util.checkexec(self._root)
146 144
147 145 @propertycache
148 146 def _checkcase(self):
149 147 return not util.checkcase(self._join('.hg'))
150 148
151 149 def _join(self, f):
152 150 # much faster than os.path.join()
153 151 # it's safe because f is always a relative path
154 152 return self._rootdir + f
155 153
156 154 def flagfunc(self, buildfallback):
157 155 if self._checklink and self._checkexec:
158 156 def f(x):
159 157 p = self._join(x)
160 158 if os.path.islink(p):
161 159 return 'l'
162 160 if util.isexec(p):
163 161 return 'x'
164 162 return ''
165 163 return f
166 164
167 165 fallback = buildfallback()
168 166 if self._checklink:
169 167 def f(x):
170 168 if os.path.islink(self._join(x)):
171 169 return 'l'
172 170 if 'x' in fallback(x):
173 171 return 'x'
174 172 return ''
175 173 return f
176 174 if self._checkexec:
177 175 def f(x):
178 176 if 'l' in fallback(x):
179 177 return 'l'
180 178 if util.isexec(self._join(x)):
181 179 return 'x'
182 180 return ''
183 181 return f
184 182 else:
185 183 return fallback
186 184
187 185 def getcwd(self):
188 186 cwd = os.getcwd()
189 187 if cwd == self._root:
190 188 return ''
191 189 # self._root ends with a path separator if self._root is '/' or 'C:\'
192 190 rootsep = self._root
193 191 if not util.endswithsep(rootsep):
194 192 rootsep += os.sep
195 193 if cwd.startswith(rootsep):
196 194 return cwd[len(rootsep):]
197 195 else:
198 196 # we're outside the repo. return an absolute path.
199 197 return cwd
200 198
201 199 def pathto(self, f, cwd=None):
202 200 if cwd is None:
203 201 cwd = self.getcwd()
204 202 path = util.pathto(self._root, cwd, f)
205 203 if self._slash:
206 204 return util.normpath(path)
207 205 return path
208 206
209 207 def __getitem__(self, key):
210 208 '''Return the current state of key (a filename) in the dirstate.
211 209
212 210 States are:
213 211 n normal
214 212 m needs merging
215 213 r marked for removal
216 214 a marked for addition
217 215 ? not tracked
218 216 '''
219 217 return self._map.get(key, ("?",))[0]
220 218
221 219 def __contains__(self, key):
222 220 return key in self._map
223 221
224 222 def __iter__(self):
225 223 for x in sorted(self._map):
226 224 yield x
227 225
228 226 def parents(self):
229 227 return [self._validate(p) for p in self._pl]
230 228
231 229 def p1(self):
232 230 return self._validate(self._pl[0])
233 231
234 232 def p2(self):
235 233 return self._validate(self._pl[1])
236 234
237 235 def branch(self):
238 236 return encoding.tolocal(self._branch)
239 237
240 238 def setparents(self, p1, p2=nullid):
241 239 """Set dirstate parents to p1 and p2.
242 240
243 241 When moving from two parents to one, 'm' merged entries a
244 242 adjusted to normal and previous copy records discarded and
245 243 returned by the call.
246 244
247 245 See localrepo.setparents()
248 246 """
249 247 self._dirty = self._dirtypl = True
250 248 oldp2 = self._pl[1]
251 249 self._pl = p1, p2
252 250 copies = {}
253 251 if oldp2 != nullid and p2 == nullid:
254 252 # Discard 'm' markers when moving away from a merge state
255 253 for f, s in self._map.iteritems():
256 254 if s[0] == 'm':
257 255 if f in self._copymap:
258 256 copies[f] = self._copymap[f]
259 257 self.normallookup(f)
260 258 return copies
261 259
262 260 def setbranch(self, branch):
263 261 self._branch = encoding.fromlocal(branch)
264 262 f = self._opener('branch', 'w', atomictemp=True)
265 263 try:
266 264 f.write(self._branch + '\n')
267 265 f.close()
268 266
269 267 # make sure filecache has the correct stat info for _branch after
270 268 # replacing the underlying file
271 269 ce = self._filecache['_branch']
272 270 if ce:
273 271 ce.refresh()
274 272 except: # re-raises
275 273 f.discard()
276 274 raise
277 275
278 276 def _read(self):
279 277 self._map = {}
280 278 self._copymap = {}
281 279 try:
282 280 st = self._opener.read("dirstate")
283 281 except IOError, err:
284 282 if err.errno != errno.ENOENT:
285 283 raise
286 284 return
287 285 if not st:
288 286 return
289 287
290 288 p = parsers.parse_dirstate(self._map, self._copymap, st)
291 289 if not self._dirtypl:
292 290 self._pl = p
293 291
294 292 def invalidate(self):
295 293 for a in ("_map", "_copymap", "_foldmap", "_branch", "_pl", "_dirs",
296 294 "_ignore"):
297 295 if a in self.__dict__:
298 296 delattr(self, a)
299 297 self._lastnormaltime = 0
300 298 self._dirty = False
301 299
302 300 def copy(self, source, dest):
303 301 """Mark dest as a copy of source. Unmark dest if source is None."""
304 302 if source == dest:
305 303 return
306 304 self._dirty = True
307 305 if source is not None:
308 306 self._copymap[dest] = source
309 307 elif dest in self._copymap:
310 308 del self._copymap[dest]
311 309
312 310 def copied(self, file):
313 311 return self._copymap.get(file, None)
314 312
315 313 def copies(self):
316 314 return self._copymap
317 315
318 316 def _droppath(self, f):
319 317 if self[f] not in "?r" and "_dirs" in self.__dict__:
320 318 _decdirs(self._dirs, f)
321 319
322 320 def _addpath(self, f, state, mode, size, mtime):
323 321 oldstate = self[f]
324 322 if state == 'a' or oldstate == 'r':
325 323 scmutil.checkfilename(f)
326 324 if f in self._dirs:
327 325 raise util.Abort(_('directory %r already in dirstate') % f)
328 326 # shadows
329 327 for d in _finddirs(f):
330 328 if d in self._dirs:
331 329 break
332 330 if d in self._map and self[d] != 'r':
333 331 raise util.Abort(
334 332 _('file %r in dirstate clashes with %r') % (d, f))
335 333 if oldstate in "?r" and "_dirs" in self.__dict__:
336 334 _incdirs(self._dirs, f)
337 335 self._dirty = True
338 336 self._map[f] = (state, mode, size, mtime)
339 337
340 338 def normal(self, f):
341 339 '''Mark a file normal and clean.'''
342 340 s = os.lstat(self._join(f))
343 341 mtime = int(s.st_mtime)
344 342 self._addpath(f, 'n', s.st_mode,
345 343 s.st_size & _rangemask, mtime & _rangemask)
346 344 if f in self._copymap:
347 345 del self._copymap[f]
348 346 if mtime > self._lastnormaltime:
349 347 # Remember the most recent modification timeslot for status(),
350 348 # to make sure we won't miss future size-preserving file content
351 349 # modifications that happen within the same timeslot.
352 350 self._lastnormaltime = mtime
353 351
354 352 def normallookup(self, f):
355 353 '''Mark a file normal, but possibly dirty.'''
356 354 if self._pl[1] != nullid and f in self._map:
357 355 # if there is a merge going on and the file was either
358 356 # in state 'm' (-1) or coming from other parent (-2) before
359 357 # being removed, restore that state.
360 358 entry = self._map[f]
361 359 if entry[0] == 'r' and entry[2] in (-1, -2):
362 360 source = self._copymap.get(f)
363 361 if entry[2] == -1:
364 362 self.merge(f)
365 363 elif entry[2] == -2:
366 364 self.otherparent(f)
367 365 if source:
368 366 self.copy(source, f)
369 367 return
370 368 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
371 369 return
372 370 self._addpath(f, 'n', 0, -1, -1)
373 371 if f in self._copymap:
374 372 del self._copymap[f]
375 373
376 374 def otherparent(self, f):
377 375 '''Mark as coming from the other parent, always dirty.'''
378 376 if self._pl[1] == nullid:
379 377 raise util.Abort(_("setting %r to other parent "
380 378 "only allowed in merges") % f)
381 379 self._addpath(f, 'n', 0, -2, -1)
382 380 if f in self._copymap:
383 381 del self._copymap[f]
384 382
385 383 def add(self, f):
386 384 '''Mark a file added.'''
387 385 self._addpath(f, 'a', 0, -1, -1)
388 386 if f in self._copymap:
389 387 del self._copymap[f]
390 388
391 389 def remove(self, f):
392 390 '''Mark a file removed.'''
393 391 self._dirty = True
394 392 self._droppath(f)
395 393 size = 0
396 394 if self._pl[1] != nullid and f in self._map:
397 395 # backup the previous state
398 396 entry = self._map[f]
399 397 if entry[0] == 'm': # merge
400 398 size = -1
401 399 elif entry[0] == 'n' and entry[2] == -2: # other parent
402 400 size = -2
403 401 self._map[f] = ('r', 0, size, 0)
404 402 if size == 0 and f in self._copymap:
405 403 del self._copymap[f]
406 404
407 405 def merge(self, f):
408 406 '''Mark a file merged.'''
409 407 if self._pl[1] == nullid:
410 408 return self.normallookup(f)
411 409 s = os.lstat(self._join(f))
412 410 self._addpath(f, 'm', s.st_mode,
413 411 s.st_size & _rangemask, int(s.st_mtime) & _rangemask)
414 412 if f in self._copymap:
415 413 del self._copymap[f]
416 414
417 415 def drop(self, f):
418 416 '''Drop a file from the dirstate'''
419 417 if f in self._map:
420 418 self._dirty = True
421 419 self._droppath(f)
422 420 del self._map[f]
423 421
424 422 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
425 423 normed = util.normcase(path)
426 424 folded = self._foldmap.get(normed, None)
427 425 if folded is None:
428 426 if isknown:
429 427 folded = path
430 428 else:
431 429 if exists is None:
432 430 exists = os.path.lexists(os.path.join(self._root, path))
433 431 if not exists:
434 432 # Maybe a path component exists
435 433 if not ignoremissing and '/' in path:
436 434 d, f = path.rsplit('/', 1)
437 435 d = self._normalize(d, isknown, ignoremissing, None)
438 436 folded = d + "/" + f
439 437 else:
440 438 # No path components, preserve original case
441 439 folded = path
442 440 else:
443 441 # recursively normalize leading directory components
444 442 # against dirstate
445 443 if '/' in normed:
446 444 d, f = normed.rsplit('/', 1)
447 445 d = self._normalize(d, isknown, ignoremissing, True)
448 446 r = self._root + "/" + d
449 447 folded = d + "/" + util.fspath(f, r)
450 448 else:
451 449 folded = util.fspath(normed, self._root)
452 450 self._foldmap[normed] = folded
453 451
454 452 return folded
455 453
456 454 def normalize(self, path, isknown=False, ignoremissing=False):
457 455 '''
458 456 normalize the case of a pathname when on a casefolding filesystem
459 457
460 458 isknown specifies whether the filename came from walking the
461 459 disk, to avoid extra filesystem access.
462 460
463 461 If ignoremissing is True, missing path are returned
464 462 unchanged. Otherwise, we try harder to normalize possibly
465 463 existing path components.
466 464
467 465 The normalized case is determined based on the following precedence:
468 466
469 467 - version of name already stored in the dirstate
470 468 - version of name stored on disk
471 469 - version provided via command arguments
472 470 '''
473 471
474 472 if self._checkcase:
475 473 return self._normalize(path, isknown, ignoremissing)
476 474 return path
477 475
478 476 def clear(self):
479 477 self._map = {}
480 478 if "_dirs" in self.__dict__:
481 479 delattr(self, "_dirs")
482 480 self._copymap = {}
483 481 self._pl = [nullid, nullid]
484 482 self._lastnormaltime = 0
485 483 self._dirty = True
486 484
487 485 def rebuild(self, parent, files):
488 486 self.clear()
489 487 for f in files:
490 488 if 'x' in files.flags(f):
491 489 self._map[f] = ('n', 0777, -1, 0)
492 490 else:
493 491 self._map[f] = ('n', 0666, -1, 0)
494 492 self._pl = (parent, nullid)
495 493 self._dirty = True
496 494
497 495 def write(self):
498 496 if not self._dirty:
499 497 return
500 498 st = self._opener("dirstate", "w", atomictemp=True)
501 499
502 500 def finish(s):
503 501 st.write(s)
504 502 st.close()
505 503 self._lastnormaltime = 0
506 504 self._dirty = self._dirtypl = False
507 505
508 506 # use the modification time of the newly created temporary file as the
509 507 # filesystem's notion of 'now'
510 508 now = util.fstat(st).st_mtime
511 copymap = self._copymap
512 try:
513 finish(parsers.pack_dirstate(self._map, copymap, self._pl, now))
514 return
515 except AttributeError:
516 pass
517
518 now = int(now)
519 cs = cStringIO.StringIO()
520 pack = struct.pack
521 write = cs.write
522 write("".join(self._pl))
523 for f, e in self._map.iteritems():
524 if e[0] == 'n' and e[3] == now:
525 # The file was last modified "simultaneously" with the current
526 # write to dirstate (i.e. within the same second for file-
527 # systems with a granularity of 1 sec). This commonly happens
528 # for at least a couple of files on 'update'.
529 # The user could change the file without changing its size
530 # within the same second. Invalidate the file's stat data in
531 # dirstate, forcing future 'status' calls to compare the
532 # contents of the file. This prevents mistakenly treating such
533 # files as clean.
534 e = (e[0], 0, -1, -1) # mark entry as 'unset'
535 self._map[f] = e
536
537 if f in copymap:
538 f = "%s\0%s" % (f, copymap[f])
539 e = pack(_format, e[0], e[1], e[2], e[3], len(f))
540 write(e)
541 write(f)
542 finish(cs.getvalue())
509 finish(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
543 510
544 511 def _dirignore(self, f):
545 512 if f == '.':
546 513 return False
547 514 if self._ignore(f):
548 515 return True
549 516 for p in _finddirs(f):
550 517 if self._ignore(p):
551 518 return True
552 519 return False
553 520
554 521 def walk(self, match, subrepos, unknown, ignored):
555 522 '''
556 523 Walk recursively through the directory tree, finding all files
557 524 matched by match.
558 525
559 526 Return a dict mapping filename to stat-like object (either
560 527 mercurial.osutil.stat instance or return value of os.stat()).
561 528 '''
562 529
563 530 def fwarn(f, msg):
564 531 self._ui.warn('%s: %s\n' % (self.pathto(f), msg))
565 532 return False
566 533
567 534 def badtype(mode):
568 535 kind = _('unknown')
569 536 if stat.S_ISCHR(mode):
570 537 kind = _('character device')
571 538 elif stat.S_ISBLK(mode):
572 539 kind = _('block device')
573 540 elif stat.S_ISFIFO(mode):
574 541 kind = _('fifo')
575 542 elif stat.S_ISSOCK(mode):
576 543 kind = _('socket')
577 544 elif stat.S_ISDIR(mode):
578 545 kind = _('directory')
579 546 return _('unsupported file type (type is %s)') % kind
580 547
581 548 ignore = self._ignore
582 549 dirignore = self._dirignore
583 550 if ignored:
584 551 ignore = util.never
585 552 dirignore = util.never
586 553 elif not unknown:
587 554 # if unknown and ignored are False, skip step 2
588 555 ignore = util.always
589 556 dirignore = util.always
590 557
591 558 matchfn = match.matchfn
592 559 badfn = match.bad
593 560 dmap = self._map
594 561 normpath = util.normpath
595 562 listdir = osutil.listdir
596 563 lstat = os.lstat
597 564 getkind = stat.S_IFMT
598 565 dirkind = stat.S_IFDIR
599 566 regkind = stat.S_IFREG
600 567 lnkkind = stat.S_IFLNK
601 568 join = self._join
602 569 work = []
603 570 wadd = work.append
604 571
605 572 exact = skipstep3 = False
606 573 if matchfn == match.exact: # match.exact
607 574 exact = True
608 575 dirignore = util.always # skip step 2
609 576 elif match.files() and not match.anypats(): # match.match, no patterns
610 577 skipstep3 = True
611 578
612 579 if not exact and self._checkcase:
613 580 normalize = self._normalize
614 581 skipstep3 = False
615 582 else:
616 583 normalize = None
617 584
618 585 files = sorted(match.files())
619 586 subrepos.sort()
620 587 i, j = 0, 0
621 588 while i < len(files) and j < len(subrepos):
622 589 subpath = subrepos[j] + "/"
623 590 if files[i] < subpath:
624 591 i += 1
625 592 continue
626 593 while i < len(files) and files[i].startswith(subpath):
627 594 del files[i]
628 595 j += 1
629 596
630 597 if not files or '.' in files:
631 598 files = ['']
632 599 results = dict.fromkeys(subrepos)
633 600 results['.hg'] = None
634 601
635 602 # step 1: find all explicit files
636 603 for ff in files:
637 604 if normalize:
638 605 nf = normalize(normpath(ff), False, True)
639 606 else:
640 607 nf = normpath(ff)
641 608 if nf in results:
642 609 continue
643 610
644 611 try:
645 612 st = lstat(join(nf))
646 613 kind = getkind(st.st_mode)
647 614 if kind == dirkind:
648 615 skipstep3 = False
649 616 if nf in dmap:
650 617 #file deleted on disk but still in dirstate
651 618 results[nf] = None
652 619 match.dir(nf)
653 620 if not dirignore(nf):
654 621 wadd(nf)
655 622 elif kind == regkind or kind == lnkkind:
656 623 results[nf] = st
657 624 else:
658 625 badfn(ff, badtype(kind))
659 626 if nf in dmap:
660 627 results[nf] = None
661 628 except OSError, inst:
662 629 if nf in dmap: # does it exactly match a file?
663 630 results[nf] = None
664 631 else: # does it match a directory?
665 632 prefix = nf + "/"
666 633 for fn in dmap:
667 634 if fn.startswith(prefix):
668 635 match.dir(nf)
669 636 skipstep3 = False
670 637 break
671 638 else:
672 639 badfn(ff, inst.strerror)
673 640
674 641 # step 2: visit subdirectories
675 642 while work:
676 643 nd = work.pop()
677 644 skip = None
678 645 if nd == '.':
679 646 nd = ''
680 647 else:
681 648 skip = '.hg'
682 649 try:
683 650 entries = listdir(join(nd), stat=True, skip=skip)
684 651 except OSError, inst:
685 652 if inst.errno in (errno.EACCES, errno.ENOENT):
686 653 fwarn(nd, inst.strerror)
687 654 continue
688 655 raise
689 656 for f, kind, st in entries:
690 657 if normalize:
691 658 nf = normalize(nd and (nd + "/" + f) or f, True, True)
692 659 else:
693 660 nf = nd and (nd + "/" + f) or f
694 661 if nf not in results:
695 662 if kind == dirkind:
696 663 if not ignore(nf):
697 664 match.dir(nf)
698 665 wadd(nf)
699 666 if nf in dmap and matchfn(nf):
700 667 results[nf] = None
701 668 elif kind == regkind or kind == lnkkind:
702 669 if nf in dmap:
703 670 if matchfn(nf):
704 671 results[nf] = st
705 672 elif matchfn(nf) and not ignore(nf):
706 673 results[nf] = st
707 674 elif nf in dmap and matchfn(nf):
708 675 results[nf] = None
709 676
710 677 # step 3: report unseen items in the dmap hash
711 678 if not skipstep3 and not exact:
712 679 visit = sorted([f for f in dmap if f not in results and matchfn(f)])
713 680 nf = iter(visit).next
714 681 for st in util.statfiles([join(i) for i in visit]):
715 682 results[nf()] = st
716 683 for s in subrepos:
717 684 del results[s]
718 685 del results['.hg']
719 686 return results
720 687
721 688 def status(self, match, subrepos, ignored, clean, unknown):
722 689 '''Determine the status of the working copy relative to the
723 690 dirstate and return a tuple of lists (unsure, modified, added,
724 691 removed, deleted, unknown, ignored, clean), where:
725 692
726 693 unsure:
727 694 files that might have been modified since the dirstate was
728 695 written, but need to be read to be sure (size is the same
729 696 but mtime differs)
730 697 modified:
731 698 files that have definitely been modified since the dirstate
732 699 was written (different size or mode)
733 700 added:
734 701 files that have been explicitly added with hg add
735 702 removed:
736 703 files that have been explicitly removed with hg remove
737 704 deleted:
738 705 files that have been deleted through other means ("missing")
739 706 unknown:
740 707 files not in the dirstate that are not ignored
741 708 ignored:
742 709 files not in the dirstate that are ignored
743 710 (by _dirignore())
744 711 clean:
745 712 files that have definitely not been modified since the
746 713 dirstate was written
747 714 '''
748 715 listignored, listclean, listunknown = ignored, clean, unknown
749 716 lookup, modified, added, unknown, ignored = [], [], [], [], []
750 717 removed, deleted, clean = [], [], []
751 718
752 719 dmap = self._map
753 720 ladd = lookup.append # aka "unsure"
754 721 madd = modified.append
755 722 aadd = added.append
756 723 uadd = unknown.append
757 724 iadd = ignored.append
758 725 radd = removed.append
759 726 dadd = deleted.append
760 727 cadd = clean.append
761 728 mexact = match.exact
762 729 dirignore = self._dirignore
763 730 checkexec = self._checkexec
764 731 checklink = self._checklink
765 732 copymap = self._copymap
766 733 lastnormaltime = self._lastnormaltime
767 734
768 735 lnkkind = stat.S_IFLNK
769 736
770 737 for fn, st in self.walk(match, subrepos, listunknown,
771 738 listignored).iteritems():
772 739 if fn not in dmap:
773 740 if (listignored or mexact(fn)) and dirignore(fn):
774 741 if listignored:
775 742 iadd(fn)
776 743 elif listunknown:
777 744 uadd(fn)
778 745 continue
779 746
780 747 state, mode, size, time = dmap[fn]
781 748
782 749 if not st and state in "nma":
783 750 dadd(fn)
784 751 elif state == 'n':
785 752 # The "mode & lnkkind != lnkkind or self._checklink"
786 753 # lines are an expansion of "islink => checklink"
787 754 # where islink means "is this a link?" and checklink
788 755 # means "can we check links?".
789 756 mtime = int(st.st_mtime)
790 757 if (size >= 0 and
791 758 ((size != st.st_size and size != st.st_size & _rangemask)
792 759 or ((mode ^ st.st_mode) & 0100 and checkexec))
793 760 and (mode & lnkkind != lnkkind or checklink)
794 761 or size == -2 # other parent
795 762 or fn in copymap):
796 763 madd(fn)
797 764 elif ((time != mtime and time != mtime & _rangemask)
798 765 and (mode & lnkkind != lnkkind or checklink)):
799 766 ladd(fn)
800 767 elif mtime == lastnormaltime:
801 768 # fn may have been changed in the same timeslot without
802 769 # changing its size. This can happen if we quickly do
803 770 # multiple commits in a single transaction.
804 771 # Force lookup, so we don't miss such a racy file change.
805 772 ladd(fn)
806 773 elif listclean:
807 774 cadd(fn)
808 775 elif state == 'm':
809 776 madd(fn)
810 777 elif state == 'a':
811 778 aadd(fn)
812 779 elif state == 'r':
813 780 radd(fn)
814 781
815 782 return (lookup, modified, added, removed, deleted, unknown, ignored,
816 783 clean)
@@ -1,1568 +1,1569 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static inline int hexdigit(const char *p, Py_ssize_t off)
18 18 {
19 19 char c = p[off];
20 20
21 21 if (c >= '0' && c <= '9')
22 22 return c - '0';
23 23 if (c >= 'a' && c <= 'f')
24 24 return c - 'a' + 10;
25 25 if (c >= 'A' && c <= 'F')
26 26 return c - 'A' + 10;
27 27
28 28 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
29 29 return 0;
30 30 }
31 31
32 32 /*
33 33 * Turn a hex-encoded string into binary.
34 34 */
35 35 static PyObject *unhexlify(const char *str, int len)
36 36 {
37 37 PyObject *ret;
38 38 char *d;
39 39 int i;
40 40
41 41 ret = PyBytes_FromStringAndSize(NULL, len / 2);
42 42
43 43 if (!ret)
44 44 return NULL;
45 45
46 46 d = PyBytes_AsString(ret);
47 47
48 48 for (i = 0; i < len;) {
49 49 int hi = hexdigit(str, i++);
50 50 int lo = hexdigit(str, i++);
51 51 *d++ = (hi << 4) | lo;
52 52 }
53 53
54 54 return ret;
55 55 }
56 56
57 57 /*
58 58 * This code assumes that a manifest is stitched together with newline
59 59 * ('\n') characters.
60 60 */
61 61 static PyObject *parse_manifest(PyObject *self, PyObject *args)
62 62 {
63 63 PyObject *mfdict, *fdict;
64 64 char *str, *cur, *start, *zero;
65 65 int len;
66 66
67 67 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
68 68 &PyDict_Type, &mfdict,
69 69 &PyDict_Type, &fdict,
70 70 &str, &len))
71 71 goto quit;
72 72
73 73 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
74 74 PyObject *file = NULL, *node = NULL;
75 75 PyObject *flags = NULL;
76 76 ptrdiff_t nlen;
77 77
78 78 if (!*cur) {
79 79 zero = cur;
80 80 continue;
81 81 }
82 82 else if (*cur != '\n')
83 83 continue;
84 84
85 85 if (!zero) {
86 86 PyErr_SetString(PyExc_ValueError,
87 87 "manifest entry has no separator");
88 88 goto quit;
89 89 }
90 90
91 91 file = PyBytes_FromStringAndSize(start, zero - start);
92 92
93 93 if (!file)
94 94 goto bail;
95 95
96 96 nlen = cur - zero - 1;
97 97
98 98 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
99 99 if (!node)
100 100 goto bail;
101 101
102 102 if (nlen > 40) {
103 103 flags = PyBytes_FromStringAndSize(zero + 41,
104 104 nlen - 40);
105 105 if (!flags)
106 106 goto bail;
107 107
108 108 if (PyDict_SetItem(fdict, file, flags) == -1)
109 109 goto bail;
110 110 }
111 111
112 112 if (PyDict_SetItem(mfdict, file, node) == -1)
113 113 goto bail;
114 114
115 115 start = cur + 1;
116 116 zero = NULL;
117 117
118 118 Py_XDECREF(flags);
119 119 Py_XDECREF(node);
120 120 Py_XDECREF(file);
121 121 continue;
122 122 bail:
123 123 Py_XDECREF(flags);
124 124 Py_XDECREF(node);
125 125 Py_XDECREF(file);
126 126 goto quit;
127 127 }
128 128
129 129 if (len > 0 && *(cur - 1) != '\n') {
130 130 PyErr_SetString(PyExc_ValueError,
131 131 "manifest contains trailing garbage");
132 132 goto quit;
133 133 }
134 134
135 135 Py_INCREF(Py_None);
136 136 return Py_None;
137 137 quit:
138 138 return NULL;
139 139 }
140 140
141 141 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
142 142 {
143 143 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
144 144 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
145 145 char *str, *cur, *end, *cpos;
146 146 int state, mode, size, mtime;
147 147 unsigned int flen;
148 148 int len;
149 149
150 150 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
151 151 &PyDict_Type, &dmap,
152 152 &PyDict_Type, &cmap,
153 153 &str, &len))
154 154 goto quit;
155 155
156 156 /* read parents */
157 157 if (len < 40)
158 158 goto quit;
159 159
160 160 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
161 161 if (!parents)
162 162 goto quit;
163 163
164 164 /* read filenames */
165 165 cur = str + 40;
166 166 end = str + len;
167 167
168 168 while (cur < end - 17) {
169 169 /* unpack header */
170 170 state = *cur;
171 171 mode = getbe32(cur + 1);
172 172 size = getbe32(cur + 5);
173 173 mtime = getbe32(cur + 9);
174 174 flen = getbe32(cur + 13);
175 175 cur += 17;
176 176 if (cur + flen > end || cur + flen < cur) {
177 177 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
178 178 goto quit;
179 179 }
180 180
181 181 entry = Py_BuildValue("ciii", state, mode, size, mtime);
182 182 if (!entry)
183 183 goto quit;
184 184 PyObject_GC_UnTrack(entry); /* don't waste time with this */
185 185
186 186 cpos = memchr(cur, 0, flen);
187 187 if (cpos) {
188 188 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
189 189 cname = PyBytes_FromStringAndSize(cpos + 1,
190 190 flen - (cpos - cur) - 1);
191 191 if (!fname || !cname ||
192 192 PyDict_SetItem(cmap, fname, cname) == -1 ||
193 193 PyDict_SetItem(dmap, fname, entry) == -1)
194 194 goto quit;
195 195 Py_DECREF(cname);
196 196 } else {
197 197 fname = PyBytes_FromStringAndSize(cur, flen);
198 198 if (!fname ||
199 199 PyDict_SetItem(dmap, fname, entry) == -1)
200 200 goto quit;
201 201 }
202 202 cur += flen;
203 203 Py_DECREF(fname);
204 204 Py_DECREF(entry);
205 205 fname = cname = entry = NULL;
206 206 }
207 207
208 208 ret = parents;
209 209 Py_INCREF(ret);
210 210 quit:
211 211 Py_XDECREF(fname);
212 212 Py_XDECREF(cname);
213 213 Py_XDECREF(entry);
214 214 Py_XDECREF(parents);
215 215 return ret;
216 216 }
217 217
218 218 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
219 219 {
220 220 PyObject *o = PyTuple_GET_ITEM(tuple, off);
221 221 long val;
222 222
223 223 if (PyInt_Check(o))
224 224 val = PyInt_AS_LONG(o);
225 225 else if (PyLong_Check(o)) {
226 226 val = PyLong_AsLong(o);
227 227 if (val == -1 && PyErr_Occurred())
228 228 return -1;
229 229 } else {
230 230 PyErr_SetString(PyExc_TypeError, "expected an int or long");
231 231 return -1;
232 232 }
233 233 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
234 234 PyErr_SetString(PyExc_OverflowError,
235 235 "Python value to large to convert to uint32_t");
236 236 return -1;
237 237 }
238 238 *v = (uint32_t)val;
239 239 return 0;
240 240 }
241 241
242 242 static PyObject *dirstate_unset;
243 243
244 244 /*
245 245 * Efficiently pack a dirstate object into its on-disk format.
246 246 */
247 247 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
248 248 {
249 249 PyObject *packobj = NULL;
250 250 PyObject *map, *copymap, *pl;
251 251 Py_ssize_t nbytes, pos, l;
252 252 PyObject *k, *v, *pn;
253 253 char *p, *s;
254 254 double now;
255 255
256 256 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
257 257 &PyDict_Type, &map, &PyDict_Type, &copymap,
258 258 &pl, &now))
259 259 return NULL;
260 260
261 261 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
262 262 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
263 263 return NULL;
264 264 }
265 265
266 266 /* Figure out how much we need to allocate. */
267 267 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
268 268 PyObject *c;
269 269 if (!PyString_Check(k)) {
270 270 PyErr_SetString(PyExc_TypeError, "expected string key");
271 271 goto bail;
272 272 }
273 273 nbytes += PyString_GET_SIZE(k) + 17;
274 274 c = PyDict_GetItem(copymap, k);
275 275 if (c) {
276 276 if (!PyString_Check(c)) {
277 277 PyErr_SetString(PyExc_TypeError,
278 278 "expected string key");
279 279 goto bail;
280 280 }
281 281 nbytes += PyString_GET_SIZE(c) + 1;
282 282 }
283 283 }
284 284
285 285 packobj = PyString_FromStringAndSize(NULL, nbytes);
286 286 if (packobj == NULL)
287 287 goto bail;
288 288
289 289 p = PyString_AS_STRING(packobj);
290 290
291 291 pn = PySequence_ITEM(pl, 0);
292 292 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
293 293 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
294 294 goto bail;
295 295 }
296 296 memcpy(p, s, l);
297 297 p += 20;
298 298 pn = PySequence_ITEM(pl, 1);
299 299 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
300 300 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
301 301 goto bail;
302 302 }
303 303 memcpy(p, s, l);
304 304 p += 20;
305 305
306 306 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
307 307 uint32_t mode, size, mtime;
308 308 Py_ssize_t len, l;
309 309 PyObject *o;
310 310 char *s, *t;
311 311
312 312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
313 313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
314 314 goto bail;
315 315 }
316 316 o = PyTuple_GET_ITEM(v, 0);
317 317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
318 318 PyErr_SetString(PyExc_TypeError, "expected one byte");
319 319 goto bail;
320 320 }
321 321 *p++ = *s;
322 322 if (getintat(v, 1, &mode) == -1)
323 323 goto bail;
324 324 if (getintat(v, 2, &size) == -1)
325 325 goto bail;
326 326 if (getintat(v, 3, &mtime) == -1)
327 327 goto bail;
328 328 if (*s == 'n' && mtime == (uint32_t)now) {
329 /* See dirstate.py:write for why we do this. */
329 /* See pure/parsers.py:pack_dirstate for why we do
330 * this. */
330 331 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
331 332 goto bail;
332 333 mode = 0, size = -1, mtime = -1;
333 334 }
334 335 putbe32(mode, p);
335 336 putbe32(size, p + 4);
336 337 putbe32(mtime, p + 8);
337 338 t = p + 12;
338 339 p += 16;
339 340 len = PyString_GET_SIZE(k);
340 341 memcpy(p, PyString_AS_STRING(k), len);
341 342 p += len;
342 343 o = PyDict_GetItem(copymap, k);
343 344 if (o) {
344 345 *p++ = '\0';
345 346 l = PyString_GET_SIZE(o);
346 347 memcpy(p, PyString_AS_STRING(o), l);
347 348 p += l;
348 349 len += l + 1;
349 350 }
350 351 putbe32((uint32_t)len, t);
351 352 }
352 353
353 354 pos = p - PyString_AS_STRING(packobj);
354 355 if (pos != nbytes) {
355 356 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
356 357 (long)pos, (long)nbytes);
357 358 goto bail;
358 359 }
359 360
360 361 return packobj;
361 362 bail:
362 363 Py_XDECREF(packobj);
363 364 return NULL;
364 365 }
365 366
366 367 /*
367 368 * A base-16 trie for fast node->rev mapping.
368 369 *
369 370 * Positive value is index of the next node in the trie
370 371 * Negative value is a leaf: -(rev + 1)
371 372 * Zero is empty
372 373 */
373 374 typedef struct {
374 375 int children[16];
375 376 } nodetree;
376 377
377 378 /*
378 379 * This class has two behaviours.
379 380 *
380 381 * When used in a list-like way (with integer keys), we decode an
381 382 * entry in a RevlogNG index file on demand. Our last entry is a
382 383 * sentinel, always a nullid. We have limited support for
383 384 * integer-keyed insert and delete, only at elements right before the
384 385 * sentinel.
385 386 *
386 387 * With string keys, we lazily perform a reverse mapping from node to
387 388 * rev, using a base-16 trie.
388 389 */
389 390 typedef struct {
390 391 PyObject_HEAD
391 392 /* Type-specific fields go here. */
392 393 PyObject *data; /* raw bytes of index */
393 394 PyObject **cache; /* cached tuples */
394 395 const char **offsets; /* populated on demand */
395 396 Py_ssize_t raw_length; /* original number of elements */
396 397 Py_ssize_t length; /* current number of elements */
397 398 PyObject *added; /* populated on demand */
398 399 PyObject *headrevs; /* cache, invalidated on changes */
399 400 nodetree *nt; /* base-16 trie */
400 401 int ntlength; /* # nodes in use */
401 402 int ntcapacity; /* # nodes allocated */
402 403 int ntdepth; /* maximum depth of tree */
403 404 int ntsplits; /* # splits performed */
404 405 int ntrev; /* last rev scanned */
405 406 int ntlookups; /* # lookups */
406 407 int ntmisses; /* # lookups that miss the cache */
407 408 int inlined;
408 409 } indexObject;
409 410
410 411 static Py_ssize_t index_length(const indexObject *self)
411 412 {
412 413 if (self->added == NULL)
413 414 return self->length;
414 415 return self->length + PyList_GET_SIZE(self->added);
415 416 }
416 417
417 418 static PyObject *nullentry;
418 419 static const char nullid[20];
419 420
420 421 static long inline_scan(indexObject *self, const char **offsets);
421 422
422 423 #if LONG_MAX == 0x7fffffffL
423 424 static char *tuple_format = "Kiiiiiis#";
424 425 #else
425 426 static char *tuple_format = "kiiiiiis#";
426 427 #endif
427 428
428 429 /* A RevlogNG v1 index entry is 64 bytes long. */
429 430 static const long v1_hdrsize = 64;
430 431
431 432 /*
432 433 * Return a pointer to the beginning of a RevlogNG record.
433 434 */
434 435 static const char *index_deref(indexObject *self, Py_ssize_t pos)
435 436 {
436 437 if (self->inlined && pos > 0) {
437 438 if (self->offsets == NULL) {
438 439 self->offsets = malloc(self->raw_length *
439 440 sizeof(*self->offsets));
440 441 if (self->offsets == NULL)
441 442 return (const char *)PyErr_NoMemory();
442 443 inline_scan(self, self->offsets);
443 444 }
444 445 return self->offsets[pos];
445 446 }
446 447
447 448 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
448 449 }
449 450
450 451 /*
451 452 * RevlogNG format (all in big endian, data may be inlined):
452 453 * 6 bytes: offset
453 454 * 2 bytes: flags
454 455 * 4 bytes: compressed length
455 456 * 4 bytes: uncompressed length
456 457 * 4 bytes: base revision
457 458 * 4 bytes: link revision
458 459 * 4 bytes: parent 1 revision
459 460 * 4 bytes: parent 2 revision
460 461 * 32 bytes: nodeid (only 20 bytes used)
461 462 */
462 463 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
463 464 {
464 465 uint64_t offset_flags;
465 466 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
466 467 const char *c_node_id;
467 468 const char *data;
468 469 Py_ssize_t length = index_length(self);
469 470 PyObject *entry;
470 471
471 472 if (pos < 0)
472 473 pos += length;
473 474
474 475 if (pos < 0 || pos >= length) {
475 476 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
476 477 return NULL;
477 478 }
478 479
479 480 if (pos == length - 1) {
480 481 Py_INCREF(nullentry);
481 482 return nullentry;
482 483 }
483 484
484 485 if (pos >= self->length - 1) {
485 486 PyObject *obj;
486 487 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
487 488 Py_INCREF(obj);
488 489 return obj;
489 490 }
490 491
491 492 if (self->cache) {
492 493 if (self->cache[pos]) {
493 494 Py_INCREF(self->cache[pos]);
494 495 return self->cache[pos];
495 496 }
496 497 } else {
497 498 self->cache = calloc(self->raw_length, sizeof(PyObject *));
498 499 if (self->cache == NULL)
499 500 return PyErr_NoMemory();
500 501 }
501 502
502 503 data = index_deref(self, pos);
503 504 if (data == NULL)
504 505 return NULL;
505 506
506 507 offset_flags = getbe32(data + 4);
507 508 if (pos == 0) /* mask out version number for the first entry */
508 509 offset_flags &= 0xFFFF;
509 510 else {
510 511 uint32_t offset_high = getbe32(data);
511 512 offset_flags |= ((uint64_t)offset_high) << 32;
512 513 }
513 514
514 515 comp_len = getbe32(data + 8);
515 516 uncomp_len = getbe32(data + 12);
516 517 base_rev = getbe32(data + 16);
517 518 link_rev = getbe32(data + 20);
518 519 parent_1 = getbe32(data + 24);
519 520 parent_2 = getbe32(data + 28);
520 521 c_node_id = data + 32;
521 522
522 523 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
523 524 uncomp_len, base_rev, link_rev,
524 525 parent_1, parent_2, c_node_id, 20);
525 526
526 527 if (entry)
527 528 PyObject_GC_UnTrack(entry);
528 529
529 530 self->cache[pos] = entry;
530 531 Py_INCREF(entry);
531 532
532 533 return entry;
533 534 }
534 535
535 536 /*
536 537 * Return the 20-byte SHA of the node corresponding to the given rev.
537 538 */
538 539 static const char *index_node(indexObject *self, Py_ssize_t pos)
539 540 {
540 541 Py_ssize_t length = index_length(self);
541 542 const char *data;
542 543
543 544 if (pos == length - 1 || pos == INT_MAX)
544 545 return nullid;
545 546
546 547 if (pos >= length)
547 548 return NULL;
548 549
549 550 if (pos >= self->length - 1) {
550 551 PyObject *tuple, *str;
551 552 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
552 553 str = PyTuple_GetItem(tuple, 7);
553 554 return str ? PyString_AS_STRING(str) : NULL;
554 555 }
555 556
556 557 data = index_deref(self, pos);
557 558 return data ? data + 32 : NULL;
558 559 }
559 560
560 561 static int nt_insert(indexObject *self, const char *node, int rev);
561 562
562 563 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
563 564 {
564 565 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
565 566 return -1;
566 567 if (*nodelen == 20)
567 568 return 0;
568 569 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
569 570 return -1;
570 571 }
571 572
572 573 static PyObject *index_insert(indexObject *self, PyObject *args)
573 574 {
574 575 PyObject *obj;
575 576 char *node;
576 577 long offset;
577 578 Py_ssize_t len, nodelen;
578 579
579 580 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
580 581 return NULL;
581 582
582 583 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
583 584 PyErr_SetString(PyExc_TypeError, "8-tuple required");
584 585 return NULL;
585 586 }
586 587
587 588 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
588 589 return NULL;
589 590
590 591 len = index_length(self);
591 592
592 593 if (offset < 0)
593 594 offset += len;
594 595
595 596 if (offset != len - 1) {
596 597 PyErr_SetString(PyExc_IndexError,
597 598 "insert only supported at index -1");
598 599 return NULL;
599 600 }
600 601
601 602 if (offset > INT_MAX) {
602 603 PyErr_SetString(PyExc_ValueError,
603 604 "currently only 2**31 revs supported");
604 605 return NULL;
605 606 }
606 607
607 608 if (self->added == NULL) {
608 609 self->added = PyList_New(0);
609 610 if (self->added == NULL)
610 611 return NULL;
611 612 }
612 613
613 614 if (PyList_Append(self->added, obj) == -1)
614 615 return NULL;
615 616
616 617 if (self->nt)
617 618 nt_insert(self, node, (int)offset);
618 619
619 620 Py_CLEAR(self->headrevs);
620 621 Py_RETURN_NONE;
621 622 }
622 623
623 624 static void _index_clearcaches(indexObject *self)
624 625 {
625 626 if (self->cache) {
626 627 Py_ssize_t i;
627 628
628 629 for (i = 0; i < self->raw_length; i++)
629 630 Py_CLEAR(self->cache[i]);
630 631 free(self->cache);
631 632 self->cache = NULL;
632 633 }
633 634 if (self->offsets) {
634 635 free(self->offsets);
635 636 self->offsets = NULL;
636 637 }
637 638 if (self->nt) {
638 639 free(self->nt);
639 640 self->nt = NULL;
640 641 }
641 642 Py_CLEAR(self->headrevs);
642 643 }
643 644
644 645 static PyObject *index_clearcaches(indexObject *self)
645 646 {
646 647 _index_clearcaches(self);
647 648 self->ntlength = self->ntcapacity = 0;
648 649 self->ntdepth = self->ntsplits = 0;
649 650 self->ntrev = -1;
650 651 self->ntlookups = self->ntmisses = 0;
651 652 Py_RETURN_NONE;
652 653 }
653 654
654 655 static PyObject *index_stats(indexObject *self)
655 656 {
656 657 PyObject *obj = PyDict_New();
657 658
658 659 if (obj == NULL)
659 660 return NULL;
660 661
661 662 #define istat(__n, __d) \
662 663 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
663 664 goto bail;
664 665
665 666 if (self->added) {
666 667 Py_ssize_t len = PyList_GET_SIZE(self->added);
667 668 if (PyDict_SetItemString(obj, "index entries added",
668 669 PyInt_FromSsize_t(len)) == -1)
669 670 goto bail;
670 671 }
671 672
672 673 if (self->raw_length != self->length - 1)
673 674 istat(raw_length, "revs on disk");
674 675 istat(length, "revs in memory");
675 676 istat(ntcapacity, "node trie capacity");
676 677 istat(ntdepth, "node trie depth");
677 678 istat(ntlength, "node trie count");
678 679 istat(ntlookups, "node trie lookups");
679 680 istat(ntmisses, "node trie misses");
680 681 istat(ntrev, "node trie last rev scanned");
681 682 istat(ntsplits, "node trie splits");
682 683
683 684 #undef istat
684 685
685 686 return obj;
686 687
687 688 bail:
688 689 Py_XDECREF(obj);
689 690 return NULL;
690 691 }
691 692
692 693 /*
693 694 * When we cache a list, we want to be sure the caller can't mutate
694 695 * the cached copy.
695 696 */
696 697 static PyObject *list_copy(PyObject *list)
697 698 {
698 699 Py_ssize_t len = PyList_GET_SIZE(list);
699 700 PyObject *newlist = PyList_New(len);
700 701 Py_ssize_t i;
701 702
702 703 if (newlist == NULL)
703 704 return NULL;
704 705
705 706 for (i = 0; i < len; i++) {
706 707 PyObject *obj = PyList_GET_ITEM(list, i);
707 708 Py_INCREF(obj);
708 709 PyList_SET_ITEM(newlist, i, obj);
709 710 }
710 711
711 712 return newlist;
712 713 }
713 714
714 715 static PyObject *index_headrevs(indexObject *self)
715 716 {
716 717 Py_ssize_t i, len, addlen;
717 718 char *nothead = NULL;
718 719 PyObject *heads;
719 720
720 721 if (self->headrevs)
721 722 return list_copy(self->headrevs);
722 723
723 724 len = index_length(self) - 1;
724 725 heads = PyList_New(0);
725 726 if (heads == NULL)
726 727 goto bail;
727 728 if (len == 0) {
728 729 PyObject *nullid = PyInt_FromLong(-1);
729 730 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
730 731 Py_XDECREF(nullid);
731 732 goto bail;
732 733 }
733 734 goto done;
734 735 }
735 736
736 737 nothead = calloc(len, 1);
737 738 if (nothead == NULL)
738 739 goto bail;
739 740
740 741 for (i = 0; i < self->raw_length; i++) {
741 742 const char *data = index_deref(self, i);
742 743 int parent_1 = getbe32(data + 24);
743 744 int parent_2 = getbe32(data + 28);
744 745 if (parent_1 >= 0)
745 746 nothead[parent_1] = 1;
746 747 if (parent_2 >= 0)
747 748 nothead[parent_2] = 1;
748 749 }
749 750
750 751 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
751 752
752 753 for (i = 0; i < addlen; i++) {
753 754 PyObject *rev = PyList_GET_ITEM(self->added, i);
754 755 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
755 756 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
756 757 long parent_1, parent_2;
757 758
758 759 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
759 760 PyErr_SetString(PyExc_TypeError,
760 761 "revlog parents are invalid");
761 762 goto bail;
762 763 }
763 764 parent_1 = PyInt_AS_LONG(p1);
764 765 parent_2 = PyInt_AS_LONG(p2);
765 766 if (parent_1 >= 0)
766 767 nothead[parent_1] = 1;
767 768 if (parent_2 >= 0)
768 769 nothead[parent_2] = 1;
769 770 }
770 771
771 772 for (i = 0; i < len; i++) {
772 773 PyObject *head;
773 774
774 775 if (nothead[i])
775 776 continue;
776 777 head = PyInt_FromLong(i);
777 778 if (head == NULL || PyList_Append(heads, head) == -1) {
778 779 Py_XDECREF(head);
779 780 goto bail;
780 781 }
781 782 }
782 783
783 784 done:
784 785 self->headrevs = heads;
785 786 free(nothead);
786 787 return list_copy(self->headrevs);
787 788 bail:
788 789 Py_XDECREF(heads);
789 790 free(nothead);
790 791 return NULL;
791 792 }
792 793
793 794 static inline int nt_level(const char *node, Py_ssize_t level)
794 795 {
795 796 int v = node[level>>1];
796 797 if (!(level & 1))
797 798 v >>= 4;
798 799 return v & 0xf;
799 800 }
800 801
801 802 /*
802 803 * Return values:
803 804 *
804 805 * -4: match is ambiguous (multiple candidates)
805 806 * -2: not found
806 807 * rest: valid rev
807 808 */
808 809 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
809 810 int hex)
810 811 {
811 812 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
812 813 int level, maxlevel, off;
813 814
814 815 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
815 816 return -1;
816 817
817 818 if (self->nt == NULL)
818 819 return -2;
819 820
820 821 if (hex)
821 822 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
822 823 else
823 824 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
824 825
825 826 for (level = off = 0; level < maxlevel; level++) {
826 827 int k = getnybble(node, level);
827 828 nodetree *n = &self->nt[off];
828 829 int v = n->children[k];
829 830
830 831 if (v < 0) {
831 832 const char *n;
832 833 Py_ssize_t i;
833 834
834 835 v = -v - 1;
835 836 n = index_node(self, v);
836 837 if (n == NULL)
837 838 return -2;
838 839 for (i = level; i < maxlevel; i++)
839 840 if (getnybble(node, i) != nt_level(n, i))
840 841 return -2;
841 842 return v;
842 843 }
843 844 if (v == 0)
844 845 return -2;
845 846 off = v;
846 847 }
847 848 /* multiple matches against an ambiguous prefix */
848 849 return -4;
849 850 }
850 851
851 852 static int nt_new(indexObject *self)
852 853 {
853 854 if (self->ntlength == self->ntcapacity) {
854 855 self->ntcapacity *= 2;
855 856 self->nt = realloc(self->nt,
856 857 self->ntcapacity * sizeof(nodetree));
857 858 if (self->nt == NULL) {
858 859 PyErr_SetString(PyExc_MemoryError, "out of memory");
859 860 return -1;
860 861 }
861 862 memset(&self->nt[self->ntlength], 0,
862 863 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
863 864 }
864 865 return self->ntlength++;
865 866 }
866 867
867 868 static int nt_insert(indexObject *self, const char *node, int rev)
868 869 {
869 870 int level = 0;
870 871 int off = 0;
871 872
872 873 while (level < 40) {
873 874 int k = nt_level(node, level);
874 875 nodetree *n;
875 876 int v;
876 877
877 878 n = &self->nt[off];
878 879 v = n->children[k];
879 880
880 881 if (v == 0) {
881 882 n->children[k] = -rev - 1;
882 883 return 0;
883 884 }
884 885 if (v < 0) {
885 886 const char *oldnode = index_node(self, -v - 1);
886 887 int noff;
887 888
888 889 if (!oldnode || !memcmp(oldnode, node, 20)) {
889 890 n->children[k] = -rev - 1;
890 891 return 0;
891 892 }
892 893 noff = nt_new(self);
893 894 if (noff == -1)
894 895 return -1;
895 896 /* self->nt may have been changed by realloc */
896 897 self->nt[off].children[k] = noff;
897 898 off = noff;
898 899 n = &self->nt[off];
899 900 n->children[nt_level(oldnode, ++level)] = v;
900 901 if (level > self->ntdepth)
901 902 self->ntdepth = level;
902 903 self->ntsplits += 1;
903 904 } else {
904 905 level += 1;
905 906 off = v;
906 907 }
907 908 }
908 909
909 910 return -1;
910 911 }
911 912
912 913 static int nt_init(indexObject *self)
913 914 {
914 915 if (self->nt == NULL) {
915 916 self->ntcapacity = self->raw_length < 4
916 917 ? 4 : self->raw_length / 2;
917 918 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
918 919 if (self->nt == NULL) {
919 920 PyErr_NoMemory();
920 921 return -1;
921 922 }
922 923 self->ntlength = 1;
923 924 self->ntrev = (int)index_length(self) - 1;
924 925 self->ntlookups = 1;
925 926 self->ntmisses = 0;
926 927 if (nt_insert(self, nullid, INT_MAX) == -1)
927 928 return -1;
928 929 }
929 930 return 0;
930 931 }
931 932
932 933 /*
933 934 * Return values:
934 935 *
935 936 * -3: error (exception set)
936 937 * -2: not found (no exception set)
937 938 * rest: valid rev
938 939 */
939 940 static int index_find_node(indexObject *self,
940 941 const char *node, Py_ssize_t nodelen)
941 942 {
942 943 int rev;
943 944
944 945 self->ntlookups++;
945 946 rev = nt_find(self, node, nodelen, 0);
946 947 if (rev >= -1)
947 948 return rev;
948 949
949 950 if (nt_init(self) == -1)
950 951 return -3;
951 952
952 953 /*
953 954 * For the first handful of lookups, we scan the entire index,
954 955 * and cache only the matching nodes. This optimizes for cases
955 956 * like "hg tip", where only a few nodes are accessed.
956 957 *
957 958 * After that, we cache every node we visit, using a single
958 959 * scan amortized over multiple lookups. This gives the best
959 960 * bulk performance, e.g. for "hg log".
960 961 */
961 962 if (self->ntmisses++ < 4) {
962 963 for (rev = self->ntrev - 1; rev >= 0; rev--) {
963 964 const char *n = index_node(self, rev);
964 965 if (n == NULL)
965 966 return -2;
966 967 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
967 968 if (nt_insert(self, n, rev) == -1)
968 969 return -3;
969 970 break;
970 971 }
971 972 }
972 973 } else {
973 974 for (rev = self->ntrev - 1; rev >= 0; rev--) {
974 975 const char *n = index_node(self, rev);
975 976 if (n == NULL) {
976 977 self->ntrev = rev + 1;
977 978 return -2;
978 979 }
979 980 if (nt_insert(self, n, rev) == -1) {
980 981 self->ntrev = rev + 1;
981 982 return -3;
982 983 }
983 984 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
984 985 break;
985 986 }
986 987 }
987 988 self->ntrev = rev;
988 989 }
989 990
990 991 if (rev >= 0)
991 992 return rev;
992 993 return -2;
993 994 }
994 995
995 996 static PyObject *raise_revlog_error(void)
996 997 {
997 998 static PyObject *errclass;
998 999 PyObject *mod = NULL, *errobj;
999 1000
1000 1001 if (errclass == NULL) {
1001 1002 PyObject *dict;
1002 1003
1003 1004 mod = PyImport_ImportModule("mercurial.error");
1004 1005 if (mod == NULL)
1005 1006 goto classfail;
1006 1007
1007 1008 dict = PyModule_GetDict(mod);
1008 1009 if (dict == NULL)
1009 1010 goto classfail;
1010 1011
1011 1012 errclass = PyDict_GetItemString(dict, "RevlogError");
1012 1013 if (errclass == NULL) {
1013 1014 PyErr_SetString(PyExc_SystemError,
1014 1015 "could not find RevlogError");
1015 1016 goto classfail;
1016 1017 }
1017 1018 Py_INCREF(errclass);
1018 1019 }
1019 1020
1020 1021 errobj = PyObject_CallFunction(errclass, NULL);
1021 1022 if (errobj == NULL)
1022 1023 return NULL;
1023 1024 PyErr_SetObject(errclass, errobj);
1024 1025 return errobj;
1025 1026
1026 1027 classfail:
1027 1028 Py_XDECREF(mod);
1028 1029 return NULL;
1029 1030 }
1030 1031
1031 1032 static PyObject *index_getitem(indexObject *self, PyObject *value)
1032 1033 {
1033 1034 char *node;
1034 1035 Py_ssize_t nodelen;
1035 1036 int rev;
1036 1037
1037 1038 if (PyInt_Check(value))
1038 1039 return index_get(self, PyInt_AS_LONG(value));
1039 1040
1040 1041 if (node_check(value, &node, &nodelen) == -1)
1041 1042 return NULL;
1042 1043 rev = index_find_node(self, node, nodelen);
1043 1044 if (rev >= -1)
1044 1045 return PyInt_FromLong(rev);
1045 1046 if (rev == -2)
1046 1047 raise_revlog_error();
1047 1048 return NULL;
1048 1049 }
1049 1050
1050 1051 static int nt_partialmatch(indexObject *self, const char *node,
1051 1052 Py_ssize_t nodelen)
1052 1053 {
1053 1054 int rev;
1054 1055
1055 1056 if (nt_init(self) == -1)
1056 1057 return -3;
1057 1058
1058 1059 if (self->ntrev > 0) {
1059 1060 /* ensure that the radix tree is fully populated */
1060 1061 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1061 1062 const char *n = index_node(self, rev);
1062 1063 if (n == NULL)
1063 1064 return -2;
1064 1065 if (nt_insert(self, n, rev) == -1)
1065 1066 return -3;
1066 1067 }
1067 1068 self->ntrev = rev;
1068 1069 }
1069 1070
1070 1071 return nt_find(self, node, nodelen, 1);
1071 1072 }
1072 1073
1073 1074 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1074 1075 {
1075 1076 const char *fullnode;
1076 1077 int nodelen;
1077 1078 char *node;
1078 1079 int rev, i;
1079 1080
1080 1081 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1081 1082 return NULL;
1082 1083
1083 1084 if (nodelen < 4) {
1084 1085 PyErr_SetString(PyExc_ValueError, "key too short");
1085 1086 return NULL;
1086 1087 }
1087 1088
1088 1089 if (nodelen > 40) {
1089 1090 PyErr_SetString(PyExc_ValueError, "key too long");
1090 1091 return NULL;
1091 1092 }
1092 1093
1093 1094 for (i = 0; i < nodelen; i++)
1094 1095 hexdigit(node, i);
1095 1096 if (PyErr_Occurred()) {
1096 1097 /* input contains non-hex characters */
1097 1098 PyErr_Clear();
1098 1099 Py_RETURN_NONE;
1099 1100 }
1100 1101
1101 1102 rev = nt_partialmatch(self, node, nodelen);
1102 1103
1103 1104 switch (rev) {
1104 1105 case -4:
1105 1106 raise_revlog_error();
1106 1107 case -3:
1107 1108 return NULL;
1108 1109 case -2:
1109 1110 Py_RETURN_NONE;
1110 1111 case -1:
1111 1112 return PyString_FromStringAndSize(nullid, 20);
1112 1113 }
1113 1114
1114 1115 fullnode = index_node(self, rev);
1115 1116 if (fullnode == NULL) {
1116 1117 PyErr_Format(PyExc_IndexError,
1117 1118 "could not access rev %d", rev);
1118 1119 return NULL;
1119 1120 }
1120 1121 return PyString_FromStringAndSize(fullnode, 20);
1121 1122 }
1122 1123
1123 1124 static PyObject *index_m_get(indexObject *self, PyObject *args)
1124 1125 {
1125 1126 Py_ssize_t nodelen;
1126 1127 PyObject *val;
1127 1128 char *node;
1128 1129 int rev;
1129 1130
1130 1131 if (!PyArg_ParseTuple(args, "O", &val))
1131 1132 return NULL;
1132 1133 if (node_check(val, &node, &nodelen) == -1)
1133 1134 return NULL;
1134 1135 rev = index_find_node(self, node, nodelen);
1135 1136 if (rev == -3)
1136 1137 return NULL;
1137 1138 if (rev == -2)
1138 1139 Py_RETURN_NONE;
1139 1140 return PyInt_FromLong(rev);
1140 1141 }
1141 1142
1142 1143 static int index_contains(indexObject *self, PyObject *value)
1143 1144 {
1144 1145 char *node;
1145 1146 Py_ssize_t nodelen;
1146 1147
1147 1148 if (PyInt_Check(value)) {
1148 1149 long rev = PyInt_AS_LONG(value);
1149 1150 return rev >= -1 && rev < index_length(self);
1150 1151 }
1151 1152
1152 1153 if (node_check(value, &node, &nodelen) == -1)
1153 1154 return -1;
1154 1155
1155 1156 switch (index_find_node(self, node, nodelen)) {
1156 1157 case -3:
1157 1158 return -1;
1158 1159 case -2:
1159 1160 return 0;
1160 1161 default:
1161 1162 return 1;
1162 1163 }
1163 1164 }
1164 1165
1165 1166 /*
1166 1167 * Invalidate any trie entries introduced by added revs.
1167 1168 */
1168 1169 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1169 1170 {
1170 1171 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1171 1172
1172 1173 for (i = start; i < len; i++) {
1173 1174 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1174 1175 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1175 1176
1176 1177 nt_insert(self, PyString_AS_STRING(node), -1);
1177 1178 }
1178 1179
1179 1180 if (start == 0)
1180 1181 Py_CLEAR(self->added);
1181 1182 }
1182 1183
1183 1184 /*
1184 1185 * Delete a numeric range of revs, which must be at the end of the
1185 1186 * range, but exclude the sentinel nullid entry.
1186 1187 */
1187 1188 static int index_slice_del(indexObject *self, PyObject *item)
1188 1189 {
1189 1190 Py_ssize_t start, stop, step, slicelength;
1190 1191 Py_ssize_t length = index_length(self);
1191 1192 int ret = 0;
1192 1193
1193 1194 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1194 1195 &start, &stop, &step, &slicelength) < 0)
1195 1196 return -1;
1196 1197
1197 1198 if (slicelength <= 0)
1198 1199 return 0;
1199 1200
1200 1201 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1201 1202 stop = start;
1202 1203
1203 1204 if (step < 0) {
1204 1205 stop = start + 1;
1205 1206 start = stop + step*(slicelength - 1) - 1;
1206 1207 step = -step;
1207 1208 }
1208 1209
1209 1210 if (step != 1) {
1210 1211 PyErr_SetString(PyExc_ValueError,
1211 1212 "revlog index delete requires step size of 1");
1212 1213 return -1;
1213 1214 }
1214 1215
1215 1216 if (stop != length - 1) {
1216 1217 PyErr_SetString(PyExc_IndexError,
1217 1218 "revlog index deletion indices are invalid");
1218 1219 return -1;
1219 1220 }
1220 1221
1221 1222 if (start < self->length - 1) {
1222 1223 if (self->nt) {
1223 1224 Py_ssize_t i;
1224 1225
1225 1226 for (i = start + 1; i < self->length - 1; i++) {
1226 1227 const char *node = index_node(self, i);
1227 1228
1228 1229 if (node)
1229 1230 nt_insert(self, node, -1);
1230 1231 }
1231 1232 if (self->added)
1232 1233 nt_invalidate_added(self, 0);
1233 1234 if (self->ntrev > start)
1234 1235 self->ntrev = (int)start;
1235 1236 }
1236 1237 self->length = start + 1;
1237 1238 if (start < self->raw_length) {
1238 1239 if (self->cache) {
1239 1240 Py_ssize_t i;
1240 1241 for (i = start; i < self->raw_length; i++)
1241 1242 Py_CLEAR(self->cache[i]);
1242 1243 }
1243 1244 self->raw_length = start;
1244 1245 }
1245 1246 goto done;
1246 1247 }
1247 1248
1248 1249 if (self->nt) {
1249 1250 nt_invalidate_added(self, start - self->length + 1);
1250 1251 if (self->ntrev > start)
1251 1252 self->ntrev = (int)start;
1252 1253 }
1253 1254 if (self->added)
1254 1255 ret = PyList_SetSlice(self->added, start - self->length + 1,
1255 1256 PyList_GET_SIZE(self->added), NULL);
1256 1257 done:
1257 1258 Py_CLEAR(self->headrevs);
1258 1259 return ret;
1259 1260 }
1260 1261
1261 1262 /*
1262 1263 * Supported ops:
1263 1264 *
1264 1265 * slice deletion
1265 1266 * string assignment (extend node->rev mapping)
1266 1267 * string deletion (shrink node->rev mapping)
1267 1268 */
1268 1269 static int index_assign_subscript(indexObject *self, PyObject *item,
1269 1270 PyObject *value)
1270 1271 {
1271 1272 char *node;
1272 1273 Py_ssize_t nodelen;
1273 1274 long rev;
1274 1275
1275 1276 if (PySlice_Check(item) && value == NULL)
1276 1277 return index_slice_del(self, item);
1277 1278
1278 1279 if (node_check(item, &node, &nodelen) == -1)
1279 1280 return -1;
1280 1281
1281 1282 if (value == NULL)
1282 1283 return self->nt ? nt_insert(self, node, -1) : 0;
1283 1284 rev = PyInt_AsLong(value);
1284 1285 if (rev > INT_MAX || rev < 0) {
1285 1286 if (!PyErr_Occurred())
1286 1287 PyErr_SetString(PyExc_ValueError, "rev out of range");
1287 1288 return -1;
1288 1289 }
1289 1290 return nt_insert(self, node, (int)rev);
1290 1291 }
1291 1292
1292 1293 /*
1293 1294 * Find all RevlogNG entries in an index that has inline data. Update
1294 1295 * the optional "offsets" table with those entries.
1295 1296 */
1296 1297 static long inline_scan(indexObject *self, const char **offsets)
1297 1298 {
1298 1299 const char *data = PyString_AS_STRING(self->data);
1299 1300 const char *end = data + PyString_GET_SIZE(self->data);
1300 1301 long incr = v1_hdrsize;
1301 1302 Py_ssize_t len = 0;
1302 1303
1303 1304 while (data + v1_hdrsize <= end) {
1304 1305 uint32_t comp_len;
1305 1306 const char *old_data;
1306 1307 /* 3rd element of header is length of compressed inline data */
1307 1308 comp_len = getbe32(data + 8);
1308 1309 incr = v1_hdrsize + comp_len;
1309 1310 if (incr < v1_hdrsize)
1310 1311 break;
1311 1312 if (offsets)
1312 1313 offsets[len] = data;
1313 1314 len++;
1314 1315 old_data = data;
1315 1316 data += incr;
1316 1317 if (data <= old_data)
1317 1318 break;
1318 1319 }
1319 1320
1320 1321 if (data != end && data + v1_hdrsize != end) {
1321 1322 if (!PyErr_Occurred())
1322 1323 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1323 1324 return -1;
1324 1325 }
1325 1326
1326 1327 return len;
1327 1328 }
1328 1329
1329 1330 static int index_init(indexObject *self, PyObject *args)
1330 1331 {
1331 1332 PyObject *data_obj, *inlined_obj;
1332 1333 Py_ssize_t size;
1333 1334
1334 1335 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1335 1336 return -1;
1336 1337 if (!PyString_Check(data_obj)) {
1337 1338 PyErr_SetString(PyExc_TypeError, "data is not a string");
1338 1339 return -1;
1339 1340 }
1340 1341 size = PyString_GET_SIZE(data_obj);
1341 1342
1342 1343 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1343 1344 self->data = data_obj;
1344 1345 self->cache = NULL;
1345 1346
1346 1347 self->added = NULL;
1347 1348 self->headrevs = NULL;
1348 1349 self->offsets = NULL;
1349 1350 self->nt = NULL;
1350 1351 self->ntlength = self->ntcapacity = 0;
1351 1352 self->ntdepth = self->ntsplits = 0;
1352 1353 self->ntlookups = self->ntmisses = 0;
1353 1354 self->ntrev = -1;
1354 1355 Py_INCREF(self->data);
1355 1356
1356 1357 if (self->inlined) {
1357 1358 long len = inline_scan(self, NULL);
1358 1359 if (len == -1)
1359 1360 goto bail;
1360 1361 self->raw_length = len;
1361 1362 self->length = len + 1;
1362 1363 } else {
1363 1364 if (size % v1_hdrsize) {
1364 1365 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1365 1366 goto bail;
1366 1367 }
1367 1368 self->raw_length = size / v1_hdrsize;
1368 1369 self->length = self->raw_length + 1;
1369 1370 }
1370 1371
1371 1372 return 0;
1372 1373 bail:
1373 1374 return -1;
1374 1375 }
1375 1376
1376 1377 static PyObject *index_nodemap(indexObject *self)
1377 1378 {
1378 1379 Py_INCREF(self);
1379 1380 return (PyObject *)self;
1380 1381 }
1381 1382
1382 1383 static void index_dealloc(indexObject *self)
1383 1384 {
1384 1385 _index_clearcaches(self);
1385 1386 Py_DECREF(self->data);
1386 1387 Py_XDECREF(self->added);
1387 1388 PyObject_Del(self);
1388 1389 }
1389 1390
1390 1391 static PySequenceMethods index_sequence_methods = {
1391 1392 (lenfunc)index_length, /* sq_length */
1392 1393 0, /* sq_concat */
1393 1394 0, /* sq_repeat */
1394 1395 (ssizeargfunc)index_get, /* sq_item */
1395 1396 0, /* sq_slice */
1396 1397 0, /* sq_ass_item */
1397 1398 0, /* sq_ass_slice */
1398 1399 (objobjproc)index_contains, /* sq_contains */
1399 1400 };
1400 1401
1401 1402 static PyMappingMethods index_mapping_methods = {
1402 1403 (lenfunc)index_length, /* mp_length */
1403 1404 (binaryfunc)index_getitem, /* mp_subscript */
1404 1405 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1405 1406 };
1406 1407
1407 1408 static PyMethodDef index_methods[] = {
1408 1409 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1409 1410 "clear the index caches"},
1410 1411 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1411 1412 "get an index entry"},
1412 1413 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1413 1414 "get head revisions"},
1414 1415 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1415 1416 "insert an index entry"},
1416 1417 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1417 1418 "match a potentially ambiguous node ID"},
1418 1419 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1419 1420 "stats for the index"},
1420 1421 {NULL} /* Sentinel */
1421 1422 };
1422 1423
1423 1424 static PyGetSetDef index_getset[] = {
1424 1425 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1425 1426 {NULL} /* Sentinel */
1426 1427 };
1427 1428
1428 1429 static PyTypeObject indexType = {
1429 1430 PyObject_HEAD_INIT(NULL)
1430 1431 0, /* ob_size */
1431 1432 "parsers.index", /* tp_name */
1432 1433 sizeof(indexObject), /* tp_basicsize */
1433 1434 0, /* tp_itemsize */
1434 1435 (destructor)index_dealloc, /* tp_dealloc */
1435 1436 0, /* tp_print */
1436 1437 0, /* tp_getattr */
1437 1438 0, /* tp_setattr */
1438 1439 0, /* tp_compare */
1439 1440 0, /* tp_repr */
1440 1441 0, /* tp_as_number */
1441 1442 &index_sequence_methods, /* tp_as_sequence */
1442 1443 &index_mapping_methods, /* tp_as_mapping */
1443 1444 0, /* tp_hash */
1444 1445 0, /* tp_call */
1445 1446 0, /* tp_str */
1446 1447 0, /* tp_getattro */
1447 1448 0, /* tp_setattro */
1448 1449 0, /* tp_as_buffer */
1449 1450 Py_TPFLAGS_DEFAULT, /* tp_flags */
1450 1451 "revlog index", /* tp_doc */
1451 1452 0, /* tp_traverse */
1452 1453 0, /* tp_clear */
1453 1454 0, /* tp_richcompare */
1454 1455 0, /* tp_weaklistoffset */
1455 1456 0, /* tp_iter */
1456 1457 0, /* tp_iternext */
1457 1458 index_methods, /* tp_methods */
1458 1459 0, /* tp_members */
1459 1460 index_getset, /* tp_getset */
1460 1461 0, /* tp_base */
1461 1462 0, /* tp_dict */
1462 1463 0, /* tp_descr_get */
1463 1464 0, /* tp_descr_set */
1464 1465 0, /* tp_dictoffset */
1465 1466 (initproc)index_init, /* tp_init */
1466 1467 0, /* tp_alloc */
1467 1468 };
1468 1469
1469 1470 /*
1470 1471 * returns a tuple of the form (index, index, cache) with elements as
1471 1472 * follows:
1472 1473 *
1473 1474 * index: an index object that lazily parses RevlogNG records
1474 1475 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1475 1476 *
1476 1477 * added complications are for backwards compatibility
1477 1478 */
1478 1479 static PyObject *parse_index2(PyObject *self, PyObject *args)
1479 1480 {
1480 1481 PyObject *tuple = NULL, *cache = NULL;
1481 1482 indexObject *idx;
1482 1483 int ret;
1483 1484
1484 1485 idx = PyObject_New(indexObject, &indexType);
1485 1486 if (idx == NULL)
1486 1487 goto bail;
1487 1488
1488 1489 ret = index_init(idx, args);
1489 1490 if (ret == -1)
1490 1491 goto bail;
1491 1492
1492 1493 if (idx->inlined) {
1493 1494 cache = Py_BuildValue("iO", 0, idx->data);
1494 1495 if (cache == NULL)
1495 1496 goto bail;
1496 1497 } else {
1497 1498 cache = Py_None;
1498 1499 Py_INCREF(cache);
1499 1500 }
1500 1501
1501 1502 tuple = Py_BuildValue("NN", idx, cache);
1502 1503 if (!tuple)
1503 1504 goto bail;
1504 1505 return tuple;
1505 1506
1506 1507 bail:
1507 1508 Py_XDECREF(idx);
1508 1509 Py_XDECREF(cache);
1509 1510 Py_XDECREF(tuple);
1510 1511 return NULL;
1511 1512 }
1512 1513
1513 1514 static char parsers_doc[] = "Efficient content parsing.";
1514 1515
1515 1516 PyObject *encodedir(PyObject *self, PyObject *args);
1516 1517 PyObject *pathencode(PyObject *self, PyObject *args);
1517 1518 PyObject *lowerencode(PyObject *self, PyObject *args);
1518 1519
1519 1520 static PyMethodDef methods[] = {
1520 1521 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1521 1522 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1522 1523 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1523 1524 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1524 1525 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
1525 1526 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
1526 1527 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
1527 1528 {NULL, NULL}
1528 1529 };
1529 1530
1530 1531 static void module_init(PyObject *mod)
1531 1532 {
1532 1533 indexType.tp_new = PyType_GenericNew;
1533 1534 if (PyType_Ready(&indexType) < 0)
1534 1535 return;
1535 1536 Py_INCREF(&indexType);
1536 1537
1537 1538 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1538 1539
1539 1540 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1540 1541 -1, -1, -1, -1, nullid, 20);
1541 1542 if (nullentry)
1542 1543 PyObject_GC_UnTrack(nullentry);
1543 1544
1544 1545 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1545 1546 }
1546 1547
1547 1548 #ifdef IS_PY3K
1548 1549 static struct PyModuleDef parsers_module = {
1549 1550 PyModuleDef_HEAD_INIT,
1550 1551 "parsers",
1551 1552 parsers_doc,
1552 1553 -1,
1553 1554 methods
1554 1555 };
1555 1556
1556 1557 PyMODINIT_FUNC PyInit_parsers(void)
1557 1558 {
1558 1559 PyObject *mod = PyModule_Create(&parsers_module);
1559 1560 module_init(mod);
1560 1561 return mod;
1561 1562 }
1562 1563 #else
1563 1564 PyMODINIT_FUNC initparsers(void)
1564 1565 {
1565 1566 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1566 1567 module_init(mod);
1567 1568 }
1568 1569 #endif
@@ -1,89 +1,115 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from mercurial.node import bin, nullid
9 9 from mercurial import util
10 import struct, zlib
10 import struct, zlib, cStringIO
11 11
12 12 _pack = struct.pack
13 13 _unpack = struct.unpack
14 14 _compress = zlib.compress
15 15 _decompress = zlib.decompress
16 16 _sha = util.sha1
17 17
18 18 def parse_manifest(mfdict, fdict, lines):
19 19 for l in lines.splitlines():
20 20 f, n = l.split('\0')
21 21 if len(n) > 40:
22 22 fdict[f] = n[40:]
23 23 mfdict[f] = bin(n[:40])
24 24 else:
25 25 mfdict[f] = bin(n)
26 26
27 27 def parse_index2(data, inline):
28 28 def gettype(q):
29 29 return int(q & 0xFFFF)
30 30
31 31 def offset_type(offset, type):
32 32 return long(long(offset) << 16 | type)
33 33
34 34 indexformatng = ">Qiiiiii20s12x"
35 35
36 36 s = struct.calcsize(indexformatng)
37 37 index = []
38 38 cache = None
39 39 off = 0
40 40
41 41 l = len(data) - s
42 42 append = index.append
43 43 if inline:
44 44 cache = (0, data)
45 45 while off <= l:
46 46 e = _unpack(indexformatng, data[off:off + s])
47 47 append(e)
48 48 if e[1] < 0:
49 49 break
50 50 off += e[1] + s
51 51 else:
52 52 while off <= l:
53 53 e = _unpack(indexformatng, data[off:off + s])
54 54 append(e)
55 55 off += s
56 56
57 57 if off != len(data):
58 58 raise ValueError('corrupt index file')
59 59
60 60 if index:
61 61 e = list(index[0])
62 62 type = gettype(e[0])
63 63 e[0] = offset_type(0, type)
64 64 index[0] = tuple(e)
65 65
66 66 # add the magic null revision at -1
67 67 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
68 68
69 69 return index, cache
70 70
71 71 def parse_dirstate(dmap, copymap, st):
72 72 parents = [st[:20], st[20: 40]]
73 73 # dereference fields so they will be local in loop
74 74 format = ">cllll"
75 75 e_size = struct.calcsize(format)
76 76 pos1 = 40
77 77 l = len(st)
78 78
79 79 # the inner loop
80 80 while pos1 < l:
81 81 pos2 = pos1 + e_size
82 82 e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
83 83 pos1 = pos2 + e[4]
84 84 f = st[pos2:pos1]
85 85 if '\0' in f:
86 86 f, c = f.split('\0')
87 87 copymap[f] = c
88 88 dmap[f] = e[:4]
89 89 return parents
90
91 def pack_dirstate(dmap, copymap, pl, now):
92 now = int(now)
93 cs = cStringIO.StringIO()
94 write = cs.write
95 write("".join(pl))
96 for f, e in dmap.iteritems():
97 if e[0] == 'n' and e[3] == now:
98 # The file was last modified "simultaneously" with the current
99 # write to dirstate (i.e. within the same second for file-
100 # systems with a granularity of 1 sec). This commonly happens
101 # for at least a couple of files on 'update'.
102 # The user could change the file without changing its size
103 # within the same second. Invalidate the file's stat data in
104 # dirstate, forcing future 'status' calls to compare the
105 # contents of the file. This prevents mistakenly treating such
106 # files as clean.
107 e = (e[0], 0, -1, -1) # mark entry as 'unset'
108 dmap[f] = e
109
110 if f in copymap:
111 f = "%s\0%s" % (f, copymap[f])
112 e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
113 write(e)
114 write(f)
115 return cs.getvalue()
General Comments 0
You need to be logged in to leave comments. Login now