##// END OF EJS Templates
parsers: add a C function to pack the dirstate...
Bryan O'Sullivan -
r16955:92e1c64b default
parent child Browse files
Show More
@@ -1,796 +1,805 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 import errno
8 8
9 9 from node import nullid
10 10 from i18n import _
11 11 import scmutil, util, ignore, osutil, parsers, encoding
12 12 import struct, os, stat, errno
13 13 import cStringIO
14 14
15 15 _format = ">cllll"
16 16 propertycache = util.propertycache
17 17 filecache = scmutil.filecache
18 18
19 19 class repocache(filecache):
20 20 """filecache for files in .hg/"""
21 21 def join(self, obj, fname):
22 22 return obj._opener.join(fname)
23 23
24 24 class rootcache(filecache):
25 25 """filecache for files in the repository root"""
26 26 def join(self, obj, fname):
27 27 return obj._join(fname)
28 28
29 29 def _finddirs(path):
30 30 pos = path.rfind('/')
31 31 while pos != -1:
32 32 yield path[:pos]
33 33 pos = path.rfind('/', 0, pos)
34 34
35 35 def _incdirs(dirs, path):
36 36 for base in _finddirs(path):
37 37 if base in dirs:
38 38 dirs[base] += 1
39 39 return
40 40 dirs[base] = 1
41 41
42 42 def _decdirs(dirs, path):
43 43 for base in _finddirs(path):
44 44 if dirs[base] > 1:
45 45 dirs[base] -= 1
46 46 return
47 47 del dirs[base]
48 48
49 49 class dirstate(object):
50 50
51 51 def __init__(self, opener, ui, root, validate):
52 52 '''Create a new dirstate object.
53 53
54 54 opener is an open()-like callable that can be used to open the
55 55 dirstate file; root is the root of the directory tracked by
56 56 the dirstate.
57 57 '''
58 58 self._opener = opener
59 59 self._validate = validate
60 60 self._root = root
61 61 self._rootdir = os.path.join(root, '')
62 62 self._dirty = False
63 63 self._dirtypl = False
64 64 self._lastnormaltime = 0
65 65 self._ui = ui
66 66 self._filecache = {}
67 67
68 68 @propertycache
69 69 def _map(self):
70 70 '''Return the dirstate contents as a map from filename to
71 71 (state, mode, size, time).'''
72 72 self._read()
73 73 return self._map
74 74
75 75 @propertycache
76 76 def _copymap(self):
77 77 self._read()
78 78 return self._copymap
79 79
80 80 @propertycache
81 81 def _foldmap(self):
82 82 f = {}
83 83 for name in self._map:
84 84 f[util.normcase(name)] = name
85 85 for name in self._dirs:
86 86 f[util.normcase(name)] = name
87 87 f['.'] = '.' # prevents useless util.fspath() invocation
88 88 return f
89 89
90 90 @repocache('branch')
91 91 def _branch(self):
92 92 try:
93 93 return self._opener.read("branch").strip() or "default"
94 94 except IOError, inst:
95 95 if inst.errno != errno.ENOENT:
96 96 raise
97 97 return "default"
98 98
99 99 @propertycache
100 100 def _pl(self):
101 101 try:
102 102 fp = self._opener("dirstate")
103 103 st = fp.read(40)
104 104 fp.close()
105 105 l = len(st)
106 106 if l == 40:
107 107 return st[:20], st[20:40]
108 108 elif l > 0 and l < 40:
109 109 raise util.Abort(_('working directory state appears damaged!'))
110 110 except IOError, err:
111 111 if err.errno != errno.ENOENT:
112 112 raise
113 113 return [nullid, nullid]
114 114
115 115 @propertycache
116 116 def _dirs(self):
117 117 dirs = {}
118 118 for f, s in self._map.iteritems():
119 119 if s[0] != 'r':
120 120 _incdirs(dirs, f)
121 121 return dirs
122 122
123 123 def dirs(self):
124 124 return self._dirs
125 125
126 126 @rootcache('.hgignore')
127 127 def _ignore(self):
128 128 files = [self._join('.hgignore')]
129 129 for name, path in self._ui.configitems("ui"):
130 130 if name == 'ignore' or name.startswith('ignore.'):
131 131 files.append(util.expandpath(path))
132 132 return ignore.ignore(self._root, files, self._ui.warn)
133 133
134 134 @propertycache
135 135 def _slash(self):
136 136 return self._ui.configbool('ui', 'slash') and os.sep != '/'
137 137
138 138 @propertycache
139 139 def _checklink(self):
140 140 return util.checklink(self._root)
141 141
142 142 @propertycache
143 143 def _checkexec(self):
144 144 return util.checkexec(self._root)
145 145
146 146 @propertycache
147 147 def _checkcase(self):
148 148 return not util.checkcase(self._join('.hg'))
149 149
150 150 def _join(self, f):
151 151 # much faster than os.path.join()
152 152 # it's safe because f is always a relative path
153 153 return self._rootdir + f
154 154
155 155 def flagfunc(self, buildfallback):
156 156 if self._checklink and self._checkexec:
157 157 def f(x):
158 158 p = self._join(x)
159 159 if os.path.islink(p):
160 160 return 'l'
161 161 if util.isexec(p):
162 162 return 'x'
163 163 return ''
164 164 return f
165 165
166 166 fallback = buildfallback()
167 167 if self._checklink:
168 168 def f(x):
169 169 if os.path.islink(self._join(x)):
170 170 return 'l'
171 171 if 'x' in fallback(x):
172 172 return 'x'
173 173 return ''
174 174 return f
175 175 if self._checkexec:
176 176 def f(x):
177 177 if 'l' in fallback(x):
178 178 return 'l'
179 179 if util.isexec(self._join(x)):
180 180 return 'x'
181 181 return ''
182 182 return f
183 183 else:
184 184 return fallback
185 185
186 186 def getcwd(self):
187 187 cwd = os.getcwd()
188 188 if cwd == self._root:
189 189 return ''
190 190 # self._root ends with a path separator if self._root is '/' or 'C:\'
191 191 rootsep = self._root
192 192 if not util.endswithsep(rootsep):
193 193 rootsep += os.sep
194 194 if cwd.startswith(rootsep):
195 195 return cwd[len(rootsep):]
196 196 else:
197 197 # we're outside the repo. return an absolute path.
198 198 return cwd
199 199
200 200 def pathto(self, f, cwd=None):
201 201 if cwd is None:
202 202 cwd = self.getcwd()
203 203 path = util.pathto(self._root, cwd, f)
204 204 if self._slash:
205 205 return util.normpath(path)
206 206 return path
207 207
208 208 def __getitem__(self, key):
209 209 '''Return the current state of key (a filename) in the dirstate.
210 210
211 211 States are:
212 212 n normal
213 213 m needs merging
214 214 r marked for removal
215 215 a marked for addition
216 216 ? not tracked
217 217 '''
218 218 return self._map.get(key, ("?",))[0]
219 219
220 220 def __contains__(self, key):
221 221 return key in self._map
222 222
223 223 def __iter__(self):
224 224 for x in sorted(self._map):
225 225 yield x
226 226
227 227 def parents(self):
228 228 return [self._validate(p) for p in self._pl]
229 229
230 230 def p1(self):
231 231 return self._validate(self._pl[0])
232 232
233 233 def p2(self):
234 234 return self._validate(self._pl[1])
235 235
236 236 def branch(self):
237 237 return encoding.tolocal(self._branch)
238 238
239 239 def setparents(self, p1, p2=nullid):
240 240 """Set dirstate parents to p1 and p2.
241 241
242 242 When moving from two parents to one, 'm' merged entries a
243 243 adjusted to normal and previous copy records discarded and
244 244 returned by the call.
245 245
246 246 See localrepo.setparents()
247 247 """
248 248 self._dirty = self._dirtypl = True
249 249 oldp2 = self._pl[1]
250 250 self._pl = p1, p2
251 251 copies = {}
252 252 if oldp2 != nullid and p2 == nullid:
253 253 # Discard 'm' markers when moving away from a merge state
254 254 for f, s in self._map.iteritems():
255 255 if s[0] == 'm':
256 256 if f in self._copymap:
257 257 copies[f] = self._copymap[f]
258 258 self.normallookup(f)
259 259 return copies
260 260
261 261 def setbranch(self, branch):
262 262 if branch in ['tip', '.', 'null']:
263 263 raise util.Abort(_('the name \'%s\' is reserved') % branch)
264 264 self._branch = encoding.fromlocal(branch)
265 265 f = self._opener('branch', 'w', atomictemp=True)
266 266 try:
267 267 f.write(self._branch + '\n')
268 268 finally:
269 269 f.close()
270 270
271 271 def _read(self):
272 272 self._map = {}
273 273 self._copymap = {}
274 274 try:
275 275 st = self._opener.read("dirstate")
276 276 except IOError, err:
277 277 if err.errno != errno.ENOENT:
278 278 raise
279 279 return
280 280 if not st:
281 281 return
282 282
283 283 p = parsers.parse_dirstate(self._map, self._copymap, st)
284 284 if not self._dirtypl:
285 285 self._pl = p
286 286
287 287 def invalidate(self):
288 288 for a in ("_map", "_copymap", "_foldmap", "_branch", "_pl", "_dirs",
289 289 "_ignore"):
290 290 if a in self.__dict__:
291 291 delattr(self, a)
292 292 self._lastnormaltime = 0
293 293 self._dirty = False
294 294
295 295 def copy(self, source, dest):
296 296 """Mark dest as a copy of source. Unmark dest if source is None."""
297 297 if source == dest:
298 298 return
299 299 self._dirty = True
300 300 if source is not None:
301 301 self._copymap[dest] = source
302 302 elif dest in self._copymap:
303 303 del self._copymap[dest]
304 304
305 305 def copied(self, file):
306 306 return self._copymap.get(file, None)
307 307
308 308 def copies(self):
309 309 return self._copymap
310 310
311 311 def _droppath(self, f):
312 312 if self[f] not in "?r" and "_dirs" in self.__dict__:
313 313 _decdirs(self._dirs, f)
314 314
315 315 def _addpath(self, f, check=False):
316 316 oldstate = self[f]
317 317 if check or oldstate == "r":
318 318 scmutil.checkfilename(f)
319 319 if f in self._dirs:
320 320 raise util.Abort(_('directory %r already in dirstate') % f)
321 321 # shadows
322 322 for d in _finddirs(f):
323 323 if d in self._dirs:
324 324 break
325 325 if d in self._map and self[d] != 'r':
326 326 raise util.Abort(
327 327 _('file %r in dirstate clashes with %r') % (d, f))
328 328 if oldstate in "?r" and "_dirs" in self.__dict__:
329 329 _incdirs(self._dirs, f)
330 330
331 331 def normal(self, f):
332 332 '''Mark a file normal and clean.'''
333 333 self._dirty = True
334 334 self._addpath(f)
335 335 s = os.lstat(self._join(f))
336 336 mtime = int(s.st_mtime)
337 337 self._map[f] = ('n', s.st_mode, s.st_size, mtime)
338 338 if f in self._copymap:
339 339 del self._copymap[f]
340 340 if mtime > self._lastnormaltime:
341 341 # Remember the most recent modification timeslot for status(),
342 342 # to make sure we won't miss future size-preserving file content
343 343 # modifications that happen within the same timeslot.
344 344 self._lastnormaltime = mtime
345 345
346 346 def normallookup(self, f):
347 347 '''Mark a file normal, but possibly dirty.'''
348 348 if self._pl[1] != nullid and f in self._map:
349 349 # if there is a merge going on and the file was either
350 350 # in state 'm' (-1) or coming from other parent (-2) before
351 351 # being removed, restore that state.
352 352 entry = self._map[f]
353 353 if entry[0] == 'r' and entry[2] in (-1, -2):
354 354 source = self._copymap.get(f)
355 355 if entry[2] == -1:
356 356 self.merge(f)
357 357 elif entry[2] == -2:
358 358 self.otherparent(f)
359 359 if source:
360 360 self.copy(source, f)
361 361 return
362 362 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
363 363 return
364 364 self._dirty = True
365 365 self._addpath(f)
366 366 self._map[f] = ('n', 0, -1, -1)
367 367 if f in self._copymap:
368 368 del self._copymap[f]
369 369
370 370 def otherparent(self, f):
371 371 '''Mark as coming from the other parent, always dirty.'''
372 372 if self._pl[1] == nullid:
373 373 raise util.Abort(_("setting %r to other parent "
374 374 "only allowed in merges") % f)
375 375 self._dirty = True
376 376 self._addpath(f)
377 377 self._map[f] = ('n', 0, -2, -1)
378 378 if f in self._copymap:
379 379 del self._copymap[f]
380 380
381 381 def add(self, f):
382 382 '''Mark a file added.'''
383 383 self._dirty = True
384 384 self._addpath(f, True)
385 385 self._map[f] = ('a', 0, -1, -1)
386 386 if f in self._copymap:
387 387 del self._copymap[f]
388 388
389 389 def remove(self, f):
390 390 '''Mark a file removed.'''
391 391 self._dirty = True
392 392 self._droppath(f)
393 393 size = 0
394 394 if self._pl[1] != nullid and f in self._map:
395 395 # backup the previous state
396 396 entry = self._map[f]
397 397 if entry[0] == 'm': # merge
398 398 size = -1
399 399 elif entry[0] == 'n' and entry[2] == -2: # other parent
400 400 size = -2
401 401 self._map[f] = ('r', 0, size, 0)
402 402 if size == 0 and f in self._copymap:
403 403 del self._copymap[f]
404 404
405 405 def merge(self, f):
406 406 '''Mark a file merged.'''
407 407 if self._pl[1] == nullid:
408 408 return self.normallookup(f)
409 409 self._dirty = True
410 410 s = os.lstat(self._join(f))
411 411 self._addpath(f)
412 412 self._map[f] = ('m', s.st_mode, s.st_size, int(s.st_mtime))
413 413 if f in self._copymap:
414 414 del self._copymap[f]
415 415
416 416 def drop(self, f):
417 417 '''Drop a file from the dirstate'''
418 418 if f in self._map:
419 419 self._dirty = True
420 420 self._droppath(f)
421 421 del self._map[f]
422 422
423 423 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
424 424 normed = util.normcase(path)
425 425 folded = self._foldmap.get(normed, None)
426 426 if folded is None:
427 427 if isknown:
428 428 folded = path
429 429 else:
430 430 if exists is None:
431 431 exists = os.path.lexists(os.path.join(self._root, path))
432 432 if not exists:
433 433 # Maybe a path component exists
434 434 if not ignoremissing and '/' in path:
435 435 d, f = path.rsplit('/', 1)
436 436 d = self._normalize(d, isknown, ignoremissing, None)
437 437 folded = d + "/" + f
438 438 else:
439 439 # No path components, preserve original case
440 440 folded = path
441 441 else:
442 442 # recursively normalize leading directory components
443 443 # against dirstate
444 444 if '/' in normed:
445 445 d, f = normed.rsplit('/', 1)
446 446 d = self._normalize(d, isknown, ignoremissing, True)
447 447 r = self._root + "/" + d
448 448 folded = d + "/" + util.fspath(f, r)
449 449 else:
450 450 folded = util.fspath(normed, self._root)
451 451 self._foldmap[normed] = folded
452 452
453 453 return folded
454 454
455 455 def normalize(self, path, isknown=False, ignoremissing=False):
456 456 '''
457 457 normalize the case of a pathname when on a casefolding filesystem
458 458
459 459 isknown specifies whether the filename came from walking the
460 460 disk, to avoid extra filesystem access.
461 461
462 462 If ignoremissing is True, missing path are returned
463 463 unchanged. Otherwise, we try harder to normalize possibly
464 464 existing path components.
465 465
466 466 The normalized case is determined based on the following precedence:
467 467
468 468 - version of name already stored in the dirstate
469 469 - version of name stored on disk
470 470 - version provided via command arguments
471 471 '''
472 472
473 473 if self._checkcase:
474 474 return self._normalize(path, isknown, ignoremissing)
475 475 return path
476 476
477 477 def clear(self):
478 478 self._map = {}
479 479 if "_dirs" in self.__dict__:
480 480 delattr(self, "_dirs")
481 481 self._copymap = {}
482 482 self._pl = [nullid, nullid]
483 483 self._lastnormaltime = 0
484 484 self._dirty = True
485 485
486 486 def rebuild(self, parent, files):
487 487 self.clear()
488 488 for f in files:
489 489 if 'x' in files.flags(f):
490 490 self._map[f] = ('n', 0777, -1, 0)
491 491 else:
492 492 self._map[f] = ('n', 0666, -1, 0)
493 493 self._pl = (parent, nullid)
494 494 self._dirty = True
495 495
496 496 def write(self):
497 497 if not self._dirty:
498 498 return
499 499 st = self._opener("dirstate", "w", atomictemp=True)
500 500
501 def finish(s):
502 st.write(s)
503 st.close()
504 self._lastnormaltime = 0
505 self._dirty = self._dirtypl = False
506
501 507 # use the modification time of the newly created temporary file as the
502 508 # filesystem's notion of 'now'
503 now = int(util.fstat(st).st_mtime)
509 now = util.fstat(st).st_mtime
510 copymap = self._copymap
511 try:
512 finish(parsers.pack_dirstate(self._map, copymap, self._pl, now))
513 return
514 except AttributeError:
515 pass
504 516
517 now = int(now)
505 518 cs = cStringIO.StringIO()
506 copymap = self._copymap
507 519 pack = struct.pack
508 520 write = cs.write
509 521 write("".join(self._pl))
510 522 for f, e in self._map.iteritems():
511 523 if e[0] == 'n' and e[3] == now:
512 524 # The file was last modified "simultaneously" with the current
513 525 # write to dirstate (i.e. within the same second for file-
514 526 # systems with a granularity of 1 sec). This commonly happens
515 527 # for at least a couple of files on 'update'.
516 528 # The user could change the file without changing its size
517 529 # within the same second. Invalidate the file's stat data in
518 530 # dirstate, forcing future 'status' calls to compare the
519 531 # contents of the file. This prevents mistakenly treating such
520 532 # files as clean.
521 533 e = (e[0], 0, -1, -1) # mark entry as 'unset'
522 534 self._map[f] = e
523 535
524 536 if f in copymap:
525 537 f = "%s\0%s" % (f, copymap[f])
526 538 e = pack(_format, e[0], e[1], e[2], e[3], len(f))
527 539 write(e)
528 540 write(f)
529 st.write(cs.getvalue())
530 st.close()
531 self._lastnormaltime = 0
532 self._dirty = self._dirtypl = False
541 finish(cs.getvalue())
533 542
534 543 def _dirignore(self, f):
535 544 if f == '.':
536 545 return False
537 546 if self._ignore(f):
538 547 return True
539 548 for p in _finddirs(f):
540 549 if self._ignore(p):
541 550 return True
542 551 return False
543 552
544 553 def walk(self, match, subrepos, unknown, ignored):
545 554 '''
546 555 Walk recursively through the directory tree, finding all files
547 556 matched by match.
548 557
549 558 Return a dict mapping filename to stat-like object (either
550 559 mercurial.osutil.stat instance or return value of os.stat()).
551 560 '''
552 561
553 562 def fwarn(f, msg):
554 563 self._ui.warn('%s: %s\n' % (self.pathto(f), msg))
555 564 return False
556 565
557 566 def badtype(mode):
558 567 kind = _('unknown')
559 568 if stat.S_ISCHR(mode):
560 569 kind = _('character device')
561 570 elif stat.S_ISBLK(mode):
562 571 kind = _('block device')
563 572 elif stat.S_ISFIFO(mode):
564 573 kind = _('fifo')
565 574 elif stat.S_ISSOCK(mode):
566 575 kind = _('socket')
567 576 elif stat.S_ISDIR(mode):
568 577 kind = _('directory')
569 578 return _('unsupported file type (type is %s)') % kind
570 579
571 580 ignore = self._ignore
572 581 dirignore = self._dirignore
573 582 if ignored:
574 583 ignore = util.never
575 584 dirignore = util.never
576 585 elif not unknown:
577 586 # if unknown and ignored are False, skip step 2
578 587 ignore = util.always
579 588 dirignore = util.always
580 589
581 590 matchfn = match.matchfn
582 591 badfn = match.bad
583 592 dmap = self._map
584 593 normpath = util.normpath
585 594 listdir = osutil.listdir
586 595 lstat = os.lstat
587 596 getkind = stat.S_IFMT
588 597 dirkind = stat.S_IFDIR
589 598 regkind = stat.S_IFREG
590 599 lnkkind = stat.S_IFLNK
591 600 join = self._join
592 601 work = []
593 602 wadd = work.append
594 603
595 604 exact = skipstep3 = False
596 605 if matchfn == match.exact: # match.exact
597 606 exact = True
598 607 dirignore = util.always # skip step 2
599 608 elif match.files() and not match.anypats(): # match.match, no patterns
600 609 skipstep3 = True
601 610
602 611 if not exact and self._checkcase:
603 612 normalize = self._normalize
604 613 skipstep3 = False
605 614 else:
606 615 normalize = lambda x, y, z: x
607 616
608 617 files = sorted(match.files())
609 618 subrepos.sort()
610 619 i, j = 0, 0
611 620 while i < len(files) and j < len(subrepos):
612 621 subpath = subrepos[j] + "/"
613 622 if files[i] < subpath:
614 623 i += 1
615 624 continue
616 625 while i < len(files) and files[i].startswith(subpath):
617 626 del files[i]
618 627 j += 1
619 628
620 629 if not files or '.' in files:
621 630 files = ['']
622 631 results = dict.fromkeys(subrepos)
623 632 results['.hg'] = None
624 633
625 634 # step 1: find all explicit files
626 635 for ff in files:
627 636 nf = normalize(normpath(ff), False, True)
628 637 if nf in results:
629 638 continue
630 639
631 640 try:
632 641 st = lstat(join(nf))
633 642 kind = getkind(st.st_mode)
634 643 if kind == dirkind:
635 644 skipstep3 = False
636 645 if nf in dmap:
637 646 #file deleted on disk but still in dirstate
638 647 results[nf] = None
639 648 match.dir(nf)
640 649 if not dirignore(nf):
641 650 wadd(nf)
642 651 elif kind == regkind or kind == lnkkind:
643 652 results[nf] = st
644 653 else:
645 654 badfn(ff, badtype(kind))
646 655 if nf in dmap:
647 656 results[nf] = None
648 657 except OSError, inst:
649 658 if nf in dmap: # does it exactly match a file?
650 659 results[nf] = None
651 660 else: # does it match a directory?
652 661 prefix = nf + "/"
653 662 for fn in dmap:
654 663 if fn.startswith(prefix):
655 664 match.dir(nf)
656 665 skipstep3 = False
657 666 break
658 667 else:
659 668 badfn(ff, inst.strerror)
660 669
661 670 # step 2: visit subdirectories
662 671 while work:
663 672 nd = work.pop()
664 673 skip = None
665 674 if nd == '.':
666 675 nd = ''
667 676 else:
668 677 skip = '.hg'
669 678 try:
670 679 entries = listdir(join(nd), stat=True, skip=skip)
671 680 except OSError, inst:
672 681 if inst.errno == errno.EACCES:
673 682 fwarn(nd, inst.strerror)
674 683 continue
675 684 raise
676 685 for f, kind, st in entries:
677 686 nf = normalize(nd and (nd + "/" + f) or f, True, True)
678 687 if nf not in results:
679 688 if kind == dirkind:
680 689 if not ignore(nf):
681 690 match.dir(nf)
682 691 wadd(nf)
683 692 if nf in dmap and matchfn(nf):
684 693 results[nf] = None
685 694 elif kind == regkind or kind == lnkkind:
686 695 if nf in dmap:
687 696 if matchfn(nf):
688 697 results[nf] = st
689 698 elif matchfn(nf) and not ignore(nf):
690 699 results[nf] = st
691 700 elif nf in dmap and matchfn(nf):
692 701 results[nf] = None
693 702
694 703 # step 3: report unseen items in the dmap hash
695 704 if not skipstep3 and not exact:
696 705 visit = sorted([f for f in dmap if f not in results and matchfn(f)])
697 706 for nf, st in zip(visit, util.statfiles([join(i) for i in visit])):
698 707 if (not st is None and
699 708 getkind(st.st_mode) not in (regkind, lnkkind)):
700 709 st = None
701 710 results[nf] = st
702 711 for s in subrepos:
703 712 del results[s]
704 713 del results['.hg']
705 714 return results
706 715
707 716 def status(self, match, subrepos, ignored, clean, unknown):
708 717 '''Determine the status of the working copy relative to the
709 718 dirstate and return a tuple of lists (unsure, modified, added,
710 719 removed, deleted, unknown, ignored, clean), where:
711 720
712 721 unsure:
713 722 files that might have been modified since the dirstate was
714 723 written, but need to be read to be sure (size is the same
715 724 but mtime differs)
716 725 modified:
717 726 files that have definitely been modified since the dirstate
718 727 was written (different size or mode)
719 728 added:
720 729 files that have been explicitly added with hg add
721 730 removed:
722 731 files that have been explicitly removed with hg remove
723 732 deleted:
724 733 files that have been deleted through other means ("missing")
725 734 unknown:
726 735 files not in the dirstate that are not ignored
727 736 ignored:
728 737 files not in the dirstate that are ignored
729 738 (by _dirignore())
730 739 clean:
731 740 files that have definitely not been modified since the
732 741 dirstate was written
733 742 '''
734 743 listignored, listclean, listunknown = ignored, clean, unknown
735 744 lookup, modified, added, unknown, ignored = [], [], [], [], []
736 745 removed, deleted, clean = [], [], []
737 746
738 747 dmap = self._map
739 748 ladd = lookup.append # aka "unsure"
740 749 madd = modified.append
741 750 aadd = added.append
742 751 uadd = unknown.append
743 752 iadd = ignored.append
744 753 radd = removed.append
745 754 dadd = deleted.append
746 755 cadd = clean.append
747 756
748 757 lnkkind = stat.S_IFLNK
749 758
750 759 for fn, st in self.walk(match, subrepos, listunknown,
751 760 listignored).iteritems():
752 761 if fn not in dmap:
753 762 if (listignored or match.exact(fn)) and self._dirignore(fn):
754 763 if listignored:
755 764 iadd(fn)
756 765 elif listunknown:
757 766 uadd(fn)
758 767 continue
759 768
760 769 state, mode, size, time = dmap[fn]
761 770
762 771 if not st and state in "nma":
763 772 dadd(fn)
764 773 elif state == 'n':
765 774 # The "mode & lnkkind != lnkkind or self._checklink"
766 775 # lines are an expansion of "islink => checklink"
767 776 # where islink means "is this a link?" and checklink
768 777 # means "can we check links?".
769 778 mtime = int(st.st_mtime)
770 779 if (size >= 0 and
771 780 (size != st.st_size
772 781 or ((mode ^ st.st_mode) & 0100 and self._checkexec))
773 782 and (mode & lnkkind != lnkkind or self._checklink)
774 783 or size == -2 # other parent
775 784 or fn in self._copymap):
776 785 madd(fn)
777 786 elif (mtime != time
778 787 and (mode & lnkkind != lnkkind or self._checklink)):
779 788 ladd(fn)
780 789 elif mtime == self._lastnormaltime:
781 790 # fn may have been changed in the same timeslot without
782 791 # changing its size. This can happen if we quickly do
783 792 # multiple commits in a single transaction.
784 793 # Force lookup, so we don't miss such a racy file change.
785 794 ladd(fn)
786 795 elif listclean:
787 796 cadd(fn)
788 797 elif state == 'm':
789 798 madd(fn)
790 799 elif state == 'a':
791 800 aadd(fn)
792 801 elif state == 'r':
793 802 radd(fn)
794 803
795 804 return (lookup, modified, added, removed, deleted, unknown, ignored,
796 805 clean)
@@ -1,1401 +1,1552 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <string.h>
13 13
14 14 #include "util.h"
15 15
16 16 static inline int hexdigit(const char *p, Py_ssize_t off)
17 17 {
18 18 char c = p[off];
19 19
20 20 if (c >= '0' && c <= '9')
21 21 return c - '0';
22 22 if (c >= 'a' && c <= 'f')
23 23 return c - 'a' + 10;
24 24 if (c >= 'A' && c <= 'F')
25 25 return c - 'A' + 10;
26 26
27 27 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
28 28 return 0;
29 29 }
30 30
31 31 /*
32 32 * Turn a hex-encoded string into binary.
33 33 */
34 34 static PyObject *unhexlify(const char *str, int len)
35 35 {
36 36 PyObject *ret;
37 37 char *d;
38 38 int i;
39 39
40 40 ret = PyBytes_FromStringAndSize(NULL, len / 2);
41 41
42 42 if (!ret)
43 43 return NULL;
44 44
45 45 d = PyBytes_AsString(ret);
46 46
47 47 for (i = 0; i < len;) {
48 48 int hi = hexdigit(str, i++);
49 49 int lo = hexdigit(str, i++);
50 50 *d++ = (hi << 4) | lo;
51 51 }
52 52
53 53 return ret;
54 54 }
55 55
56 56 /*
57 57 * This code assumes that a manifest is stitched together with newline
58 58 * ('\n') characters.
59 59 */
60 60 static PyObject *parse_manifest(PyObject *self, PyObject *args)
61 61 {
62 62 PyObject *mfdict, *fdict;
63 63 char *str, *cur, *start, *zero;
64 64 int len;
65 65
66 66 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
67 67 &PyDict_Type, &mfdict,
68 68 &PyDict_Type, &fdict,
69 69 &str, &len))
70 70 goto quit;
71 71
72 72 for (start = cur = str, zero = NULL; cur < str + len; cur++) {
73 73 PyObject *file = NULL, *node = NULL;
74 74 PyObject *flags = NULL;
75 75 int nlen;
76 76
77 77 if (!*cur) {
78 78 zero = cur;
79 79 continue;
80 80 }
81 81 else if (*cur != '\n')
82 82 continue;
83 83
84 84 if (!zero) {
85 85 PyErr_SetString(PyExc_ValueError,
86 86 "manifest entry has no separator");
87 87 goto quit;
88 88 }
89 89
90 90 file = PyBytes_FromStringAndSize(start, zero - start);
91 91
92 92 if (!file)
93 93 goto bail;
94 94
95 95 nlen = cur - zero - 1;
96 96
97 97 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
98 98 if (!node)
99 99 goto bail;
100 100
101 101 if (nlen > 40) {
102 102 flags = PyBytes_FromStringAndSize(zero + 41,
103 103 nlen - 40);
104 104 if (!flags)
105 105 goto bail;
106 106
107 107 if (PyDict_SetItem(fdict, file, flags) == -1)
108 108 goto bail;
109 109 }
110 110
111 111 if (PyDict_SetItem(mfdict, file, node) == -1)
112 112 goto bail;
113 113
114 114 start = cur + 1;
115 115 zero = NULL;
116 116
117 117 Py_XDECREF(flags);
118 118 Py_XDECREF(node);
119 119 Py_XDECREF(file);
120 120 continue;
121 121 bail:
122 122 Py_XDECREF(flags);
123 123 Py_XDECREF(node);
124 124 Py_XDECREF(file);
125 125 goto quit;
126 126 }
127 127
128 128 if (len > 0 && *(cur - 1) != '\n') {
129 129 PyErr_SetString(PyExc_ValueError,
130 130 "manifest contains trailing garbage");
131 131 goto quit;
132 132 }
133 133
134 134 Py_INCREF(Py_None);
135 135 return Py_None;
136 136 quit:
137 137 return NULL;
138 138 }
139 139
140 140 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
141 141 {
142 142 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
143 143 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
144 144 char *str, *cur, *end, *cpos;
145 145 int state, mode, size, mtime;
146 146 unsigned int flen;
147 147 int len;
148 148
149 149 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
150 150 &PyDict_Type, &dmap,
151 151 &PyDict_Type, &cmap,
152 152 &str, &len))
153 153 goto quit;
154 154
155 155 /* read parents */
156 156 if (len < 40)
157 157 goto quit;
158 158
159 159 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
160 160 if (!parents)
161 161 goto quit;
162 162
163 163 /* read filenames */
164 164 cur = str + 40;
165 165 end = str + len;
166 166
167 167 while (cur < end - 17) {
168 168 /* unpack header */
169 169 state = *cur;
170 170 mode = getbe32(cur + 1);
171 171 size = getbe32(cur + 5);
172 172 mtime = getbe32(cur + 9);
173 173 flen = getbe32(cur + 13);
174 174 cur += 17;
175 175 if (cur + flen > end || cur + flen < cur) {
176 176 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
177 177 goto quit;
178 178 }
179 179
180 180 entry = Py_BuildValue("ciii", state, mode, size, mtime);
181 181 if (!entry)
182 182 goto quit;
183 183 PyObject_GC_UnTrack(entry); /* don't waste time with this */
184 184
185 185 cpos = memchr(cur, 0, flen);
186 186 if (cpos) {
187 187 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
188 188 cname = PyBytes_FromStringAndSize(cpos + 1,
189 189 flen - (cpos - cur) - 1);
190 190 if (!fname || !cname ||
191 191 PyDict_SetItem(cmap, fname, cname) == -1 ||
192 192 PyDict_SetItem(dmap, fname, entry) == -1)
193 193 goto quit;
194 194 Py_DECREF(cname);
195 195 } else {
196 196 fname = PyBytes_FromStringAndSize(cur, flen);
197 197 if (!fname ||
198 198 PyDict_SetItem(dmap, fname, entry) == -1)
199 199 goto quit;
200 200 }
201 201 cur += flen;
202 202 Py_DECREF(fname);
203 203 Py_DECREF(entry);
204 204 fname = cname = entry = NULL;
205 205 }
206 206
207 207 ret = parents;
208 208 Py_INCREF(ret);
209 209 quit:
210 210 Py_XDECREF(fname);
211 211 Py_XDECREF(cname);
212 212 Py_XDECREF(entry);
213 213 Py_XDECREF(parents);
214 214 return ret;
215 215 }
216 216
217 static inline int getintat(PyObject *tuple, int off, uint32_t *v)
218 {
219 PyObject *o = PyTuple_GET_ITEM(tuple, off);
220 long val;
221
222 if (PyInt_Check(o))
223 val = PyInt_AS_LONG(o);
224 else if (PyLong_Check(o)) {
225 val = PyLong_AsLong(o);
226 if (val == -1 && PyErr_Occurred())
227 return -1;
228 } else {
229 PyErr_SetString(PyExc_TypeError, "expected an int or long");
230 return -1;
231 }
232 if (LONG_MAX > INT_MAX && (val > INT_MAX || val < INT_MIN)) {
233 PyErr_SetString(PyExc_OverflowError,
234 "Python value to large to convert to uint32_t");
235 return -1;
236 }
237 *v = (uint32_t)val;
238 return 0;
239 }
240
241 static PyObject *dirstate_unset;
242
243 /*
244 * Efficiently pack a dirstate object into its on-disk format.
245 */
246 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
247 {
248 PyObject *packobj = NULL;
249 PyObject *map, *copymap, *pl;
250 Py_ssize_t nbytes, pos, l;
251 PyObject *k, *v, *pn;
252 char *p, *s;
253 double now;
254
255 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
256 &PyDict_Type, &map, &PyDict_Type, &copymap,
257 &pl, &now))
258 return NULL;
259
260 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
261 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
262 return NULL;
263 }
264
265 /* Figure out how much we need to allocate. */
266 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
267 PyObject *c;
268 if (!PyString_Check(k)) {
269 PyErr_SetString(PyExc_TypeError, "expected string key");
270 goto bail;
271 }
272 nbytes += PyString_GET_SIZE(k) + 17;
273 c = PyDict_GetItem(copymap, k);
274 if (c) {
275 if (!PyString_Check(c)) {
276 PyErr_SetString(PyExc_TypeError,
277 "expected string key");
278 goto bail;
279 }
280 nbytes += PyString_GET_SIZE(c) + 1;
281 }
282 }
283
284 packobj = PyString_FromStringAndSize(NULL, nbytes);
285 if (packobj == NULL)
286 goto bail;
287
288 p = PyString_AS_STRING(packobj);
289
290 pn = PySequence_ITEM(pl, 0);
291 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
292 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
293 goto bail;
294 }
295 memcpy(p, s, l);
296 p += 20;
297 pn = PySequence_ITEM(pl, 1);
298 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
299 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
300 goto bail;
301 }
302 memcpy(p, s, l);
303 p += 20;
304
305 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
306 uint32_t mode, size, mtime;
307 Py_ssize_t len, l;
308 PyObject *o;
309 char *s, *t;
310 int err;
311
312 if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
313 PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
314 goto bail;
315 }
316 o = PyTuple_GET_ITEM(v, 0);
317 if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
318 PyErr_SetString(PyExc_TypeError, "expected one byte");
319 goto bail;
320 }
321 *p++ = *s;
322 err = getintat(v, 1, &mode);
323 err |= getintat(v, 2, &size);
324 err |= getintat(v, 3, &mtime);
325 if (err)
326 goto bail;
327 if (*s == 'n' && mtime == (uint32_t)now) {
328 /* See dirstate.py:write for why we do this. */
329 if (PyDict_SetItem(map, k, dirstate_unset) == -1)
330 goto bail;
331 mode = 0, size = -1, mtime = -1;
332 }
333 putbe32(mode, p);
334 putbe32(size, p + 4);
335 putbe32(mtime, p + 8);
336 t = p + 12;
337 p += 16;
338 len = PyString_GET_SIZE(k);
339 memcpy(p, PyString_AS_STRING(k), len);
340 p += len;
341 o = PyDict_GetItem(copymap, k);
342 if (o) {
343 *p++ = '\0';
344 l = PyString_GET_SIZE(o);
345 memcpy(p, PyString_AS_STRING(o), l);
346 p += l;
347 len += l + 1;
348 }
349 putbe32((uint32_t)len, t);
350 }
351
352 pos = p - PyString_AS_STRING(packobj);
353 if (pos != nbytes) {
354 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
355 (long)pos, (long)nbytes);
356 goto bail;
357 }
358
359 return packobj;
360 bail:
361 Py_XDECREF(packobj);
362 return NULL;
363 }
364
217 365 /*
218 366 * A base-16 trie for fast node->rev mapping.
219 367 *
220 368 * Positive value is index of the next node in the trie
221 369 * Negative value is a leaf: -(rev + 1)
222 370 * Zero is empty
223 371 */
224 372 typedef struct {
225 373 int children[16];
226 374 } nodetree;
227 375
228 376 /*
229 377 * This class has two behaviours.
230 378 *
231 379 * When used in a list-like way (with integer keys), we decode an
232 380 * entry in a RevlogNG index file on demand. Our last entry is a
233 381 * sentinel, always a nullid. We have limited support for
234 382 * integer-keyed insert and delete, only at elements right before the
235 383 * sentinel.
236 384 *
237 385 * With string keys, we lazily perform a reverse mapping from node to
238 386 * rev, using a base-16 trie.
239 387 */
240 388 typedef struct {
241 389 PyObject_HEAD
242 390 /* Type-specific fields go here. */
243 391 PyObject *data; /* raw bytes of index */
244 392 PyObject **cache; /* cached tuples */
245 393 const char **offsets; /* populated on demand */
246 394 Py_ssize_t raw_length; /* original number of elements */
247 395 Py_ssize_t length; /* current number of elements */
248 396 PyObject *added; /* populated on demand */
249 397 PyObject *headrevs; /* cache, invalidated on changes */
250 398 nodetree *nt; /* base-16 trie */
251 399 int ntlength; /* # nodes in use */
252 400 int ntcapacity; /* # nodes allocated */
253 401 int ntdepth; /* maximum depth of tree */
254 402 int ntsplits; /* # splits performed */
255 403 int ntrev; /* last rev scanned */
256 404 int ntlookups; /* # lookups */
257 405 int ntmisses; /* # lookups that miss the cache */
258 406 int inlined;
259 407 } indexObject;
260 408
261 409 static Py_ssize_t index_length(const indexObject *self)
262 410 {
263 411 if (self->added == NULL)
264 412 return self->length;
265 413 return self->length + PyList_GET_SIZE(self->added);
266 414 }
267 415
268 416 static PyObject *nullentry;
269 417 static const char nullid[20];
270 418
271 419 static long inline_scan(indexObject *self, const char **offsets);
272 420
273 421 #if LONG_MAX == 0x7fffffffL
274 422 static char *tuple_format = "Kiiiiiis#";
275 423 #else
276 424 static char *tuple_format = "kiiiiiis#";
277 425 #endif
278 426
279 427 /* A RevlogNG v1 index entry is 64 bytes long. */
280 428 static const long v1_hdrsize = 64;
281 429
282 430 /*
283 431 * Return a pointer to the beginning of a RevlogNG record.
284 432 */
285 433 static const char *index_deref(indexObject *self, Py_ssize_t pos)
286 434 {
287 435 if (self->inlined && pos > 0) {
288 436 if (self->offsets == NULL) {
289 437 self->offsets = malloc(self->raw_length *
290 438 sizeof(*self->offsets));
291 439 if (self->offsets == NULL)
292 440 return (const char *)PyErr_NoMemory();
293 441 inline_scan(self, self->offsets);
294 442 }
295 443 return self->offsets[pos];
296 444 }
297 445
298 446 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
299 447 }
300 448
301 449 /*
302 450 * RevlogNG format (all in big endian, data may be inlined):
303 451 * 6 bytes: offset
304 452 * 2 bytes: flags
305 453 * 4 bytes: compressed length
306 454 * 4 bytes: uncompressed length
307 455 * 4 bytes: base revision
308 456 * 4 bytes: link revision
309 457 * 4 bytes: parent 1 revision
310 458 * 4 bytes: parent 2 revision
311 459 * 32 bytes: nodeid (only 20 bytes used)
312 460 */
313 461 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
314 462 {
315 463 uint64_t offset_flags;
316 464 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
317 465 const char *c_node_id;
318 466 const char *data;
319 467 Py_ssize_t length = index_length(self);
320 468 PyObject *entry;
321 469
322 470 if (pos < 0)
323 471 pos += length;
324 472
325 473 if (pos < 0 || pos >= length) {
326 474 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
327 475 return NULL;
328 476 }
329 477
330 478 if (pos == length - 1) {
331 479 Py_INCREF(nullentry);
332 480 return nullentry;
333 481 }
334 482
335 483 if (pos >= self->length - 1) {
336 484 PyObject *obj;
337 485 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
338 486 Py_INCREF(obj);
339 487 return obj;
340 488 }
341 489
342 490 if (self->cache) {
343 491 if (self->cache[pos]) {
344 492 Py_INCREF(self->cache[pos]);
345 493 return self->cache[pos];
346 494 }
347 495 } else {
348 496 self->cache = calloc(self->raw_length, sizeof(PyObject *));
349 497 if (self->cache == NULL)
350 498 return PyErr_NoMemory();
351 499 }
352 500
353 501 data = index_deref(self, pos);
354 502 if (data == NULL)
355 503 return NULL;
356 504
357 505 offset_flags = getbe32(data + 4);
358 506 if (pos == 0) /* mask out version number for the first entry */
359 507 offset_flags &= 0xFFFF;
360 508 else {
361 509 uint32_t offset_high = getbe32(data);
362 510 offset_flags |= ((uint64_t)offset_high) << 32;
363 511 }
364 512
365 513 comp_len = getbe32(data + 8);
366 514 uncomp_len = getbe32(data + 12);
367 515 base_rev = getbe32(data + 16);
368 516 link_rev = getbe32(data + 20);
369 517 parent_1 = getbe32(data + 24);
370 518 parent_2 = getbe32(data + 28);
371 519 c_node_id = data + 32;
372 520
373 521 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
374 522 uncomp_len, base_rev, link_rev,
375 523 parent_1, parent_2, c_node_id, 20);
376 524
377 525 if (entry)
378 526 PyObject_GC_UnTrack(entry);
379 527
380 528 self->cache[pos] = entry;
381 529 Py_INCREF(entry);
382 530
383 531 return entry;
384 532 }
385 533
386 534 /*
387 535 * Return the 20-byte SHA of the node corresponding to the given rev.
388 536 */
389 537 static const char *index_node(indexObject *self, Py_ssize_t pos)
390 538 {
391 539 Py_ssize_t length = index_length(self);
392 540 const char *data;
393 541
394 542 if (pos == length - 1 || pos == INT_MAX)
395 543 return nullid;
396 544
397 545 if (pos >= length)
398 546 return NULL;
399 547
400 548 if (pos >= self->length - 1) {
401 549 PyObject *tuple, *str;
402 550 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
403 551 str = PyTuple_GetItem(tuple, 7);
404 552 return str ? PyString_AS_STRING(str) : NULL;
405 553 }
406 554
407 555 data = index_deref(self, pos);
408 556 return data ? data + 32 : NULL;
409 557 }
410 558
411 559 static int nt_insert(indexObject *self, const char *node, int rev);
412 560
413 561 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
414 562 {
415 563 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
416 564 return -1;
417 565 if (*nodelen == 20)
418 566 return 0;
419 567 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
420 568 return -1;
421 569 }
422 570
423 571 static PyObject *index_insert(indexObject *self, PyObject *args)
424 572 {
425 573 PyObject *obj;
426 574 char *node;
427 575 long offset;
428 576 Py_ssize_t len, nodelen;
429 577
430 578 if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
431 579 return NULL;
432 580
433 581 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
434 582 PyErr_SetString(PyExc_TypeError, "8-tuple required");
435 583 return NULL;
436 584 }
437 585
438 586 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
439 587 return NULL;
440 588
441 589 len = index_length(self);
442 590
443 591 if (offset < 0)
444 592 offset += len;
445 593
446 594 if (offset != len - 1) {
447 595 PyErr_SetString(PyExc_IndexError,
448 596 "insert only supported at index -1");
449 597 return NULL;
450 598 }
451 599
452 600 if (offset > INT_MAX) {
453 601 PyErr_SetString(PyExc_ValueError,
454 602 "currently only 2**31 revs supported");
455 603 return NULL;
456 604 }
457 605
458 606 if (self->added == NULL) {
459 607 self->added = PyList_New(0);
460 608 if (self->added == NULL)
461 609 return NULL;
462 610 }
463 611
464 612 if (PyList_Append(self->added, obj) == -1)
465 613 return NULL;
466 614
467 615 if (self->nt)
468 616 nt_insert(self, node, (int)offset);
469 617
470 618 Py_CLEAR(self->headrevs);
471 619 Py_RETURN_NONE;
472 620 }
473 621
474 622 static void _index_clearcaches(indexObject *self)
475 623 {
476 624 if (self->cache) {
477 625 Py_ssize_t i;
478 626
479 627 for (i = 0; i < self->raw_length; i++)
480 628 Py_CLEAR(self->cache[i]);
481 629 free(self->cache);
482 630 self->cache = NULL;
483 631 }
484 632 if (self->offsets) {
485 633 free(self->offsets);
486 634 self->offsets = NULL;
487 635 }
488 636 if (self->nt) {
489 637 free(self->nt);
490 638 self->nt = NULL;
491 639 }
492 640 Py_CLEAR(self->headrevs);
493 641 }
494 642
495 643 static PyObject *index_clearcaches(indexObject *self)
496 644 {
497 645 _index_clearcaches(self);
498 646 self->ntlength = self->ntcapacity = 0;
499 647 self->ntdepth = self->ntsplits = 0;
500 648 self->ntrev = -1;
501 649 self->ntlookups = self->ntmisses = 0;
502 650 Py_RETURN_NONE;
503 651 }
504 652
505 653 static PyObject *index_stats(indexObject *self)
506 654 {
507 655 PyObject *obj = PyDict_New();
508 656
509 657 if (obj == NULL)
510 658 return NULL;
511 659
512 660 #define istat(__n, __d) \
513 661 if (PyDict_SetItemString(obj, __d, PyInt_FromSsize_t(self->__n)) == -1) \
514 662 goto bail;
515 663
516 664 if (self->added) {
517 665 Py_ssize_t len = PyList_GET_SIZE(self->added);
518 666 if (PyDict_SetItemString(obj, "index entries added",
519 667 PyInt_FromSsize_t(len)) == -1)
520 668 goto bail;
521 669 }
522 670
523 671 if (self->raw_length != self->length - 1)
524 672 istat(raw_length, "revs on disk");
525 673 istat(length, "revs in memory");
526 674 istat(ntcapacity, "node trie capacity");
527 675 istat(ntdepth, "node trie depth");
528 676 istat(ntlength, "node trie count");
529 677 istat(ntlookups, "node trie lookups");
530 678 istat(ntmisses, "node trie misses");
531 679 istat(ntrev, "node trie last rev scanned");
532 680 istat(ntsplits, "node trie splits");
533 681
534 682 #undef istat
535 683
536 684 return obj;
537 685
538 686 bail:
539 687 Py_XDECREF(obj);
540 688 return NULL;
541 689 }
542 690
543 691 /*
544 692 * When we cache a list, we want to be sure the caller can't mutate
545 693 * the cached copy.
546 694 */
547 695 static PyObject *list_copy(PyObject *list)
548 696 {
549 697 Py_ssize_t len = PyList_GET_SIZE(list);
550 698 PyObject *newlist = PyList_New(len);
551 699 Py_ssize_t i;
552 700
553 701 if (newlist == NULL)
554 702 return NULL;
555 703
556 704 for (i = 0; i < len; i++) {
557 705 PyObject *obj = PyList_GET_ITEM(list, i);
558 706 Py_INCREF(obj);
559 707 PyList_SET_ITEM(newlist, i, obj);
560 708 }
561 709
562 710 return newlist;
563 711 }
564 712
565 713 static PyObject *index_headrevs(indexObject *self)
566 714 {
567 715 Py_ssize_t i, len, addlen;
568 716 char *nothead = NULL;
569 717 PyObject *heads;
570 718
571 719 if (self->headrevs)
572 720 return list_copy(self->headrevs);
573 721
574 722 len = index_length(self) - 1;
575 723 heads = PyList_New(0);
576 724 if (heads == NULL)
577 725 goto bail;
578 726 if (len == 0) {
579 727 PyObject *nullid = PyInt_FromLong(-1);
580 728 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
581 729 Py_XDECREF(nullid);
582 730 goto bail;
583 731 }
584 732 goto done;
585 733 }
586 734
587 735 nothead = calloc(len, 1);
588 736 if (nothead == NULL)
589 737 goto bail;
590 738
591 739 for (i = 0; i < self->raw_length; i++) {
592 740 const char *data = index_deref(self, i);
593 741 int parent_1 = getbe32(data + 24);
594 742 int parent_2 = getbe32(data + 28);
595 743 if (parent_1 >= 0)
596 744 nothead[parent_1] = 1;
597 745 if (parent_2 >= 0)
598 746 nothead[parent_2] = 1;
599 747 }
600 748
601 749 addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
602 750
603 751 for (i = 0; i < addlen; i++) {
604 752 PyObject *rev = PyList_GET_ITEM(self->added, i);
605 753 PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
606 754 PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
607 755 long parent_1, parent_2;
608 756
609 757 if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
610 758 PyErr_SetString(PyExc_TypeError,
611 759 "revlog parents are invalid");
612 760 goto bail;
613 761 }
614 762 parent_1 = PyInt_AS_LONG(p1);
615 763 parent_2 = PyInt_AS_LONG(p2);
616 764 if (parent_1 >= 0)
617 765 nothead[parent_1] = 1;
618 766 if (parent_2 >= 0)
619 767 nothead[parent_2] = 1;
620 768 }
621 769
622 770 for (i = 0; i < len; i++) {
623 771 PyObject *head;
624 772
625 773 if (nothead[i])
626 774 continue;
627 775 head = PyInt_FromLong(i);
628 776 if (head == NULL || PyList_Append(heads, head) == -1) {
629 777 Py_XDECREF(head);
630 778 goto bail;
631 779 }
632 780 }
633 781
634 782 done:
635 783 self->headrevs = heads;
636 784 free(nothead);
637 785 return list_copy(self->headrevs);
638 786 bail:
639 787 Py_XDECREF(heads);
640 788 free(nothead);
641 789 return NULL;
642 790 }
643 791
644 792 static inline int nt_level(const char *node, Py_ssize_t level)
645 793 {
646 794 int v = node[level>>1];
647 795 if (!(level & 1))
648 796 v >>= 4;
649 797 return v & 0xf;
650 798 }
651 799
652 800 /*
653 801 * Return values:
654 802 *
655 803 * -4: match is ambiguous (multiple candidates)
656 804 * -2: not found
657 805 * rest: valid rev
658 806 */
659 807 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
660 808 int hex)
661 809 {
662 810 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
663 811 int level, maxlevel, off;
664 812
665 813 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
666 814 return -1;
667 815
668 816 if (self->nt == NULL)
669 817 return -2;
670 818
671 819 if (hex)
672 820 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
673 821 else
674 822 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
675 823
676 824 for (level = off = 0; level < maxlevel; level++) {
677 825 int k = getnybble(node, level);
678 826 nodetree *n = &self->nt[off];
679 827 int v = n->children[k];
680 828
681 829 if (v < 0) {
682 830 const char *n;
683 831 Py_ssize_t i;
684 832
685 833 v = -v - 1;
686 834 n = index_node(self, v);
687 835 if (n == NULL)
688 836 return -2;
689 837 for (i = level; i < maxlevel; i++)
690 838 if (getnybble(node, i) != nt_level(n, i))
691 839 return -2;
692 840 return v;
693 841 }
694 842 if (v == 0)
695 843 return -2;
696 844 off = v;
697 845 }
698 846 /* multiple matches against an ambiguous prefix */
699 847 return -4;
700 848 }
701 849
702 850 static int nt_new(indexObject *self)
703 851 {
704 852 if (self->ntlength == self->ntcapacity) {
705 853 self->ntcapacity *= 2;
706 854 self->nt = realloc(self->nt,
707 855 self->ntcapacity * sizeof(nodetree));
708 856 if (self->nt == NULL) {
709 857 PyErr_SetString(PyExc_MemoryError, "out of memory");
710 858 return -1;
711 859 }
712 860 memset(&self->nt[self->ntlength], 0,
713 861 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
714 862 }
715 863 return self->ntlength++;
716 864 }
717 865
718 866 static int nt_insert(indexObject *self, const char *node, int rev)
719 867 {
720 868 int level = 0;
721 869 int off = 0;
722 870
723 871 while (level < 40) {
724 872 int k = nt_level(node, level);
725 873 nodetree *n;
726 874 int v;
727 875
728 876 n = &self->nt[off];
729 877 v = n->children[k];
730 878
731 879 if (v == 0) {
732 880 n->children[k] = -rev - 1;
733 881 return 0;
734 882 }
735 883 if (v < 0) {
736 884 const char *oldnode = index_node(self, -v - 1);
737 885 int noff;
738 886
739 887 if (!oldnode || !memcmp(oldnode, node, 20)) {
740 888 n->children[k] = -rev - 1;
741 889 return 0;
742 890 }
743 891 noff = nt_new(self);
744 892 if (noff == -1)
745 893 return -1;
746 894 /* self->nt may have been changed by realloc */
747 895 self->nt[off].children[k] = noff;
748 896 off = noff;
749 897 n = &self->nt[off];
750 898 n->children[nt_level(oldnode, ++level)] = v;
751 899 if (level > self->ntdepth)
752 900 self->ntdepth = level;
753 901 self->ntsplits += 1;
754 902 } else {
755 903 level += 1;
756 904 off = v;
757 905 }
758 906 }
759 907
760 908 return -1;
761 909 }
762 910
763 911 static int nt_init(indexObject *self)
764 912 {
765 913 if (self->nt == NULL) {
766 914 self->ntcapacity = self->raw_length < 4
767 915 ? 4 : self->raw_length / 2;
768 916 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
769 917 if (self->nt == NULL) {
770 918 PyErr_NoMemory();
771 919 return -1;
772 920 }
773 921 self->ntlength = 1;
774 922 self->ntrev = (int)index_length(self) - 1;
775 923 self->ntlookups = 1;
776 924 self->ntmisses = 0;
777 925 if (nt_insert(self, nullid, INT_MAX) == -1)
778 926 return -1;
779 927 }
780 928 return 0;
781 929 }
782 930
783 931 /*
784 932 * Return values:
785 933 *
786 934 * -3: error (exception set)
787 935 * -2: not found (no exception set)
788 936 * rest: valid rev
789 937 */
790 938 static int index_find_node(indexObject *self,
791 939 const char *node, Py_ssize_t nodelen)
792 940 {
793 941 int rev;
794 942
795 943 self->ntlookups++;
796 944 rev = nt_find(self, node, nodelen, 0);
797 945 if (rev >= -1)
798 946 return rev;
799 947
800 948 if (nt_init(self) == -1)
801 949 return -3;
802 950
803 951 /*
804 952 * For the first handful of lookups, we scan the entire index,
805 953 * and cache only the matching nodes. This optimizes for cases
806 954 * like "hg tip", where only a few nodes are accessed.
807 955 *
808 956 * After that, we cache every node we visit, using a single
809 957 * scan amortized over multiple lookups. This gives the best
810 958 * bulk performance, e.g. for "hg log".
811 959 */
812 960 if (self->ntmisses++ < 4) {
813 961 for (rev = self->ntrev - 1; rev >= 0; rev--) {
814 962 const char *n = index_node(self, rev);
815 963 if (n == NULL)
816 964 return -2;
817 965 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
818 966 if (nt_insert(self, n, rev) == -1)
819 967 return -3;
820 968 break;
821 969 }
822 970 }
823 971 } else {
824 972 for (rev = self->ntrev - 1; rev >= 0; rev--) {
825 973 const char *n = index_node(self, rev);
826 974 if (n == NULL) {
827 975 self->ntrev = rev + 1;
828 976 return -2;
829 977 }
830 978 if (nt_insert(self, n, rev) == -1) {
831 979 self->ntrev = rev + 1;
832 980 return -3;
833 981 }
834 982 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
835 983 break;
836 984 }
837 985 }
838 986 self->ntrev = rev;
839 987 }
840 988
841 989 if (rev >= 0)
842 990 return rev;
843 991 return -2;
844 992 }
845 993
846 994 static PyObject *raise_revlog_error(void)
847 995 {
848 996 static PyObject *errclass;
849 997 PyObject *mod = NULL, *errobj;
850 998
851 999 if (errclass == NULL) {
852 1000 PyObject *dict;
853 1001
854 1002 mod = PyImport_ImportModule("mercurial.error");
855 1003 if (mod == NULL)
856 1004 goto classfail;
857 1005
858 1006 dict = PyModule_GetDict(mod);
859 1007 if (dict == NULL)
860 1008 goto classfail;
861 1009
862 1010 errclass = PyDict_GetItemString(dict, "RevlogError");
863 1011 if (errclass == NULL) {
864 1012 PyErr_SetString(PyExc_SystemError,
865 1013 "could not find RevlogError");
866 1014 goto classfail;
867 1015 }
868 1016 Py_INCREF(errclass);
869 1017 }
870 1018
871 1019 errobj = PyObject_CallFunction(errclass, NULL);
872 1020 if (errobj == NULL)
873 1021 return NULL;
874 1022 PyErr_SetObject(errclass, errobj);
875 1023 return errobj;
876 1024
877 1025 classfail:
878 1026 Py_XDECREF(mod);
879 1027 return NULL;
880 1028 }
881 1029
882 1030 static PyObject *index_getitem(indexObject *self, PyObject *value)
883 1031 {
884 1032 char *node;
885 1033 Py_ssize_t nodelen;
886 1034 int rev;
887 1035
888 1036 if (PyInt_Check(value))
889 1037 return index_get(self, PyInt_AS_LONG(value));
890 1038
891 1039 if (node_check(value, &node, &nodelen) == -1)
892 1040 return NULL;
893 1041 rev = index_find_node(self, node, nodelen);
894 1042 if (rev >= -1)
895 1043 return PyInt_FromLong(rev);
896 1044 if (rev == -2)
897 1045 raise_revlog_error();
898 1046 return NULL;
899 1047 }
900 1048
901 1049 static int nt_partialmatch(indexObject *self, const char *node,
902 1050 Py_ssize_t nodelen)
903 1051 {
904 1052 int rev;
905 1053
906 1054 if (nt_init(self) == -1)
907 1055 return -3;
908 1056
909 1057 if (self->ntrev > 0) {
910 1058 /* ensure that the radix tree is fully populated */
911 1059 for (rev = self->ntrev - 1; rev >= 0; rev--) {
912 1060 const char *n = index_node(self, rev);
913 1061 if (n == NULL)
914 1062 return -2;
915 1063 if (nt_insert(self, n, rev) == -1)
916 1064 return -3;
917 1065 }
918 1066 self->ntrev = rev;
919 1067 }
920 1068
921 1069 return nt_find(self, node, nodelen, 1);
922 1070 }
923 1071
924 1072 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
925 1073 {
926 1074 const char *fullnode;
927 1075 int nodelen;
928 1076 char *node;
929 1077 int rev, i;
930 1078
931 1079 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
932 1080 return NULL;
933 1081
934 1082 if (nodelen < 4) {
935 1083 PyErr_SetString(PyExc_ValueError, "key too short");
936 1084 return NULL;
937 1085 }
938 1086
939 1087 if (nodelen > 40)
940 1088 nodelen = 40;
941 1089
942 1090 for (i = 0; i < nodelen; i++)
943 1091 hexdigit(node, i);
944 1092 if (PyErr_Occurred()) {
945 1093 /* input contains non-hex characters */
946 1094 PyErr_Clear();
947 1095 Py_RETURN_NONE;
948 1096 }
949 1097
950 1098 rev = nt_partialmatch(self, node, nodelen);
951 1099
952 1100 switch (rev) {
953 1101 case -4:
954 1102 raise_revlog_error();
955 1103 case -3:
956 1104 return NULL;
957 1105 case -2:
958 1106 Py_RETURN_NONE;
959 1107 case -1:
960 1108 return PyString_FromStringAndSize(nullid, 20);
961 1109 }
962 1110
963 1111 fullnode = index_node(self, rev);
964 1112 if (fullnode == NULL) {
965 1113 PyErr_Format(PyExc_IndexError,
966 1114 "could not access rev %d", rev);
967 1115 return NULL;
968 1116 }
969 1117 return PyString_FromStringAndSize(fullnode, 20);
970 1118 }
971 1119
972 1120 static PyObject *index_m_get(indexObject *self, PyObject *args)
973 1121 {
974 1122 Py_ssize_t nodelen;
975 1123 PyObject *val;
976 1124 char *node;
977 1125 int rev;
978 1126
979 1127 if (!PyArg_ParseTuple(args, "O", &val))
980 1128 return NULL;
981 1129 if (node_check(val, &node, &nodelen) == -1)
982 1130 return NULL;
983 1131 rev = index_find_node(self, node, nodelen);
984 1132 if (rev == -3)
985 1133 return NULL;
986 1134 if (rev == -2)
987 1135 Py_RETURN_NONE;
988 1136 return PyInt_FromLong(rev);
989 1137 }
990 1138
991 1139 static int index_contains(indexObject *self, PyObject *value)
992 1140 {
993 1141 char *node;
994 1142 Py_ssize_t nodelen;
995 1143
996 1144 if (PyInt_Check(value)) {
997 1145 long rev = PyInt_AS_LONG(value);
998 1146 return rev >= -1 && rev < index_length(self);
999 1147 }
1000 1148
1001 1149 if (node_check(value, &node, &nodelen) == -1)
1002 1150 return -1;
1003 1151
1004 1152 switch (index_find_node(self, node, nodelen)) {
1005 1153 case -3:
1006 1154 return -1;
1007 1155 case -2:
1008 1156 return 0;
1009 1157 default:
1010 1158 return 1;
1011 1159 }
1012 1160 }
1013 1161
1014 1162 /*
1015 1163 * Invalidate any trie entries introduced by added revs.
1016 1164 */
1017 1165 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
1018 1166 {
1019 1167 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
1020 1168
1021 1169 for (i = start; i < len; i++) {
1022 1170 PyObject *tuple = PyList_GET_ITEM(self->added, i);
1023 1171 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
1024 1172
1025 1173 nt_insert(self, PyString_AS_STRING(node), -1);
1026 1174 }
1027 1175
1028 1176 if (start == 0)
1029 1177 Py_CLEAR(self->added);
1030 1178 }
1031 1179
1032 1180 /*
1033 1181 * Delete a numeric range of revs, which must be at the end of the
1034 1182 * range, but exclude the sentinel nullid entry.
1035 1183 */
1036 1184 static int index_slice_del(indexObject *self, PyObject *item)
1037 1185 {
1038 1186 Py_ssize_t start, stop, step, slicelength;
1039 1187 Py_ssize_t length = index_length(self);
1040 1188 int ret = 0;
1041 1189
1042 1190 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
1043 1191 &start, &stop, &step, &slicelength) < 0)
1044 1192 return -1;
1045 1193
1046 1194 if (slicelength <= 0)
1047 1195 return 0;
1048 1196
1049 1197 if ((step < 0 && start < stop) || (step > 0 && start > stop))
1050 1198 stop = start;
1051 1199
1052 1200 if (step < 0) {
1053 1201 stop = start + 1;
1054 1202 start = stop + step*(slicelength - 1) - 1;
1055 1203 step = -step;
1056 1204 }
1057 1205
1058 1206 if (step != 1) {
1059 1207 PyErr_SetString(PyExc_ValueError,
1060 1208 "revlog index delete requires step size of 1");
1061 1209 return -1;
1062 1210 }
1063 1211
1064 1212 if (stop != length - 1) {
1065 1213 PyErr_SetString(PyExc_IndexError,
1066 1214 "revlog index deletion indices are invalid");
1067 1215 return -1;
1068 1216 }
1069 1217
1070 1218 if (start < self->length - 1) {
1071 1219 if (self->nt) {
1072 1220 Py_ssize_t i;
1073 1221
1074 1222 for (i = start + 1; i < self->length - 1; i++) {
1075 1223 const char *node = index_node(self, i);
1076 1224
1077 1225 if (node)
1078 1226 nt_insert(self, node, -1);
1079 1227 }
1080 1228 if (self->added)
1081 1229 nt_invalidate_added(self, 0);
1082 1230 if (self->ntrev > start)
1083 1231 self->ntrev = (int)start;
1084 1232 }
1085 1233 self->length = start + 1;
1086 1234 if (start < self->raw_length)
1087 1235 self->raw_length = start;
1088 1236 goto done;
1089 1237 }
1090 1238
1091 1239 if (self->nt) {
1092 1240 nt_invalidate_added(self, start - self->length + 1);
1093 1241 if (self->ntrev > start)
1094 1242 self->ntrev = (int)start;
1095 1243 }
1096 1244 if (self->added)
1097 1245 ret = PyList_SetSlice(self->added, start - self->length + 1,
1098 1246 PyList_GET_SIZE(self->added), NULL);
1099 1247 done:
1100 1248 Py_CLEAR(self->headrevs);
1101 1249 return ret;
1102 1250 }
1103 1251
1104 1252 /*
1105 1253 * Supported ops:
1106 1254 *
1107 1255 * slice deletion
1108 1256 * string assignment (extend node->rev mapping)
1109 1257 * string deletion (shrink node->rev mapping)
1110 1258 */
1111 1259 static int index_assign_subscript(indexObject *self, PyObject *item,
1112 1260 PyObject *value)
1113 1261 {
1114 1262 char *node;
1115 1263 Py_ssize_t nodelen;
1116 1264 long rev;
1117 1265
1118 1266 if (PySlice_Check(item) && value == NULL)
1119 1267 return index_slice_del(self, item);
1120 1268
1121 1269 if (node_check(item, &node, &nodelen) == -1)
1122 1270 return -1;
1123 1271
1124 1272 if (value == NULL)
1125 1273 return self->nt ? nt_insert(self, node, -1) : 0;
1126 1274 rev = PyInt_AsLong(value);
1127 1275 if (rev > INT_MAX || rev < 0) {
1128 1276 if (!PyErr_Occurred())
1129 1277 PyErr_SetString(PyExc_ValueError, "rev out of range");
1130 1278 return -1;
1131 1279 }
1132 1280 return nt_insert(self, node, (int)rev);
1133 1281 }
1134 1282
1135 1283 /*
1136 1284 * Find all RevlogNG entries in an index that has inline data. Update
1137 1285 * the optional "offsets" table with those entries.
1138 1286 */
1139 1287 static long inline_scan(indexObject *self, const char **offsets)
1140 1288 {
1141 1289 const char *data = PyString_AS_STRING(self->data);
1142 1290 const char *end = data + PyString_GET_SIZE(self->data);
1143 1291 long incr = v1_hdrsize;
1144 1292 Py_ssize_t len = 0;
1145 1293
1146 1294 while (data + v1_hdrsize <= end) {
1147 1295 uint32_t comp_len;
1148 1296 const char *old_data;
1149 1297 /* 3rd element of header is length of compressed inline data */
1150 1298 comp_len = getbe32(data + 8);
1151 1299 incr = v1_hdrsize + comp_len;
1152 1300 if (incr < v1_hdrsize)
1153 1301 break;
1154 1302 if (offsets)
1155 1303 offsets[len] = data;
1156 1304 len++;
1157 1305 old_data = data;
1158 1306 data += incr;
1159 1307 if (data <= old_data)
1160 1308 break;
1161 1309 }
1162 1310
1163 1311 if (data != end && data + v1_hdrsize != end) {
1164 1312 if (!PyErr_Occurred())
1165 1313 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1166 1314 return -1;
1167 1315 }
1168 1316
1169 1317 return len;
1170 1318 }
1171 1319
1172 1320 static int index_init(indexObject *self, PyObject *args)
1173 1321 {
1174 1322 PyObject *data_obj, *inlined_obj;
1175 1323 Py_ssize_t size;
1176 1324
1177 1325 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
1178 1326 return -1;
1179 1327 if (!PyString_Check(data_obj)) {
1180 1328 PyErr_SetString(PyExc_TypeError, "data is not a string");
1181 1329 return -1;
1182 1330 }
1183 1331 size = PyString_GET_SIZE(data_obj);
1184 1332
1185 1333 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
1186 1334 self->data = data_obj;
1187 1335 self->cache = NULL;
1188 1336
1189 1337 self->added = NULL;
1190 1338 self->headrevs = NULL;
1191 1339 self->offsets = NULL;
1192 1340 self->nt = NULL;
1193 1341 self->ntlength = self->ntcapacity = 0;
1194 1342 self->ntdepth = self->ntsplits = 0;
1195 1343 self->ntlookups = self->ntmisses = 0;
1196 1344 self->ntrev = -1;
1197 1345 Py_INCREF(self->data);
1198 1346
1199 1347 if (self->inlined) {
1200 1348 long len = inline_scan(self, NULL);
1201 1349 if (len == -1)
1202 1350 goto bail;
1203 1351 self->raw_length = len;
1204 1352 self->length = len + 1;
1205 1353 } else {
1206 1354 if (size % v1_hdrsize) {
1207 1355 PyErr_SetString(PyExc_ValueError, "corrupt index file");
1208 1356 goto bail;
1209 1357 }
1210 1358 self->raw_length = size / v1_hdrsize;
1211 1359 self->length = self->raw_length + 1;
1212 1360 }
1213 1361
1214 1362 return 0;
1215 1363 bail:
1216 1364 return -1;
1217 1365 }
1218 1366
1219 1367 static PyObject *index_nodemap(indexObject *self)
1220 1368 {
1221 1369 Py_INCREF(self);
1222 1370 return (PyObject *)self;
1223 1371 }
1224 1372
1225 1373 static void index_dealloc(indexObject *self)
1226 1374 {
1227 1375 _index_clearcaches(self);
1228 1376 Py_DECREF(self->data);
1229 1377 Py_XDECREF(self->added);
1230 1378 PyObject_Del(self);
1231 1379 }
1232 1380
1233 1381 static PySequenceMethods index_sequence_methods = {
1234 1382 (lenfunc)index_length, /* sq_length */
1235 1383 0, /* sq_concat */
1236 1384 0, /* sq_repeat */
1237 1385 (ssizeargfunc)index_get, /* sq_item */
1238 1386 0, /* sq_slice */
1239 1387 0, /* sq_ass_item */
1240 1388 0, /* sq_ass_slice */
1241 1389 (objobjproc)index_contains, /* sq_contains */
1242 1390 };
1243 1391
1244 1392 static PyMappingMethods index_mapping_methods = {
1245 1393 (lenfunc)index_length, /* mp_length */
1246 1394 (binaryfunc)index_getitem, /* mp_subscript */
1247 1395 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
1248 1396 };
1249 1397
1250 1398 static PyMethodDef index_methods[] = {
1251 1399 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
1252 1400 "clear the index caches"},
1253 1401 {"get", (PyCFunction)index_m_get, METH_VARARGS,
1254 1402 "get an index entry"},
1255 1403 {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
1256 1404 "get head revisions"},
1257 1405 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1258 1406 "insert an index entry"},
1259 1407 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1260 1408 "match a potentially ambiguous node ID"},
1261 1409 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1262 1410 "stats for the index"},
1263 1411 {NULL} /* Sentinel */
1264 1412 };
1265 1413
1266 1414 static PyGetSetDef index_getset[] = {
1267 1415 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
1268 1416 {NULL} /* Sentinel */
1269 1417 };
1270 1418
1271 1419 static PyTypeObject indexType = {
1272 1420 PyObject_HEAD_INIT(NULL)
1273 1421 0, /* ob_size */
1274 1422 "parsers.index", /* tp_name */
1275 1423 sizeof(indexObject), /* tp_basicsize */
1276 1424 0, /* tp_itemsize */
1277 1425 (destructor)index_dealloc, /* tp_dealloc */
1278 1426 0, /* tp_print */
1279 1427 0, /* tp_getattr */
1280 1428 0, /* tp_setattr */
1281 1429 0, /* tp_compare */
1282 1430 0, /* tp_repr */
1283 1431 0, /* tp_as_number */
1284 1432 &index_sequence_methods, /* tp_as_sequence */
1285 1433 &index_mapping_methods, /* tp_as_mapping */
1286 1434 0, /* tp_hash */
1287 1435 0, /* tp_call */
1288 1436 0, /* tp_str */
1289 1437 0, /* tp_getattro */
1290 1438 0, /* tp_setattro */
1291 1439 0, /* tp_as_buffer */
1292 1440 Py_TPFLAGS_DEFAULT, /* tp_flags */
1293 1441 "revlog index", /* tp_doc */
1294 1442 0, /* tp_traverse */
1295 1443 0, /* tp_clear */
1296 1444 0, /* tp_richcompare */
1297 1445 0, /* tp_weaklistoffset */
1298 1446 0, /* tp_iter */
1299 1447 0, /* tp_iternext */
1300 1448 index_methods, /* tp_methods */
1301 1449 0, /* tp_members */
1302 1450 index_getset, /* tp_getset */
1303 1451 0, /* tp_base */
1304 1452 0, /* tp_dict */
1305 1453 0, /* tp_descr_get */
1306 1454 0, /* tp_descr_set */
1307 1455 0, /* tp_dictoffset */
1308 1456 (initproc)index_init, /* tp_init */
1309 1457 0, /* tp_alloc */
1310 1458 };
1311 1459
1312 1460 /*
1313 1461 * returns a tuple of the form (index, index, cache) with elements as
1314 1462 * follows:
1315 1463 *
1316 1464 * index: an index object that lazily parses RevlogNG records
1317 1465 * cache: if data is inlined, a tuple (index_file_content, 0), else None
1318 1466 *
1319 1467 * added complications are for backwards compatibility
1320 1468 */
1321 1469 static PyObject *parse_index2(PyObject *self, PyObject *args)
1322 1470 {
1323 1471 PyObject *tuple = NULL, *cache = NULL;
1324 1472 indexObject *idx;
1325 1473 int ret;
1326 1474
1327 1475 idx = PyObject_New(indexObject, &indexType);
1328 1476 if (idx == NULL)
1329 1477 goto bail;
1330 1478
1331 1479 ret = index_init(idx, args);
1332 1480 if (ret == -1)
1333 1481 goto bail;
1334 1482
1335 1483 if (idx->inlined) {
1336 1484 cache = Py_BuildValue("iO", 0, idx->data);
1337 1485 if (cache == NULL)
1338 1486 goto bail;
1339 1487 } else {
1340 1488 cache = Py_None;
1341 1489 Py_INCREF(cache);
1342 1490 }
1343 1491
1344 1492 tuple = Py_BuildValue("NN", idx, cache);
1345 1493 if (!tuple)
1346 1494 goto bail;
1347 1495 return tuple;
1348 1496
1349 1497 bail:
1350 1498 Py_XDECREF(idx);
1351 1499 Py_XDECREF(cache);
1352 1500 Py_XDECREF(tuple);
1353 1501 return NULL;
1354 1502 }
1355 1503
1356 1504 static char parsers_doc[] = "Efficient content parsing.";
1357 1505
1358 1506 static PyMethodDef methods[] = {
1507 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
1359 1508 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
1360 1509 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
1361 1510 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
1362 1511 {NULL, NULL}
1363 1512 };
1364 1513
1365 1514 static void module_init(PyObject *mod)
1366 1515 {
1367 1516 indexType.tp_new = PyType_GenericNew;
1368 1517 if (PyType_Ready(&indexType) < 0)
1369 1518 return;
1370 1519 Py_INCREF(&indexType);
1371 1520
1372 1521 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
1373 1522
1374 1523 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
1375 1524 -1, -1, -1, -1, nullid, 20);
1376 1525 if (nullentry)
1377 1526 PyObject_GC_UnTrack(nullentry);
1527
1528 dirstate_unset = Py_BuildValue("ciii", 'n', 0, -1, -1);
1378 1529 }
1379 1530
1380 1531 #ifdef IS_PY3K
1381 1532 static struct PyModuleDef parsers_module = {
1382 1533 PyModuleDef_HEAD_INIT,
1383 1534 "parsers",
1384 1535 parsers_doc,
1385 1536 -1,
1386 1537 methods
1387 1538 };
1388 1539
1389 1540 PyMODINIT_FUNC PyInit_parsers(void)
1390 1541 {
1391 1542 PyObject *mod = PyModule_Create(&parsers_module);
1392 1543 module_init(mod);
1393 1544 return mod;
1394 1545 }
1395 1546 #else
1396 1547 PyMODINIT_FUNC initparsers(void)
1397 1548 {
1398 1549 PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
1399 1550 module_init(mod);
1400 1551 }
1401 1552 #endif
General Comments 0
You need to be logged in to leave comments. Login now