##// END OF EJS Templates
parsers: make pack_dirstate take now in integer for consistency...
FUJIWARA Katsunori -
r26630:3111b45a default
parent child Browse files
Show More
@@ -1,1053 +1,1053 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from node import nullid
9 9 from i18n import _
10 10 import scmutil, util, osutil, parsers, encoding, pathutil, error
11 11 import os, stat, errno
12 12 import match as matchmod
13 13
14 14 propertycache = util.propertycache
15 15 filecache = scmutil.filecache
16 16 _rangemask = 0x7fffffff
17 17
18 18 dirstatetuple = parsers.dirstatetuple
19 19
20 20 class repocache(filecache):
21 21 """filecache for files in .hg/"""
22 22 def join(self, obj, fname):
23 23 return obj._opener.join(fname)
24 24
25 25 class rootcache(filecache):
26 26 """filecache for files in the repository root"""
27 27 def join(self, obj, fname):
28 28 return obj._join(fname)
29 29
30 30 class dirstate(object):
31 31
32 32 def __init__(self, opener, ui, root, validate):
33 33 '''Create a new dirstate object.
34 34
35 35 opener is an open()-like callable that can be used to open the
36 36 dirstate file; root is the root of the directory tracked by
37 37 the dirstate.
38 38 '''
39 39 self._opener = opener
40 40 self._validate = validate
41 41 self._root = root
42 42 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
43 43 # UNC path pointing to root share (issue4557)
44 44 self._rootdir = pathutil.normasprefix(root)
45 45 # internal config: ui.forcecwd
46 46 forcecwd = ui.config('ui', 'forcecwd')
47 47 if forcecwd:
48 48 self._cwd = forcecwd
49 49 self._dirty = False
50 50 self._dirtypl = False
51 51 self._lastnormaltime = 0
52 52 self._ui = ui
53 53 self._filecache = {}
54 54 self._parentwriters = 0
55 55 self._filename = 'dirstate'
56 56
57 57 def beginparentchange(self):
58 58 '''Marks the beginning of a set of changes that involve changing
59 59 the dirstate parents. If there is an exception during this time,
60 60 the dirstate will not be written when the wlock is released. This
61 61 prevents writing an incoherent dirstate where the parent doesn't
62 62 match the contents.
63 63 '''
64 64 self._parentwriters += 1
65 65
66 66 def endparentchange(self):
67 67 '''Marks the end of a set of changes that involve changing the
68 68 dirstate parents. Once all parent changes have been marked done,
69 69 the wlock will be free to write the dirstate on release.
70 70 '''
71 71 if self._parentwriters > 0:
72 72 self._parentwriters -= 1
73 73
74 74 def pendingparentchange(self):
75 75 '''Returns true if the dirstate is in the middle of a set of changes
76 76 that modify the dirstate parent.
77 77 '''
78 78 return self._parentwriters > 0
79 79
80 80 @propertycache
81 81 def _map(self):
82 82 '''Return the dirstate contents as a map from filename to
83 83 (state, mode, size, time).'''
84 84 self._read()
85 85 return self._map
86 86
87 87 @propertycache
88 88 def _copymap(self):
89 89 self._read()
90 90 return self._copymap
91 91
92 92 @propertycache
93 93 def _filefoldmap(self):
94 94 try:
95 95 makefilefoldmap = parsers.make_file_foldmap
96 96 except AttributeError:
97 97 pass
98 98 else:
99 99 return makefilefoldmap(self._map, util.normcasespec,
100 100 util.normcasefallback)
101 101
102 102 f = {}
103 103 normcase = util.normcase
104 104 for name, s in self._map.iteritems():
105 105 if s[0] != 'r':
106 106 f[normcase(name)] = name
107 107 f['.'] = '.' # prevents useless util.fspath() invocation
108 108 return f
109 109
110 110 @propertycache
111 111 def _dirfoldmap(self):
112 112 f = {}
113 113 normcase = util.normcase
114 114 for name in self._dirs:
115 115 f[normcase(name)] = name
116 116 return f
117 117
118 118 @repocache('branch')
119 119 def _branch(self):
120 120 try:
121 121 return self._opener.read("branch").strip() or "default"
122 122 except IOError as inst:
123 123 if inst.errno != errno.ENOENT:
124 124 raise
125 125 return "default"
126 126
127 127 @propertycache
128 128 def _pl(self):
129 129 try:
130 130 fp = self._opener(self._filename)
131 131 st = fp.read(40)
132 132 fp.close()
133 133 l = len(st)
134 134 if l == 40:
135 135 return st[:20], st[20:40]
136 136 elif l > 0 and l < 40:
137 137 raise error.Abort(_('working directory state appears damaged!'))
138 138 except IOError as err:
139 139 if err.errno != errno.ENOENT:
140 140 raise
141 141 return [nullid, nullid]
142 142
143 143 @propertycache
144 144 def _dirs(self):
145 145 return util.dirs(self._map, 'r')
146 146
147 147 def dirs(self):
148 148 return self._dirs
149 149
150 150 @rootcache('.hgignore')
151 151 def _ignore(self):
152 152 files = []
153 153 if os.path.exists(self._join('.hgignore')):
154 154 files.append(self._join('.hgignore'))
155 155 for name, path in self._ui.configitems("ui"):
156 156 if name == 'ignore' or name.startswith('ignore.'):
157 157 # we need to use os.path.join here rather than self._join
158 158 # because path is arbitrary and user-specified
159 159 files.append(os.path.join(self._rootdir, util.expandpath(path)))
160 160
161 161 if not files:
162 162 return util.never
163 163
164 164 pats = ['include:%s' % f for f in files]
165 165 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
166 166
167 167 @propertycache
168 168 def _slash(self):
169 169 return self._ui.configbool('ui', 'slash') and os.sep != '/'
170 170
171 171 @propertycache
172 172 def _checklink(self):
173 173 return util.checklink(self._root)
174 174
175 175 @propertycache
176 176 def _checkexec(self):
177 177 return util.checkexec(self._root)
178 178
179 179 @propertycache
180 180 def _checkcase(self):
181 181 return not util.checkcase(self._join('.hg'))
182 182
183 183 def _join(self, f):
184 184 # much faster than os.path.join()
185 185 # it's safe because f is always a relative path
186 186 return self._rootdir + f
187 187
188 188 def flagfunc(self, buildfallback):
189 189 if self._checklink and self._checkexec:
190 190 def f(x):
191 191 try:
192 192 st = os.lstat(self._join(x))
193 193 if util.statislink(st):
194 194 return 'l'
195 195 if util.statisexec(st):
196 196 return 'x'
197 197 except OSError:
198 198 pass
199 199 return ''
200 200 return f
201 201
202 202 fallback = buildfallback()
203 203 if self._checklink:
204 204 def f(x):
205 205 if os.path.islink(self._join(x)):
206 206 return 'l'
207 207 if 'x' in fallback(x):
208 208 return 'x'
209 209 return ''
210 210 return f
211 211 if self._checkexec:
212 212 def f(x):
213 213 if 'l' in fallback(x):
214 214 return 'l'
215 215 if util.isexec(self._join(x)):
216 216 return 'x'
217 217 return ''
218 218 return f
219 219 else:
220 220 return fallback
221 221
222 222 @propertycache
223 223 def _cwd(self):
224 224 return os.getcwd()
225 225
226 226 def getcwd(self):
227 227 '''Return the path from which a canonical path is calculated.
228 228
229 229 This path should be used to resolve file patterns or to convert
230 230 canonical paths back to file paths for display. It shouldn't be
231 231 used to get real file paths. Use vfs functions instead.
232 232 '''
233 233 cwd = self._cwd
234 234 if cwd == self._root:
235 235 return ''
236 236 # self._root ends with a path separator if self._root is '/' or 'C:\'
237 237 rootsep = self._root
238 238 if not util.endswithsep(rootsep):
239 239 rootsep += os.sep
240 240 if cwd.startswith(rootsep):
241 241 return cwd[len(rootsep):]
242 242 else:
243 243 # we're outside the repo. return an absolute path.
244 244 return cwd
245 245
246 246 def pathto(self, f, cwd=None):
247 247 if cwd is None:
248 248 cwd = self.getcwd()
249 249 path = util.pathto(self._root, cwd, f)
250 250 if self._slash:
251 251 return util.pconvert(path)
252 252 return path
253 253
254 254 def __getitem__(self, key):
255 255 '''Return the current state of key (a filename) in the dirstate.
256 256
257 257 States are:
258 258 n normal
259 259 m needs merging
260 260 r marked for removal
261 261 a marked for addition
262 262 ? not tracked
263 263 '''
264 264 return self._map.get(key, ("?",))[0]
265 265
266 266 def __contains__(self, key):
267 267 return key in self._map
268 268
269 269 def __iter__(self):
270 270 for x in sorted(self._map):
271 271 yield x
272 272
273 273 def iteritems(self):
274 274 return self._map.iteritems()
275 275
276 276 def parents(self):
277 277 return [self._validate(p) for p in self._pl]
278 278
279 279 def p1(self):
280 280 return self._validate(self._pl[0])
281 281
282 282 def p2(self):
283 283 return self._validate(self._pl[1])
284 284
285 285 def branch(self):
286 286 return encoding.tolocal(self._branch)
287 287
288 288 def setparents(self, p1, p2=nullid):
289 289 """Set dirstate parents to p1 and p2.
290 290
291 291 When moving from two parents to one, 'm' merged entries a
292 292 adjusted to normal and previous copy records discarded and
293 293 returned by the call.
294 294
295 295 See localrepo.setparents()
296 296 """
297 297 if self._parentwriters == 0:
298 298 raise ValueError("cannot set dirstate parent without "
299 299 "calling dirstate.beginparentchange")
300 300
301 301 self._dirty = self._dirtypl = True
302 302 oldp2 = self._pl[1]
303 303 self._pl = p1, p2
304 304 copies = {}
305 305 if oldp2 != nullid and p2 == nullid:
306 306 for f, s in self._map.iteritems():
307 307 # Discard 'm' markers when moving away from a merge state
308 308 if s[0] == 'm':
309 309 if f in self._copymap:
310 310 copies[f] = self._copymap[f]
311 311 self.normallookup(f)
312 312 # Also fix up otherparent markers
313 313 elif s[0] == 'n' and s[2] == -2:
314 314 if f in self._copymap:
315 315 copies[f] = self._copymap[f]
316 316 self.add(f)
317 317 return copies
318 318
319 319 def setbranch(self, branch):
320 320 self._branch = encoding.fromlocal(branch)
321 321 f = self._opener('branch', 'w', atomictemp=True)
322 322 try:
323 323 f.write(self._branch + '\n')
324 324 f.close()
325 325
326 326 # make sure filecache has the correct stat info for _branch after
327 327 # replacing the underlying file
328 328 ce = self._filecache['_branch']
329 329 if ce:
330 330 ce.refresh()
331 331 except: # re-raises
332 332 f.discard()
333 333 raise
334 334
335 335 def _read(self):
336 336 self._map = {}
337 337 self._copymap = {}
338 338 try:
339 339 fp = self._opener.open(self._filename)
340 340 try:
341 341 st = fp.read()
342 342 finally:
343 343 fp.close()
344 344 except IOError as err:
345 345 if err.errno != errno.ENOENT:
346 346 raise
347 347 return
348 348 if not st:
349 349 return
350 350
351 351 if util.safehasattr(parsers, 'dict_new_presized'):
352 352 # Make an estimate of the number of files in the dirstate based on
353 353 # its size. From a linear regression on a set of real-world repos,
354 354 # all over 10,000 files, the size of a dirstate entry is 85
355 355 # bytes. The cost of resizing is significantly higher than the cost
356 356 # of filling in a larger presized dict, so subtract 20% from the
357 357 # size.
358 358 #
359 359 # This heuristic is imperfect in many ways, so in a future dirstate
360 360 # format update it makes sense to just record the number of entries
361 361 # on write.
362 362 self._map = parsers.dict_new_presized(len(st) / 71)
363 363
364 364 # Python's garbage collector triggers a GC each time a certain number
365 365 # of container objects (the number being defined by
366 366 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
367 367 # for each file in the dirstate. The C version then immediately marks
368 368 # them as not to be tracked by the collector. However, this has no
369 369 # effect on when GCs are triggered, only on what objects the GC looks
370 370 # into. This means that O(number of files) GCs are unavoidable.
371 371 # Depending on when in the process's lifetime the dirstate is parsed,
372 372 # this can get very expensive. As a workaround, disable GC while
373 373 # parsing the dirstate.
374 374 #
375 375 # (we cannot decorate the function directly since it is in a C module)
376 376 parse_dirstate = util.nogc(parsers.parse_dirstate)
377 377 p = parse_dirstate(self._map, self._copymap, st)
378 378 if not self._dirtypl:
379 379 self._pl = p
380 380
381 381 def invalidate(self):
382 382 for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
383 383 "_pl", "_dirs", "_ignore"):
384 384 if a in self.__dict__:
385 385 delattr(self, a)
386 386 self._lastnormaltime = 0
387 387 self._dirty = False
388 388 self._parentwriters = 0
389 389
390 390 def copy(self, source, dest):
391 391 """Mark dest as a copy of source. Unmark dest if source is None."""
392 392 if source == dest:
393 393 return
394 394 self._dirty = True
395 395 if source is not None:
396 396 self._copymap[dest] = source
397 397 elif dest in self._copymap:
398 398 del self._copymap[dest]
399 399
400 400 def copied(self, file):
401 401 return self._copymap.get(file, None)
402 402
403 403 def copies(self):
404 404 return self._copymap
405 405
406 406 def _droppath(self, f):
407 407 if self[f] not in "?r" and "_dirs" in self.__dict__:
408 408 self._dirs.delpath(f)
409 409
410 410 def _addpath(self, f, state, mode, size, mtime):
411 411 oldstate = self[f]
412 412 if state == 'a' or oldstate == 'r':
413 413 scmutil.checkfilename(f)
414 414 if f in self._dirs:
415 415 raise error.Abort(_('directory %r already in dirstate') % f)
416 416 # shadows
417 417 for d in util.finddirs(f):
418 418 if d in self._dirs:
419 419 break
420 420 if d in self._map and self[d] != 'r':
421 421 raise error.Abort(
422 422 _('file %r in dirstate clashes with %r') % (d, f))
423 423 if oldstate in "?r" and "_dirs" in self.__dict__:
424 424 self._dirs.addpath(f)
425 425 self._dirty = True
426 426 self._map[f] = dirstatetuple(state, mode, size, mtime)
427 427
428 428 def normal(self, f):
429 429 '''Mark a file normal and clean.'''
430 430 s = os.lstat(self._join(f))
431 431 mtime = util.statmtimesec(s)
432 432 self._addpath(f, 'n', s.st_mode,
433 433 s.st_size & _rangemask, mtime & _rangemask)
434 434 if f in self._copymap:
435 435 del self._copymap[f]
436 436 if mtime > self._lastnormaltime:
437 437 # Remember the most recent modification timeslot for status(),
438 438 # to make sure we won't miss future size-preserving file content
439 439 # modifications that happen within the same timeslot.
440 440 self._lastnormaltime = mtime
441 441
442 442 def normallookup(self, f):
443 443 '''Mark a file normal, but possibly dirty.'''
444 444 if self._pl[1] != nullid and f in self._map:
445 445 # if there is a merge going on and the file was either
446 446 # in state 'm' (-1) or coming from other parent (-2) before
447 447 # being removed, restore that state.
448 448 entry = self._map[f]
449 449 if entry[0] == 'r' and entry[2] in (-1, -2):
450 450 source = self._copymap.get(f)
451 451 if entry[2] == -1:
452 452 self.merge(f)
453 453 elif entry[2] == -2:
454 454 self.otherparent(f)
455 455 if source:
456 456 self.copy(source, f)
457 457 return
458 458 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
459 459 return
460 460 self._addpath(f, 'n', 0, -1, -1)
461 461 if f in self._copymap:
462 462 del self._copymap[f]
463 463
464 464 def otherparent(self, f):
465 465 '''Mark as coming from the other parent, always dirty.'''
466 466 if self._pl[1] == nullid:
467 467 raise error.Abort(_("setting %r to other parent "
468 468 "only allowed in merges") % f)
469 469 if f in self and self[f] == 'n':
470 470 # merge-like
471 471 self._addpath(f, 'm', 0, -2, -1)
472 472 else:
473 473 # add-like
474 474 self._addpath(f, 'n', 0, -2, -1)
475 475
476 476 if f in self._copymap:
477 477 del self._copymap[f]
478 478
479 479 def add(self, f):
480 480 '''Mark a file added.'''
481 481 self._addpath(f, 'a', 0, -1, -1)
482 482 if f in self._copymap:
483 483 del self._copymap[f]
484 484
485 485 def remove(self, f):
486 486 '''Mark a file removed.'''
487 487 self._dirty = True
488 488 self._droppath(f)
489 489 size = 0
490 490 if self._pl[1] != nullid and f in self._map:
491 491 # backup the previous state
492 492 entry = self._map[f]
493 493 if entry[0] == 'm': # merge
494 494 size = -1
495 495 elif entry[0] == 'n' and entry[2] == -2: # other parent
496 496 size = -2
497 497 self._map[f] = dirstatetuple('r', 0, size, 0)
498 498 if size == 0 and f in self._copymap:
499 499 del self._copymap[f]
500 500
501 501 def merge(self, f):
502 502 '''Mark a file merged.'''
503 503 if self._pl[1] == nullid:
504 504 return self.normallookup(f)
505 505 return self.otherparent(f)
506 506
507 507 def drop(self, f):
508 508 '''Drop a file from the dirstate'''
509 509 if f in self._map:
510 510 self._dirty = True
511 511 self._droppath(f)
512 512 del self._map[f]
513 513
514 514 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
515 515 if exists is None:
516 516 exists = os.path.lexists(os.path.join(self._root, path))
517 517 if not exists:
518 518 # Maybe a path component exists
519 519 if not ignoremissing and '/' in path:
520 520 d, f = path.rsplit('/', 1)
521 521 d = self._normalize(d, False, ignoremissing, None)
522 522 folded = d + "/" + f
523 523 else:
524 524 # No path components, preserve original case
525 525 folded = path
526 526 else:
527 527 # recursively normalize leading directory components
528 528 # against dirstate
529 529 if '/' in normed:
530 530 d, f = normed.rsplit('/', 1)
531 531 d = self._normalize(d, False, ignoremissing, True)
532 532 r = self._root + "/" + d
533 533 folded = d + "/" + util.fspath(f, r)
534 534 else:
535 535 folded = util.fspath(normed, self._root)
536 536 storemap[normed] = folded
537 537
538 538 return folded
539 539
540 540 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
541 541 normed = util.normcase(path)
542 542 folded = self._filefoldmap.get(normed, None)
543 543 if folded is None:
544 544 if isknown:
545 545 folded = path
546 546 else:
547 547 folded = self._discoverpath(path, normed, ignoremissing, exists,
548 548 self._filefoldmap)
549 549 return folded
550 550
551 551 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
552 552 normed = util.normcase(path)
553 553 folded = self._filefoldmap.get(normed, None)
554 554 if folded is None:
555 555 folded = self._dirfoldmap.get(normed, None)
556 556 if folded is None:
557 557 if isknown:
558 558 folded = path
559 559 else:
560 560 # store discovered result in dirfoldmap so that future
561 561 # normalizefile calls don't start matching directories
562 562 folded = self._discoverpath(path, normed, ignoremissing, exists,
563 563 self._dirfoldmap)
564 564 return folded
565 565
566 566 def normalize(self, path, isknown=False, ignoremissing=False):
567 567 '''
568 568 normalize the case of a pathname when on a casefolding filesystem
569 569
570 570 isknown specifies whether the filename came from walking the
571 571 disk, to avoid extra filesystem access.
572 572
573 573 If ignoremissing is True, missing path are returned
574 574 unchanged. Otherwise, we try harder to normalize possibly
575 575 existing path components.
576 576
577 577 The normalized case is determined based on the following precedence:
578 578
579 579 - version of name already stored in the dirstate
580 580 - version of name stored on disk
581 581 - version provided via command arguments
582 582 '''
583 583
584 584 if self._checkcase:
585 585 return self._normalize(path, isknown, ignoremissing)
586 586 return path
587 587
588 588 def clear(self):
589 589 self._map = {}
590 590 if "_dirs" in self.__dict__:
591 591 delattr(self, "_dirs")
592 592 self._copymap = {}
593 593 self._pl = [nullid, nullid]
594 594 self._lastnormaltime = 0
595 595 self._dirty = True
596 596
597 597 def rebuild(self, parent, allfiles, changedfiles=None):
598 598 if changedfiles is None:
599 599 changedfiles = allfiles
600 600 oldmap = self._map
601 601 self.clear()
602 602 for f in allfiles:
603 603 if f not in changedfiles:
604 604 self._map[f] = oldmap[f]
605 605 else:
606 606 if 'x' in allfiles.flags(f):
607 607 self._map[f] = dirstatetuple('n', 0o777, -1, 0)
608 608 else:
609 609 self._map[f] = dirstatetuple('n', 0o666, -1, 0)
610 610 self._pl = (parent, nullid)
611 611 self._dirty = True
612 612
613 613 def write(self):
614 614 if not self._dirty:
615 615 return
616 616
617 617 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
618 618 # timestamp of each entries in dirstate, because of 'now > mtime'
619 619 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite', 0)
620 620 if delaywrite > 0:
621 621 import time # to avoid useless import
622 622 time.sleep(delaywrite)
623 623
624 624 st = self._opener(self._filename, "w", atomictemp=True)
625 625 self._writedirstate(st)
626 626
627 627 def _writedirstate(self, st):
628 628 # use the modification time of the newly created temporary file as the
629 629 # filesystem's notion of 'now'
630 now = util.fstat(st).st_mtime
630 now = util.statmtimesec(util.fstat(st)) & _rangemask
631 631 st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
632 632 st.close()
633 633 self._lastnormaltime = 0
634 634 self._dirty = self._dirtypl = False
635 635
636 636 def _dirignore(self, f):
637 637 if f == '.':
638 638 return False
639 639 if self._ignore(f):
640 640 return True
641 641 for p in util.finddirs(f):
642 642 if self._ignore(p):
643 643 return True
644 644 return False
645 645
646 646 def _walkexplicit(self, match, subrepos):
647 647 '''Get stat data about the files explicitly specified by match.
648 648
649 649 Return a triple (results, dirsfound, dirsnotfound).
650 650 - results is a mapping from filename to stat result. It also contains
651 651 listings mapping subrepos and .hg to None.
652 652 - dirsfound is a list of files found to be directories.
653 653 - dirsnotfound is a list of files that the dirstate thinks are
654 654 directories and that were not found.'''
655 655
656 656 def badtype(mode):
657 657 kind = _('unknown')
658 658 if stat.S_ISCHR(mode):
659 659 kind = _('character device')
660 660 elif stat.S_ISBLK(mode):
661 661 kind = _('block device')
662 662 elif stat.S_ISFIFO(mode):
663 663 kind = _('fifo')
664 664 elif stat.S_ISSOCK(mode):
665 665 kind = _('socket')
666 666 elif stat.S_ISDIR(mode):
667 667 kind = _('directory')
668 668 return _('unsupported file type (type is %s)') % kind
669 669
670 670 matchedir = match.explicitdir
671 671 badfn = match.bad
672 672 dmap = self._map
673 673 lstat = os.lstat
674 674 getkind = stat.S_IFMT
675 675 dirkind = stat.S_IFDIR
676 676 regkind = stat.S_IFREG
677 677 lnkkind = stat.S_IFLNK
678 678 join = self._join
679 679 dirsfound = []
680 680 foundadd = dirsfound.append
681 681 dirsnotfound = []
682 682 notfoundadd = dirsnotfound.append
683 683
684 684 if not match.isexact() and self._checkcase:
685 685 normalize = self._normalize
686 686 else:
687 687 normalize = None
688 688
689 689 files = sorted(match.files())
690 690 subrepos.sort()
691 691 i, j = 0, 0
692 692 while i < len(files) and j < len(subrepos):
693 693 subpath = subrepos[j] + "/"
694 694 if files[i] < subpath:
695 695 i += 1
696 696 continue
697 697 while i < len(files) and files[i].startswith(subpath):
698 698 del files[i]
699 699 j += 1
700 700
701 701 if not files or '.' in files:
702 702 files = ['.']
703 703 results = dict.fromkeys(subrepos)
704 704 results['.hg'] = None
705 705
706 706 alldirs = None
707 707 for ff in files:
708 708 # constructing the foldmap is expensive, so don't do it for the
709 709 # common case where files is ['.']
710 710 if normalize and ff != '.':
711 711 nf = normalize(ff, False, True)
712 712 else:
713 713 nf = ff
714 714 if nf in results:
715 715 continue
716 716
717 717 try:
718 718 st = lstat(join(nf))
719 719 kind = getkind(st.st_mode)
720 720 if kind == dirkind:
721 721 if nf in dmap:
722 722 # file replaced by dir on disk but still in dirstate
723 723 results[nf] = None
724 724 if matchedir:
725 725 matchedir(nf)
726 726 foundadd((nf, ff))
727 727 elif kind == regkind or kind == lnkkind:
728 728 results[nf] = st
729 729 else:
730 730 badfn(ff, badtype(kind))
731 731 if nf in dmap:
732 732 results[nf] = None
733 733 except OSError as inst: # nf not found on disk - it is dirstate only
734 734 if nf in dmap: # does it exactly match a missing file?
735 735 results[nf] = None
736 736 else: # does it match a missing directory?
737 737 if alldirs is None:
738 738 alldirs = util.dirs(dmap)
739 739 if nf in alldirs:
740 740 if matchedir:
741 741 matchedir(nf)
742 742 notfoundadd(nf)
743 743 else:
744 744 badfn(ff, inst.strerror)
745 745
746 746 # Case insensitive filesystems cannot rely on lstat() failing to detect
747 747 # a case-only rename. Prune the stat object for any file that does not
748 748 # match the case in the filesystem, if there are multiple files that
749 749 # normalize to the same path.
750 750 if match.isexact() and self._checkcase:
751 751 normed = {}
752 752
753 753 for f, st in results.iteritems():
754 754 if st is None:
755 755 continue
756 756
757 757 nc = util.normcase(f)
758 758 paths = normed.get(nc)
759 759
760 760 if paths is None:
761 761 paths = set()
762 762 normed[nc] = paths
763 763
764 764 paths.add(f)
765 765
766 766 for norm, paths in normed.iteritems():
767 767 if len(paths) > 1:
768 768 for path in paths:
769 769 folded = self._discoverpath(path, norm, True, None,
770 770 self._dirfoldmap)
771 771 if path != folded:
772 772 results[path] = None
773 773
774 774 return results, dirsfound, dirsnotfound
775 775
776 776 def walk(self, match, subrepos, unknown, ignored, full=True):
777 777 '''
778 778 Walk recursively through the directory tree, finding all files
779 779 matched by match.
780 780
781 781 If full is False, maybe skip some known-clean files.
782 782
783 783 Return a dict mapping filename to stat-like object (either
784 784 mercurial.osutil.stat instance or return value of os.stat()).
785 785
786 786 '''
787 787 # full is a flag that extensions that hook into walk can use -- this
788 788 # implementation doesn't use it at all. This satisfies the contract
789 789 # because we only guarantee a "maybe".
790 790
791 791 if ignored:
792 792 ignore = util.never
793 793 dirignore = util.never
794 794 elif unknown:
795 795 ignore = self._ignore
796 796 dirignore = self._dirignore
797 797 else:
798 798 # if not unknown and not ignored, drop dir recursion and step 2
799 799 ignore = util.always
800 800 dirignore = util.always
801 801
802 802 matchfn = match.matchfn
803 803 matchalways = match.always()
804 804 matchtdir = match.traversedir
805 805 dmap = self._map
806 806 listdir = osutil.listdir
807 807 lstat = os.lstat
808 808 dirkind = stat.S_IFDIR
809 809 regkind = stat.S_IFREG
810 810 lnkkind = stat.S_IFLNK
811 811 join = self._join
812 812
813 813 exact = skipstep3 = False
814 814 if match.isexact(): # match.exact
815 815 exact = True
816 816 dirignore = util.always # skip step 2
817 817 elif match.prefix(): # match.match, no patterns
818 818 skipstep3 = True
819 819
820 820 if not exact and self._checkcase:
821 821 normalize = self._normalize
822 822 normalizefile = self._normalizefile
823 823 skipstep3 = False
824 824 else:
825 825 normalize = self._normalize
826 826 normalizefile = None
827 827
828 828 # step 1: find all explicit files
829 829 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
830 830
831 831 skipstep3 = skipstep3 and not (work or dirsnotfound)
832 832 work = [d for d in work if not dirignore(d[0])]
833 833
834 834 # step 2: visit subdirectories
835 835 def traverse(work, alreadynormed):
836 836 wadd = work.append
837 837 while work:
838 838 nd = work.pop()
839 839 skip = None
840 840 if nd == '.':
841 841 nd = ''
842 842 else:
843 843 skip = '.hg'
844 844 try:
845 845 entries = listdir(join(nd), stat=True, skip=skip)
846 846 except OSError as inst:
847 847 if inst.errno in (errno.EACCES, errno.ENOENT):
848 848 match.bad(self.pathto(nd), inst.strerror)
849 849 continue
850 850 raise
851 851 for f, kind, st in entries:
852 852 if normalizefile:
853 853 # even though f might be a directory, we're only
854 854 # interested in comparing it to files currently in the
855 855 # dmap -- therefore normalizefile is enough
856 856 nf = normalizefile(nd and (nd + "/" + f) or f, True,
857 857 True)
858 858 else:
859 859 nf = nd and (nd + "/" + f) or f
860 860 if nf not in results:
861 861 if kind == dirkind:
862 862 if not ignore(nf):
863 863 if matchtdir:
864 864 matchtdir(nf)
865 865 wadd(nf)
866 866 if nf in dmap and (matchalways or matchfn(nf)):
867 867 results[nf] = None
868 868 elif kind == regkind or kind == lnkkind:
869 869 if nf in dmap:
870 870 if matchalways or matchfn(nf):
871 871 results[nf] = st
872 872 elif ((matchalways or matchfn(nf))
873 873 and not ignore(nf)):
874 874 # unknown file -- normalize if necessary
875 875 if not alreadynormed:
876 876 nf = normalize(nf, False, True)
877 877 results[nf] = st
878 878 elif nf in dmap and (matchalways or matchfn(nf)):
879 879 results[nf] = None
880 880
881 881 for nd, d in work:
882 882 # alreadynormed means that processwork doesn't have to do any
883 883 # expensive directory normalization
884 884 alreadynormed = not normalize or nd == d
885 885 traverse([d], alreadynormed)
886 886
887 887 for s in subrepos:
888 888 del results[s]
889 889 del results['.hg']
890 890
891 891 # step 3: visit remaining files from dmap
892 892 if not skipstep3 and not exact:
893 893 # If a dmap file is not in results yet, it was either
894 894 # a) not matching matchfn b) ignored, c) missing, or d) under a
895 895 # symlink directory.
896 896 if not results and matchalways:
897 897 visit = dmap.keys()
898 898 else:
899 899 visit = [f for f in dmap if f not in results and matchfn(f)]
900 900 visit.sort()
901 901
902 902 if unknown:
903 903 # unknown == True means we walked all dirs under the roots
904 904 # that wasn't ignored, and everything that matched was stat'ed
905 905 # and is already in results.
906 906 # The rest must thus be ignored or under a symlink.
907 907 audit_path = pathutil.pathauditor(self._root)
908 908
909 909 for nf in iter(visit):
910 910 # If a stat for the same file was already added with a
911 911 # different case, don't add one for this, since that would
912 912 # make it appear as if the file exists under both names
913 913 # on disk.
914 914 if (normalizefile and
915 915 normalizefile(nf, True, True) in results):
916 916 results[nf] = None
917 917 # Report ignored items in the dmap as long as they are not
918 918 # under a symlink directory.
919 919 elif audit_path.check(nf):
920 920 try:
921 921 results[nf] = lstat(join(nf))
922 922 # file was just ignored, no links, and exists
923 923 except OSError:
924 924 # file doesn't exist
925 925 results[nf] = None
926 926 else:
927 927 # It's either missing or under a symlink directory
928 928 # which we in this case report as missing
929 929 results[nf] = None
930 930 else:
931 931 # We may not have walked the full directory tree above,
932 932 # so stat and check everything we missed.
933 933 nf = iter(visit).next
934 934 pos = 0
935 935 while pos < len(visit):
936 936 # visit in mid-sized batches so that we don't
937 937 # block signals indefinitely
938 938 xr = xrange(pos, min(len(visit), pos + 1000))
939 939 for st in util.statfiles([join(visit[n]) for n in xr]):
940 940 results[nf()] = st
941 941 pos += 1000
942 942 return results
943 943
944 944 def status(self, match, subrepos, ignored, clean, unknown):
945 945 '''Determine the status of the working copy relative to the
946 946 dirstate and return a pair of (unsure, status), where status is of type
947 947 scmutil.status and:
948 948
949 949 unsure:
950 950 files that might have been modified since the dirstate was
951 951 written, but need to be read to be sure (size is the same
952 952 but mtime differs)
953 953 status.modified:
954 954 files that have definitely been modified since the dirstate
955 955 was written (different size or mode)
956 956 status.clean:
957 957 files that have definitely not been modified since the
958 958 dirstate was written
959 959 '''
960 960 listignored, listclean, listunknown = ignored, clean, unknown
961 961 lookup, modified, added, unknown, ignored = [], [], [], [], []
962 962 removed, deleted, clean = [], [], []
963 963
964 964 dmap = self._map
965 965 ladd = lookup.append # aka "unsure"
966 966 madd = modified.append
967 967 aadd = added.append
968 968 uadd = unknown.append
969 969 iadd = ignored.append
970 970 radd = removed.append
971 971 dadd = deleted.append
972 972 cadd = clean.append
973 973 mexact = match.exact
974 974 dirignore = self._dirignore
975 975 checkexec = self._checkexec
976 976 copymap = self._copymap
977 977 lastnormaltime = self._lastnormaltime
978 978
979 979 # We need to do full walks when either
980 980 # - we're listing all clean files, or
981 981 # - match.traversedir does something, because match.traversedir should
982 982 # be called for every dir in the working dir
983 983 full = listclean or match.traversedir is not None
984 984 for fn, st in self.walk(match, subrepos, listunknown, listignored,
985 985 full=full).iteritems():
986 986 if fn not in dmap:
987 987 if (listignored or mexact(fn)) and dirignore(fn):
988 988 if listignored:
989 989 iadd(fn)
990 990 else:
991 991 uadd(fn)
992 992 continue
993 993
994 994 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
995 995 # written like that for performance reasons. dmap[fn] is not a
996 996 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
997 997 # opcode has fast paths when the value to be unpacked is a tuple or
998 998 # a list, but falls back to creating a full-fledged iterator in
999 999 # general. That is much slower than simply accessing and storing the
1000 1000 # tuple members one by one.
1001 1001 t = dmap[fn]
1002 1002 state = t[0]
1003 1003 mode = t[1]
1004 1004 size = t[2]
1005 1005 time = t[3]
1006 1006
1007 1007 if not st and state in "nma":
1008 1008 dadd(fn)
1009 1009 elif state == 'n':
1010 1010 mtime = util.statmtimesec(st)
1011 1011 if (size >= 0 and
1012 1012 ((size != st.st_size and size != st.st_size & _rangemask)
1013 1013 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1014 1014 or size == -2 # other parent
1015 1015 or fn in copymap):
1016 1016 madd(fn)
1017 1017 elif time != mtime and time != mtime & _rangemask:
1018 1018 ladd(fn)
1019 1019 elif mtime == lastnormaltime:
1020 1020 # fn may have just been marked as normal and it may have
1021 1021 # changed in the same second without changing its size.
1022 1022 # This can happen if we quickly do multiple commits.
1023 1023 # Force lookup, so we don't miss such a racy file change.
1024 1024 ladd(fn)
1025 1025 elif listclean:
1026 1026 cadd(fn)
1027 1027 elif state == 'm':
1028 1028 madd(fn)
1029 1029 elif state == 'a':
1030 1030 aadd(fn)
1031 1031 elif state == 'r':
1032 1032 radd(fn)
1033 1033
1034 1034 return (lookup, scmutil.status(modified, added, removed, deleted,
1035 1035 unknown, ignored, clean))
1036 1036
1037 1037 def matches(self, match):
1038 1038 '''
1039 1039 return files in the dirstate (in whatever state) filtered by match
1040 1040 '''
1041 1041 dmap = self._map
1042 1042 if match.always():
1043 1043 return dmap.keys()
1044 1044 files = match.files()
1045 1045 if match.isexact():
1046 1046 # fast path -- filter the other way around, since typically files is
1047 1047 # much smaller than dmap
1048 1048 return [f for f in files if f in dmap]
1049 1049 if match.prefix() and all(fn in dmap for fn in files):
1050 1050 # fast path -- all the values are known to be files, so just return
1051 1051 # that
1052 1052 return list(files)
1053 1053 return [f for f in dmap if match(f)]
@@ -1,2843 +1,2843 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16
17 17 static char *versionerrortext = "Python minor version mismatch";
18 18
19 19 static int8_t hextable[256] = {
20 20 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21 21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22 22 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
23 23 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
24 24 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
25 25 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26 26 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
27 27 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28 28 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29 29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30 30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
31 31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32 32 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
36 36 };
37 37
38 38 static char lowertable[128] = {
39 39 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
40 40 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
41 41 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
42 42 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
43 43 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
44 44 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
45 45 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
46 46 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
47 47 '\x40',
48 48 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
49 49 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
50 50 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
51 51 '\x78', '\x79', '\x7a', /* X-Z */
52 52 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
53 53 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
54 54 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
55 55 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
56 56 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
57 57 };
58 58
59 59 static char uppertable[128] = {
60 60 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
61 61 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
62 62 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
63 63 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
64 64 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
65 65 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
66 66 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
67 67 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
68 68 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
69 69 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
70 70 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
71 71 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
72 72 '\x60',
73 73 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
74 74 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
75 75 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
76 76 '\x58', '\x59', '\x5a', /* x-z */
77 77 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
78 78 };
79 79
80 80 static inline int hexdigit(const char *p, Py_ssize_t off)
81 81 {
82 82 int8_t val = hextable[(unsigned char)p[off]];
83 83
84 84 if (val >= 0) {
85 85 return val;
86 86 }
87 87
88 88 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
89 89 return 0;
90 90 }
91 91
92 92 /*
93 93 * Turn a hex-encoded string into binary.
94 94 */
95 95 PyObject *unhexlify(const char *str, int len)
96 96 {
97 97 PyObject *ret;
98 98 char *d;
99 99 int i;
100 100
101 101 ret = PyBytes_FromStringAndSize(NULL, len / 2);
102 102
103 103 if (!ret)
104 104 return NULL;
105 105
106 106 d = PyBytes_AsString(ret);
107 107
108 108 for (i = 0; i < len;) {
109 109 int hi = hexdigit(str, i++);
110 110 int lo = hexdigit(str, i++);
111 111 *d++ = (hi << 4) | lo;
112 112 }
113 113
114 114 return ret;
115 115 }
116 116
117 117 static inline PyObject *_asciitransform(PyObject *str_obj,
118 118 const char table[128],
119 119 PyObject *fallback_fn)
120 120 {
121 121 char *str, *newstr;
122 122 Py_ssize_t i, len;
123 123 PyObject *newobj = NULL;
124 124 PyObject *ret = NULL;
125 125
126 126 str = PyBytes_AS_STRING(str_obj);
127 127 len = PyBytes_GET_SIZE(str_obj);
128 128
129 129 newobj = PyBytes_FromStringAndSize(NULL, len);
130 130 if (!newobj)
131 131 goto quit;
132 132
133 133 newstr = PyBytes_AS_STRING(newobj);
134 134
135 135 for (i = 0; i < len; i++) {
136 136 char c = str[i];
137 137 if (c & 0x80) {
138 138 if (fallback_fn != NULL) {
139 139 ret = PyObject_CallFunctionObjArgs(fallback_fn,
140 140 str_obj, NULL);
141 141 } else {
142 142 PyObject *err = PyUnicodeDecodeError_Create(
143 143 "ascii", str, len, i, (i + 1),
144 144 "unexpected code byte");
145 145 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
146 146 Py_XDECREF(err);
147 147 }
148 148 goto quit;
149 149 }
150 150 newstr[i] = table[(unsigned char)c];
151 151 }
152 152
153 153 ret = newobj;
154 154 Py_INCREF(ret);
155 155 quit:
156 156 Py_XDECREF(newobj);
157 157 return ret;
158 158 }
159 159
160 160 static PyObject *asciilower(PyObject *self, PyObject *args)
161 161 {
162 162 PyObject *str_obj;
163 163 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
164 164 return NULL;
165 165 return _asciitransform(str_obj, lowertable, NULL);
166 166 }
167 167
168 168 static PyObject *asciiupper(PyObject *self, PyObject *args)
169 169 {
170 170 PyObject *str_obj;
171 171 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
172 172 return NULL;
173 173 return _asciitransform(str_obj, uppertable, NULL);
174 174 }
175 175
176 176 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
177 177 {
178 178 /* _PyDict_NewPresized expects a minused parameter, but it actually
179 179 creates a dictionary that's the nearest power of two bigger than the
180 180 parameter. For example, with the initial minused = 1000, the
181 181 dictionary created has size 1024. Of course in a lot of cases that
182 182 can be greater than the maximum load factor Python's dict object
183 183 expects (= 2/3), so as soon as we cross the threshold we'll resize
184 184 anyway. So create a dictionary that's at least 3/2 the size. */
185 185 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
186 186 }
187 187
188 188 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
189 189 {
190 190 Py_ssize_t expected_size;
191 191
192 192 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
193 193 return NULL;
194 194
195 195 return _dict_new_presized(expected_size);
196 196 }
197 197
198 198 static PyObject *make_file_foldmap(PyObject *self, PyObject *args)
199 199 {
200 200 PyObject *dmap, *spec_obj, *normcase_fallback;
201 201 PyObject *file_foldmap = NULL;
202 202 enum normcase_spec spec;
203 203 PyObject *k, *v;
204 204 dirstateTupleObject *tuple;
205 205 Py_ssize_t pos = 0;
206 206 const char *table;
207 207
208 208 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
209 209 &PyDict_Type, &dmap,
210 210 &PyInt_Type, &spec_obj,
211 211 &PyFunction_Type, &normcase_fallback))
212 212 goto quit;
213 213
214 214 spec = (int)PyInt_AS_LONG(spec_obj);
215 215 switch (spec) {
216 216 case NORMCASE_LOWER:
217 217 table = lowertable;
218 218 break;
219 219 case NORMCASE_UPPER:
220 220 table = uppertable;
221 221 break;
222 222 case NORMCASE_OTHER:
223 223 table = NULL;
224 224 break;
225 225 default:
226 226 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
227 227 goto quit;
228 228 }
229 229
230 230 /* Add some more entries to deal with additions outside this
231 231 function. */
232 232 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
233 233 if (file_foldmap == NULL)
234 234 goto quit;
235 235
236 236 while (PyDict_Next(dmap, &pos, &k, &v)) {
237 237 if (!dirstate_tuple_check(v)) {
238 238 PyErr_SetString(PyExc_TypeError,
239 239 "expected a dirstate tuple");
240 240 goto quit;
241 241 }
242 242
243 243 tuple = (dirstateTupleObject *)v;
244 244 if (tuple->state != 'r') {
245 245 PyObject *normed;
246 246 if (table != NULL) {
247 247 normed = _asciitransform(k, table,
248 248 normcase_fallback);
249 249 } else {
250 250 normed = PyObject_CallFunctionObjArgs(
251 251 normcase_fallback, k, NULL);
252 252 }
253 253
254 254 if (normed == NULL)
255 255 goto quit;
256 256 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
257 257 Py_DECREF(normed);
258 258 goto quit;
259 259 }
260 260 Py_DECREF(normed);
261 261 }
262 262 }
263 263 return file_foldmap;
264 264 quit:
265 265 Py_XDECREF(file_foldmap);
266 266 return NULL;
267 267 }
268 268
269 269 /*
270 270 * This code assumes that a manifest is stitched together with newline
271 271 * ('\n') characters.
272 272 */
273 273 static PyObject *parse_manifest(PyObject *self, PyObject *args)
274 274 {
275 275 PyObject *mfdict, *fdict;
276 276 char *str, *start, *end;
277 277 int len;
278 278
279 279 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
280 280 &PyDict_Type, &mfdict,
281 281 &PyDict_Type, &fdict,
282 282 &str, &len))
283 283 goto quit;
284 284
285 285 start = str;
286 286 end = str + len;
287 287 while (start < end) {
288 288 PyObject *file = NULL, *node = NULL;
289 289 PyObject *flags = NULL;
290 290 char *zero = NULL, *newline = NULL;
291 291 ptrdiff_t nlen;
292 292
293 293 zero = memchr(start, '\0', end - start);
294 294 if (!zero) {
295 295 PyErr_SetString(PyExc_ValueError,
296 296 "manifest entry has no separator");
297 297 goto quit;
298 298 }
299 299
300 300 newline = memchr(zero + 1, '\n', end - (zero + 1));
301 301 if (!newline) {
302 302 PyErr_SetString(PyExc_ValueError,
303 303 "manifest contains trailing garbage");
304 304 goto quit;
305 305 }
306 306
307 307 file = PyBytes_FromStringAndSize(start, zero - start);
308 308
309 309 if (!file)
310 310 goto bail;
311 311
312 312 nlen = newline - zero - 1;
313 313
314 314 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
315 315 if (!node)
316 316 goto bail;
317 317
318 318 if (nlen > 40) {
319 319 flags = PyBytes_FromStringAndSize(zero + 41,
320 320 nlen - 40);
321 321 if (!flags)
322 322 goto bail;
323 323
324 324 if (PyDict_SetItem(fdict, file, flags) == -1)
325 325 goto bail;
326 326 }
327 327
328 328 if (PyDict_SetItem(mfdict, file, node) == -1)
329 329 goto bail;
330 330
331 331 start = newline + 1;
332 332
333 333 Py_XDECREF(flags);
334 334 Py_XDECREF(node);
335 335 Py_XDECREF(file);
336 336 continue;
337 337 bail:
338 338 Py_XDECREF(flags);
339 339 Py_XDECREF(node);
340 340 Py_XDECREF(file);
341 341 goto quit;
342 342 }
343 343
344 344 Py_INCREF(Py_None);
345 345 return Py_None;
346 346 quit:
347 347 return NULL;
348 348 }
349 349
350 350 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
351 351 int size, int mtime)
352 352 {
353 353 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
354 354 &dirstateTupleType);
355 355 if (!t)
356 356 return NULL;
357 357 t->state = state;
358 358 t->mode = mode;
359 359 t->size = size;
360 360 t->mtime = mtime;
361 361 return t;
362 362 }
363 363
364 364 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
365 365 PyObject *kwds)
366 366 {
367 367 /* We do all the initialization here and not a tp_init function because
368 368 * dirstate_tuple is immutable. */
369 369 dirstateTupleObject *t;
370 370 char state;
371 371 int size, mode, mtime;
372 372 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
373 373 return NULL;
374 374
375 375 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
376 376 if (!t)
377 377 return NULL;
378 378 t->state = state;
379 379 t->mode = mode;
380 380 t->size = size;
381 381 t->mtime = mtime;
382 382
383 383 return (PyObject *)t;
384 384 }
385 385
386 386 static void dirstate_tuple_dealloc(PyObject *o)
387 387 {
388 388 PyObject_Del(o);
389 389 }
390 390
391 391 static Py_ssize_t dirstate_tuple_length(PyObject *o)
392 392 {
393 393 return 4;
394 394 }
395 395
396 396 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
397 397 {
398 398 dirstateTupleObject *t = (dirstateTupleObject *)o;
399 399 switch (i) {
400 400 case 0:
401 401 return PyBytes_FromStringAndSize(&t->state, 1);
402 402 case 1:
403 403 return PyInt_FromLong(t->mode);
404 404 case 2:
405 405 return PyInt_FromLong(t->size);
406 406 case 3:
407 407 return PyInt_FromLong(t->mtime);
408 408 default:
409 409 PyErr_SetString(PyExc_IndexError, "index out of range");
410 410 return NULL;
411 411 }
412 412 }
413 413
414 414 static PySequenceMethods dirstate_tuple_sq = {
415 415 dirstate_tuple_length, /* sq_length */
416 416 0, /* sq_concat */
417 417 0, /* sq_repeat */
418 418 dirstate_tuple_item, /* sq_item */
419 419 0, /* sq_ass_item */
420 420 0, /* sq_contains */
421 421 0, /* sq_inplace_concat */
422 422 0 /* sq_inplace_repeat */
423 423 };
424 424
425 425 PyTypeObject dirstateTupleType = {
426 426 PyVarObject_HEAD_INIT(NULL, 0)
427 427 "dirstate_tuple", /* tp_name */
428 428 sizeof(dirstateTupleObject),/* tp_basicsize */
429 429 0, /* tp_itemsize */
430 430 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
431 431 0, /* tp_print */
432 432 0, /* tp_getattr */
433 433 0, /* tp_setattr */
434 434 0, /* tp_compare */
435 435 0, /* tp_repr */
436 436 0, /* tp_as_number */
437 437 &dirstate_tuple_sq, /* tp_as_sequence */
438 438 0, /* tp_as_mapping */
439 439 0, /* tp_hash */
440 440 0, /* tp_call */
441 441 0, /* tp_str */
442 442 0, /* tp_getattro */
443 443 0, /* tp_setattro */
444 444 0, /* tp_as_buffer */
445 445 Py_TPFLAGS_DEFAULT, /* tp_flags */
446 446 "dirstate tuple", /* tp_doc */
447 447 0, /* tp_traverse */
448 448 0, /* tp_clear */
449 449 0, /* tp_richcompare */
450 450 0, /* tp_weaklistoffset */
451 451 0, /* tp_iter */
452 452 0, /* tp_iternext */
453 453 0, /* tp_methods */
454 454 0, /* tp_members */
455 455 0, /* tp_getset */
456 456 0, /* tp_base */
457 457 0, /* tp_dict */
458 458 0, /* tp_descr_get */
459 459 0, /* tp_descr_set */
460 460 0, /* tp_dictoffset */
461 461 0, /* tp_init */
462 462 0, /* tp_alloc */
463 463 dirstate_tuple_new, /* tp_new */
464 464 };
465 465
466 466 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
467 467 {
468 468 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
469 469 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
470 470 char state, *cur, *str, *cpos;
471 471 int mode, size, mtime;
472 472 unsigned int flen, len, pos = 40;
473 473 int readlen;
474 474
475 475 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
476 476 &PyDict_Type, &dmap,
477 477 &PyDict_Type, &cmap,
478 478 &str, &readlen))
479 479 goto quit;
480 480
481 481 len = readlen;
482 482
483 483 /* read parents */
484 484 if (len < 40) {
485 485 PyErr_SetString(
486 486 PyExc_ValueError, "too little data for parents");
487 487 goto quit;
488 488 }
489 489
490 490 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
491 491 if (!parents)
492 492 goto quit;
493 493
494 494 /* read filenames */
495 495 while (pos >= 40 && pos < len) {
496 496 cur = str + pos;
497 497 /* unpack header */
498 498 state = *cur;
499 499 mode = getbe32(cur + 1);
500 500 size = getbe32(cur + 5);
501 501 mtime = getbe32(cur + 9);
502 502 flen = getbe32(cur + 13);
503 503 pos += 17;
504 504 cur += 17;
505 505 if (flen > len - pos) {
506 506 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
507 507 goto quit;
508 508 }
509 509
510 510 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
511 511 mtime);
512 512 cpos = memchr(cur, 0, flen);
513 513 if (cpos) {
514 514 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
515 515 cname = PyBytes_FromStringAndSize(cpos + 1,
516 516 flen - (cpos - cur) - 1);
517 517 if (!fname || !cname ||
518 518 PyDict_SetItem(cmap, fname, cname) == -1 ||
519 519 PyDict_SetItem(dmap, fname, entry) == -1)
520 520 goto quit;
521 521 Py_DECREF(cname);
522 522 } else {
523 523 fname = PyBytes_FromStringAndSize(cur, flen);
524 524 if (!fname ||
525 525 PyDict_SetItem(dmap, fname, entry) == -1)
526 526 goto quit;
527 527 }
528 528 Py_DECREF(fname);
529 529 Py_DECREF(entry);
530 530 fname = cname = entry = NULL;
531 531 pos += flen;
532 532 }
533 533
534 534 ret = parents;
535 535 Py_INCREF(ret);
536 536 quit:
537 537 Py_XDECREF(fname);
538 538 Py_XDECREF(cname);
539 539 Py_XDECREF(entry);
540 540 Py_XDECREF(parents);
541 541 return ret;
542 542 }
543 543
544 544 /*
545 545 * Efficiently pack a dirstate object into its on-disk format.
546 546 */
547 547 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
548 548 {
549 549 PyObject *packobj = NULL;
550 550 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
551 551 Py_ssize_t nbytes, pos, l;
552 552 PyObject *k, *v = NULL, *pn;
553 553 char *p, *s;
554 double now;
554 int now;
555 555
556 if (!PyArg_ParseTuple(args, "O!O!Od:pack_dirstate",
556 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate",
557 557 &PyDict_Type, &map, &PyDict_Type, &copymap,
558 558 &pl, &now))
559 559 return NULL;
560 560
561 561 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
562 562 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
563 563 return NULL;
564 564 }
565 565
566 566 /* Figure out how much we need to allocate. */
567 567 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
568 568 PyObject *c;
569 569 if (!PyString_Check(k)) {
570 570 PyErr_SetString(PyExc_TypeError, "expected string key");
571 571 goto bail;
572 572 }
573 573 nbytes += PyString_GET_SIZE(k) + 17;
574 574 c = PyDict_GetItem(copymap, k);
575 575 if (c) {
576 576 if (!PyString_Check(c)) {
577 577 PyErr_SetString(PyExc_TypeError,
578 578 "expected string key");
579 579 goto bail;
580 580 }
581 581 nbytes += PyString_GET_SIZE(c) + 1;
582 582 }
583 583 }
584 584
585 585 packobj = PyString_FromStringAndSize(NULL, nbytes);
586 586 if (packobj == NULL)
587 587 goto bail;
588 588
589 589 p = PyString_AS_STRING(packobj);
590 590
591 591 pn = PySequence_ITEM(pl, 0);
592 592 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
593 593 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
594 594 goto bail;
595 595 }
596 596 memcpy(p, s, l);
597 597 p += 20;
598 598 pn = PySequence_ITEM(pl, 1);
599 599 if (PyString_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
600 600 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
601 601 goto bail;
602 602 }
603 603 memcpy(p, s, l);
604 604 p += 20;
605 605
606 606 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
607 607 dirstateTupleObject *tuple;
608 608 char state;
609 609 uint32_t mode, size, mtime;
610 610 Py_ssize_t len, l;
611 611 PyObject *o;
612 612 char *t;
613 613
614 614 if (!dirstate_tuple_check(v)) {
615 615 PyErr_SetString(PyExc_TypeError,
616 616 "expected a dirstate tuple");
617 617 goto bail;
618 618 }
619 619 tuple = (dirstateTupleObject *)v;
620 620
621 621 state = tuple->state;
622 622 mode = tuple->mode;
623 623 size = tuple->size;
624 624 mtime = tuple->mtime;
625 if (state == 'n' && mtime == (uint32_t)now) {
625 if (state == 'n' && mtime == now) {
626 626 /* See pure/parsers.py:pack_dirstate for why we do
627 627 * this. */
628 628 mtime = -1;
629 629 mtime_unset = (PyObject *)make_dirstate_tuple(
630 630 state, mode, size, mtime);
631 631 if (!mtime_unset)
632 632 goto bail;
633 633 if (PyDict_SetItem(map, k, mtime_unset) == -1)
634 634 goto bail;
635 635 Py_DECREF(mtime_unset);
636 636 mtime_unset = NULL;
637 637 }
638 638 *p++ = state;
639 639 putbe32(mode, p);
640 640 putbe32(size, p + 4);
641 641 putbe32(mtime, p + 8);
642 642 t = p + 12;
643 643 p += 16;
644 644 len = PyString_GET_SIZE(k);
645 645 memcpy(p, PyString_AS_STRING(k), len);
646 646 p += len;
647 647 o = PyDict_GetItem(copymap, k);
648 648 if (o) {
649 649 *p++ = '\0';
650 650 l = PyString_GET_SIZE(o);
651 651 memcpy(p, PyString_AS_STRING(o), l);
652 652 p += l;
653 653 len += l + 1;
654 654 }
655 655 putbe32((uint32_t)len, t);
656 656 }
657 657
658 658 pos = p - PyString_AS_STRING(packobj);
659 659 if (pos != nbytes) {
660 660 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
661 661 (long)pos, (long)nbytes);
662 662 goto bail;
663 663 }
664 664
665 665 return packobj;
666 666 bail:
667 667 Py_XDECREF(mtime_unset);
668 668 Py_XDECREF(packobj);
669 669 Py_XDECREF(v);
670 670 return NULL;
671 671 }
672 672
673 673 /*
674 674 * A base-16 trie for fast node->rev mapping.
675 675 *
676 676 * Positive value is index of the next node in the trie
677 677 * Negative value is a leaf: -(rev + 1)
678 678 * Zero is empty
679 679 */
680 680 typedef struct {
681 681 int children[16];
682 682 } nodetree;
683 683
684 684 /*
685 685 * This class has two behaviors.
686 686 *
687 687 * When used in a list-like way (with integer keys), we decode an
688 688 * entry in a RevlogNG index file on demand. Our last entry is a
689 689 * sentinel, always a nullid. We have limited support for
690 690 * integer-keyed insert and delete, only at elements right before the
691 691 * sentinel.
692 692 *
693 693 * With string keys, we lazily perform a reverse mapping from node to
694 694 * rev, using a base-16 trie.
695 695 */
696 696 typedef struct {
697 697 PyObject_HEAD
698 698 /* Type-specific fields go here. */
699 699 PyObject *data; /* raw bytes of index */
700 700 PyObject **cache; /* cached tuples */
701 701 const char **offsets; /* populated on demand */
702 702 Py_ssize_t raw_length; /* original number of elements */
703 703 Py_ssize_t length; /* current number of elements */
704 704 PyObject *added; /* populated on demand */
705 705 PyObject *headrevs; /* cache, invalidated on changes */
706 706 PyObject *filteredrevs;/* filtered revs set */
707 707 nodetree *nt; /* base-16 trie */
708 708 unsigned ntlength; /* # nodes in use */
709 709 unsigned ntcapacity; /* # nodes allocated */
710 710 int ntdepth; /* maximum depth of tree */
711 711 int ntsplits; /* # splits performed */
712 712 int ntrev; /* last rev scanned */
713 713 int ntlookups; /* # lookups */
714 714 int ntmisses; /* # lookups that miss the cache */
715 715 int inlined;
716 716 } indexObject;
717 717
718 718 static Py_ssize_t index_length(const indexObject *self)
719 719 {
720 720 if (self->added == NULL)
721 721 return self->length;
722 722 return self->length + PyList_GET_SIZE(self->added);
723 723 }
724 724
725 725 static PyObject *nullentry;
726 726 static const char nullid[20];
727 727
728 728 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
729 729
730 730 #if LONG_MAX == 0x7fffffffL
731 731 static char *tuple_format = "Kiiiiiis#";
732 732 #else
733 733 static char *tuple_format = "kiiiiiis#";
734 734 #endif
735 735
736 736 /* A RevlogNG v1 index entry is 64 bytes long. */
737 737 static const long v1_hdrsize = 64;
738 738
739 739 /*
740 740 * Return a pointer to the beginning of a RevlogNG record.
741 741 */
742 742 static const char *index_deref(indexObject *self, Py_ssize_t pos)
743 743 {
744 744 if (self->inlined && pos > 0) {
745 745 if (self->offsets == NULL) {
746 746 self->offsets = malloc(self->raw_length *
747 747 sizeof(*self->offsets));
748 748 if (self->offsets == NULL)
749 749 return (const char *)PyErr_NoMemory();
750 750 inline_scan(self, self->offsets);
751 751 }
752 752 return self->offsets[pos];
753 753 }
754 754
755 755 return PyString_AS_STRING(self->data) + pos * v1_hdrsize;
756 756 }
757 757
758 758 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
759 759 int *ps, int maxrev)
760 760 {
761 761 if (rev >= self->length - 1) {
762 762 PyObject *tuple = PyList_GET_ITEM(self->added,
763 763 rev - self->length + 1);
764 764 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
765 765 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
766 766 } else {
767 767 const char *data = index_deref(self, rev);
768 768 ps[0] = getbe32(data + 24);
769 769 ps[1] = getbe32(data + 28);
770 770 }
771 771 /* If index file is corrupted, ps[] may point to invalid revisions. So
772 772 * there is a risk of buffer overflow to trust them unconditionally. */
773 773 if (ps[0] > maxrev || ps[1] > maxrev) {
774 774 PyErr_SetString(PyExc_ValueError, "parent out of range");
775 775 return -1;
776 776 }
777 777 return 0;
778 778 }
779 779
780 780
781 781 /*
782 782 * RevlogNG format (all in big endian, data may be inlined):
783 783 * 6 bytes: offset
784 784 * 2 bytes: flags
785 785 * 4 bytes: compressed length
786 786 * 4 bytes: uncompressed length
787 787 * 4 bytes: base revision
788 788 * 4 bytes: link revision
789 789 * 4 bytes: parent 1 revision
790 790 * 4 bytes: parent 2 revision
791 791 * 32 bytes: nodeid (only 20 bytes used)
792 792 */
793 793 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
794 794 {
795 795 uint64_t offset_flags;
796 796 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
797 797 const char *c_node_id;
798 798 const char *data;
799 799 Py_ssize_t length = index_length(self);
800 800 PyObject *entry;
801 801
802 802 if (pos < 0)
803 803 pos += length;
804 804
805 805 if (pos < 0 || pos >= length) {
806 806 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
807 807 return NULL;
808 808 }
809 809
810 810 if (pos == length - 1) {
811 811 Py_INCREF(nullentry);
812 812 return nullentry;
813 813 }
814 814
815 815 if (pos >= self->length - 1) {
816 816 PyObject *obj;
817 817 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
818 818 Py_INCREF(obj);
819 819 return obj;
820 820 }
821 821
822 822 if (self->cache) {
823 823 if (self->cache[pos]) {
824 824 Py_INCREF(self->cache[pos]);
825 825 return self->cache[pos];
826 826 }
827 827 } else {
828 828 self->cache = calloc(self->raw_length, sizeof(PyObject *));
829 829 if (self->cache == NULL)
830 830 return PyErr_NoMemory();
831 831 }
832 832
833 833 data = index_deref(self, pos);
834 834 if (data == NULL)
835 835 return NULL;
836 836
837 837 offset_flags = getbe32(data + 4);
838 838 if (pos == 0) /* mask out version number for the first entry */
839 839 offset_flags &= 0xFFFF;
840 840 else {
841 841 uint32_t offset_high = getbe32(data);
842 842 offset_flags |= ((uint64_t)offset_high) << 32;
843 843 }
844 844
845 845 comp_len = getbe32(data + 8);
846 846 uncomp_len = getbe32(data + 12);
847 847 base_rev = getbe32(data + 16);
848 848 link_rev = getbe32(data + 20);
849 849 parent_1 = getbe32(data + 24);
850 850 parent_2 = getbe32(data + 28);
851 851 c_node_id = data + 32;
852 852
853 853 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
854 854 uncomp_len, base_rev, link_rev,
855 855 parent_1, parent_2, c_node_id, 20);
856 856
857 857 if (entry) {
858 858 PyObject_GC_UnTrack(entry);
859 859 Py_INCREF(entry);
860 860 }
861 861
862 862 self->cache[pos] = entry;
863 863
864 864 return entry;
865 865 }
866 866
867 867 /*
868 868 * Return the 20-byte SHA of the node corresponding to the given rev.
869 869 */
870 870 static const char *index_node(indexObject *self, Py_ssize_t pos)
871 871 {
872 872 Py_ssize_t length = index_length(self);
873 873 const char *data;
874 874
875 875 if (pos == length - 1 || pos == INT_MAX)
876 876 return nullid;
877 877
878 878 if (pos >= length)
879 879 return NULL;
880 880
881 881 if (pos >= self->length - 1) {
882 882 PyObject *tuple, *str;
883 883 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
884 884 str = PyTuple_GetItem(tuple, 7);
885 885 return str ? PyString_AS_STRING(str) : NULL;
886 886 }
887 887
888 888 data = index_deref(self, pos);
889 889 return data ? data + 32 : NULL;
890 890 }
891 891
892 892 static int nt_insert(indexObject *self, const char *node, int rev);
893 893
894 894 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
895 895 {
896 896 if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
897 897 return -1;
898 898 if (*nodelen == 20)
899 899 return 0;
900 900 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
901 901 return -1;
902 902 }
903 903
904 904 static PyObject *index_insert(indexObject *self, PyObject *args)
905 905 {
906 906 PyObject *obj;
907 907 char *node;
908 908 int index;
909 909 Py_ssize_t len, nodelen;
910 910
911 911 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
912 912 return NULL;
913 913
914 914 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
915 915 PyErr_SetString(PyExc_TypeError, "8-tuple required");
916 916 return NULL;
917 917 }
918 918
919 919 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
920 920 return NULL;
921 921
922 922 len = index_length(self);
923 923
924 924 if (index < 0)
925 925 index += len;
926 926
927 927 if (index != len - 1) {
928 928 PyErr_SetString(PyExc_IndexError,
929 929 "insert only supported at index -1");
930 930 return NULL;
931 931 }
932 932
933 933 if (self->added == NULL) {
934 934 self->added = PyList_New(0);
935 935 if (self->added == NULL)
936 936 return NULL;
937 937 }
938 938
939 939 if (PyList_Append(self->added, obj) == -1)
940 940 return NULL;
941 941
942 942 if (self->nt)
943 943 nt_insert(self, node, index);
944 944
945 945 Py_CLEAR(self->headrevs);
946 946 Py_RETURN_NONE;
947 947 }
948 948
949 949 static void _index_clearcaches(indexObject *self)
950 950 {
951 951 if (self->cache) {
952 952 Py_ssize_t i;
953 953
954 954 for (i = 0; i < self->raw_length; i++)
955 955 Py_CLEAR(self->cache[i]);
956 956 free(self->cache);
957 957 self->cache = NULL;
958 958 }
959 959 if (self->offsets) {
960 960 free(self->offsets);
961 961 self->offsets = NULL;
962 962 }
963 963 if (self->nt) {
964 964 free(self->nt);
965 965 self->nt = NULL;
966 966 }
967 967 Py_CLEAR(self->headrevs);
968 968 }
969 969
970 970 static PyObject *index_clearcaches(indexObject *self)
971 971 {
972 972 _index_clearcaches(self);
973 973 self->ntlength = self->ntcapacity = 0;
974 974 self->ntdepth = self->ntsplits = 0;
975 975 self->ntrev = -1;
976 976 self->ntlookups = self->ntmisses = 0;
977 977 Py_RETURN_NONE;
978 978 }
979 979
980 980 static PyObject *index_stats(indexObject *self)
981 981 {
982 982 PyObject *obj = PyDict_New();
983 983 PyObject *t = NULL;
984 984
985 985 if (obj == NULL)
986 986 return NULL;
987 987
988 988 #define istat(__n, __d) \
989 989 t = PyInt_FromSsize_t(self->__n); \
990 990 if (!t) \
991 991 goto bail; \
992 992 if (PyDict_SetItemString(obj, __d, t) == -1) \
993 993 goto bail; \
994 994 Py_DECREF(t);
995 995
996 996 if (self->added) {
997 997 Py_ssize_t len = PyList_GET_SIZE(self->added);
998 998 t = PyInt_FromSsize_t(len);
999 999 if (!t)
1000 1000 goto bail;
1001 1001 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
1002 1002 goto bail;
1003 1003 Py_DECREF(t);
1004 1004 }
1005 1005
1006 1006 if (self->raw_length != self->length - 1)
1007 1007 istat(raw_length, "revs on disk");
1008 1008 istat(length, "revs in memory");
1009 1009 istat(ntcapacity, "node trie capacity");
1010 1010 istat(ntdepth, "node trie depth");
1011 1011 istat(ntlength, "node trie count");
1012 1012 istat(ntlookups, "node trie lookups");
1013 1013 istat(ntmisses, "node trie misses");
1014 1014 istat(ntrev, "node trie last rev scanned");
1015 1015 istat(ntsplits, "node trie splits");
1016 1016
1017 1017 #undef istat
1018 1018
1019 1019 return obj;
1020 1020
1021 1021 bail:
1022 1022 Py_XDECREF(obj);
1023 1023 Py_XDECREF(t);
1024 1024 return NULL;
1025 1025 }
1026 1026
1027 1027 /*
1028 1028 * When we cache a list, we want to be sure the caller can't mutate
1029 1029 * the cached copy.
1030 1030 */
1031 1031 static PyObject *list_copy(PyObject *list)
1032 1032 {
1033 1033 Py_ssize_t len = PyList_GET_SIZE(list);
1034 1034 PyObject *newlist = PyList_New(len);
1035 1035 Py_ssize_t i;
1036 1036
1037 1037 if (newlist == NULL)
1038 1038 return NULL;
1039 1039
1040 1040 for (i = 0; i < len; i++) {
1041 1041 PyObject *obj = PyList_GET_ITEM(list, i);
1042 1042 Py_INCREF(obj);
1043 1043 PyList_SET_ITEM(newlist, i, obj);
1044 1044 }
1045 1045
1046 1046 return newlist;
1047 1047 }
1048 1048
1049 1049 static int check_filter(PyObject *filter, Py_ssize_t arg) {
1050 1050 if (filter) {
1051 1051 PyObject *arglist, *result;
1052 1052 int isfiltered;
1053 1053
1054 1054 arglist = Py_BuildValue("(n)", arg);
1055 1055 if (!arglist) {
1056 1056 return -1;
1057 1057 }
1058 1058
1059 1059 result = PyEval_CallObject(filter, arglist);
1060 1060 Py_DECREF(arglist);
1061 1061 if (!result) {
1062 1062 return -1;
1063 1063 }
1064 1064
1065 1065 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
1066 1066 * same as this function, so we can just return it directly.*/
1067 1067 isfiltered = PyObject_IsTrue(result);
1068 1068 Py_DECREF(result);
1069 1069 return isfiltered;
1070 1070 } else {
1071 1071 return 0;
1072 1072 }
1073 1073 }
1074 1074
1075 1075 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
1076 1076 Py_ssize_t marker, char *phases)
1077 1077 {
1078 1078 PyObject *iter = NULL;
1079 1079 PyObject *iter_item = NULL;
1080 1080 Py_ssize_t min_idx = index_length(self) + 1;
1081 1081 long iter_item_long;
1082 1082
1083 1083 if (PyList_GET_SIZE(list) != 0) {
1084 1084 iter = PyObject_GetIter(list);
1085 1085 if (iter == NULL)
1086 1086 return -2;
1087 1087 while ((iter_item = PyIter_Next(iter)))
1088 1088 {
1089 1089 iter_item_long = PyInt_AS_LONG(iter_item);
1090 1090 Py_DECREF(iter_item);
1091 1091 if (iter_item_long < min_idx)
1092 1092 min_idx = iter_item_long;
1093 1093 phases[iter_item_long] = marker;
1094 1094 }
1095 1095 Py_DECREF(iter);
1096 1096 }
1097 1097
1098 1098 return min_idx;
1099 1099 }
1100 1100
1101 1101 static inline void set_phase_from_parents(char *phases, int parent_1,
1102 1102 int parent_2, Py_ssize_t i)
1103 1103 {
1104 1104 if (parent_1 >= 0 && phases[parent_1] > phases[i])
1105 1105 phases[i] = phases[parent_1];
1106 1106 if (parent_2 >= 0 && phases[parent_2] > phases[i])
1107 1107 phases[i] = phases[parent_2];
1108 1108 }
1109 1109
1110 1110 static PyObject *reachableroots2(indexObject *self, PyObject *args)
1111 1111 {
1112 1112
1113 1113 /* Input */
1114 1114 long minroot;
1115 1115 PyObject *includepatharg = NULL;
1116 1116 int includepath = 0;
1117 1117 /* heads and roots are lists */
1118 1118 PyObject *heads = NULL;
1119 1119 PyObject *roots = NULL;
1120 1120 PyObject *reachable = NULL;
1121 1121
1122 1122 PyObject *val;
1123 1123 Py_ssize_t len = index_length(self) - 1;
1124 1124 long revnum;
1125 1125 Py_ssize_t k;
1126 1126 Py_ssize_t i;
1127 1127 Py_ssize_t l;
1128 1128 int r;
1129 1129 int parents[2];
1130 1130
1131 1131 /* Internal data structure:
1132 1132 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
1133 1133 * revstates: array of length len+1 (all revs + nullrev) */
1134 1134 int *tovisit = NULL;
1135 1135 long lentovisit = 0;
1136 1136 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
1137 1137 char *revstates = NULL;
1138 1138
1139 1139 /* Get arguments */
1140 1140 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
1141 1141 &PyList_Type, &roots,
1142 1142 &PyBool_Type, &includepatharg))
1143 1143 goto bail;
1144 1144
1145 1145 if (includepatharg == Py_True)
1146 1146 includepath = 1;
1147 1147
1148 1148 /* Initialize return set */
1149 1149 reachable = PyList_New(0);
1150 1150 if (reachable == NULL)
1151 1151 goto bail;
1152 1152
1153 1153 /* Initialize internal datastructures */
1154 1154 tovisit = (int *)malloc((len + 1) * sizeof(int));
1155 1155 if (tovisit == NULL) {
1156 1156 PyErr_NoMemory();
1157 1157 goto bail;
1158 1158 }
1159 1159
1160 1160 revstates = (char *)calloc(len + 1, 1);
1161 1161 if (revstates == NULL) {
1162 1162 PyErr_NoMemory();
1163 1163 goto bail;
1164 1164 }
1165 1165
1166 1166 l = PyList_GET_SIZE(roots);
1167 1167 for (i = 0; i < l; i++) {
1168 1168 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
1169 1169 if (revnum == -1 && PyErr_Occurred())
1170 1170 goto bail;
1171 1171 /* If root is out of range, e.g. wdir(), it must be unreachable
1172 1172 * from heads. So we can just ignore it. */
1173 1173 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
1174 1174 continue;
1175 1175 revstates[revnum + 1] |= RS_ROOT;
1176 1176 }
1177 1177
1178 1178 /* Populate tovisit with all the heads */
1179 1179 l = PyList_GET_SIZE(heads);
1180 1180 for (i = 0; i < l; i++) {
1181 1181 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
1182 1182 if (revnum == -1 && PyErr_Occurred())
1183 1183 goto bail;
1184 1184 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
1185 1185 PyErr_SetString(PyExc_IndexError, "head out of range");
1186 1186 goto bail;
1187 1187 }
1188 1188 if (!(revstates[revnum + 1] & RS_SEEN)) {
1189 1189 tovisit[lentovisit++] = (int)revnum;
1190 1190 revstates[revnum + 1] |= RS_SEEN;
1191 1191 }
1192 1192 }
1193 1193
1194 1194 /* Visit the tovisit list and find the reachable roots */
1195 1195 k = 0;
1196 1196 while (k < lentovisit) {
1197 1197 /* Add the node to reachable if it is a root*/
1198 1198 revnum = tovisit[k++];
1199 1199 if (revstates[revnum + 1] & RS_ROOT) {
1200 1200 revstates[revnum + 1] |= RS_REACHABLE;
1201 1201 val = PyInt_FromLong(revnum);
1202 1202 if (val == NULL)
1203 1203 goto bail;
1204 1204 r = PyList_Append(reachable, val);
1205 1205 Py_DECREF(val);
1206 1206 if (r < 0)
1207 1207 goto bail;
1208 1208 if (includepath == 0)
1209 1209 continue;
1210 1210 }
1211 1211
1212 1212 /* Add its parents to the list of nodes to visit */
1213 1213 if (revnum == -1)
1214 1214 continue;
1215 1215 r = index_get_parents(self, revnum, parents, (int)len - 1);
1216 1216 if (r < 0)
1217 1217 goto bail;
1218 1218 for (i = 0; i < 2; i++) {
1219 1219 if (!(revstates[parents[i] + 1] & RS_SEEN)
1220 1220 && parents[i] >= minroot) {
1221 1221 tovisit[lentovisit++] = parents[i];
1222 1222 revstates[parents[i] + 1] |= RS_SEEN;
1223 1223 }
1224 1224 }
1225 1225 }
1226 1226
1227 1227 /* Find all the nodes in between the roots we found and the heads
1228 1228 * and add them to the reachable set */
1229 1229 if (includepath == 1) {
1230 1230 long minidx = minroot;
1231 1231 if (minidx < 0)
1232 1232 minidx = 0;
1233 1233 for (i = minidx; i < len; i++) {
1234 1234 if (!(revstates[i + 1] & RS_SEEN))
1235 1235 continue;
1236 1236 r = index_get_parents(self, i, parents, (int)len - 1);
1237 1237 /* Corrupted index file, error is set from
1238 1238 * index_get_parents */
1239 1239 if (r < 0)
1240 1240 goto bail;
1241 1241 if (((revstates[parents[0] + 1] |
1242 1242 revstates[parents[1] + 1]) & RS_REACHABLE)
1243 1243 && !(revstates[i + 1] & RS_REACHABLE)) {
1244 1244 revstates[i + 1] |= RS_REACHABLE;
1245 1245 val = PyInt_FromLong(i);
1246 1246 if (val == NULL)
1247 1247 goto bail;
1248 1248 r = PyList_Append(reachable, val);
1249 1249 Py_DECREF(val);
1250 1250 if (r < 0)
1251 1251 goto bail;
1252 1252 }
1253 1253 }
1254 1254 }
1255 1255
1256 1256 free(revstates);
1257 1257 free(tovisit);
1258 1258 return reachable;
1259 1259 bail:
1260 1260 Py_XDECREF(reachable);
1261 1261 free(revstates);
1262 1262 free(tovisit);
1263 1263 return NULL;
1264 1264 }
1265 1265
1266 1266 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1267 1267 {
1268 1268 PyObject *roots = Py_None;
1269 1269 PyObject *ret = NULL;
1270 1270 PyObject *phaseslist = NULL;
1271 1271 PyObject *phaseroots = NULL;
1272 1272 PyObject *phaseset = NULL;
1273 1273 PyObject *phasessetlist = NULL;
1274 1274 PyObject *rev = NULL;
1275 1275 Py_ssize_t len = index_length(self) - 1;
1276 1276 Py_ssize_t numphase = 0;
1277 1277 Py_ssize_t minrevallphases = 0;
1278 1278 Py_ssize_t minrevphase = 0;
1279 1279 Py_ssize_t i = 0;
1280 1280 char *phases = NULL;
1281 1281 long phase;
1282 1282
1283 1283 if (!PyArg_ParseTuple(args, "O", &roots))
1284 1284 goto release_none;
1285 1285 if (roots == NULL || !PyList_Check(roots))
1286 1286 goto release_none;
1287 1287
1288 1288 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
1289 1289 if (phases == NULL)
1290 1290 goto release_none;
1291 1291 /* Put the phase information of all the roots in phases */
1292 1292 numphase = PyList_GET_SIZE(roots)+1;
1293 1293 minrevallphases = len + 1;
1294 1294 phasessetlist = PyList_New(numphase);
1295 1295 if (phasessetlist == NULL)
1296 1296 goto release_none;
1297 1297
1298 1298 PyList_SET_ITEM(phasessetlist, 0, Py_None);
1299 1299 Py_INCREF(Py_None);
1300 1300
1301 1301 for (i = 0; i < numphase-1; i++) {
1302 1302 phaseroots = PyList_GET_ITEM(roots, i);
1303 1303 phaseset = PySet_New(NULL);
1304 1304 if (phaseset == NULL)
1305 1305 goto release_phasesetlist;
1306 1306 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
1307 1307 if (!PyList_Check(phaseroots))
1308 1308 goto release_phasesetlist;
1309 1309 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
1310 1310 if (minrevphase == -2) /* Error from add_roots_get_min */
1311 1311 goto release_phasesetlist;
1312 1312 minrevallphases = MIN(minrevallphases, minrevphase);
1313 1313 }
1314 1314 /* Propagate the phase information from the roots to the revs */
1315 1315 if (minrevallphases != -1) {
1316 1316 int parents[2];
1317 1317 for (i = minrevallphases; i < len; i++) {
1318 1318 if (index_get_parents(self, i, parents,
1319 1319 (int)len - 1) < 0)
1320 1320 goto release_phasesetlist;
1321 1321 set_phase_from_parents(phases, parents[0], parents[1], i);
1322 1322 }
1323 1323 }
1324 1324 /* Transform phase list to a python list */
1325 1325 phaseslist = PyList_New(len);
1326 1326 if (phaseslist == NULL)
1327 1327 goto release_phasesetlist;
1328 1328 for (i = 0; i < len; i++) {
1329 1329 phase = phases[i];
1330 1330 /* We only store the sets of phase for non public phase, the public phase
1331 1331 * is computed as a difference */
1332 1332 if (phase != 0) {
1333 1333 phaseset = PyList_GET_ITEM(phasessetlist, phase);
1334 1334 rev = PyInt_FromLong(i);
1335 1335 PySet_Add(phaseset, rev);
1336 1336 Py_XDECREF(rev);
1337 1337 }
1338 1338 PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phase));
1339 1339 }
1340 1340 ret = PyList_New(2);
1341 1341 if (ret == NULL)
1342 1342 goto release_phaseslist;
1343 1343
1344 1344 PyList_SET_ITEM(ret, 0, phaseslist);
1345 1345 PyList_SET_ITEM(ret, 1, phasessetlist);
1346 1346 /* We don't release phaseslist and phasessetlist as we return them to
1347 1347 * python */
1348 1348 goto release_phases;
1349 1349
1350 1350 release_phaseslist:
1351 1351 Py_XDECREF(phaseslist);
1352 1352 release_phasesetlist:
1353 1353 Py_XDECREF(phasessetlist);
1354 1354 release_phases:
1355 1355 free(phases);
1356 1356 release_none:
1357 1357 return ret;
1358 1358 }
1359 1359
1360 1360 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1361 1361 {
1362 1362 Py_ssize_t i, j, len;
1363 1363 char *nothead = NULL;
1364 1364 PyObject *heads = NULL;
1365 1365 PyObject *filter = NULL;
1366 1366 PyObject *filteredrevs = Py_None;
1367 1367
1368 1368 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1369 1369 return NULL;
1370 1370 }
1371 1371
1372 1372 if (self->headrevs && filteredrevs == self->filteredrevs)
1373 1373 return list_copy(self->headrevs);
1374 1374
1375 1375 Py_DECREF(self->filteredrevs);
1376 1376 self->filteredrevs = filteredrevs;
1377 1377 Py_INCREF(filteredrevs);
1378 1378
1379 1379 if (filteredrevs != Py_None) {
1380 1380 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1381 1381 if (!filter) {
1382 1382 PyErr_SetString(PyExc_TypeError,
1383 1383 "filteredrevs has no attribute __contains__");
1384 1384 goto bail;
1385 1385 }
1386 1386 }
1387 1387
1388 1388 len = index_length(self) - 1;
1389 1389 heads = PyList_New(0);
1390 1390 if (heads == NULL)
1391 1391 goto bail;
1392 1392 if (len == 0) {
1393 1393 PyObject *nullid = PyInt_FromLong(-1);
1394 1394 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1395 1395 Py_XDECREF(nullid);
1396 1396 goto bail;
1397 1397 }
1398 1398 goto done;
1399 1399 }
1400 1400
1401 1401 nothead = calloc(len, 1);
1402 1402 if (nothead == NULL)
1403 1403 goto bail;
1404 1404
1405 1405 for (i = 0; i < len; i++) {
1406 1406 int isfiltered;
1407 1407 int parents[2];
1408 1408
1409 1409 isfiltered = check_filter(filter, i);
1410 1410 if (isfiltered == -1) {
1411 1411 PyErr_SetString(PyExc_TypeError,
1412 1412 "unable to check filter");
1413 1413 goto bail;
1414 1414 }
1415 1415
1416 1416 if (isfiltered) {
1417 1417 nothead[i] = 1;
1418 1418 continue;
1419 1419 }
1420 1420
1421 1421 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1422 1422 goto bail;
1423 1423 for (j = 0; j < 2; j++) {
1424 1424 if (parents[j] >= 0)
1425 1425 nothead[parents[j]] = 1;
1426 1426 }
1427 1427 }
1428 1428
1429 1429 for (i = 0; i < len; i++) {
1430 1430 PyObject *head;
1431 1431
1432 1432 if (nothead[i])
1433 1433 continue;
1434 1434 head = PyInt_FromSsize_t(i);
1435 1435 if (head == NULL || PyList_Append(heads, head) == -1) {
1436 1436 Py_XDECREF(head);
1437 1437 goto bail;
1438 1438 }
1439 1439 }
1440 1440
1441 1441 done:
1442 1442 self->headrevs = heads;
1443 1443 Py_XDECREF(filter);
1444 1444 free(nothead);
1445 1445 return list_copy(self->headrevs);
1446 1446 bail:
1447 1447 Py_XDECREF(filter);
1448 1448 Py_XDECREF(heads);
1449 1449 free(nothead);
1450 1450 return NULL;
1451 1451 }
1452 1452
1453 1453 static inline int nt_level(const char *node, Py_ssize_t level)
1454 1454 {
1455 1455 int v = node[level>>1];
1456 1456 if (!(level & 1))
1457 1457 v >>= 4;
1458 1458 return v & 0xf;
1459 1459 }
1460 1460
1461 1461 /*
1462 1462 * Return values:
1463 1463 *
1464 1464 * -4: match is ambiguous (multiple candidates)
1465 1465 * -2: not found
1466 1466 * rest: valid rev
1467 1467 */
1468 1468 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
1469 1469 int hex)
1470 1470 {
1471 1471 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1472 1472 int level, maxlevel, off;
1473 1473
1474 1474 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1475 1475 return -1;
1476 1476
1477 1477 if (self->nt == NULL)
1478 1478 return -2;
1479 1479
1480 1480 if (hex)
1481 1481 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1482 1482 else
1483 1483 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1484 1484
1485 1485 for (level = off = 0; level < maxlevel; level++) {
1486 1486 int k = getnybble(node, level);
1487 1487 nodetree *n = &self->nt[off];
1488 1488 int v = n->children[k];
1489 1489
1490 1490 if (v < 0) {
1491 1491 const char *n;
1492 1492 Py_ssize_t i;
1493 1493
1494 1494 v = -(v + 1);
1495 1495 n = index_node(self, v);
1496 1496 if (n == NULL)
1497 1497 return -2;
1498 1498 for (i = level; i < maxlevel; i++)
1499 1499 if (getnybble(node, i) != nt_level(n, i))
1500 1500 return -2;
1501 1501 return v;
1502 1502 }
1503 1503 if (v == 0)
1504 1504 return -2;
1505 1505 off = v;
1506 1506 }
1507 1507 /* multiple matches against an ambiguous prefix */
1508 1508 return -4;
1509 1509 }
1510 1510
1511 1511 static int nt_new(indexObject *self)
1512 1512 {
1513 1513 if (self->ntlength == self->ntcapacity) {
1514 1514 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1515 1515 PyErr_SetString(PyExc_MemoryError,
1516 1516 "overflow in nt_new");
1517 1517 return -1;
1518 1518 }
1519 1519 self->ntcapacity *= 2;
1520 1520 self->nt = realloc(self->nt,
1521 1521 self->ntcapacity * sizeof(nodetree));
1522 1522 if (self->nt == NULL) {
1523 1523 PyErr_SetString(PyExc_MemoryError, "out of memory");
1524 1524 return -1;
1525 1525 }
1526 1526 memset(&self->nt[self->ntlength], 0,
1527 1527 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1528 1528 }
1529 1529 return self->ntlength++;
1530 1530 }
1531 1531
1532 1532 static int nt_insert(indexObject *self, const char *node, int rev)
1533 1533 {
1534 1534 int level = 0;
1535 1535 int off = 0;
1536 1536
1537 1537 while (level < 40) {
1538 1538 int k = nt_level(node, level);
1539 1539 nodetree *n;
1540 1540 int v;
1541 1541
1542 1542 n = &self->nt[off];
1543 1543 v = n->children[k];
1544 1544
1545 1545 if (v == 0) {
1546 1546 n->children[k] = -rev - 1;
1547 1547 return 0;
1548 1548 }
1549 1549 if (v < 0) {
1550 1550 const char *oldnode = index_node(self, -(v + 1));
1551 1551 int noff;
1552 1552
1553 1553 if (!oldnode || !memcmp(oldnode, node, 20)) {
1554 1554 n->children[k] = -rev - 1;
1555 1555 return 0;
1556 1556 }
1557 1557 noff = nt_new(self);
1558 1558 if (noff == -1)
1559 1559 return -1;
1560 1560 /* self->nt may have been changed by realloc */
1561 1561 self->nt[off].children[k] = noff;
1562 1562 off = noff;
1563 1563 n = &self->nt[off];
1564 1564 n->children[nt_level(oldnode, ++level)] = v;
1565 1565 if (level > self->ntdepth)
1566 1566 self->ntdepth = level;
1567 1567 self->ntsplits += 1;
1568 1568 } else {
1569 1569 level += 1;
1570 1570 off = v;
1571 1571 }
1572 1572 }
1573 1573
1574 1574 return -1;
1575 1575 }
1576 1576
1577 1577 static int nt_init(indexObject *self)
1578 1578 {
1579 1579 if (self->nt == NULL) {
1580 1580 if (self->raw_length > INT_MAX / sizeof(nodetree)) {
1581 1581 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1582 1582 return -1;
1583 1583 }
1584 1584 self->ntcapacity = self->raw_length < 4
1585 1585 ? 4 : (int)self->raw_length / 2;
1586 1586
1587 1587 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1588 1588 if (self->nt == NULL) {
1589 1589 PyErr_NoMemory();
1590 1590 return -1;
1591 1591 }
1592 1592 self->ntlength = 1;
1593 1593 self->ntrev = (int)index_length(self) - 1;
1594 1594 self->ntlookups = 1;
1595 1595 self->ntmisses = 0;
1596 1596 if (nt_insert(self, nullid, INT_MAX) == -1)
1597 1597 return -1;
1598 1598 }
1599 1599 return 0;
1600 1600 }
1601 1601
1602 1602 /*
1603 1603 * Return values:
1604 1604 *
1605 1605 * -3: error (exception set)
1606 1606 * -2: not found (no exception set)
1607 1607 * rest: valid rev
1608 1608 */
1609 1609 static int index_find_node(indexObject *self,
1610 1610 const char *node, Py_ssize_t nodelen)
1611 1611 {
1612 1612 int rev;
1613 1613
1614 1614 self->ntlookups++;
1615 1615 rev = nt_find(self, node, nodelen, 0);
1616 1616 if (rev >= -1)
1617 1617 return rev;
1618 1618
1619 1619 if (nt_init(self) == -1)
1620 1620 return -3;
1621 1621
1622 1622 /*
1623 1623 * For the first handful of lookups, we scan the entire index,
1624 1624 * and cache only the matching nodes. This optimizes for cases
1625 1625 * like "hg tip", where only a few nodes are accessed.
1626 1626 *
1627 1627 * After that, we cache every node we visit, using a single
1628 1628 * scan amortized over multiple lookups. This gives the best
1629 1629 * bulk performance, e.g. for "hg log".
1630 1630 */
1631 1631 if (self->ntmisses++ < 4) {
1632 1632 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1633 1633 const char *n = index_node(self, rev);
1634 1634 if (n == NULL)
1635 1635 return -2;
1636 1636 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1637 1637 if (nt_insert(self, n, rev) == -1)
1638 1638 return -3;
1639 1639 break;
1640 1640 }
1641 1641 }
1642 1642 } else {
1643 1643 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1644 1644 const char *n = index_node(self, rev);
1645 1645 if (n == NULL) {
1646 1646 self->ntrev = rev + 1;
1647 1647 return -2;
1648 1648 }
1649 1649 if (nt_insert(self, n, rev) == -1) {
1650 1650 self->ntrev = rev + 1;
1651 1651 return -3;
1652 1652 }
1653 1653 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1654 1654 break;
1655 1655 }
1656 1656 }
1657 1657 self->ntrev = rev;
1658 1658 }
1659 1659
1660 1660 if (rev >= 0)
1661 1661 return rev;
1662 1662 return -2;
1663 1663 }
1664 1664
1665 1665 static void raise_revlog_error(void)
1666 1666 {
1667 1667 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1668 1668
1669 1669 mod = PyImport_ImportModule("mercurial.error");
1670 1670 if (mod == NULL) {
1671 1671 goto cleanup;
1672 1672 }
1673 1673
1674 1674 dict = PyModule_GetDict(mod);
1675 1675 if (dict == NULL) {
1676 1676 goto cleanup;
1677 1677 }
1678 1678 Py_INCREF(dict);
1679 1679
1680 1680 errclass = PyDict_GetItemString(dict, "RevlogError");
1681 1681 if (errclass == NULL) {
1682 1682 PyErr_SetString(PyExc_SystemError,
1683 1683 "could not find RevlogError");
1684 1684 goto cleanup;
1685 1685 }
1686 1686
1687 1687 /* value of exception is ignored by callers */
1688 1688 PyErr_SetString(errclass, "RevlogError");
1689 1689
1690 1690 cleanup:
1691 1691 Py_XDECREF(dict);
1692 1692 Py_XDECREF(mod);
1693 1693 }
1694 1694
1695 1695 static PyObject *index_getitem(indexObject *self, PyObject *value)
1696 1696 {
1697 1697 char *node;
1698 1698 Py_ssize_t nodelen;
1699 1699 int rev;
1700 1700
1701 1701 if (PyInt_Check(value))
1702 1702 return index_get(self, PyInt_AS_LONG(value));
1703 1703
1704 1704 if (node_check(value, &node, &nodelen) == -1)
1705 1705 return NULL;
1706 1706 rev = index_find_node(self, node, nodelen);
1707 1707 if (rev >= -1)
1708 1708 return PyInt_FromLong(rev);
1709 1709 if (rev == -2)
1710 1710 raise_revlog_error();
1711 1711 return NULL;
1712 1712 }
1713 1713
1714 1714 static int nt_partialmatch(indexObject *self, const char *node,
1715 1715 Py_ssize_t nodelen)
1716 1716 {
1717 1717 int rev;
1718 1718
1719 1719 if (nt_init(self) == -1)
1720 1720 return -3;
1721 1721
1722 1722 if (self->ntrev > 0) {
1723 1723 /* ensure that the radix tree is fully populated */
1724 1724 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1725 1725 const char *n = index_node(self, rev);
1726 1726 if (n == NULL)
1727 1727 return -2;
1728 1728 if (nt_insert(self, n, rev) == -1)
1729 1729 return -3;
1730 1730 }
1731 1731 self->ntrev = rev;
1732 1732 }
1733 1733
1734 1734 return nt_find(self, node, nodelen, 1);
1735 1735 }
1736 1736
1737 1737 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1738 1738 {
1739 1739 const char *fullnode;
1740 1740 int nodelen;
1741 1741 char *node;
1742 1742 int rev, i;
1743 1743
1744 1744 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1745 1745 return NULL;
1746 1746
1747 1747 if (nodelen < 4) {
1748 1748 PyErr_SetString(PyExc_ValueError, "key too short");
1749 1749 return NULL;
1750 1750 }
1751 1751
1752 1752 if (nodelen > 40) {
1753 1753 PyErr_SetString(PyExc_ValueError, "key too long");
1754 1754 return NULL;
1755 1755 }
1756 1756
1757 1757 for (i = 0; i < nodelen; i++)
1758 1758 hexdigit(node, i);
1759 1759 if (PyErr_Occurred()) {
1760 1760 /* input contains non-hex characters */
1761 1761 PyErr_Clear();
1762 1762 Py_RETURN_NONE;
1763 1763 }
1764 1764
1765 1765 rev = nt_partialmatch(self, node, nodelen);
1766 1766
1767 1767 switch (rev) {
1768 1768 case -4:
1769 1769 raise_revlog_error();
1770 1770 case -3:
1771 1771 return NULL;
1772 1772 case -2:
1773 1773 Py_RETURN_NONE;
1774 1774 case -1:
1775 1775 return PyString_FromStringAndSize(nullid, 20);
1776 1776 }
1777 1777
1778 1778 fullnode = index_node(self, rev);
1779 1779 if (fullnode == NULL) {
1780 1780 PyErr_Format(PyExc_IndexError,
1781 1781 "could not access rev %d", rev);
1782 1782 return NULL;
1783 1783 }
1784 1784 return PyString_FromStringAndSize(fullnode, 20);
1785 1785 }
1786 1786
1787 1787 static PyObject *index_m_get(indexObject *self, PyObject *args)
1788 1788 {
1789 1789 Py_ssize_t nodelen;
1790 1790 PyObject *val;
1791 1791 char *node;
1792 1792 int rev;
1793 1793
1794 1794 if (!PyArg_ParseTuple(args, "O", &val))
1795 1795 return NULL;
1796 1796 if (node_check(val, &node, &nodelen) == -1)
1797 1797 return NULL;
1798 1798 rev = index_find_node(self, node, nodelen);
1799 1799 if (rev == -3)
1800 1800 return NULL;
1801 1801 if (rev == -2)
1802 1802 Py_RETURN_NONE;
1803 1803 return PyInt_FromLong(rev);
1804 1804 }
1805 1805
1806 1806 static int index_contains(indexObject *self, PyObject *value)
1807 1807 {
1808 1808 char *node;
1809 1809 Py_ssize_t nodelen;
1810 1810
1811 1811 if (PyInt_Check(value)) {
1812 1812 long rev = PyInt_AS_LONG(value);
1813 1813 return rev >= -1 && rev < index_length(self);
1814 1814 }
1815 1815
1816 1816 if (node_check(value, &node, &nodelen) == -1)
1817 1817 return -1;
1818 1818
1819 1819 switch (index_find_node(self, node, nodelen)) {
1820 1820 case -3:
1821 1821 return -1;
1822 1822 case -2:
1823 1823 return 0;
1824 1824 default:
1825 1825 return 1;
1826 1826 }
1827 1827 }
1828 1828
1829 1829 typedef uint64_t bitmask;
1830 1830
1831 1831 /*
1832 1832 * Given a disjoint set of revs, return all candidates for the
1833 1833 * greatest common ancestor. In revset notation, this is the set
1834 1834 * "heads(::a and ::b and ...)"
1835 1835 */
1836 1836 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1837 1837 int revcount)
1838 1838 {
1839 1839 const bitmask allseen = (1ull << revcount) - 1;
1840 1840 const bitmask poison = 1ull << revcount;
1841 1841 PyObject *gca = PyList_New(0);
1842 1842 int i, v, interesting;
1843 1843 int maxrev = -1;
1844 1844 bitmask sp;
1845 1845 bitmask *seen;
1846 1846
1847 1847 if (gca == NULL)
1848 1848 return PyErr_NoMemory();
1849 1849
1850 1850 for (i = 0; i < revcount; i++) {
1851 1851 if (revs[i] > maxrev)
1852 1852 maxrev = revs[i];
1853 1853 }
1854 1854
1855 1855 seen = calloc(sizeof(*seen), maxrev + 1);
1856 1856 if (seen == NULL) {
1857 1857 Py_DECREF(gca);
1858 1858 return PyErr_NoMemory();
1859 1859 }
1860 1860
1861 1861 for (i = 0; i < revcount; i++)
1862 1862 seen[revs[i]] = 1ull << i;
1863 1863
1864 1864 interesting = revcount;
1865 1865
1866 1866 for (v = maxrev; v >= 0 && interesting; v--) {
1867 1867 bitmask sv = seen[v];
1868 1868 int parents[2];
1869 1869
1870 1870 if (!sv)
1871 1871 continue;
1872 1872
1873 1873 if (sv < poison) {
1874 1874 interesting -= 1;
1875 1875 if (sv == allseen) {
1876 1876 PyObject *obj = PyInt_FromLong(v);
1877 1877 if (obj == NULL)
1878 1878 goto bail;
1879 1879 if (PyList_Append(gca, obj) == -1) {
1880 1880 Py_DECREF(obj);
1881 1881 goto bail;
1882 1882 }
1883 1883 sv |= poison;
1884 1884 for (i = 0; i < revcount; i++) {
1885 1885 if (revs[i] == v)
1886 1886 goto done;
1887 1887 }
1888 1888 }
1889 1889 }
1890 1890 if (index_get_parents(self, v, parents, maxrev) < 0)
1891 1891 goto bail;
1892 1892
1893 1893 for (i = 0; i < 2; i++) {
1894 1894 int p = parents[i];
1895 1895 if (p == -1)
1896 1896 continue;
1897 1897 sp = seen[p];
1898 1898 if (sv < poison) {
1899 1899 if (sp == 0) {
1900 1900 seen[p] = sv;
1901 1901 interesting++;
1902 1902 }
1903 1903 else if (sp != sv)
1904 1904 seen[p] |= sv;
1905 1905 } else {
1906 1906 if (sp && sp < poison)
1907 1907 interesting--;
1908 1908 seen[p] = sv;
1909 1909 }
1910 1910 }
1911 1911 }
1912 1912
1913 1913 done:
1914 1914 free(seen);
1915 1915 return gca;
1916 1916 bail:
1917 1917 free(seen);
1918 1918 Py_XDECREF(gca);
1919 1919 return NULL;
1920 1920 }
1921 1921
1922 1922 /*
1923 1923 * Given a disjoint set of revs, return the subset with the longest
1924 1924 * path to the root.
1925 1925 */
1926 1926 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1927 1927 {
1928 1928 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1929 1929 static const Py_ssize_t capacity = 24;
1930 1930 int *depth, *interesting = NULL;
1931 1931 int i, j, v, ninteresting;
1932 1932 PyObject *dict = NULL, *keys = NULL;
1933 1933 long *seen = NULL;
1934 1934 int maxrev = -1;
1935 1935 long final;
1936 1936
1937 1937 if (revcount > capacity) {
1938 1938 PyErr_Format(PyExc_OverflowError,
1939 1939 "bitset size (%ld) > capacity (%ld)",
1940 1940 (long)revcount, (long)capacity);
1941 1941 return NULL;
1942 1942 }
1943 1943
1944 1944 for (i = 0; i < revcount; i++) {
1945 1945 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1946 1946 if (n > maxrev)
1947 1947 maxrev = n;
1948 1948 }
1949 1949
1950 1950 depth = calloc(sizeof(*depth), maxrev + 1);
1951 1951 if (depth == NULL)
1952 1952 return PyErr_NoMemory();
1953 1953
1954 1954 seen = calloc(sizeof(*seen), maxrev + 1);
1955 1955 if (seen == NULL) {
1956 1956 PyErr_NoMemory();
1957 1957 goto bail;
1958 1958 }
1959 1959
1960 1960 interesting = calloc(sizeof(*interesting), 2 << revcount);
1961 1961 if (interesting == NULL) {
1962 1962 PyErr_NoMemory();
1963 1963 goto bail;
1964 1964 }
1965 1965
1966 1966 if (PyList_Sort(revs) == -1)
1967 1967 goto bail;
1968 1968
1969 1969 for (i = 0; i < revcount; i++) {
1970 1970 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
1971 1971 long b = 1l << i;
1972 1972 depth[n] = 1;
1973 1973 seen[n] = b;
1974 1974 interesting[b] = 1;
1975 1975 }
1976 1976
1977 1977 ninteresting = (int)revcount;
1978 1978
1979 1979 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
1980 1980 int dv = depth[v];
1981 1981 int parents[2];
1982 1982 long sv;
1983 1983
1984 1984 if (dv == 0)
1985 1985 continue;
1986 1986
1987 1987 sv = seen[v];
1988 1988 if (index_get_parents(self, v, parents, maxrev) < 0)
1989 1989 goto bail;
1990 1990
1991 1991 for (i = 0; i < 2; i++) {
1992 1992 int p = parents[i];
1993 1993 long nsp, sp;
1994 1994 int dp;
1995 1995
1996 1996 if (p == -1)
1997 1997 continue;
1998 1998
1999 1999 dp = depth[p];
2000 2000 nsp = sp = seen[p];
2001 2001 if (dp <= dv) {
2002 2002 depth[p] = dv + 1;
2003 2003 if (sp != sv) {
2004 2004 interesting[sv] += 1;
2005 2005 nsp = seen[p] = sv;
2006 2006 if (sp) {
2007 2007 interesting[sp] -= 1;
2008 2008 if (interesting[sp] == 0)
2009 2009 ninteresting -= 1;
2010 2010 }
2011 2011 }
2012 2012 }
2013 2013 else if (dv == dp - 1) {
2014 2014 nsp = sp | sv;
2015 2015 if (nsp == sp)
2016 2016 continue;
2017 2017 seen[p] = nsp;
2018 2018 interesting[sp] -= 1;
2019 2019 if (interesting[sp] == 0 && interesting[nsp] > 0)
2020 2020 ninteresting -= 1;
2021 2021 interesting[nsp] += 1;
2022 2022 }
2023 2023 }
2024 2024 interesting[sv] -= 1;
2025 2025 if (interesting[sv] == 0)
2026 2026 ninteresting -= 1;
2027 2027 }
2028 2028
2029 2029 final = 0;
2030 2030 j = ninteresting;
2031 2031 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2032 2032 if (interesting[i] == 0)
2033 2033 continue;
2034 2034 final |= i;
2035 2035 j -= 1;
2036 2036 }
2037 2037 if (final == 0) {
2038 2038 keys = PyList_New(0);
2039 2039 goto bail;
2040 2040 }
2041 2041
2042 2042 dict = PyDict_New();
2043 2043 if (dict == NULL)
2044 2044 goto bail;
2045 2045
2046 2046 for (i = 0; i < revcount; i++) {
2047 2047 PyObject *key;
2048 2048
2049 2049 if ((final & (1 << i)) == 0)
2050 2050 continue;
2051 2051
2052 2052 key = PyList_GET_ITEM(revs, i);
2053 2053 Py_INCREF(key);
2054 2054 Py_INCREF(Py_None);
2055 2055 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2056 2056 Py_DECREF(key);
2057 2057 Py_DECREF(Py_None);
2058 2058 goto bail;
2059 2059 }
2060 2060 }
2061 2061
2062 2062 keys = PyDict_Keys(dict);
2063 2063
2064 2064 bail:
2065 2065 free(depth);
2066 2066 free(seen);
2067 2067 free(interesting);
2068 2068 Py_XDECREF(dict);
2069 2069
2070 2070 return keys;
2071 2071 }
2072 2072
2073 2073 /*
2074 2074 * Given a (possibly overlapping) set of revs, return all the
2075 2075 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2076 2076 */
2077 2077 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2078 2078 {
2079 2079 PyObject *ret = NULL;
2080 2080 Py_ssize_t argcount, i, len;
2081 2081 bitmask repeat = 0;
2082 2082 int revcount = 0;
2083 2083 int *revs;
2084 2084
2085 2085 argcount = PySequence_Length(args);
2086 2086 revs = malloc(argcount * sizeof(*revs));
2087 2087 if (argcount > 0 && revs == NULL)
2088 2088 return PyErr_NoMemory();
2089 2089 len = index_length(self) - 1;
2090 2090
2091 2091 for (i = 0; i < argcount; i++) {
2092 2092 static const int capacity = 24;
2093 2093 PyObject *obj = PySequence_GetItem(args, i);
2094 2094 bitmask x;
2095 2095 long val;
2096 2096
2097 2097 if (!PyInt_Check(obj)) {
2098 2098 PyErr_SetString(PyExc_TypeError,
2099 2099 "arguments must all be ints");
2100 2100 Py_DECREF(obj);
2101 2101 goto bail;
2102 2102 }
2103 2103 val = PyInt_AsLong(obj);
2104 2104 Py_DECREF(obj);
2105 2105 if (val == -1) {
2106 2106 ret = PyList_New(0);
2107 2107 goto done;
2108 2108 }
2109 2109 if (val < 0 || val >= len) {
2110 2110 PyErr_SetString(PyExc_IndexError,
2111 2111 "index out of range");
2112 2112 goto bail;
2113 2113 }
2114 2114 /* this cheesy bloom filter lets us avoid some more
2115 2115 * expensive duplicate checks in the common set-is-disjoint
2116 2116 * case */
2117 2117 x = 1ull << (val & 0x3f);
2118 2118 if (repeat & x) {
2119 2119 int k;
2120 2120 for (k = 0; k < revcount; k++) {
2121 2121 if (val == revs[k])
2122 2122 goto duplicate;
2123 2123 }
2124 2124 }
2125 2125 else repeat |= x;
2126 2126 if (revcount >= capacity) {
2127 2127 PyErr_Format(PyExc_OverflowError,
2128 2128 "bitset size (%d) > capacity (%d)",
2129 2129 revcount, capacity);
2130 2130 goto bail;
2131 2131 }
2132 2132 revs[revcount++] = (int)val;
2133 2133 duplicate:;
2134 2134 }
2135 2135
2136 2136 if (revcount == 0) {
2137 2137 ret = PyList_New(0);
2138 2138 goto done;
2139 2139 }
2140 2140 if (revcount == 1) {
2141 2141 PyObject *obj;
2142 2142 ret = PyList_New(1);
2143 2143 if (ret == NULL)
2144 2144 goto bail;
2145 2145 obj = PyInt_FromLong(revs[0]);
2146 2146 if (obj == NULL)
2147 2147 goto bail;
2148 2148 PyList_SET_ITEM(ret, 0, obj);
2149 2149 goto done;
2150 2150 }
2151 2151
2152 2152 ret = find_gca_candidates(self, revs, revcount);
2153 2153 if (ret == NULL)
2154 2154 goto bail;
2155 2155
2156 2156 done:
2157 2157 free(revs);
2158 2158 return ret;
2159 2159
2160 2160 bail:
2161 2161 free(revs);
2162 2162 Py_XDECREF(ret);
2163 2163 return NULL;
2164 2164 }
2165 2165
2166 2166 /*
2167 2167 * Given a (possibly overlapping) set of revs, return the greatest
2168 2168 * common ancestors: those with the longest path to the root.
2169 2169 */
2170 2170 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2171 2171 {
2172 2172 PyObject *ret;
2173 2173 PyObject *gca = index_commonancestorsheads(self, args);
2174 2174 if (gca == NULL)
2175 2175 return NULL;
2176 2176
2177 2177 if (PyList_GET_SIZE(gca) <= 1) {
2178 2178 return gca;
2179 2179 }
2180 2180
2181 2181 ret = find_deepest(self, gca);
2182 2182 Py_DECREF(gca);
2183 2183 return ret;
2184 2184 }
2185 2185
2186 2186 /*
2187 2187 * Invalidate any trie entries introduced by added revs.
2188 2188 */
2189 2189 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
2190 2190 {
2191 2191 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2192 2192
2193 2193 for (i = start; i < len; i++) {
2194 2194 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2195 2195 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2196 2196
2197 2197 nt_insert(self, PyString_AS_STRING(node), -1);
2198 2198 }
2199 2199
2200 2200 if (start == 0)
2201 2201 Py_CLEAR(self->added);
2202 2202 }
2203 2203
2204 2204 /*
2205 2205 * Delete a numeric range of revs, which must be at the end of the
2206 2206 * range, but exclude the sentinel nullid entry.
2207 2207 */
2208 2208 static int index_slice_del(indexObject *self, PyObject *item)
2209 2209 {
2210 2210 Py_ssize_t start, stop, step, slicelength;
2211 2211 Py_ssize_t length = index_length(self);
2212 2212 int ret = 0;
2213 2213
2214 2214 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
2215 2215 &start, &stop, &step, &slicelength) < 0)
2216 2216 return -1;
2217 2217
2218 2218 if (slicelength <= 0)
2219 2219 return 0;
2220 2220
2221 2221 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2222 2222 stop = start;
2223 2223
2224 2224 if (step < 0) {
2225 2225 stop = start + 1;
2226 2226 start = stop + step*(slicelength - 1) - 1;
2227 2227 step = -step;
2228 2228 }
2229 2229
2230 2230 if (step != 1) {
2231 2231 PyErr_SetString(PyExc_ValueError,
2232 2232 "revlog index delete requires step size of 1");
2233 2233 return -1;
2234 2234 }
2235 2235
2236 2236 if (stop != length - 1) {
2237 2237 PyErr_SetString(PyExc_IndexError,
2238 2238 "revlog index deletion indices are invalid");
2239 2239 return -1;
2240 2240 }
2241 2241
2242 2242 if (start < self->length - 1) {
2243 2243 if (self->nt) {
2244 2244 Py_ssize_t i;
2245 2245
2246 2246 for (i = start + 1; i < self->length - 1; i++) {
2247 2247 const char *node = index_node(self, i);
2248 2248
2249 2249 if (node)
2250 2250 nt_insert(self, node, -1);
2251 2251 }
2252 2252 if (self->added)
2253 2253 nt_invalidate_added(self, 0);
2254 2254 if (self->ntrev > start)
2255 2255 self->ntrev = (int)start;
2256 2256 }
2257 2257 self->length = start + 1;
2258 2258 if (start < self->raw_length) {
2259 2259 if (self->cache) {
2260 2260 Py_ssize_t i;
2261 2261 for (i = start; i < self->raw_length; i++)
2262 2262 Py_CLEAR(self->cache[i]);
2263 2263 }
2264 2264 self->raw_length = start;
2265 2265 }
2266 2266 goto done;
2267 2267 }
2268 2268
2269 2269 if (self->nt) {
2270 2270 nt_invalidate_added(self, start - self->length + 1);
2271 2271 if (self->ntrev > start)
2272 2272 self->ntrev = (int)start;
2273 2273 }
2274 2274 if (self->added)
2275 2275 ret = PyList_SetSlice(self->added, start - self->length + 1,
2276 2276 PyList_GET_SIZE(self->added), NULL);
2277 2277 done:
2278 2278 Py_CLEAR(self->headrevs);
2279 2279 return ret;
2280 2280 }
2281 2281
2282 2282 /*
2283 2283 * Supported ops:
2284 2284 *
2285 2285 * slice deletion
2286 2286 * string assignment (extend node->rev mapping)
2287 2287 * string deletion (shrink node->rev mapping)
2288 2288 */
2289 2289 static int index_assign_subscript(indexObject *self, PyObject *item,
2290 2290 PyObject *value)
2291 2291 {
2292 2292 char *node;
2293 2293 Py_ssize_t nodelen;
2294 2294 long rev;
2295 2295
2296 2296 if (PySlice_Check(item) && value == NULL)
2297 2297 return index_slice_del(self, item);
2298 2298
2299 2299 if (node_check(item, &node, &nodelen) == -1)
2300 2300 return -1;
2301 2301
2302 2302 if (value == NULL)
2303 2303 return self->nt ? nt_insert(self, node, -1) : 0;
2304 2304 rev = PyInt_AsLong(value);
2305 2305 if (rev > INT_MAX || rev < 0) {
2306 2306 if (!PyErr_Occurred())
2307 2307 PyErr_SetString(PyExc_ValueError, "rev out of range");
2308 2308 return -1;
2309 2309 }
2310 2310
2311 2311 if (nt_init(self) == -1)
2312 2312 return -1;
2313 2313 return nt_insert(self, node, (int)rev);
2314 2314 }
2315 2315
2316 2316 /*
2317 2317 * Find all RevlogNG entries in an index that has inline data. Update
2318 2318 * the optional "offsets" table with those entries.
2319 2319 */
2320 2320 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2321 2321 {
2322 2322 const char *data = PyString_AS_STRING(self->data);
2323 2323 Py_ssize_t pos = 0;
2324 2324 Py_ssize_t end = PyString_GET_SIZE(self->data);
2325 2325 long incr = v1_hdrsize;
2326 2326 Py_ssize_t len = 0;
2327 2327
2328 2328 while (pos + v1_hdrsize <= end && pos >= 0) {
2329 2329 uint32_t comp_len;
2330 2330 /* 3rd element of header is length of compressed inline data */
2331 2331 comp_len = getbe32(data + pos + 8);
2332 2332 incr = v1_hdrsize + comp_len;
2333 2333 if (offsets)
2334 2334 offsets[len] = data + pos;
2335 2335 len++;
2336 2336 pos += incr;
2337 2337 }
2338 2338
2339 2339 if (pos != end) {
2340 2340 if (!PyErr_Occurred())
2341 2341 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2342 2342 return -1;
2343 2343 }
2344 2344
2345 2345 return len;
2346 2346 }
2347 2347
2348 2348 static int index_init(indexObject *self, PyObject *args)
2349 2349 {
2350 2350 PyObject *data_obj, *inlined_obj;
2351 2351 Py_ssize_t size;
2352 2352
2353 2353 /* Initialize before argument-checking to avoid index_dealloc() crash. */
2354 2354 self->raw_length = 0;
2355 2355 self->added = NULL;
2356 2356 self->cache = NULL;
2357 2357 self->data = NULL;
2358 2358 self->headrevs = NULL;
2359 2359 self->filteredrevs = Py_None;
2360 2360 Py_INCREF(Py_None);
2361 2361 self->nt = NULL;
2362 2362 self->offsets = NULL;
2363 2363
2364 2364 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2365 2365 return -1;
2366 2366 if (!PyString_Check(data_obj)) {
2367 2367 PyErr_SetString(PyExc_TypeError, "data is not a string");
2368 2368 return -1;
2369 2369 }
2370 2370 size = PyString_GET_SIZE(data_obj);
2371 2371
2372 2372 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2373 2373 self->data = data_obj;
2374 2374
2375 2375 self->ntlength = self->ntcapacity = 0;
2376 2376 self->ntdepth = self->ntsplits = 0;
2377 2377 self->ntlookups = self->ntmisses = 0;
2378 2378 self->ntrev = -1;
2379 2379 Py_INCREF(self->data);
2380 2380
2381 2381 if (self->inlined) {
2382 2382 Py_ssize_t len = inline_scan(self, NULL);
2383 2383 if (len == -1)
2384 2384 goto bail;
2385 2385 self->raw_length = len;
2386 2386 self->length = len + 1;
2387 2387 } else {
2388 2388 if (size % v1_hdrsize) {
2389 2389 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2390 2390 goto bail;
2391 2391 }
2392 2392 self->raw_length = size / v1_hdrsize;
2393 2393 self->length = self->raw_length + 1;
2394 2394 }
2395 2395
2396 2396 return 0;
2397 2397 bail:
2398 2398 return -1;
2399 2399 }
2400 2400
2401 2401 static PyObject *index_nodemap(indexObject *self)
2402 2402 {
2403 2403 Py_INCREF(self);
2404 2404 return (PyObject *)self;
2405 2405 }
2406 2406
2407 2407 static void index_dealloc(indexObject *self)
2408 2408 {
2409 2409 _index_clearcaches(self);
2410 2410 Py_XDECREF(self->filteredrevs);
2411 2411 Py_XDECREF(self->data);
2412 2412 Py_XDECREF(self->added);
2413 2413 PyObject_Del(self);
2414 2414 }
2415 2415
2416 2416 static PySequenceMethods index_sequence_methods = {
2417 2417 (lenfunc)index_length, /* sq_length */
2418 2418 0, /* sq_concat */
2419 2419 0, /* sq_repeat */
2420 2420 (ssizeargfunc)index_get, /* sq_item */
2421 2421 0, /* sq_slice */
2422 2422 0, /* sq_ass_item */
2423 2423 0, /* sq_ass_slice */
2424 2424 (objobjproc)index_contains, /* sq_contains */
2425 2425 };
2426 2426
2427 2427 static PyMappingMethods index_mapping_methods = {
2428 2428 (lenfunc)index_length, /* mp_length */
2429 2429 (binaryfunc)index_getitem, /* mp_subscript */
2430 2430 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2431 2431 };
2432 2432
2433 2433 static PyMethodDef index_methods[] = {
2434 2434 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2435 2435 "return the gca set of the given revs"},
2436 2436 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2437 2437 METH_VARARGS,
2438 2438 "return the heads of the common ancestors of the given revs"},
2439 2439 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2440 2440 "clear the index caches"},
2441 2441 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2442 2442 "get an index entry"},
2443 2443 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2444 2444 METH_VARARGS, "compute phases"},
2445 2445 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2446 2446 "reachableroots"},
2447 2447 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2448 2448 "get head revisions"}, /* Can do filtering since 3.2 */
2449 2449 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2450 2450 "get filtered head revisions"}, /* Can always do filtering */
2451 2451 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2452 2452 "insert an index entry"},
2453 2453 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2454 2454 "match a potentially ambiguous node ID"},
2455 2455 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2456 2456 "stats for the index"},
2457 2457 {NULL} /* Sentinel */
2458 2458 };
2459 2459
2460 2460 static PyGetSetDef index_getset[] = {
2461 2461 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2462 2462 {NULL} /* Sentinel */
2463 2463 };
2464 2464
2465 2465 static PyTypeObject indexType = {
2466 2466 PyObject_HEAD_INIT(NULL)
2467 2467 0, /* ob_size */
2468 2468 "parsers.index", /* tp_name */
2469 2469 sizeof(indexObject), /* tp_basicsize */
2470 2470 0, /* tp_itemsize */
2471 2471 (destructor)index_dealloc, /* tp_dealloc */
2472 2472 0, /* tp_print */
2473 2473 0, /* tp_getattr */
2474 2474 0, /* tp_setattr */
2475 2475 0, /* tp_compare */
2476 2476 0, /* tp_repr */
2477 2477 0, /* tp_as_number */
2478 2478 &index_sequence_methods, /* tp_as_sequence */
2479 2479 &index_mapping_methods, /* tp_as_mapping */
2480 2480 0, /* tp_hash */
2481 2481 0, /* tp_call */
2482 2482 0, /* tp_str */
2483 2483 0, /* tp_getattro */
2484 2484 0, /* tp_setattro */
2485 2485 0, /* tp_as_buffer */
2486 2486 Py_TPFLAGS_DEFAULT, /* tp_flags */
2487 2487 "revlog index", /* tp_doc */
2488 2488 0, /* tp_traverse */
2489 2489 0, /* tp_clear */
2490 2490 0, /* tp_richcompare */
2491 2491 0, /* tp_weaklistoffset */
2492 2492 0, /* tp_iter */
2493 2493 0, /* tp_iternext */
2494 2494 index_methods, /* tp_methods */
2495 2495 0, /* tp_members */
2496 2496 index_getset, /* tp_getset */
2497 2497 0, /* tp_base */
2498 2498 0, /* tp_dict */
2499 2499 0, /* tp_descr_get */
2500 2500 0, /* tp_descr_set */
2501 2501 0, /* tp_dictoffset */
2502 2502 (initproc)index_init, /* tp_init */
2503 2503 0, /* tp_alloc */
2504 2504 };
2505 2505
2506 2506 /*
2507 2507 * returns a tuple of the form (index, index, cache) with elements as
2508 2508 * follows:
2509 2509 *
2510 2510 * index: an index object that lazily parses RevlogNG records
2511 2511 * cache: if data is inlined, a tuple (index_file_content, 0), else None
2512 2512 *
2513 2513 * added complications are for backwards compatibility
2514 2514 */
2515 2515 static PyObject *parse_index2(PyObject *self, PyObject *args)
2516 2516 {
2517 2517 PyObject *tuple = NULL, *cache = NULL;
2518 2518 indexObject *idx;
2519 2519 int ret;
2520 2520
2521 2521 idx = PyObject_New(indexObject, &indexType);
2522 2522 if (idx == NULL)
2523 2523 goto bail;
2524 2524
2525 2525 ret = index_init(idx, args);
2526 2526 if (ret == -1)
2527 2527 goto bail;
2528 2528
2529 2529 if (idx->inlined) {
2530 2530 cache = Py_BuildValue("iO", 0, idx->data);
2531 2531 if (cache == NULL)
2532 2532 goto bail;
2533 2533 } else {
2534 2534 cache = Py_None;
2535 2535 Py_INCREF(cache);
2536 2536 }
2537 2537
2538 2538 tuple = Py_BuildValue("NN", idx, cache);
2539 2539 if (!tuple)
2540 2540 goto bail;
2541 2541 return tuple;
2542 2542
2543 2543 bail:
2544 2544 Py_XDECREF(idx);
2545 2545 Py_XDECREF(cache);
2546 2546 Py_XDECREF(tuple);
2547 2547 return NULL;
2548 2548 }
2549 2549
2550 2550 #define BUMPED_FIX 1
2551 2551 #define USING_SHA_256 2
2552 2552 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
2553 2553
2554 2554 static PyObject *readshas(
2555 2555 const char *source, unsigned char num, Py_ssize_t hashwidth)
2556 2556 {
2557 2557 int i;
2558 2558 PyObject *list = PyTuple_New(num);
2559 2559 if (list == NULL) {
2560 2560 return NULL;
2561 2561 }
2562 2562 for (i = 0; i < num; i++) {
2563 2563 PyObject *hash = PyString_FromStringAndSize(source, hashwidth);
2564 2564 if (hash == NULL) {
2565 2565 Py_DECREF(list);
2566 2566 return NULL;
2567 2567 }
2568 2568 PyTuple_SET_ITEM(list, i, hash);
2569 2569 source += hashwidth;
2570 2570 }
2571 2571 return list;
2572 2572 }
2573 2573
2574 2574 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
2575 2575 uint32_t *msize)
2576 2576 {
2577 2577 const char *data = databegin;
2578 2578 const char *meta;
2579 2579
2580 2580 double mtime;
2581 2581 int16_t tz;
2582 2582 uint16_t flags;
2583 2583 unsigned char nsuccs, nparents, nmetadata;
2584 2584 Py_ssize_t hashwidth = 20;
2585 2585
2586 2586 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
2587 2587 PyObject *metadata = NULL, *ret = NULL;
2588 2588 int i;
2589 2589
2590 2590 if (data + FM1_HEADER_SIZE > dataend) {
2591 2591 goto overflow;
2592 2592 }
2593 2593
2594 2594 *msize = getbe32(data);
2595 2595 data += 4;
2596 2596 mtime = getbefloat64(data);
2597 2597 data += 8;
2598 2598 tz = getbeint16(data);
2599 2599 data += 2;
2600 2600 flags = getbeuint16(data);
2601 2601 data += 2;
2602 2602
2603 2603 if (flags & USING_SHA_256) {
2604 2604 hashwidth = 32;
2605 2605 }
2606 2606
2607 2607 nsuccs = (unsigned char)(*data++);
2608 2608 nparents = (unsigned char)(*data++);
2609 2609 nmetadata = (unsigned char)(*data++);
2610 2610
2611 2611 if (databegin + *msize > dataend) {
2612 2612 goto overflow;
2613 2613 }
2614 2614 dataend = databegin + *msize; /* narrow down to marker size */
2615 2615
2616 2616 if (data + hashwidth > dataend) {
2617 2617 goto overflow;
2618 2618 }
2619 2619 prec = PyString_FromStringAndSize(data, hashwidth);
2620 2620 data += hashwidth;
2621 2621 if (prec == NULL) {
2622 2622 goto bail;
2623 2623 }
2624 2624
2625 2625 if (data + nsuccs * hashwidth > dataend) {
2626 2626 goto overflow;
2627 2627 }
2628 2628 succs = readshas(data, nsuccs, hashwidth);
2629 2629 if (succs == NULL) {
2630 2630 goto bail;
2631 2631 }
2632 2632 data += nsuccs * hashwidth;
2633 2633
2634 2634 if (nparents == 1 || nparents == 2) {
2635 2635 if (data + nparents * hashwidth > dataend) {
2636 2636 goto overflow;
2637 2637 }
2638 2638 parents = readshas(data, nparents, hashwidth);
2639 2639 if (parents == NULL) {
2640 2640 goto bail;
2641 2641 }
2642 2642 data += nparents * hashwidth;
2643 2643 } else {
2644 2644 parents = Py_None;
2645 2645 }
2646 2646
2647 2647 if (data + 2 * nmetadata > dataend) {
2648 2648 goto overflow;
2649 2649 }
2650 2650 meta = data + (2 * nmetadata);
2651 2651 metadata = PyTuple_New(nmetadata);
2652 2652 if (metadata == NULL) {
2653 2653 goto bail;
2654 2654 }
2655 2655 for (i = 0; i < nmetadata; i++) {
2656 2656 PyObject *tmp, *left = NULL, *right = NULL;
2657 2657 Py_ssize_t leftsize = (unsigned char)(*data++);
2658 2658 Py_ssize_t rightsize = (unsigned char)(*data++);
2659 2659 if (meta + leftsize + rightsize > dataend) {
2660 2660 goto overflow;
2661 2661 }
2662 2662 left = PyString_FromStringAndSize(meta, leftsize);
2663 2663 meta += leftsize;
2664 2664 right = PyString_FromStringAndSize(meta, rightsize);
2665 2665 meta += rightsize;
2666 2666 tmp = PyTuple_New(2);
2667 2667 if (!left || !right || !tmp) {
2668 2668 Py_XDECREF(left);
2669 2669 Py_XDECREF(right);
2670 2670 Py_XDECREF(tmp);
2671 2671 goto bail;
2672 2672 }
2673 2673 PyTuple_SET_ITEM(tmp, 0, left);
2674 2674 PyTuple_SET_ITEM(tmp, 1, right);
2675 2675 PyTuple_SET_ITEM(metadata, i, tmp);
2676 2676 }
2677 2677 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags,
2678 2678 metadata, mtime, (int)tz * 60, parents);
2679 2679 goto bail; /* return successfully */
2680 2680
2681 2681 overflow:
2682 2682 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
2683 2683 bail:
2684 2684 Py_XDECREF(prec);
2685 2685 Py_XDECREF(succs);
2686 2686 Py_XDECREF(metadata);
2687 2687 if (parents != Py_None)
2688 2688 Py_XDECREF(parents);
2689 2689 return ret;
2690 2690 }
2691 2691
2692 2692
2693 2693 static PyObject *fm1readmarkers(PyObject *self, PyObject *args) {
2694 2694 const char *data, *dataend;
2695 2695 Py_ssize_t datalen;
2696 2696 Py_ssize_t offset, stop;
2697 2697 PyObject *markers = NULL;
2698 2698
2699 2699 if (!PyArg_ParseTuple(args, "s#nn", &data, &datalen, &offset, &stop)) {
2700 2700 return NULL;
2701 2701 }
2702 2702 dataend = data + datalen;
2703 2703 data += offset;
2704 2704 markers = PyList_New(0);
2705 2705 if (!markers) {
2706 2706 return NULL;
2707 2707 }
2708 2708 while (offset < stop) {
2709 2709 uint32_t msize;
2710 2710 int error;
2711 2711 PyObject *record = fm1readmarker(data, dataend, &msize);
2712 2712 if (!record) {
2713 2713 goto bail;
2714 2714 }
2715 2715 error = PyList_Append(markers, record);
2716 2716 Py_DECREF(record);
2717 2717 if (error) {
2718 2718 goto bail;
2719 2719 }
2720 2720 data += msize;
2721 2721 offset += msize;
2722 2722 }
2723 2723 return markers;
2724 2724 bail:
2725 2725 Py_DECREF(markers);
2726 2726 return NULL;
2727 2727 }
2728 2728
2729 2729 static char parsers_doc[] = "Efficient content parsing.";
2730 2730
2731 2731 PyObject *encodedir(PyObject *self, PyObject *args);
2732 2732 PyObject *pathencode(PyObject *self, PyObject *args);
2733 2733 PyObject *lowerencode(PyObject *self, PyObject *args);
2734 2734
2735 2735 static PyMethodDef methods[] = {
2736 2736 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
2737 2737 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2738 2738 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2739 2739 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2740 2740 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
2741 2741 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
2742 2742 {"dict_new_presized", dict_new_presized, METH_VARARGS,
2743 2743 "construct a dict with an expected size\n"},
2744 2744 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
2745 2745 "make file foldmap\n"},
2746 2746 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2747 2747 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2748 2748 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2749 2749 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
2750 2750 "parse v1 obsolete markers\n"},
2751 2751 {NULL, NULL}
2752 2752 };
2753 2753
2754 2754 void dirs_module_init(PyObject *mod);
2755 2755 void manifest_module_init(PyObject *mod);
2756 2756
2757 2757 static void module_init(PyObject *mod)
2758 2758 {
2759 2759 /* This module constant has two purposes. First, it lets us unit test
2760 2760 * the ImportError raised without hard-coding any error text. This
2761 2761 * means we can change the text in the future without breaking tests,
2762 2762 * even across changesets without a recompile. Second, its presence
2763 2763 * can be used to determine whether the version-checking logic is
2764 2764 * present, which also helps in testing across changesets without a
2765 2765 * recompile. Note that this means the pure-Python version of parsers
2766 2766 * should not have this module constant. */
2767 2767 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2768 2768
2769 2769 dirs_module_init(mod);
2770 2770 manifest_module_init(mod);
2771 2771
2772 2772 indexType.tp_new = PyType_GenericNew;
2773 2773 if (PyType_Ready(&indexType) < 0 ||
2774 2774 PyType_Ready(&dirstateTupleType) < 0)
2775 2775 return;
2776 2776 Py_INCREF(&indexType);
2777 2777 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2778 2778 Py_INCREF(&dirstateTupleType);
2779 2779 PyModule_AddObject(mod, "dirstatetuple",
2780 2780 (PyObject *)&dirstateTupleType);
2781 2781
2782 2782 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2783 2783 -1, -1, -1, -1, nullid, 20);
2784 2784 if (nullentry)
2785 2785 PyObject_GC_UnTrack(nullentry);
2786 2786 }
2787 2787
2788 2788 static int check_python_version(void)
2789 2789 {
2790 2790 PyObject *sys = PyImport_ImportModule("sys"), *ver;
2791 2791 long hexversion;
2792 2792 if (!sys)
2793 2793 return -1;
2794 2794 ver = PyObject_GetAttrString(sys, "hexversion");
2795 2795 Py_DECREF(sys);
2796 2796 if (!ver)
2797 2797 return -1;
2798 2798 hexversion = PyInt_AsLong(ver);
2799 2799 Py_DECREF(ver);
2800 2800 /* sys.hexversion is a 32-bit number by default, so the -1 case
2801 2801 * should only occur in unusual circumstances (e.g. if sys.hexversion
2802 2802 * is manually set to an invalid value). */
2803 2803 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2804 2804 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2805 2805 "modules were compiled with Python " PY_VERSION ", but "
2806 2806 "Mercurial is currently using Python with sys.hexversion=%ld: "
2807 2807 "Python %s\n at: %s", versionerrortext, hexversion,
2808 2808 Py_GetVersion(), Py_GetProgramFullPath());
2809 2809 return -1;
2810 2810 }
2811 2811 return 0;
2812 2812 }
2813 2813
2814 2814 #ifdef IS_PY3K
2815 2815 static struct PyModuleDef parsers_module = {
2816 2816 PyModuleDef_HEAD_INIT,
2817 2817 "parsers",
2818 2818 parsers_doc,
2819 2819 -1,
2820 2820 methods
2821 2821 };
2822 2822
2823 2823 PyMODINIT_FUNC PyInit_parsers(void)
2824 2824 {
2825 2825 PyObject *mod;
2826 2826
2827 2827 if (check_python_version() == -1)
2828 2828 return;
2829 2829 mod = PyModule_Create(&parsers_module);
2830 2830 module_init(mod);
2831 2831 return mod;
2832 2832 }
2833 2833 #else
2834 2834 PyMODINIT_FUNC initparsers(void)
2835 2835 {
2836 2836 PyObject *mod;
2837 2837
2838 2838 if (check_python_version() == -1)
2839 2839 return;
2840 2840 mod = Py_InitModule3("parsers", methods, parsers_doc);
2841 2841 module_init(mod);
2842 2842 }
2843 2843 #endif
@@ -1,58 +1,57 b''
1 1 # extension to emulate invoking 'dirstate.write()' at the time
2 2 # specified by '[fakedirstatewritetime] fakenow', only when
3 3 # 'dirstate.write()' is invoked via functions below:
4 4 #
5 5 # - 'workingctx._checklookup()' (= 'repo.status()')
6 6 # - 'committablectx.markcommitted()'
7 7
8 8 from mercurial import context, extensions, parsers, util
9 9
10 10 def pack_dirstate(fakenow, orig, dmap, copymap, pl, now):
11 11 # execute what original parsers.pack_dirstate should do actually
12 12 # for consistency
13 13 actualnow = int(now)
14 14 for f, e in dmap.iteritems():
15 15 if e[0] == 'n' and e[3] == actualnow:
16 16 e = parsers.dirstatetuple(e[0], e[1], e[2], -1)
17 17 dmap[f] = e
18 18
19 19 return orig(dmap, copymap, pl, fakenow)
20 20
21 21 def fakewrite(ui, func):
22 22 # fake "now" of 'pack_dirstate' only if it is invoked while 'func'
23 23
24 24 fakenow = ui.config('fakedirstatewritetime', 'fakenow')
25 25 if not fakenow:
26 26 # Execute original one, if fakenow isn't configured. This is
27 27 # useful to prevent subrepos from executing replaced one,
28 28 # because replacing 'parsers.pack_dirstate' is also effective
29 29 # in subrepos.
30 30 return func()
31 31
32 32 # parsing 'fakenow' in YYYYmmddHHMM format makes comparison between
33 33 # 'fakenow' value and 'touch -t YYYYmmddHHMM' argument easy
34 timestamp = util.parsedate(fakenow, ['%Y%m%d%H%M'])[0]
35 fakenow = float(timestamp)
34 fakenow = util.parsedate(fakenow, ['%Y%m%d%H%M'])[0]
36 35
37 36 orig_pack_dirstate = parsers.pack_dirstate
38 37 wrapper = lambda *args: pack_dirstate(fakenow, orig_pack_dirstate, *args)
39 38
40 39 parsers.pack_dirstate = wrapper
41 40 try:
42 41 return func()
43 42 finally:
44 43 parsers.pack_dirstate = orig_pack_dirstate
45 44
46 45 def _checklookup(orig, workingctx, files):
47 46 ui = workingctx.repo().ui
48 47 return fakewrite(ui, lambda : orig(workingctx, files))
49 48
50 49 def markcommitted(orig, committablectx, node):
51 50 ui = committablectx.repo().ui
52 51 return fakewrite(ui, lambda : orig(committablectx, node))
53 52
54 53 def extsetup(ui):
55 54 extensions.wrapfunction(context.workingctx, '_checklookup',
56 55 _checklookup)
57 56 extensions.wrapfunction(context.committablectx, 'markcommitted',
58 57 markcommitted)
General Comments 0
You need to be logged in to leave comments. Login now