##// END OF EJS Templates
dirstate: track otherparent files same as nonnormal...
Durham Goode -
r31278:1c97a91a default
parent child Browse files
Show More
@@ -1,1263 +1,1287 b''
1 1 # dirstate.py - working directory tracking for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import errno
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from .node import nullid
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 match as matchmod,
21 21 osutil,
22 22 parsers,
23 23 pathutil,
24 24 pycompat,
25 25 scmutil,
26 26 txnutil,
27 27 util,
28 28 )
29 29
30 30 propertycache = util.propertycache
31 31 filecache = scmutil.filecache
32 32 _rangemask = 0x7fffffff
33 33
34 34 dirstatetuple = parsers.dirstatetuple
35 35
36 36 class repocache(filecache):
37 37 """filecache for files in .hg/"""
38 38 def join(self, obj, fname):
39 39 return obj._opener.join(fname)
40 40
41 41 class rootcache(filecache):
42 42 """filecache for files in the repository root"""
43 43 def join(self, obj, fname):
44 44 return obj._join(fname)
45 45
46 46 def _getfsnow(vfs):
47 47 '''Get "now" timestamp on filesystem'''
48 48 tmpfd, tmpname = vfs.mkstemp()
49 49 try:
50 50 return os.fstat(tmpfd).st_mtime
51 51 finally:
52 52 os.close(tmpfd)
53 53 vfs.unlink(tmpname)
54 54
55 55 def nonnormalentries(dmap):
56 56 '''Compute the nonnormal dirstate entries from the dmap'''
57 57 try:
58 return parsers.nonnormalentries(dmap)
58 return parsers.nonnormalotherparententries(dmap)
59 59 except AttributeError:
60 return set(fname for fname, e in dmap.iteritems()
61 if e[0] != 'n' or e[3] == -1)
60 nonnorm = set()
61 otherparent = set()
62 for fname, e in dmap.iteritems():
63 if e[0] != 'n' or e[3] == -1:
64 nonnorm.add(fname)
65 if e[0] == 'n' and e[2] == -2:
66 otherparent.add(fname)
67 return nonnorm, otherparent
62 68
63 69 class dirstate(object):
64 70
65 71 def __init__(self, opener, ui, root, validate):
66 72 '''Create a new dirstate object.
67 73
68 74 opener is an open()-like callable that can be used to open the
69 75 dirstate file; root is the root of the directory tracked by
70 76 the dirstate.
71 77 '''
72 78 self._opener = opener
73 79 self._validate = validate
74 80 self._root = root
75 81 # ntpath.join(root, '') of Python 2.7.9 does not add sep if root is
76 82 # UNC path pointing to root share (issue4557)
77 83 self._rootdir = pathutil.normasprefix(root)
78 84 # internal config: ui.forcecwd
79 85 forcecwd = ui.config('ui', 'forcecwd')
80 86 if forcecwd:
81 87 self._cwd = forcecwd
82 88 self._dirty = False
83 89 self._dirtypl = False
84 90 self._lastnormaltime = 0
85 91 self._ui = ui
86 92 self._filecache = {}
87 93 self._parentwriters = 0
88 94 self._filename = 'dirstate'
89 95 self._pendingfilename = '%s.pending' % self._filename
90 96 self._plchangecallbacks = {}
91 97 self._origpl = None
92 98 self._updatedfiles = set()
93 99
94 100 # for consistent view between _pl() and _read() invocations
95 101 self._pendingmode = None
96 102
97 103 def beginparentchange(self):
98 104 '''Marks the beginning of a set of changes that involve changing
99 105 the dirstate parents. If there is an exception during this time,
100 106 the dirstate will not be written when the wlock is released. This
101 107 prevents writing an incoherent dirstate where the parent doesn't
102 108 match the contents.
103 109 '''
104 110 self._parentwriters += 1
105 111
106 112 def endparentchange(self):
107 113 '''Marks the end of a set of changes that involve changing the
108 114 dirstate parents. Once all parent changes have been marked done,
109 115 the wlock will be free to write the dirstate on release.
110 116 '''
111 117 if self._parentwriters > 0:
112 118 self._parentwriters -= 1
113 119
114 120 def pendingparentchange(self):
115 121 '''Returns true if the dirstate is in the middle of a set of changes
116 122 that modify the dirstate parent.
117 123 '''
118 124 return self._parentwriters > 0
119 125
120 126 @propertycache
121 127 def _map(self):
122 128 '''Return the dirstate contents as a map from filename to
123 129 (state, mode, size, time).'''
124 130 self._read()
125 131 return self._map
126 132
127 133 @propertycache
128 134 def _copymap(self):
129 135 self._read()
130 136 return self._copymap
131 137
132 138 @propertycache
133 139 def _nonnormalset(self):
134 return nonnormalentries(self._map)
140 nonnorm, otherparents = nonnormalentries(self._map)
141 self._otherparentset = otherparents
142 return nonnorm
143
144 @propertycache
145 def _otherparentset(self):
146 nonnorm, otherparents = nonnormalentries(self._map)
147 self._nonnormalset = nonnorm
148 return otherparents
135 149
136 150 @propertycache
137 151 def _filefoldmap(self):
138 152 try:
139 153 makefilefoldmap = parsers.make_file_foldmap
140 154 except AttributeError:
141 155 pass
142 156 else:
143 157 return makefilefoldmap(self._map, util.normcasespec,
144 158 util.normcasefallback)
145 159
146 160 f = {}
147 161 normcase = util.normcase
148 162 for name, s in self._map.iteritems():
149 163 if s[0] != 'r':
150 164 f[normcase(name)] = name
151 165 f['.'] = '.' # prevents useless util.fspath() invocation
152 166 return f
153 167
154 168 @propertycache
155 169 def _dirfoldmap(self):
156 170 f = {}
157 171 normcase = util.normcase
158 172 for name in self._dirs:
159 173 f[normcase(name)] = name
160 174 return f
161 175
162 176 @repocache('branch')
163 177 def _branch(self):
164 178 try:
165 179 return self._opener.read("branch").strip() or "default"
166 180 except IOError as inst:
167 181 if inst.errno != errno.ENOENT:
168 182 raise
169 183 return "default"
170 184
171 185 @propertycache
172 186 def _pl(self):
173 187 try:
174 188 fp = self._opendirstatefile()
175 189 st = fp.read(40)
176 190 fp.close()
177 191 l = len(st)
178 192 if l == 40:
179 193 return st[:20], st[20:40]
180 194 elif l > 0 and l < 40:
181 195 raise error.Abort(_('working directory state appears damaged!'))
182 196 except IOError as err:
183 197 if err.errno != errno.ENOENT:
184 198 raise
185 199 return [nullid, nullid]
186 200
187 201 @propertycache
188 202 def _dirs(self):
189 203 return util.dirs(self._map, 'r')
190 204
191 205 def dirs(self):
192 206 return self._dirs
193 207
194 208 @rootcache('.hgignore')
195 209 def _ignore(self):
196 210 files = self._ignorefiles()
197 211 if not files:
198 212 return util.never
199 213
200 214 pats = ['include:%s' % f for f in files]
201 215 return matchmod.match(self._root, '', [], pats, warn=self._ui.warn)
202 216
203 217 @propertycache
204 218 def _slash(self):
205 219 return self._ui.configbool('ui', 'slash') and pycompat.ossep != '/'
206 220
207 221 @propertycache
208 222 def _checklink(self):
209 223 return util.checklink(self._root)
210 224
211 225 @propertycache
212 226 def _checkexec(self):
213 227 return util.checkexec(self._root)
214 228
215 229 @propertycache
216 230 def _checkcase(self):
217 231 return not util.fscasesensitive(self._join('.hg'))
218 232
219 233 def _join(self, f):
220 234 # much faster than os.path.join()
221 235 # it's safe because f is always a relative path
222 236 return self._rootdir + f
223 237
224 238 def flagfunc(self, buildfallback):
225 239 if self._checklink and self._checkexec:
226 240 def f(x):
227 241 try:
228 242 st = os.lstat(self._join(x))
229 243 if util.statislink(st):
230 244 return 'l'
231 245 if util.statisexec(st):
232 246 return 'x'
233 247 except OSError:
234 248 pass
235 249 return ''
236 250 return f
237 251
238 252 fallback = buildfallback()
239 253 if self._checklink:
240 254 def f(x):
241 255 if os.path.islink(self._join(x)):
242 256 return 'l'
243 257 if 'x' in fallback(x):
244 258 return 'x'
245 259 return ''
246 260 return f
247 261 if self._checkexec:
248 262 def f(x):
249 263 if 'l' in fallback(x):
250 264 return 'l'
251 265 if util.isexec(self._join(x)):
252 266 return 'x'
253 267 return ''
254 268 return f
255 269 else:
256 270 return fallback
257 271
258 272 @propertycache
259 273 def _cwd(self):
260 274 return pycompat.getcwd()
261 275
262 276 def getcwd(self):
263 277 '''Return the path from which a canonical path is calculated.
264 278
265 279 This path should be used to resolve file patterns or to convert
266 280 canonical paths back to file paths for display. It shouldn't be
267 281 used to get real file paths. Use vfs functions instead.
268 282 '''
269 283 cwd = self._cwd
270 284 if cwd == self._root:
271 285 return ''
272 286 # self._root ends with a path separator if self._root is '/' or 'C:\'
273 287 rootsep = self._root
274 288 if not util.endswithsep(rootsep):
275 289 rootsep += pycompat.ossep
276 290 if cwd.startswith(rootsep):
277 291 return cwd[len(rootsep):]
278 292 else:
279 293 # we're outside the repo. return an absolute path.
280 294 return cwd
281 295
282 296 def pathto(self, f, cwd=None):
283 297 if cwd is None:
284 298 cwd = self.getcwd()
285 299 path = util.pathto(self._root, cwd, f)
286 300 if self._slash:
287 301 return util.pconvert(path)
288 302 return path
289 303
290 304 def __getitem__(self, key):
291 305 '''Return the current state of key (a filename) in the dirstate.
292 306
293 307 States are:
294 308 n normal
295 309 m needs merging
296 310 r marked for removal
297 311 a marked for addition
298 312 ? not tracked
299 313 '''
300 314 return self._map.get(key, ("?",))[0]
301 315
302 316 def __contains__(self, key):
303 317 return key in self._map
304 318
305 319 def __iter__(self):
306 320 for x in sorted(self._map):
307 321 yield x
308 322
309 323 def iteritems(self):
310 324 return self._map.iteritems()
311 325
312 326 def parents(self):
313 327 return [self._validate(p) for p in self._pl]
314 328
315 329 def p1(self):
316 330 return self._validate(self._pl[0])
317 331
318 332 def p2(self):
319 333 return self._validate(self._pl[1])
320 334
321 335 def branch(self):
322 336 return encoding.tolocal(self._branch)
323 337
324 338 def setparents(self, p1, p2=nullid):
325 339 """Set dirstate parents to p1 and p2.
326 340
327 341 When moving from two parents to one, 'm' merged entries a
328 342 adjusted to normal and previous copy records discarded and
329 343 returned by the call.
330 344
331 345 See localrepo.setparents()
332 346 """
333 347 if self._parentwriters == 0:
334 348 raise ValueError("cannot set dirstate parent without "
335 349 "calling dirstate.beginparentchange")
336 350
337 351 self._dirty = self._dirtypl = True
338 352 oldp2 = self._pl[1]
339 353 if self._origpl is None:
340 354 self._origpl = self._pl
341 355 self._pl = p1, p2
342 356 copies = {}
343 357 if oldp2 != nullid and p2 == nullid:
344 for f, s in self._map.iteritems():
358 candidatefiles = self._nonnormalset.union(self._otherparentset)
359 for f in candidatefiles:
360 s = self._map.get(f)
361 if s is None:
362 continue
363
345 364 # Discard 'm' markers when moving away from a merge state
346 365 if s[0] == 'm':
347 366 if f in self._copymap:
348 367 copies[f] = self._copymap[f]
349 368 self.normallookup(f)
350 369 # Also fix up otherparent markers
351 370 elif s[0] == 'n' and s[2] == -2:
352 371 if f in self._copymap:
353 372 copies[f] = self._copymap[f]
354 373 self.add(f)
355 374 return copies
356 375
357 376 def setbranch(self, branch):
358 377 self._branch = encoding.fromlocal(branch)
359 378 f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
360 379 try:
361 380 f.write(self._branch + '\n')
362 381 f.close()
363 382
364 383 # make sure filecache has the correct stat info for _branch after
365 384 # replacing the underlying file
366 385 ce = self._filecache['_branch']
367 386 if ce:
368 387 ce.refresh()
369 388 except: # re-raises
370 389 f.discard()
371 390 raise
372 391
373 392 def _opendirstatefile(self):
374 393 fp, mode = txnutil.trypending(self._root, self._opener, self._filename)
375 394 if self._pendingmode is not None and self._pendingmode != mode:
376 395 fp.close()
377 396 raise error.Abort(_('working directory state may be '
378 397 'changed parallelly'))
379 398 self._pendingmode = mode
380 399 return fp
381 400
382 401 def _read(self):
383 402 self._map = {}
384 403 self._copymap = {}
385 404 try:
386 405 fp = self._opendirstatefile()
387 406 try:
388 407 st = fp.read()
389 408 finally:
390 409 fp.close()
391 410 except IOError as err:
392 411 if err.errno != errno.ENOENT:
393 412 raise
394 413 return
395 414 if not st:
396 415 return
397 416
398 417 if util.safehasattr(parsers, 'dict_new_presized'):
399 418 # Make an estimate of the number of files in the dirstate based on
400 419 # its size. From a linear regression on a set of real-world repos,
401 420 # all over 10,000 files, the size of a dirstate entry is 85
402 421 # bytes. The cost of resizing is significantly higher than the cost
403 422 # of filling in a larger presized dict, so subtract 20% from the
404 423 # size.
405 424 #
406 425 # This heuristic is imperfect in many ways, so in a future dirstate
407 426 # format update it makes sense to just record the number of entries
408 427 # on write.
409 428 self._map = parsers.dict_new_presized(len(st) / 71)
410 429
411 430 # Python's garbage collector triggers a GC each time a certain number
412 431 # of container objects (the number being defined by
413 432 # gc.get_threshold()) are allocated. parse_dirstate creates a tuple
414 433 # for each file in the dirstate. The C version then immediately marks
415 434 # them as not to be tracked by the collector. However, this has no
416 435 # effect on when GCs are triggered, only on what objects the GC looks
417 436 # into. This means that O(number of files) GCs are unavoidable.
418 437 # Depending on when in the process's lifetime the dirstate is parsed,
419 438 # this can get very expensive. As a workaround, disable GC while
420 439 # parsing the dirstate.
421 440 #
422 441 # (we cannot decorate the function directly since it is in a C module)
423 442 parse_dirstate = util.nogc(parsers.parse_dirstate)
424 443 p = parse_dirstate(self._map, self._copymap, st)
425 444 if not self._dirtypl:
426 445 self._pl = p
427 446
428 447 def invalidate(self):
429 448 for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
430 "_pl", "_dirs", "_ignore", "_nonnormalset"):
449 "_pl", "_dirs", "_ignore", "_nonnormalset",
450 "_otherparentset"):
431 451 if a in self.__dict__:
432 452 delattr(self, a)
433 453 self._lastnormaltime = 0
434 454 self._dirty = False
435 455 self._updatedfiles.clear()
436 456 self._parentwriters = 0
437 457 self._origpl = None
438 458
439 459 def copy(self, source, dest):
440 460 """Mark dest as a copy of source. Unmark dest if source is None."""
441 461 if source == dest:
442 462 return
443 463 self._dirty = True
444 464 if source is not None:
445 465 self._copymap[dest] = source
446 466 self._updatedfiles.add(source)
447 467 self._updatedfiles.add(dest)
448 468 elif dest in self._copymap:
449 469 del self._copymap[dest]
450 470 self._updatedfiles.add(dest)
451 471
452 472 def copied(self, file):
453 473 return self._copymap.get(file, None)
454 474
455 475 def copies(self):
456 476 return self._copymap
457 477
458 478 def _droppath(self, f):
459 479 if self[f] not in "?r" and "_dirs" in self.__dict__:
460 480 self._dirs.delpath(f)
461 481
462 482 if "_filefoldmap" in self.__dict__:
463 483 normed = util.normcase(f)
464 484 if normed in self._filefoldmap:
465 485 del self._filefoldmap[normed]
466 486
467 487 self._updatedfiles.add(f)
468 488
469 489 def _addpath(self, f, state, mode, size, mtime):
470 490 oldstate = self[f]
471 491 if state == 'a' or oldstate == 'r':
472 492 scmutil.checkfilename(f)
473 493 if f in self._dirs:
474 494 raise error.Abort(_('directory %r already in dirstate') % f)
475 495 # shadows
476 496 for d in util.finddirs(f):
477 497 if d in self._dirs:
478 498 break
479 499 if d in self._map and self[d] != 'r':
480 500 raise error.Abort(
481 501 _('file %r in dirstate clashes with %r') % (d, f))
482 502 if oldstate in "?r" and "_dirs" in self.__dict__:
483 503 self._dirs.addpath(f)
484 504 self._dirty = True
485 505 self._updatedfiles.add(f)
486 506 self._map[f] = dirstatetuple(state, mode, size, mtime)
487 507 if state != 'n' or mtime == -1:
488 508 self._nonnormalset.add(f)
509 if size == -2:
510 self._otherparentset.add(f)
489 511
490 512 def normal(self, f):
491 513 '''Mark a file normal and clean.'''
492 514 s = os.lstat(self._join(f))
493 515 mtime = s.st_mtime
494 516 self._addpath(f, 'n', s.st_mode,
495 517 s.st_size & _rangemask, mtime & _rangemask)
496 518 if f in self._copymap:
497 519 del self._copymap[f]
498 520 if f in self._nonnormalset:
499 521 self._nonnormalset.remove(f)
500 522 if mtime > self._lastnormaltime:
501 523 # Remember the most recent modification timeslot for status(),
502 524 # to make sure we won't miss future size-preserving file content
503 525 # modifications that happen within the same timeslot.
504 526 self._lastnormaltime = mtime
505 527
506 528 def normallookup(self, f):
507 529 '''Mark a file normal, but possibly dirty.'''
508 530 if self._pl[1] != nullid and f in self._map:
509 531 # if there is a merge going on and the file was either
510 532 # in state 'm' (-1) or coming from other parent (-2) before
511 533 # being removed, restore that state.
512 534 entry = self._map[f]
513 535 if entry[0] == 'r' and entry[2] in (-1, -2):
514 536 source = self._copymap.get(f)
515 537 if entry[2] == -1:
516 538 self.merge(f)
517 539 elif entry[2] == -2:
518 540 self.otherparent(f)
519 541 if source:
520 542 self.copy(source, f)
521 543 return
522 544 if entry[0] == 'm' or entry[0] == 'n' and entry[2] == -2:
523 545 return
524 546 self._addpath(f, 'n', 0, -1, -1)
525 547 if f in self._copymap:
526 548 del self._copymap[f]
527 549 if f in self._nonnormalset:
528 550 self._nonnormalset.remove(f)
529 551
530 552 def otherparent(self, f):
531 553 '''Mark as coming from the other parent, always dirty.'''
532 554 if self._pl[1] == nullid:
533 555 raise error.Abort(_("setting %r to other parent "
534 556 "only allowed in merges") % f)
535 557 if f in self and self[f] == 'n':
536 558 # merge-like
537 559 self._addpath(f, 'm', 0, -2, -1)
538 560 else:
539 561 # add-like
540 562 self._addpath(f, 'n', 0, -2, -1)
541 563
542 564 if f in self._copymap:
543 565 del self._copymap[f]
544 566
545 567 def add(self, f):
546 568 '''Mark a file added.'''
547 569 self._addpath(f, 'a', 0, -1, -1)
548 570 if f in self._copymap:
549 571 del self._copymap[f]
550 572
551 573 def remove(self, f):
552 574 '''Mark a file removed.'''
553 575 self._dirty = True
554 576 self._droppath(f)
555 577 size = 0
556 578 if self._pl[1] != nullid and f in self._map:
557 579 # backup the previous state
558 580 entry = self._map[f]
559 581 if entry[0] == 'm': # merge
560 582 size = -1
561 583 elif entry[0] == 'n' and entry[2] == -2: # other parent
562 584 size = -2
585 self._otherparentset.add(f)
563 586 self._map[f] = dirstatetuple('r', 0, size, 0)
564 587 self._nonnormalset.add(f)
565 588 if size == 0 and f in self._copymap:
566 589 del self._copymap[f]
567 590
568 591 def merge(self, f):
569 592 '''Mark a file merged.'''
570 593 if self._pl[1] == nullid:
571 594 return self.normallookup(f)
572 595 return self.otherparent(f)
573 596
574 597 def drop(self, f):
575 598 '''Drop a file from the dirstate'''
576 599 if f in self._map:
577 600 self._dirty = True
578 601 self._droppath(f)
579 602 del self._map[f]
580 603 if f in self._nonnormalset:
581 604 self._nonnormalset.remove(f)
582 605 if f in self._copymap:
583 606 del self._copymap[f]
584 607
585 608 def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
586 609 if exists is None:
587 610 exists = os.path.lexists(os.path.join(self._root, path))
588 611 if not exists:
589 612 # Maybe a path component exists
590 613 if not ignoremissing and '/' in path:
591 614 d, f = path.rsplit('/', 1)
592 615 d = self._normalize(d, False, ignoremissing, None)
593 616 folded = d + "/" + f
594 617 else:
595 618 # No path components, preserve original case
596 619 folded = path
597 620 else:
598 621 # recursively normalize leading directory components
599 622 # against dirstate
600 623 if '/' in normed:
601 624 d, f = normed.rsplit('/', 1)
602 625 d = self._normalize(d, False, ignoremissing, True)
603 626 r = self._root + "/" + d
604 627 folded = d + "/" + util.fspath(f, r)
605 628 else:
606 629 folded = util.fspath(normed, self._root)
607 630 storemap[normed] = folded
608 631
609 632 return folded
610 633
611 634 def _normalizefile(self, path, isknown, ignoremissing=False, exists=None):
612 635 normed = util.normcase(path)
613 636 folded = self._filefoldmap.get(normed, None)
614 637 if folded is None:
615 638 if isknown:
616 639 folded = path
617 640 else:
618 641 folded = self._discoverpath(path, normed, ignoremissing, exists,
619 642 self._filefoldmap)
620 643 return folded
621 644
622 645 def _normalize(self, path, isknown, ignoremissing=False, exists=None):
623 646 normed = util.normcase(path)
624 647 folded = self._filefoldmap.get(normed, None)
625 648 if folded is None:
626 649 folded = self._dirfoldmap.get(normed, None)
627 650 if folded is None:
628 651 if isknown:
629 652 folded = path
630 653 else:
631 654 # store discovered result in dirfoldmap so that future
632 655 # normalizefile calls don't start matching directories
633 656 folded = self._discoverpath(path, normed, ignoremissing, exists,
634 657 self._dirfoldmap)
635 658 return folded
636 659
637 660 def normalize(self, path, isknown=False, ignoremissing=False):
638 661 '''
639 662 normalize the case of a pathname when on a casefolding filesystem
640 663
641 664 isknown specifies whether the filename came from walking the
642 665 disk, to avoid extra filesystem access.
643 666
644 667 If ignoremissing is True, missing path are returned
645 668 unchanged. Otherwise, we try harder to normalize possibly
646 669 existing path components.
647 670
648 671 The normalized case is determined based on the following precedence:
649 672
650 673 - version of name already stored in the dirstate
651 674 - version of name stored on disk
652 675 - version provided via command arguments
653 676 '''
654 677
655 678 if self._checkcase:
656 679 return self._normalize(path, isknown, ignoremissing)
657 680 return path
658 681
659 682 def clear(self):
660 683 self._map = {}
661 684 self._nonnormalset = set()
685 self._otherparentset = set()
662 686 if "_dirs" in self.__dict__:
663 687 delattr(self, "_dirs")
664 688 self._copymap = {}
665 689 self._pl = [nullid, nullid]
666 690 self._lastnormaltime = 0
667 691 self._updatedfiles.clear()
668 692 self._dirty = True
669 693
670 694 def rebuild(self, parent, allfiles, changedfiles=None):
671 695 if changedfiles is None:
672 696 # Rebuild entire dirstate
673 697 changedfiles = allfiles
674 698 lastnormaltime = self._lastnormaltime
675 699 self.clear()
676 700 self._lastnormaltime = lastnormaltime
677 701
678 702 if self._origpl is None:
679 703 self._origpl = self._pl
680 704 self._pl = (parent, nullid)
681 705 for f in changedfiles:
682 706 if f in allfiles:
683 707 self.normallookup(f)
684 708 else:
685 709 self.drop(f)
686 710
687 711 self._dirty = True
688 712
689 713 def write(self, tr):
690 714 if not self._dirty:
691 715 return
692 716
693 717 filename = self._filename
694 718 if tr:
695 719 # 'dirstate.write()' is not only for writing in-memory
696 720 # changes out, but also for dropping ambiguous timestamp.
697 721 # delayed writing re-raise "ambiguous timestamp issue".
698 722 # See also the wiki page below for detail:
699 723 # https://www.mercurial-scm.org/wiki/DirstateTransactionPlan
700 724
701 725 # emulate dropping timestamp in 'parsers.pack_dirstate'
702 726 now = _getfsnow(self._opener)
703 727 dmap = self._map
704 728 for f in self._updatedfiles:
705 729 e = dmap.get(f)
706 730 if e is not None and e[0] == 'n' and e[3] == now:
707 731 dmap[f] = dirstatetuple(e[0], e[1], e[2], -1)
708 732 self._nonnormalset.add(f)
709 733
710 734 # emulate that all 'dirstate.normal' results are written out
711 735 self._lastnormaltime = 0
712 736 self._updatedfiles.clear()
713 737
714 738 # delay writing in-memory changes out
715 739 tr.addfilegenerator('dirstate', (self._filename,),
716 740 self._writedirstate, location='plain')
717 741 return
718 742
719 743 st = self._opener(filename, "w", atomictemp=True, checkambig=True)
720 744 self._writedirstate(st)
721 745
722 746 def addparentchangecallback(self, category, callback):
723 747 """add a callback to be called when the wd parents are changed
724 748
725 749 Callback will be called with the following arguments:
726 750 dirstate, (oldp1, oldp2), (newp1, newp2)
727 751
728 752 Category is a unique identifier to allow overwriting an old callback
729 753 with a newer callback.
730 754 """
731 755 self._plchangecallbacks[category] = callback
732 756
733 757 def _writedirstate(self, st):
734 758 # notify callbacks about parents change
735 759 if self._origpl is not None and self._origpl != self._pl:
736 760 for c, callback in sorted(self._plchangecallbacks.iteritems()):
737 761 callback(self, self._origpl, self._pl)
738 762 self._origpl = None
739 763 # use the modification time of the newly created temporary file as the
740 764 # filesystem's notion of 'now'
741 765 now = util.fstat(st).st_mtime & _rangemask
742 766
743 767 # enough 'delaywrite' prevents 'pack_dirstate' from dropping
744 768 # timestamp of each entries in dirstate, because of 'now > mtime'
745 769 delaywrite = self._ui.configint('debug', 'dirstate.delaywrite', 0)
746 770 if delaywrite > 0:
747 771 # do we have any files to delay for?
748 772 for f, e in self._map.iteritems():
749 773 if e[0] == 'n' and e[3] == now:
750 774 import time # to avoid useless import
751 775 # rather than sleep n seconds, sleep until the next
752 776 # multiple of n seconds
753 777 clock = time.time()
754 778 start = int(clock) - (int(clock) % delaywrite)
755 779 end = start + delaywrite
756 780 time.sleep(end - clock)
757 781 now = end # trust our estimate that the end is near now
758 782 break
759 783
760 784 st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
761 self._nonnormalset = nonnormalentries(self._map)
785 self._nonnormalset, self._otherparentset = nonnormalentries(self._map)
762 786 st.close()
763 787 self._lastnormaltime = 0
764 788 self._dirty = self._dirtypl = False
765 789
766 790 def _dirignore(self, f):
767 791 if f == '.':
768 792 return False
769 793 if self._ignore(f):
770 794 return True
771 795 for p in util.finddirs(f):
772 796 if self._ignore(p):
773 797 return True
774 798 return False
775 799
776 800 def _ignorefiles(self):
777 801 files = []
778 802 if os.path.exists(self._join('.hgignore')):
779 803 files.append(self._join('.hgignore'))
780 804 for name, path in self._ui.configitems("ui"):
781 805 if name == 'ignore' or name.startswith('ignore.'):
782 806 # we need to use os.path.join here rather than self._join
783 807 # because path is arbitrary and user-specified
784 808 files.append(os.path.join(self._rootdir, util.expandpath(path)))
785 809 return files
786 810
787 811 def _ignorefileandline(self, f):
788 812 files = collections.deque(self._ignorefiles())
789 813 visited = set()
790 814 while files:
791 815 i = files.popleft()
792 816 patterns = matchmod.readpatternfile(i, self._ui.warn,
793 817 sourceinfo=True)
794 818 for pattern, lineno, line in patterns:
795 819 kind, p = matchmod._patsplit(pattern, 'glob')
796 820 if kind == "subinclude":
797 821 if p not in visited:
798 822 files.append(p)
799 823 continue
800 824 m = matchmod.match(self._root, '', [], [pattern],
801 825 warn=self._ui.warn)
802 826 if m(f):
803 827 return (i, lineno, line)
804 828 visited.add(i)
805 829 return (None, -1, "")
806 830
807 831 def _walkexplicit(self, match, subrepos):
808 832 '''Get stat data about the files explicitly specified by match.
809 833
810 834 Return a triple (results, dirsfound, dirsnotfound).
811 835 - results is a mapping from filename to stat result. It also contains
812 836 listings mapping subrepos and .hg to None.
813 837 - dirsfound is a list of files found to be directories.
814 838 - dirsnotfound is a list of files that the dirstate thinks are
815 839 directories and that were not found.'''
816 840
817 841 def badtype(mode):
818 842 kind = _('unknown')
819 843 if stat.S_ISCHR(mode):
820 844 kind = _('character device')
821 845 elif stat.S_ISBLK(mode):
822 846 kind = _('block device')
823 847 elif stat.S_ISFIFO(mode):
824 848 kind = _('fifo')
825 849 elif stat.S_ISSOCK(mode):
826 850 kind = _('socket')
827 851 elif stat.S_ISDIR(mode):
828 852 kind = _('directory')
829 853 return _('unsupported file type (type is %s)') % kind
830 854
831 855 matchedir = match.explicitdir
832 856 badfn = match.bad
833 857 dmap = self._map
834 858 lstat = os.lstat
835 859 getkind = stat.S_IFMT
836 860 dirkind = stat.S_IFDIR
837 861 regkind = stat.S_IFREG
838 862 lnkkind = stat.S_IFLNK
839 863 join = self._join
840 864 dirsfound = []
841 865 foundadd = dirsfound.append
842 866 dirsnotfound = []
843 867 notfoundadd = dirsnotfound.append
844 868
845 869 if not match.isexact() and self._checkcase:
846 870 normalize = self._normalize
847 871 else:
848 872 normalize = None
849 873
850 874 files = sorted(match.files())
851 875 subrepos.sort()
852 876 i, j = 0, 0
853 877 while i < len(files) and j < len(subrepos):
854 878 subpath = subrepos[j] + "/"
855 879 if files[i] < subpath:
856 880 i += 1
857 881 continue
858 882 while i < len(files) and files[i].startswith(subpath):
859 883 del files[i]
860 884 j += 1
861 885
862 886 if not files or '.' in files:
863 887 files = ['.']
864 888 results = dict.fromkeys(subrepos)
865 889 results['.hg'] = None
866 890
867 891 alldirs = None
868 892 for ff in files:
869 893 # constructing the foldmap is expensive, so don't do it for the
870 894 # common case where files is ['.']
871 895 if normalize and ff != '.':
872 896 nf = normalize(ff, False, True)
873 897 else:
874 898 nf = ff
875 899 if nf in results:
876 900 continue
877 901
878 902 try:
879 903 st = lstat(join(nf))
880 904 kind = getkind(st.st_mode)
881 905 if kind == dirkind:
882 906 if nf in dmap:
883 907 # file replaced by dir on disk but still in dirstate
884 908 results[nf] = None
885 909 if matchedir:
886 910 matchedir(nf)
887 911 foundadd((nf, ff))
888 912 elif kind == regkind or kind == lnkkind:
889 913 results[nf] = st
890 914 else:
891 915 badfn(ff, badtype(kind))
892 916 if nf in dmap:
893 917 results[nf] = None
894 918 except OSError as inst: # nf not found on disk - it is dirstate only
895 919 if nf in dmap: # does it exactly match a missing file?
896 920 results[nf] = None
897 921 else: # does it match a missing directory?
898 922 if alldirs is None:
899 923 alldirs = util.dirs(dmap)
900 924 if nf in alldirs:
901 925 if matchedir:
902 926 matchedir(nf)
903 927 notfoundadd(nf)
904 928 else:
905 929 badfn(ff, inst.strerror)
906 930
907 931 # Case insensitive filesystems cannot rely on lstat() failing to detect
908 932 # a case-only rename. Prune the stat object for any file that does not
909 933 # match the case in the filesystem, if there are multiple files that
910 934 # normalize to the same path.
911 935 if match.isexact() and self._checkcase:
912 936 normed = {}
913 937
914 938 for f, st in results.iteritems():
915 939 if st is None:
916 940 continue
917 941
918 942 nc = util.normcase(f)
919 943 paths = normed.get(nc)
920 944
921 945 if paths is None:
922 946 paths = set()
923 947 normed[nc] = paths
924 948
925 949 paths.add(f)
926 950
927 951 for norm, paths in normed.iteritems():
928 952 if len(paths) > 1:
929 953 for path in paths:
930 954 folded = self._discoverpath(path, norm, True, None,
931 955 self._dirfoldmap)
932 956 if path != folded:
933 957 results[path] = None
934 958
935 959 return results, dirsfound, dirsnotfound
936 960
937 961 def walk(self, match, subrepos, unknown, ignored, full=True):
938 962 '''
939 963 Walk recursively through the directory tree, finding all files
940 964 matched by match.
941 965
942 966 If full is False, maybe skip some known-clean files.
943 967
944 968 Return a dict mapping filename to stat-like object (either
945 969 mercurial.osutil.stat instance or return value of os.stat()).
946 970
947 971 '''
948 972 # full is a flag that extensions that hook into walk can use -- this
949 973 # implementation doesn't use it at all. This satisfies the contract
950 974 # because we only guarantee a "maybe".
951 975
952 976 if ignored:
953 977 ignore = util.never
954 978 dirignore = util.never
955 979 elif unknown:
956 980 ignore = self._ignore
957 981 dirignore = self._dirignore
958 982 else:
959 983 # if not unknown and not ignored, drop dir recursion and step 2
960 984 ignore = util.always
961 985 dirignore = util.always
962 986
963 987 matchfn = match.matchfn
964 988 matchalways = match.always()
965 989 matchtdir = match.traversedir
966 990 dmap = self._map
967 991 listdir = osutil.listdir
968 992 lstat = os.lstat
969 993 dirkind = stat.S_IFDIR
970 994 regkind = stat.S_IFREG
971 995 lnkkind = stat.S_IFLNK
972 996 join = self._join
973 997
974 998 exact = skipstep3 = False
975 999 if match.isexact(): # match.exact
976 1000 exact = True
977 1001 dirignore = util.always # skip step 2
978 1002 elif match.prefix(): # match.match, no patterns
979 1003 skipstep3 = True
980 1004
981 1005 if not exact and self._checkcase:
982 1006 normalize = self._normalize
983 1007 normalizefile = self._normalizefile
984 1008 skipstep3 = False
985 1009 else:
986 1010 normalize = self._normalize
987 1011 normalizefile = None
988 1012
989 1013 # step 1: find all explicit files
990 1014 results, work, dirsnotfound = self._walkexplicit(match, subrepos)
991 1015
992 1016 skipstep3 = skipstep3 and not (work or dirsnotfound)
993 1017 work = [d for d in work if not dirignore(d[0])]
994 1018
995 1019 # step 2: visit subdirectories
996 1020 def traverse(work, alreadynormed):
997 1021 wadd = work.append
998 1022 while work:
999 1023 nd = work.pop()
1000 1024 skip = None
1001 1025 if nd == '.':
1002 1026 nd = ''
1003 1027 else:
1004 1028 skip = '.hg'
1005 1029 try:
1006 1030 entries = listdir(join(nd), stat=True, skip=skip)
1007 1031 except OSError as inst:
1008 1032 if inst.errno in (errno.EACCES, errno.ENOENT):
1009 1033 match.bad(self.pathto(nd), inst.strerror)
1010 1034 continue
1011 1035 raise
1012 1036 for f, kind, st in entries:
1013 1037 if normalizefile:
1014 1038 # even though f might be a directory, we're only
1015 1039 # interested in comparing it to files currently in the
1016 1040 # dmap -- therefore normalizefile is enough
1017 1041 nf = normalizefile(nd and (nd + "/" + f) or f, True,
1018 1042 True)
1019 1043 else:
1020 1044 nf = nd and (nd + "/" + f) or f
1021 1045 if nf not in results:
1022 1046 if kind == dirkind:
1023 1047 if not ignore(nf):
1024 1048 if matchtdir:
1025 1049 matchtdir(nf)
1026 1050 wadd(nf)
1027 1051 if nf in dmap and (matchalways or matchfn(nf)):
1028 1052 results[nf] = None
1029 1053 elif kind == regkind or kind == lnkkind:
1030 1054 if nf in dmap:
1031 1055 if matchalways or matchfn(nf):
1032 1056 results[nf] = st
1033 1057 elif ((matchalways or matchfn(nf))
1034 1058 and not ignore(nf)):
1035 1059 # unknown file -- normalize if necessary
1036 1060 if not alreadynormed:
1037 1061 nf = normalize(nf, False, True)
1038 1062 results[nf] = st
1039 1063 elif nf in dmap and (matchalways or matchfn(nf)):
1040 1064 results[nf] = None
1041 1065
1042 1066 for nd, d in work:
1043 1067 # alreadynormed means that processwork doesn't have to do any
1044 1068 # expensive directory normalization
1045 1069 alreadynormed = not normalize or nd == d
1046 1070 traverse([d], alreadynormed)
1047 1071
1048 1072 for s in subrepos:
1049 1073 del results[s]
1050 1074 del results['.hg']
1051 1075
1052 1076 # step 3: visit remaining files from dmap
1053 1077 if not skipstep3 and not exact:
1054 1078 # If a dmap file is not in results yet, it was either
1055 1079 # a) not matching matchfn b) ignored, c) missing, or d) under a
1056 1080 # symlink directory.
1057 1081 if not results and matchalways:
1058 1082 visit = dmap.keys()
1059 1083 else:
1060 1084 visit = [f for f in dmap if f not in results and matchfn(f)]
1061 1085 visit.sort()
1062 1086
1063 1087 if unknown:
1064 1088 # unknown == True means we walked all dirs under the roots
1065 1089 # that wasn't ignored, and everything that matched was stat'ed
1066 1090 # and is already in results.
1067 1091 # The rest must thus be ignored or under a symlink.
1068 1092 audit_path = pathutil.pathauditor(self._root)
1069 1093
1070 1094 for nf in iter(visit):
1071 1095 # If a stat for the same file was already added with a
1072 1096 # different case, don't add one for this, since that would
1073 1097 # make it appear as if the file exists under both names
1074 1098 # on disk.
1075 1099 if (normalizefile and
1076 1100 normalizefile(nf, True, True) in results):
1077 1101 results[nf] = None
1078 1102 # Report ignored items in the dmap as long as they are not
1079 1103 # under a symlink directory.
1080 1104 elif audit_path.check(nf):
1081 1105 try:
1082 1106 results[nf] = lstat(join(nf))
1083 1107 # file was just ignored, no links, and exists
1084 1108 except OSError:
1085 1109 # file doesn't exist
1086 1110 results[nf] = None
1087 1111 else:
1088 1112 # It's either missing or under a symlink directory
1089 1113 # which we in this case report as missing
1090 1114 results[nf] = None
1091 1115 else:
1092 1116 # We may not have walked the full directory tree above,
1093 1117 # so stat and check everything we missed.
1094 1118 nf = iter(visit).next
1095 1119 for st in util.statfiles([join(i) for i in visit]):
1096 1120 results[nf()] = st
1097 1121 return results
1098 1122
1099 1123 def status(self, match, subrepos, ignored, clean, unknown):
1100 1124 '''Determine the status of the working copy relative to the
1101 1125 dirstate and return a pair of (unsure, status), where status is of type
1102 1126 scmutil.status and:
1103 1127
1104 1128 unsure:
1105 1129 files that might have been modified since the dirstate was
1106 1130 written, but need to be read to be sure (size is the same
1107 1131 but mtime differs)
1108 1132 status.modified:
1109 1133 files that have definitely been modified since the dirstate
1110 1134 was written (different size or mode)
1111 1135 status.clean:
1112 1136 files that have definitely not been modified since the
1113 1137 dirstate was written
1114 1138 '''
1115 1139 listignored, listclean, listunknown = ignored, clean, unknown
1116 1140 lookup, modified, added, unknown, ignored = [], [], [], [], []
1117 1141 removed, deleted, clean = [], [], []
1118 1142
1119 1143 dmap = self._map
1120 1144 ladd = lookup.append # aka "unsure"
1121 1145 madd = modified.append
1122 1146 aadd = added.append
1123 1147 uadd = unknown.append
1124 1148 iadd = ignored.append
1125 1149 radd = removed.append
1126 1150 dadd = deleted.append
1127 1151 cadd = clean.append
1128 1152 mexact = match.exact
1129 1153 dirignore = self._dirignore
1130 1154 checkexec = self._checkexec
1131 1155 copymap = self._copymap
1132 1156 lastnormaltime = self._lastnormaltime
1133 1157
1134 1158 # We need to do full walks when either
1135 1159 # - we're listing all clean files, or
1136 1160 # - match.traversedir does something, because match.traversedir should
1137 1161 # be called for every dir in the working dir
1138 1162 full = listclean or match.traversedir is not None
1139 1163 for fn, st in self.walk(match, subrepos, listunknown, listignored,
1140 1164 full=full).iteritems():
1141 1165 if fn not in dmap:
1142 1166 if (listignored or mexact(fn)) and dirignore(fn):
1143 1167 if listignored:
1144 1168 iadd(fn)
1145 1169 else:
1146 1170 uadd(fn)
1147 1171 continue
1148 1172
1149 1173 # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
1150 1174 # written like that for performance reasons. dmap[fn] is not a
1151 1175 # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
1152 1176 # opcode has fast paths when the value to be unpacked is a tuple or
1153 1177 # a list, but falls back to creating a full-fledged iterator in
1154 1178 # general. That is much slower than simply accessing and storing the
1155 1179 # tuple members one by one.
1156 1180 t = dmap[fn]
1157 1181 state = t[0]
1158 1182 mode = t[1]
1159 1183 size = t[2]
1160 1184 time = t[3]
1161 1185
1162 1186 if not st and state in "nma":
1163 1187 dadd(fn)
1164 1188 elif state == 'n':
1165 1189 if (size >= 0 and
1166 1190 ((size != st.st_size and size != st.st_size & _rangemask)
1167 1191 or ((mode ^ st.st_mode) & 0o100 and checkexec))
1168 1192 or size == -2 # other parent
1169 1193 or fn in copymap):
1170 1194 madd(fn)
1171 1195 elif time != st.st_mtime and time != st.st_mtime & _rangemask:
1172 1196 ladd(fn)
1173 1197 elif st.st_mtime == lastnormaltime:
1174 1198 # fn may have just been marked as normal and it may have
1175 1199 # changed in the same second without changing its size.
1176 1200 # This can happen if we quickly do multiple commits.
1177 1201 # Force lookup, so we don't miss such a racy file change.
1178 1202 ladd(fn)
1179 1203 elif listclean:
1180 1204 cadd(fn)
1181 1205 elif state == 'm':
1182 1206 madd(fn)
1183 1207 elif state == 'a':
1184 1208 aadd(fn)
1185 1209 elif state == 'r':
1186 1210 radd(fn)
1187 1211
1188 1212 return (lookup, scmutil.status(modified, added, removed, deleted,
1189 1213 unknown, ignored, clean))
1190 1214
1191 1215 def matches(self, match):
1192 1216 '''
1193 1217 return files in the dirstate (in whatever state) filtered by match
1194 1218 '''
1195 1219 dmap = self._map
1196 1220 if match.always():
1197 1221 return dmap.keys()
1198 1222 files = match.files()
1199 1223 if match.isexact():
1200 1224 # fast path -- filter the other way around, since typically files is
1201 1225 # much smaller than dmap
1202 1226 return [f for f in files if f in dmap]
1203 1227 if match.prefix() and all(fn in dmap for fn in files):
1204 1228 # fast path -- all the values are known to be files, so just return
1205 1229 # that
1206 1230 return list(files)
1207 1231 return [f for f in dmap if match(f)]
1208 1232
1209 1233 def _actualfilename(self, tr):
1210 1234 if tr:
1211 1235 return self._pendingfilename
1212 1236 else:
1213 1237 return self._filename
1214 1238
1215 1239 def savebackup(self, tr, suffix='', prefix=''):
1216 1240 '''Save current dirstate into backup file with suffix'''
1217 1241 assert len(suffix) > 0 or len(prefix) > 0
1218 1242 filename = self._actualfilename(tr)
1219 1243
1220 1244 # use '_writedirstate' instead of 'write' to write changes certainly,
1221 1245 # because the latter omits writing out if transaction is running.
1222 1246 # output file will be used to create backup of dirstate at this point.
1223 1247 if self._dirty or not self._opener.exists(filename):
1224 1248 self._writedirstate(self._opener(filename, "w", atomictemp=True,
1225 1249 checkambig=True))
1226 1250
1227 1251 if tr:
1228 1252 # ensure that subsequent tr.writepending returns True for
1229 1253 # changes written out above, even if dirstate is never
1230 1254 # changed after this
1231 1255 tr.addfilegenerator('dirstate', (self._filename,),
1232 1256 self._writedirstate, location='plain')
1233 1257
1234 1258 # ensure that pending file written above is unlinked at
1235 1259 # failure, even if tr.writepending isn't invoked until the
1236 1260 # end of this transaction
1237 1261 tr.registertmp(filename, location='plain')
1238 1262
1239 1263 backupname = prefix + self._filename + suffix
1240 1264 assert backupname != filename
1241 1265 if self._opener.exists(backupname):
1242 1266 self._opener.unlink(backupname)
1243 1267 # hardlink backup is okay because _writedirstate is always called
1244 1268 # with an "atomictemp=True" file.
1245 1269 util.copyfile(self._opener.join(filename),
1246 1270 self._opener.join(backupname), hardlink=True)
1247 1271
1248 1272 def restorebackup(self, tr, suffix='', prefix=''):
1249 1273 '''Restore dirstate by backup file with suffix'''
1250 1274 assert len(suffix) > 0 or len(prefix) > 0
1251 1275 # this "invalidate()" prevents "wlock.release()" from writing
1252 1276 # changes of dirstate out after restoring from backup file
1253 1277 self.invalidate()
1254 1278 filename = self._actualfilename(tr)
1255 1279 # using self._filename to avoid having "pending" in the backup filename
1256 1280 self._opener.rename(prefix + self._filename + suffix, filename,
1257 1281 checkambig=True)
1258 1282
1259 1283 def clearbackup(self, tr, suffix='', prefix=''):
1260 1284 '''Clear backup file with suffix'''
1261 1285 assert len(suffix) > 0 or len(prefix) > 0
1262 1286 # using self._filename to avoid having "pending" in the backup filename
1263 1287 self._opener.unlink(prefix + self._filename + suffix)
@@ -1,2925 +1,2966 b''
1 1 /*
2 2 parsers.c - efficient content parsing
3 3
4 4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #include <Python.h>
11 11 #include <ctype.h>
12 12 #include <stddef.h>
13 13 #include <string.h>
14 14
15 15 #include "util.h"
16 16 #include "bitmanipulation.h"
17 17
18 18 #ifdef IS_PY3K
19 19 /* The mapping of Python types is meant to be temporary to get Python
20 20 * 3 to compile. We should remove this once Python 3 support is fully
21 21 * supported and proper types are used in the extensions themselves. */
22 22 #define PyInt_Type PyLong_Type
23 23 #define PyInt_Check PyLong_Check
24 24 #define PyInt_FromLong PyLong_FromLong
25 25 #define PyInt_FromSsize_t PyLong_FromSsize_t
26 26 #define PyInt_AS_LONG PyLong_AS_LONG
27 27 #define PyInt_AsLong PyLong_AsLong
28 28 #endif
29 29
30 30 static char *versionerrortext = "Python minor version mismatch";
31 31
32 32 static int8_t hextable[256] = {
33 33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
34 34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
35 35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
36 36 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
37 37 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
38 38 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
39 39 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
40 40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41 41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42 42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
43 43 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44 44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
45 45 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
46 46 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
47 47 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
48 48 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
49 49 };
50 50
51 51 static char lowertable[128] = {
52 52 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
53 53 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
54 54 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
55 55 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
56 56 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
57 57 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
58 58 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
59 59 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
60 60 '\x40',
61 61 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
62 62 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
63 63 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
64 64 '\x78', '\x79', '\x7a', /* X-Z */
65 65 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
66 66 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
67 67 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
68 68 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
69 69 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
70 70 };
71 71
72 72 static char uppertable[128] = {
73 73 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
74 74 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
75 75 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
76 76 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
77 77 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
78 78 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
79 79 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
80 80 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
81 81 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
82 82 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
83 83 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
84 84 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
85 85 '\x60',
86 86 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
87 87 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
88 88 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
89 89 '\x58', '\x59', '\x5a', /* x-z */
90 90 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
91 91 };
92 92
93 93 static inline int hexdigit(const char *p, Py_ssize_t off)
94 94 {
95 95 int8_t val = hextable[(unsigned char)p[off]];
96 96
97 97 if (val >= 0) {
98 98 return val;
99 99 }
100 100
101 101 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
102 102 return 0;
103 103 }
104 104
105 105 /*
106 106 * Turn a hex-encoded string into binary.
107 107 */
108 108 PyObject *unhexlify(const char *str, int len)
109 109 {
110 110 PyObject *ret;
111 111 char *d;
112 112 int i;
113 113
114 114 ret = PyBytes_FromStringAndSize(NULL, len / 2);
115 115
116 116 if (!ret)
117 117 return NULL;
118 118
119 119 d = PyBytes_AsString(ret);
120 120
121 121 for (i = 0; i < len;) {
122 122 int hi = hexdigit(str, i++);
123 123 int lo = hexdigit(str, i++);
124 124 *d++ = (hi << 4) | lo;
125 125 }
126 126
127 127 return ret;
128 128 }
129 129
130 130 static inline PyObject *_asciitransform(PyObject *str_obj,
131 131 const char table[128],
132 132 PyObject *fallback_fn)
133 133 {
134 134 char *str, *newstr;
135 135 Py_ssize_t i, len;
136 136 PyObject *newobj = NULL;
137 137 PyObject *ret = NULL;
138 138
139 139 str = PyBytes_AS_STRING(str_obj);
140 140 len = PyBytes_GET_SIZE(str_obj);
141 141
142 142 newobj = PyBytes_FromStringAndSize(NULL, len);
143 143 if (!newobj)
144 144 goto quit;
145 145
146 146 newstr = PyBytes_AS_STRING(newobj);
147 147
148 148 for (i = 0; i < len; i++) {
149 149 char c = str[i];
150 150 if (c & 0x80) {
151 151 if (fallback_fn != NULL) {
152 152 ret = PyObject_CallFunctionObjArgs(fallback_fn,
153 153 str_obj, NULL);
154 154 } else {
155 155 PyObject *err = PyUnicodeDecodeError_Create(
156 156 "ascii", str, len, i, (i + 1),
157 157 "unexpected code byte");
158 158 PyErr_SetObject(PyExc_UnicodeDecodeError, err);
159 159 Py_XDECREF(err);
160 160 }
161 161 goto quit;
162 162 }
163 163 newstr[i] = table[(unsigned char)c];
164 164 }
165 165
166 166 ret = newobj;
167 167 Py_INCREF(ret);
168 168 quit:
169 169 Py_XDECREF(newobj);
170 170 return ret;
171 171 }
172 172
173 173 static PyObject *asciilower(PyObject *self, PyObject *args)
174 174 {
175 175 PyObject *str_obj;
176 176 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
177 177 return NULL;
178 178 return _asciitransform(str_obj, lowertable, NULL);
179 179 }
180 180
181 181 static PyObject *asciiupper(PyObject *self, PyObject *args)
182 182 {
183 183 PyObject *str_obj;
184 184 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
185 185 return NULL;
186 186 return _asciitransform(str_obj, uppertable, NULL);
187 187 }
188 188
189 189 static inline PyObject *_dict_new_presized(Py_ssize_t expected_size)
190 190 {
191 191 /* _PyDict_NewPresized expects a minused parameter, but it actually
192 192 creates a dictionary that's the nearest power of two bigger than the
193 193 parameter. For example, with the initial minused = 1000, the
194 194 dictionary created has size 1024. Of course in a lot of cases that
195 195 can be greater than the maximum load factor Python's dict object
196 196 expects (= 2/3), so as soon as we cross the threshold we'll resize
197 197 anyway. So create a dictionary that's at least 3/2 the size. */
198 198 return _PyDict_NewPresized(((1 + expected_size) / 2) * 3);
199 199 }
200 200
201 201 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
202 202 {
203 203 Py_ssize_t expected_size;
204 204
205 205 if (!PyArg_ParseTuple(args, "n:make_presized_dict", &expected_size))
206 206 return NULL;
207 207
208 208 return _dict_new_presized(expected_size);
209 209 }
210 210
211 211 static PyObject *make_file_foldmap(PyObject *self, PyObject *args)
212 212 {
213 213 PyObject *dmap, *spec_obj, *normcase_fallback;
214 214 PyObject *file_foldmap = NULL;
215 215 enum normcase_spec spec;
216 216 PyObject *k, *v;
217 217 dirstateTupleObject *tuple;
218 218 Py_ssize_t pos = 0;
219 219 const char *table;
220 220
221 221 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
222 222 &PyDict_Type, &dmap,
223 223 &PyInt_Type, &spec_obj,
224 224 &PyFunction_Type, &normcase_fallback))
225 225 goto quit;
226 226
227 227 spec = (int)PyInt_AS_LONG(spec_obj);
228 228 switch (spec) {
229 229 case NORMCASE_LOWER:
230 230 table = lowertable;
231 231 break;
232 232 case NORMCASE_UPPER:
233 233 table = uppertable;
234 234 break;
235 235 case NORMCASE_OTHER:
236 236 table = NULL;
237 237 break;
238 238 default:
239 239 PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
240 240 goto quit;
241 241 }
242 242
243 243 /* Add some more entries to deal with additions outside this
244 244 function. */
245 245 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
246 246 if (file_foldmap == NULL)
247 247 goto quit;
248 248
249 249 while (PyDict_Next(dmap, &pos, &k, &v)) {
250 250 if (!dirstate_tuple_check(v)) {
251 251 PyErr_SetString(PyExc_TypeError,
252 252 "expected a dirstate tuple");
253 253 goto quit;
254 254 }
255 255
256 256 tuple = (dirstateTupleObject *)v;
257 257 if (tuple->state != 'r') {
258 258 PyObject *normed;
259 259 if (table != NULL) {
260 260 normed = _asciitransform(k, table,
261 261 normcase_fallback);
262 262 } else {
263 263 normed = PyObject_CallFunctionObjArgs(
264 264 normcase_fallback, k, NULL);
265 265 }
266 266
267 267 if (normed == NULL)
268 268 goto quit;
269 269 if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
270 270 Py_DECREF(normed);
271 271 goto quit;
272 272 }
273 273 Py_DECREF(normed);
274 274 }
275 275 }
276 276 return file_foldmap;
277 277 quit:
278 278 Py_XDECREF(file_foldmap);
279 279 return NULL;
280 280 }
281 281
282 282 /*
283 283 * This code assumes that a manifest is stitched together with newline
284 284 * ('\n') characters.
285 285 */
286 286 static PyObject *parse_manifest(PyObject *self, PyObject *args)
287 287 {
288 288 PyObject *mfdict, *fdict;
289 289 char *str, *start, *end;
290 290 int len;
291 291
292 292 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
293 293 &PyDict_Type, &mfdict,
294 294 &PyDict_Type, &fdict,
295 295 &str, &len))
296 296 goto quit;
297 297
298 298 start = str;
299 299 end = str + len;
300 300 while (start < end) {
301 301 PyObject *file = NULL, *node = NULL;
302 302 PyObject *flags = NULL;
303 303 char *zero = NULL, *newline = NULL;
304 304 ptrdiff_t nlen;
305 305
306 306 zero = memchr(start, '\0', end - start);
307 307 if (!zero) {
308 308 PyErr_SetString(PyExc_ValueError,
309 309 "manifest entry has no separator");
310 310 goto quit;
311 311 }
312 312
313 313 newline = memchr(zero + 1, '\n', end - (zero + 1));
314 314 if (!newline) {
315 315 PyErr_SetString(PyExc_ValueError,
316 316 "manifest contains trailing garbage");
317 317 goto quit;
318 318 }
319 319
320 320 file = PyBytes_FromStringAndSize(start, zero - start);
321 321
322 322 if (!file)
323 323 goto bail;
324 324
325 325 nlen = newline - zero - 1;
326 326
327 327 node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
328 328 if (!node)
329 329 goto bail;
330 330
331 331 if (nlen > 40) {
332 332 flags = PyBytes_FromStringAndSize(zero + 41,
333 333 nlen - 40);
334 334 if (!flags)
335 335 goto bail;
336 336
337 337 if (PyDict_SetItem(fdict, file, flags) == -1)
338 338 goto bail;
339 339 }
340 340
341 341 if (PyDict_SetItem(mfdict, file, node) == -1)
342 342 goto bail;
343 343
344 344 start = newline + 1;
345 345
346 346 Py_XDECREF(flags);
347 347 Py_XDECREF(node);
348 348 Py_XDECREF(file);
349 349 continue;
350 350 bail:
351 351 Py_XDECREF(flags);
352 352 Py_XDECREF(node);
353 353 Py_XDECREF(file);
354 354 goto quit;
355 355 }
356 356
357 357 Py_INCREF(Py_None);
358 358 return Py_None;
359 359 quit:
360 360 return NULL;
361 361 }
362 362
363 363 static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
364 364 int size, int mtime)
365 365 {
366 366 dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
367 367 &dirstateTupleType);
368 368 if (!t)
369 369 return NULL;
370 370 t->state = state;
371 371 t->mode = mode;
372 372 t->size = size;
373 373 t->mtime = mtime;
374 374 return t;
375 375 }
376 376
377 377 static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
378 378 PyObject *kwds)
379 379 {
380 380 /* We do all the initialization here and not a tp_init function because
381 381 * dirstate_tuple is immutable. */
382 382 dirstateTupleObject *t;
383 383 char state;
384 384 int size, mode, mtime;
385 385 if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
386 386 return NULL;
387 387
388 388 t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
389 389 if (!t)
390 390 return NULL;
391 391 t->state = state;
392 392 t->mode = mode;
393 393 t->size = size;
394 394 t->mtime = mtime;
395 395
396 396 return (PyObject *)t;
397 397 }
398 398
399 399 static void dirstate_tuple_dealloc(PyObject *o)
400 400 {
401 401 PyObject_Del(o);
402 402 }
403 403
404 404 static Py_ssize_t dirstate_tuple_length(PyObject *o)
405 405 {
406 406 return 4;
407 407 }
408 408
409 409 static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
410 410 {
411 411 dirstateTupleObject *t = (dirstateTupleObject *)o;
412 412 switch (i) {
413 413 case 0:
414 414 return PyBytes_FromStringAndSize(&t->state, 1);
415 415 case 1:
416 416 return PyInt_FromLong(t->mode);
417 417 case 2:
418 418 return PyInt_FromLong(t->size);
419 419 case 3:
420 420 return PyInt_FromLong(t->mtime);
421 421 default:
422 422 PyErr_SetString(PyExc_IndexError, "index out of range");
423 423 return NULL;
424 424 }
425 425 }
426 426
427 427 static PySequenceMethods dirstate_tuple_sq = {
428 428 dirstate_tuple_length, /* sq_length */
429 429 0, /* sq_concat */
430 430 0, /* sq_repeat */
431 431 dirstate_tuple_item, /* sq_item */
432 432 0, /* sq_ass_item */
433 433 0, /* sq_contains */
434 434 0, /* sq_inplace_concat */
435 435 0 /* sq_inplace_repeat */
436 436 };
437 437
438 438 PyTypeObject dirstateTupleType = {
439 439 PyVarObject_HEAD_INIT(NULL, 0)
440 440 "dirstate_tuple", /* tp_name */
441 441 sizeof(dirstateTupleObject),/* tp_basicsize */
442 442 0, /* tp_itemsize */
443 443 (destructor)dirstate_tuple_dealloc, /* tp_dealloc */
444 444 0, /* tp_print */
445 445 0, /* tp_getattr */
446 446 0, /* tp_setattr */
447 447 0, /* tp_compare */
448 448 0, /* tp_repr */
449 449 0, /* tp_as_number */
450 450 &dirstate_tuple_sq, /* tp_as_sequence */
451 451 0, /* tp_as_mapping */
452 452 0, /* tp_hash */
453 453 0, /* tp_call */
454 454 0, /* tp_str */
455 455 0, /* tp_getattro */
456 456 0, /* tp_setattro */
457 457 0, /* tp_as_buffer */
458 458 Py_TPFLAGS_DEFAULT, /* tp_flags */
459 459 "dirstate tuple", /* tp_doc */
460 460 0, /* tp_traverse */
461 461 0, /* tp_clear */
462 462 0, /* tp_richcompare */
463 463 0, /* tp_weaklistoffset */
464 464 0, /* tp_iter */
465 465 0, /* tp_iternext */
466 466 0, /* tp_methods */
467 467 0, /* tp_members */
468 468 0, /* tp_getset */
469 469 0, /* tp_base */
470 470 0, /* tp_dict */
471 471 0, /* tp_descr_get */
472 472 0, /* tp_descr_set */
473 473 0, /* tp_dictoffset */
474 474 0, /* tp_init */
475 475 0, /* tp_alloc */
476 476 dirstate_tuple_new, /* tp_new */
477 477 };
478 478
479 479 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
480 480 {
481 481 PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
482 482 PyObject *fname = NULL, *cname = NULL, *entry = NULL;
483 483 char state, *cur, *str, *cpos;
484 484 int mode, size, mtime;
485 485 unsigned int flen, len, pos = 40;
486 486 int readlen;
487 487
488 488 if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
489 489 &PyDict_Type, &dmap,
490 490 &PyDict_Type, &cmap,
491 491 &str, &readlen))
492 492 goto quit;
493 493
494 494 len = readlen;
495 495
496 496 /* read parents */
497 497 if (len < 40) {
498 498 PyErr_SetString(
499 499 PyExc_ValueError, "too little data for parents");
500 500 goto quit;
501 501 }
502 502
503 503 parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
504 504 if (!parents)
505 505 goto quit;
506 506
507 507 /* read filenames */
508 508 while (pos >= 40 && pos < len) {
509 509 if (pos + 17 > len) {
510 510 PyErr_SetString(PyExc_ValueError,
511 511 "overflow in dirstate");
512 512 goto quit;
513 513 }
514 514 cur = str + pos;
515 515 /* unpack header */
516 516 state = *cur;
517 517 mode = getbe32(cur + 1);
518 518 size = getbe32(cur + 5);
519 519 mtime = getbe32(cur + 9);
520 520 flen = getbe32(cur + 13);
521 521 pos += 17;
522 522 cur += 17;
523 523 if (flen > len - pos) {
524 524 PyErr_SetString(PyExc_ValueError, "overflow in dirstate");
525 525 goto quit;
526 526 }
527 527
528 528 entry = (PyObject *)make_dirstate_tuple(state, mode, size,
529 529 mtime);
530 530 cpos = memchr(cur, 0, flen);
531 531 if (cpos) {
532 532 fname = PyBytes_FromStringAndSize(cur, cpos - cur);
533 533 cname = PyBytes_FromStringAndSize(cpos + 1,
534 534 flen - (cpos - cur) - 1);
535 535 if (!fname || !cname ||
536 536 PyDict_SetItem(cmap, fname, cname) == -1 ||
537 537 PyDict_SetItem(dmap, fname, entry) == -1)
538 538 goto quit;
539 539 Py_DECREF(cname);
540 540 } else {
541 541 fname = PyBytes_FromStringAndSize(cur, flen);
542 542 if (!fname ||
543 543 PyDict_SetItem(dmap, fname, entry) == -1)
544 544 goto quit;
545 545 }
546 546 Py_DECREF(fname);
547 547 Py_DECREF(entry);
548 548 fname = cname = entry = NULL;
549 549 pos += flen;
550 550 }
551 551
552 552 ret = parents;
553 553 Py_INCREF(ret);
554 554 quit:
555 555 Py_XDECREF(fname);
556 556 Py_XDECREF(cname);
557 557 Py_XDECREF(entry);
558 558 Py_XDECREF(parents);
559 559 return ret;
560 560 }
561 561
562 562 /*
563 * Build a set of non-normal entries from the dirstate dmap
563 * Build a set of non-normal and other parent entries from the dirstate dmap
564 564 */
565 static PyObject *nonnormalentries(PyObject *self, PyObject *args)
566 {
567 PyObject *dmap, *nonnset = NULL, *fname, *v;
565 static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) {
566 PyObject *dmap, *fname, *v;
567 PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
568 568 Py_ssize_t pos;
569 569
570 570 if (!PyArg_ParseTuple(args, "O!:nonnormalentries",
571 571 &PyDict_Type, &dmap))
572 572 goto bail;
573 573
574 574 nonnset = PySet_New(NULL);
575 575 if (nonnset == NULL)
576 576 goto bail;
577 577
578 otherpset = PySet_New(NULL);
579 if (otherpset == NULL)
580 goto bail;
581
578 582 pos = 0;
579 583 while (PyDict_Next(dmap, &pos, &fname, &v)) {
580 584 dirstateTupleObject *t;
581 585 if (!dirstate_tuple_check(v)) {
582 586 PyErr_SetString(PyExc_TypeError,
583 587 "expected a dirstate tuple");
584 588 goto bail;
585 589 }
586 590 t = (dirstateTupleObject *)v;
587 591
592 if (t->state == 'n' && t->size == -2) {
593 if (PySet_Add(otherpset, fname) == -1) {
594 goto bail;
595 }
596 }
597
588 598 if (t->state == 'n' && t->mtime != -1)
589 599 continue;
590 600 if (PySet_Add(nonnset, fname) == -1)
591 601 goto bail;
592 602 }
593 603
594 return nonnset;
604 result = Py_BuildValue("(OO)", nonnset, otherpset);
605 if (result == NULL)
606 goto bail;
607 return result;
595 608 bail:
596 609 Py_XDECREF(nonnset);
610 Py_XDECREF(otherpset);
611 Py_XDECREF(result);
597 612 return NULL;
598 613 }
599 614
600 615 /*
616 * Build a set of non-normal entries from the dirstate dmap
617 */
618 static PyObject *nonnormalentries(PyObject *self, PyObject *args)
619 {
620 PyObject *nonnset = NULL, *combined = NULL;
621
622 combined = nonnormalotherparententries(self, args);
623 if (!combined) {
624 return NULL;
625 }
626
627 nonnset = PyTuple_GetItem(combined, 0);
628 if (!nonnset) {
629 Py_DECREF(combined);
630 return NULL;
631 }
632
633 Py_INCREF(nonnset);
634 Py_DECREF(combined);
635 return nonnset;
636 }
637
638 /*
601 639 * Efficiently pack a dirstate object into its on-disk format.
602 640 */
603 641 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
604 642 {
605 643 PyObject *packobj = NULL;
606 644 PyObject *map, *copymap, *pl, *mtime_unset = NULL;
607 645 Py_ssize_t nbytes, pos, l;
608 646 PyObject *k, *v = NULL, *pn;
609 647 char *p, *s;
610 648 int now;
611 649
612 650 if (!PyArg_ParseTuple(args, "O!O!Oi:pack_dirstate",
613 651 &PyDict_Type, &map, &PyDict_Type, &copymap,
614 652 &pl, &now))
615 653 return NULL;
616 654
617 655 if (!PySequence_Check(pl) || PySequence_Size(pl) != 2) {
618 656 PyErr_SetString(PyExc_TypeError, "expected 2-element sequence");
619 657 return NULL;
620 658 }
621 659
622 660 /* Figure out how much we need to allocate. */
623 661 for (nbytes = 40, pos = 0; PyDict_Next(map, &pos, &k, &v);) {
624 662 PyObject *c;
625 663 if (!PyBytes_Check(k)) {
626 664 PyErr_SetString(PyExc_TypeError, "expected string key");
627 665 goto bail;
628 666 }
629 667 nbytes += PyBytes_GET_SIZE(k) + 17;
630 668 c = PyDict_GetItem(copymap, k);
631 669 if (c) {
632 670 if (!PyBytes_Check(c)) {
633 671 PyErr_SetString(PyExc_TypeError,
634 672 "expected string key");
635 673 goto bail;
636 674 }
637 675 nbytes += PyBytes_GET_SIZE(c) + 1;
638 676 }
639 677 }
640 678
641 679 packobj = PyBytes_FromStringAndSize(NULL, nbytes);
642 680 if (packobj == NULL)
643 681 goto bail;
644 682
645 683 p = PyBytes_AS_STRING(packobj);
646 684
647 685 pn = PySequence_ITEM(pl, 0);
648 686 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
649 687 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
650 688 goto bail;
651 689 }
652 690 memcpy(p, s, l);
653 691 p += 20;
654 692 pn = PySequence_ITEM(pl, 1);
655 693 if (PyBytes_AsStringAndSize(pn, &s, &l) == -1 || l != 20) {
656 694 PyErr_SetString(PyExc_TypeError, "expected a 20-byte hash");
657 695 goto bail;
658 696 }
659 697 memcpy(p, s, l);
660 698 p += 20;
661 699
662 700 for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
663 701 dirstateTupleObject *tuple;
664 702 char state;
665 703 int mode, size, mtime;
666 704 Py_ssize_t len, l;
667 705 PyObject *o;
668 706 char *t;
669 707
670 708 if (!dirstate_tuple_check(v)) {
671 709 PyErr_SetString(PyExc_TypeError,
672 710 "expected a dirstate tuple");
673 711 goto bail;
674 712 }
675 713 tuple = (dirstateTupleObject *)v;
676 714
677 715 state = tuple->state;
678 716 mode = tuple->mode;
679 717 size = tuple->size;
680 718 mtime = tuple->mtime;
681 719 if (state == 'n' && mtime == now) {
682 720 /* See pure/parsers.py:pack_dirstate for why we do
683 721 * this. */
684 722 mtime = -1;
685 723 mtime_unset = (PyObject *)make_dirstate_tuple(
686 724 state, mode, size, mtime);
687 725 if (!mtime_unset)
688 726 goto bail;
689 727 if (PyDict_SetItem(map, k, mtime_unset) == -1)
690 728 goto bail;
691 729 Py_DECREF(mtime_unset);
692 730 mtime_unset = NULL;
693 731 }
694 732 *p++ = state;
695 733 putbe32((uint32_t)mode, p);
696 734 putbe32((uint32_t)size, p + 4);
697 735 putbe32((uint32_t)mtime, p + 8);
698 736 t = p + 12;
699 737 p += 16;
700 738 len = PyBytes_GET_SIZE(k);
701 739 memcpy(p, PyBytes_AS_STRING(k), len);
702 740 p += len;
703 741 o = PyDict_GetItem(copymap, k);
704 742 if (o) {
705 743 *p++ = '\0';
706 744 l = PyBytes_GET_SIZE(o);
707 745 memcpy(p, PyBytes_AS_STRING(o), l);
708 746 p += l;
709 747 len += l + 1;
710 748 }
711 749 putbe32((uint32_t)len, t);
712 750 }
713 751
714 752 pos = p - PyBytes_AS_STRING(packobj);
715 753 if (pos != nbytes) {
716 754 PyErr_Format(PyExc_SystemError, "bad dirstate size: %ld != %ld",
717 755 (long)pos, (long)nbytes);
718 756 goto bail;
719 757 }
720 758
721 759 return packobj;
722 760 bail:
723 761 Py_XDECREF(mtime_unset);
724 762 Py_XDECREF(packobj);
725 763 Py_XDECREF(v);
726 764 return NULL;
727 765 }
728 766
729 767 /*
730 768 * A base-16 trie for fast node->rev mapping.
731 769 *
732 770 * Positive value is index of the next node in the trie
733 771 * Negative value is a leaf: -(rev + 1)
734 772 * Zero is empty
735 773 */
736 774 typedef struct {
737 775 int children[16];
738 776 } nodetree;
739 777
740 778 /*
741 779 * This class has two behaviors.
742 780 *
743 781 * When used in a list-like way (with integer keys), we decode an
744 782 * entry in a RevlogNG index file on demand. Our last entry is a
745 783 * sentinel, always a nullid. We have limited support for
746 784 * integer-keyed insert and delete, only at elements right before the
747 785 * sentinel.
748 786 *
749 787 * With string keys, we lazily perform a reverse mapping from node to
750 788 * rev, using a base-16 trie.
751 789 */
752 790 typedef struct {
753 791 PyObject_HEAD
754 792 /* Type-specific fields go here. */
755 793 PyObject *data; /* raw bytes of index */
756 794 Py_buffer buf; /* buffer of data */
757 795 PyObject **cache; /* cached tuples */
758 796 const char **offsets; /* populated on demand */
759 797 Py_ssize_t raw_length; /* original number of elements */
760 798 Py_ssize_t length; /* current number of elements */
761 799 PyObject *added; /* populated on demand */
762 800 PyObject *headrevs; /* cache, invalidated on changes */
763 801 PyObject *filteredrevs;/* filtered revs set */
764 802 nodetree *nt; /* base-16 trie */
765 803 unsigned ntlength; /* # nodes in use */
766 804 unsigned ntcapacity; /* # nodes allocated */
767 805 int ntdepth; /* maximum depth of tree */
768 806 int ntsplits; /* # splits performed */
769 807 int ntrev; /* last rev scanned */
770 808 int ntlookups; /* # lookups */
771 809 int ntmisses; /* # lookups that miss the cache */
772 810 int inlined;
773 811 } indexObject;
774 812
775 813 static Py_ssize_t index_length(const indexObject *self)
776 814 {
777 815 if (self->added == NULL)
778 816 return self->length;
779 817 return self->length + PyList_GET_SIZE(self->added);
780 818 }
781 819
782 820 static PyObject *nullentry;
783 821 static const char nullid[20];
784 822
785 823 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
786 824
787 825 #if LONG_MAX == 0x7fffffffL
788 826 static char *tuple_format = "Kiiiiiis#";
789 827 #else
790 828 static char *tuple_format = "kiiiiiis#";
791 829 #endif
792 830
793 831 /* A RevlogNG v1 index entry is 64 bytes long. */
794 832 static const long v1_hdrsize = 64;
795 833
796 834 /*
797 835 * Return a pointer to the beginning of a RevlogNG record.
798 836 */
799 837 static const char *index_deref(indexObject *self, Py_ssize_t pos)
800 838 {
801 839 if (self->inlined && pos > 0) {
802 840 if (self->offsets == NULL) {
803 841 self->offsets = malloc(self->raw_length *
804 842 sizeof(*self->offsets));
805 843 if (self->offsets == NULL)
806 844 return (const char *)PyErr_NoMemory();
807 845 inline_scan(self, self->offsets);
808 846 }
809 847 return self->offsets[pos];
810 848 }
811 849
812 850 return (const char *)(self->buf.buf) + pos * v1_hdrsize;
813 851 }
814 852
815 853 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
816 854 int *ps, int maxrev)
817 855 {
818 856 if (rev >= self->length - 1) {
819 857 PyObject *tuple = PyList_GET_ITEM(self->added,
820 858 rev - self->length + 1);
821 859 ps[0] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 5));
822 860 ps[1] = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(tuple, 6));
823 861 } else {
824 862 const char *data = index_deref(self, rev);
825 863 ps[0] = getbe32(data + 24);
826 864 ps[1] = getbe32(data + 28);
827 865 }
828 866 /* If index file is corrupted, ps[] may point to invalid revisions. So
829 867 * there is a risk of buffer overflow to trust them unconditionally. */
830 868 if (ps[0] > maxrev || ps[1] > maxrev) {
831 869 PyErr_SetString(PyExc_ValueError, "parent out of range");
832 870 return -1;
833 871 }
834 872 return 0;
835 873 }
836 874
837 875
838 876 /*
839 877 * RevlogNG format (all in big endian, data may be inlined):
840 878 * 6 bytes: offset
841 879 * 2 bytes: flags
842 880 * 4 bytes: compressed length
843 881 * 4 bytes: uncompressed length
844 882 * 4 bytes: base revision
845 883 * 4 bytes: link revision
846 884 * 4 bytes: parent 1 revision
847 885 * 4 bytes: parent 2 revision
848 886 * 32 bytes: nodeid (only 20 bytes used)
849 887 */
850 888 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
851 889 {
852 890 uint64_t offset_flags;
853 891 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
854 892 const char *c_node_id;
855 893 const char *data;
856 894 Py_ssize_t length = index_length(self);
857 895 PyObject *entry;
858 896
859 897 if (pos < 0)
860 898 pos += length;
861 899
862 900 if (pos < 0 || pos >= length) {
863 901 PyErr_SetString(PyExc_IndexError, "revlog index out of range");
864 902 return NULL;
865 903 }
866 904
867 905 if (pos == length - 1) {
868 906 Py_INCREF(nullentry);
869 907 return nullentry;
870 908 }
871 909
872 910 if (pos >= self->length - 1) {
873 911 PyObject *obj;
874 912 obj = PyList_GET_ITEM(self->added, pos - self->length + 1);
875 913 Py_INCREF(obj);
876 914 return obj;
877 915 }
878 916
879 917 if (self->cache) {
880 918 if (self->cache[pos]) {
881 919 Py_INCREF(self->cache[pos]);
882 920 return self->cache[pos];
883 921 }
884 922 } else {
885 923 self->cache = calloc(self->raw_length, sizeof(PyObject *));
886 924 if (self->cache == NULL)
887 925 return PyErr_NoMemory();
888 926 }
889 927
890 928 data = index_deref(self, pos);
891 929 if (data == NULL)
892 930 return NULL;
893 931
894 932 offset_flags = getbe32(data + 4);
895 933 if (pos == 0) /* mask out version number for the first entry */
896 934 offset_flags &= 0xFFFF;
897 935 else {
898 936 uint32_t offset_high = getbe32(data);
899 937 offset_flags |= ((uint64_t)offset_high) << 32;
900 938 }
901 939
902 940 comp_len = getbe32(data + 8);
903 941 uncomp_len = getbe32(data + 12);
904 942 base_rev = getbe32(data + 16);
905 943 link_rev = getbe32(data + 20);
906 944 parent_1 = getbe32(data + 24);
907 945 parent_2 = getbe32(data + 28);
908 946 c_node_id = data + 32;
909 947
910 948 entry = Py_BuildValue(tuple_format, offset_flags, comp_len,
911 949 uncomp_len, base_rev, link_rev,
912 950 parent_1, parent_2, c_node_id, 20);
913 951
914 952 if (entry) {
915 953 PyObject_GC_UnTrack(entry);
916 954 Py_INCREF(entry);
917 955 }
918 956
919 957 self->cache[pos] = entry;
920 958
921 959 return entry;
922 960 }
923 961
924 962 /*
925 963 * Return the 20-byte SHA of the node corresponding to the given rev.
926 964 */
927 965 static const char *index_node(indexObject *self, Py_ssize_t pos)
928 966 {
929 967 Py_ssize_t length = index_length(self);
930 968 const char *data;
931 969
932 970 if (pos == length - 1 || pos == INT_MAX)
933 971 return nullid;
934 972
935 973 if (pos >= length)
936 974 return NULL;
937 975
938 976 if (pos >= self->length - 1) {
939 977 PyObject *tuple, *str;
940 978 tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
941 979 str = PyTuple_GetItem(tuple, 7);
942 980 return str ? PyBytes_AS_STRING(str) : NULL;
943 981 }
944 982
945 983 data = index_deref(self, pos);
946 984 return data ? data + 32 : NULL;
947 985 }
948 986
949 987 static int nt_insert(indexObject *self, const char *node, int rev);
950 988
951 989 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
952 990 {
953 991 if (PyBytes_AsStringAndSize(obj, node, nodelen) == -1)
954 992 return -1;
955 993 if (*nodelen == 20)
956 994 return 0;
957 995 PyErr_SetString(PyExc_ValueError, "20-byte hash required");
958 996 return -1;
959 997 }
960 998
961 999 static PyObject *index_insert(indexObject *self, PyObject *args)
962 1000 {
963 1001 PyObject *obj;
964 1002 char *node;
965 1003 int index;
966 1004 Py_ssize_t len, nodelen;
967 1005
968 1006 if (!PyArg_ParseTuple(args, "iO", &index, &obj))
969 1007 return NULL;
970 1008
971 1009 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
972 1010 PyErr_SetString(PyExc_TypeError, "8-tuple required");
973 1011 return NULL;
974 1012 }
975 1013
976 1014 if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
977 1015 return NULL;
978 1016
979 1017 len = index_length(self);
980 1018
981 1019 if (index < 0)
982 1020 index += len;
983 1021
984 1022 if (index != len - 1) {
985 1023 PyErr_SetString(PyExc_IndexError,
986 1024 "insert only supported at index -1");
987 1025 return NULL;
988 1026 }
989 1027
990 1028 if (self->added == NULL) {
991 1029 self->added = PyList_New(0);
992 1030 if (self->added == NULL)
993 1031 return NULL;
994 1032 }
995 1033
996 1034 if (PyList_Append(self->added, obj) == -1)
997 1035 return NULL;
998 1036
999 1037 if (self->nt)
1000 1038 nt_insert(self, node, index);
1001 1039
1002 1040 Py_CLEAR(self->headrevs);
1003 1041 Py_RETURN_NONE;
1004 1042 }
1005 1043
1006 1044 static void _index_clearcaches(indexObject *self)
1007 1045 {
1008 1046 if (self->cache) {
1009 1047 Py_ssize_t i;
1010 1048
1011 1049 for (i = 0; i < self->raw_length; i++)
1012 1050 Py_CLEAR(self->cache[i]);
1013 1051 free(self->cache);
1014 1052 self->cache = NULL;
1015 1053 }
1016 1054 if (self->offsets) {
1017 1055 free(self->offsets);
1018 1056 self->offsets = NULL;
1019 1057 }
1020 1058 if (self->nt) {
1021 1059 free(self->nt);
1022 1060 self->nt = NULL;
1023 1061 }
1024 1062 Py_CLEAR(self->headrevs);
1025 1063 }
1026 1064
1027 1065 static PyObject *index_clearcaches(indexObject *self)
1028 1066 {
1029 1067 _index_clearcaches(self);
1030 1068 self->ntlength = self->ntcapacity = 0;
1031 1069 self->ntdepth = self->ntsplits = 0;
1032 1070 self->ntrev = -1;
1033 1071 self->ntlookups = self->ntmisses = 0;
1034 1072 Py_RETURN_NONE;
1035 1073 }
1036 1074
1037 1075 static PyObject *index_stats(indexObject *self)
1038 1076 {
1039 1077 PyObject *obj = PyDict_New();
1040 1078 PyObject *t = NULL;
1041 1079
1042 1080 if (obj == NULL)
1043 1081 return NULL;
1044 1082
1045 1083 #define istat(__n, __d) \
1046 1084 do { \
1047 1085 t = PyInt_FromSsize_t(self->__n); \
1048 1086 if (!t) \
1049 1087 goto bail; \
1050 1088 if (PyDict_SetItemString(obj, __d, t) == -1) \
1051 1089 goto bail; \
1052 1090 Py_DECREF(t); \
1053 1091 } while (0)
1054 1092
1055 1093 if (self->added) {
1056 1094 Py_ssize_t len = PyList_GET_SIZE(self->added);
1057 1095 t = PyInt_FromSsize_t(len);
1058 1096 if (!t)
1059 1097 goto bail;
1060 1098 if (PyDict_SetItemString(obj, "index entries added", t) == -1)
1061 1099 goto bail;
1062 1100 Py_DECREF(t);
1063 1101 }
1064 1102
1065 1103 if (self->raw_length != self->length - 1)
1066 1104 istat(raw_length, "revs on disk");
1067 1105 istat(length, "revs in memory");
1068 1106 istat(ntcapacity, "node trie capacity");
1069 1107 istat(ntdepth, "node trie depth");
1070 1108 istat(ntlength, "node trie count");
1071 1109 istat(ntlookups, "node trie lookups");
1072 1110 istat(ntmisses, "node trie misses");
1073 1111 istat(ntrev, "node trie last rev scanned");
1074 1112 istat(ntsplits, "node trie splits");
1075 1113
1076 1114 #undef istat
1077 1115
1078 1116 return obj;
1079 1117
1080 1118 bail:
1081 1119 Py_XDECREF(obj);
1082 1120 Py_XDECREF(t);
1083 1121 return NULL;
1084 1122 }
1085 1123
1086 1124 /*
1087 1125 * When we cache a list, we want to be sure the caller can't mutate
1088 1126 * the cached copy.
1089 1127 */
1090 1128 static PyObject *list_copy(PyObject *list)
1091 1129 {
1092 1130 Py_ssize_t len = PyList_GET_SIZE(list);
1093 1131 PyObject *newlist = PyList_New(len);
1094 1132 Py_ssize_t i;
1095 1133
1096 1134 if (newlist == NULL)
1097 1135 return NULL;
1098 1136
1099 1137 for (i = 0; i < len; i++) {
1100 1138 PyObject *obj = PyList_GET_ITEM(list, i);
1101 1139 Py_INCREF(obj);
1102 1140 PyList_SET_ITEM(newlist, i, obj);
1103 1141 }
1104 1142
1105 1143 return newlist;
1106 1144 }
1107 1145
1108 1146 static int check_filter(PyObject *filter, Py_ssize_t arg) {
1109 1147 if (filter) {
1110 1148 PyObject *arglist, *result;
1111 1149 int isfiltered;
1112 1150
1113 1151 arglist = Py_BuildValue("(n)", arg);
1114 1152 if (!arglist) {
1115 1153 return -1;
1116 1154 }
1117 1155
1118 1156 result = PyEval_CallObject(filter, arglist);
1119 1157 Py_DECREF(arglist);
1120 1158 if (!result) {
1121 1159 return -1;
1122 1160 }
1123 1161
1124 1162 /* PyObject_IsTrue returns 1 if true, 0 if false, -1 if error,
1125 1163 * same as this function, so we can just return it directly.*/
1126 1164 isfiltered = PyObject_IsTrue(result);
1127 1165 Py_DECREF(result);
1128 1166 return isfiltered;
1129 1167 } else {
1130 1168 return 0;
1131 1169 }
1132 1170 }
1133 1171
1134 1172 static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
1135 1173 Py_ssize_t marker, char *phases)
1136 1174 {
1137 1175 PyObject *iter = NULL;
1138 1176 PyObject *iter_item = NULL;
1139 1177 Py_ssize_t min_idx = index_length(self) + 1;
1140 1178 long iter_item_long;
1141 1179
1142 1180 if (PyList_GET_SIZE(list) != 0) {
1143 1181 iter = PyObject_GetIter(list);
1144 1182 if (iter == NULL)
1145 1183 return -2;
1146 1184 while ((iter_item = PyIter_Next(iter)))
1147 1185 {
1148 1186 iter_item_long = PyInt_AS_LONG(iter_item);
1149 1187 Py_DECREF(iter_item);
1150 1188 if (iter_item_long < min_idx)
1151 1189 min_idx = iter_item_long;
1152 1190 phases[iter_item_long] = marker;
1153 1191 }
1154 1192 Py_DECREF(iter);
1155 1193 }
1156 1194
1157 1195 return min_idx;
1158 1196 }
1159 1197
1160 1198 static inline void set_phase_from_parents(char *phases, int parent_1,
1161 1199 int parent_2, Py_ssize_t i)
1162 1200 {
1163 1201 if (parent_1 >= 0 && phases[parent_1] > phases[i])
1164 1202 phases[i] = phases[parent_1];
1165 1203 if (parent_2 >= 0 && phases[parent_2] > phases[i])
1166 1204 phases[i] = phases[parent_2];
1167 1205 }
1168 1206
1169 1207 static PyObject *reachableroots2(indexObject *self, PyObject *args)
1170 1208 {
1171 1209
1172 1210 /* Input */
1173 1211 long minroot;
1174 1212 PyObject *includepatharg = NULL;
1175 1213 int includepath = 0;
1176 1214 /* heads and roots are lists */
1177 1215 PyObject *heads = NULL;
1178 1216 PyObject *roots = NULL;
1179 1217 PyObject *reachable = NULL;
1180 1218
1181 1219 PyObject *val;
1182 1220 Py_ssize_t len = index_length(self) - 1;
1183 1221 long revnum;
1184 1222 Py_ssize_t k;
1185 1223 Py_ssize_t i;
1186 1224 Py_ssize_t l;
1187 1225 int r;
1188 1226 int parents[2];
1189 1227
1190 1228 /* Internal data structure:
1191 1229 * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
1192 1230 * revstates: array of length len+1 (all revs + nullrev) */
1193 1231 int *tovisit = NULL;
1194 1232 long lentovisit = 0;
1195 1233 enum { RS_SEEN = 1, RS_ROOT = 2, RS_REACHABLE = 4 };
1196 1234 char *revstates = NULL;
1197 1235
1198 1236 /* Get arguments */
1199 1237 if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
1200 1238 &PyList_Type, &roots,
1201 1239 &PyBool_Type, &includepatharg))
1202 1240 goto bail;
1203 1241
1204 1242 if (includepatharg == Py_True)
1205 1243 includepath = 1;
1206 1244
1207 1245 /* Initialize return set */
1208 1246 reachable = PyList_New(0);
1209 1247 if (reachable == NULL)
1210 1248 goto bail;
1211 1249
1212 1250 /* Initialize internal datastructures */
1213 1251 tovisit = (int *)malloc((len + 1) * sizeof(int));
1214 1252 if (tovisit == NULL) {
1215 1253 PyErr_NoMemory();
1216 1254 goto bail;
1217 1255 }
1218 1256
1219 1257 revstates = (char *)calloc(len + 1, 1);
1220 1258 if (revstates == NULL) {
1221 1259 PyErr_NoMemory();
1222 1260 goto bail;
1223 1261 }
1224 1262
1225 1263 l = PyList_GET_SIZE(roots);
1226 1264 for (i = 0; i < l; i++) {
1227 1265 revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
1228 1266 if (revnum == -1 && PyErr_Occurred())
1229 1267 goto bail;
1230 1268 /* If root is out of range, e.g. wdir(), it must be unreachable
1231 1269 * from heads. So we can just ignore it. */
1232 1270 if (revnum + 1 < 0 || revnum + 1 >= len + 1)
1233 1271 continue;
1234 1272 revstates[revnum + 1] |= RS_ROOT;
1235 1273 }
1236 1274
1237 1275 /* Populate tovisit with all the heads */
1238 1276 l = PyList_GET_SIZE(heads);
1239 1277 for (i = 0; i < l; i++) {
1240 1278 revnum = PyInt_AsLong(PyList_GET_ITEM(heads, i));
1241 1279 if (revnum == -1 && PyErr_Occurred())
1242 1280 goto bail;
1243 1281 if (revnum + 1 < 0 || revnum + 1 >= len + 1) {
1244 1282 PyErr_SetString(PyExc_IndexError, "head out of range");
1245 1283 goto bail;
1246 1284 }
1247 1285 if (!(revstates[revnum + 1] & RS_SEEN)) {
1248 1286 tovisit[lentovisit++] = (int)revnum;
1249 1287 revstates[revnum + 1] |= RS_SEEN;
1250 1288 }
1251 1289 }
1252 1290
1253 1291 /* Visit the tovisit list and find the reachable roots */
1254 1292 k = 0;
1255 1293 while (k < lentovisit) {
1256 1294 /* Add the node to reachable if it is a root*/
1257 1295 revnum = tovisit[k++];
1258 1296 if (revstates[revnum + 1] & RS_ROOT) {
1259 1297 revstates[revnum + 1] |= RS_REACHABLE;
1260 1298 val = PyInt_FromLong(revnum);
1261 1299 if (val == NULL)
1262 1300 goto bail;
1263 1301 r = PyList_Append(reachable, val);
1264 1302 Py_DECREF(val);
1265 1303 if (r < 0)
1266 1304 goto bail;
1267 1305 if (includepath == 0)
1268 1306 continue;
1269 1307 }
1270 1308
1271 1309 /* Add its parents to the list of nodes to visit */
1272 1310 if (revnum == -1)
1273 1311 continue;
1274 1312 r = index_get_parents(self, revnum, parents, (int)len - 1);
1275 1313 if (r < 0)
1276 1314 goto bail;
1277 1315 for (i = 0; i < 2; i++) {
1278 1316 if (!(revstates[parents[i] + 1] & RS_SEEN)
1279 1317 && parents[i] >= minroot) {
1280 1318 tovisit[lentovisit++] = parents[i];
1281 1319 revstates[parents[i] + 1] |= RS_SEEN;
1282 1320 }
1283 1321 }
1284 1322 }
1285 1323
1286 1324 /* Find all the nodes in between the roots we found and the heads
1287 1325 * and add them to the reachable set */
1288 1326 if (includepath == 1) {
1289 1327 long minidx = minroot;
1290 1328 if (minidx < 0)
1291 1329 minidx = 0;
1292 1330 for (i = minidx; i < len; i++) {
1293 1331 if (!(revstates[i + 1] & RS_SEEN))
1294 1332 continue;
1295 1333 r = index_get_parents(self, i, parents, (int)len - 1);
1296 1334 /* Corrupted index file, error is set from
1297 1335 * index_get_parents */
1298 1336 if (r < 0)
1299 1337 goto bail;
1300 1338 if (((revstates[parents[0] + 1] |
1301 1339 revstates[parents[1] + 1]) & RS_REACHABLE)
1302 1340 && !(revstates[i + 1] & RS_REACHABLE)) {
1303 1341 revstates[i + 1] |= RS_REACHABLE;
1304 1342 val = PyInt_FromLong(i);
1305 1343 if (val == NULL)
1306 1344 goto bail;
1307 1345 r = PyList_Append(reachable, val);
1308 1346 Py_DECREF(val);
1309 1347 if (r < 0)
1310 1348 goto bail;
1311 1349 }
1312 1350 }
1313 1351 }
1314 1352
1315 1353 free(revstates);
1316 1354 free(tovisit);
1317 1355 return reachable;
1318 1356 bail:
1319 1357 Py_XDECREF(reachable);
1320 1358 free(revstates);
1321 1359 free(tovisit);
1322 1360 return NULL;
1323 1361 }
1324 1362
1325 1363 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
1326 1364 {
1327 1365 PyObject *roots = Py_None;
1328 1366 PyObject *ret = NULL;
1329 1367 PyObject *phaseslist = NULL;
1330 1368 PyObject *phaseroots = NULL;
1331 1369 PyObject *phaseset = NULL;
1332 1370 PyObject *phasessetlist = NULL;
1333 1371 PyObject *rev = NULL;
1334 1372 Py_ssize_t len = index_length(self) - 1;
1335 1373 Py_ssize_t numphase = 0;
1336 1374 Py_ssize_t minrevallphases = 0;
1337 1375 Py_ssize_t minrevphase = 0;
1338 1376 Py_ssize_t i = 0;
1339 1377 char *phases = NULL;
1340 1378 long phase;
1341 1379
1342 1380 if (!PyArg_ParseTuple(args, "O", &roots))
1343 1381 goto done;
1344 1382 if (roots == NULL || !PyList_Check(roots))
1345 1383 goto done;
1346 1384
1347 1385 phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
1348 1386 if (phases == NULL) {
1349 1387 PyErr_NoMemory();
1350 1388 goto done;
1351 1389 }
1352 1390 /* Put the phase information of all the roots in phases */
1353 1391 numphase = PyList_GET_SIZE(roots)+1;
1354 1392 minrevallphases = len + 1;
1355 1393 phasessetlist = PyList_New(numphase);
1356 1394 if (phasessetlist == NULL)
1357 1395 goto done;
1358 1396
1359 1397 PyList_SET_ITEM(phasessetlist, 0, Py_None);
1360 1398 Py_INCREF(Py_None);
1361 1399
1362 1400 for (i = 0; i < numphase-1; i++) {
1363 1401 phaseroots = PyList_GET_ITEM(roots, i);
1364 1402 phaseset = PySet_New(NULL);
1365 1403 if (phaseset == NULL)
1366 1404 goto release;
1367 1405 PyList_SET_ITEM(phasessetlist, i+1, phaseset);
1368 1406 if (!PyList_Check(phaseroots))
1369 1407 goto release;
1370 1408 minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
1371 1409 if (minrevphase == -2) /* Error from add_roots_get_min */
1372 1410 goto release;
1373 1411 minrevallphases = MIN(minrevallphases, minrevphase);
1374 1412 }
1375 1413 /* Propagate the phase information from the roots to the revs */
1376 1414 if (minrevallphases != -1) {
1377 1415 int parents[2];
1378 1416 for (i = minrevallphases; i < len; i++) {
1379 1417 if (index_get_parents(self, i, parents,
1380 1418 (int)len - 1) < 0)
1381 1419 goto release;
1382 1420 set_phase_from_parents(phases, parents[0], parents[1], i);
1383 1421 }
1384 1422 }
1385 1423 /* Transform phase list to a python list */
1386 1424 phaseslist = PyList_New(len);
1387 1425 if (phaseslist == NULL)
1388 1426 goto release;
1389 1427 for (i = 0; i < len; i++) {
1390 1428 PyObject *phaseval;
1391 1429
1392 1430 phase = phases[i];
1393 1431 /* We only store the sets of phase for non public phase, the public phase
1394 1432 * is computed as a difference */
1395 1433 if (phase != 0) {
1396 1434 phaseset = PyList_GET_ITEM(phasessetlist, phase);
1397 1435 rev = PyInt_FromLong(i);
1398 1436 if (rev == NULL)
1399 1437 goto release;
1400 1438 PySet_Add(phaseset, rev);
1401 1439 Py_XDECREF(rev);
1402 1440 }
1403 1441 phaseval = PyInt_FromLong(phase);
1404 1442 if (phaseval == NULL)
1405 1443 goto release;
1406 1444 PyList_SET_ITEM(phaseslist, i, phaseval);
1407 1445 }
1408 1446 ret = PyTuple_Pack(2, phaseslist, phasessetlist);
1409 1447
1410 1448 release:
1411 1449 Py_XDECREF(phaseslist);
1412 1450 Py_XDECREF(phasessetlist);
1413 1451 done:
1414 1452 free(phases);
1415 1453 return ret;
1416 1454 }
1417 1455
1418 1456 static PyObject *index_headrevs(indexObject *self, PyObject *args)
1419 1457 {
1420 1458 Py_ssize_t i, j, len;
1421 1459 char *nothead = NULL;
1422 1460 PyObject *heads = NULL;
1423 1461 PyObject *filter = NULL;
1424 1462 PyObject *filteredrevs = Py_None;
1425 1463
1426 1464 if (!PyArg_ParseTuple(args, "|O", &filteredrevs)) {
1427 1465 return NULL;
1428 1466 }
1429 1467
1430 1468 if (self->headrevs && filteredrevs == self->filteredrevs)
1431 1469 return list_copy(self->headrevs);
1432 1470
1433 1471 Py_DECREF(self->filteredrevs);
1434 1472 self->filteredrevs = filteredrevs;
1435 1473 Py_INCREF(filteredrevs);
1436 1474
1437 1475 if (filteredrevs != Py_None) {
1438 1476 filter = PyObject_GetAttrString(filteredrevs, "__contains__");
1439 1477 if (!filter) {
1440 1478 PyErr_SetString(PyExc_TypeError,
1441 1479 "filteredrevs has no attribute __contains__");
1442 1480 goto bail;
1443 1481 }
1444 1482 }
1445 1483
1446 1484 len = index_length(self) - 1;
1447 1485 heads = PyList_New(0);
1448 1486 if (heads == NULL)
1449 1487 goto bail;
1450 1488 if (len == 0) {
1451 1489 PyObject *nullid = PyInt_FromLong(-1);
1452 1490 if (nullid == NULL || PyList_Append(heads, nullid) == -1) {
1453 1491 Py_XDECREF(nullid);
1454 1492 goto bail;
1455 1493 }
1456 1494 goto done;
1457 1495 }
1458 1496
1459 1497 nothead = calloc(len, 1);
1460 1498 if (nothead == NULL) {
1461 1499 PyErr_NoMemory();
1462 1500 goto bail;
1463 1501 }
1464 1502
1465 1503 for (i = len - 1; i >= 0; i--) {
1466 1504 int isfiltered;
1467 1505 int parents[2];
1468 1506
1469 1507 /* If nothead[i] == 1, it means we've seen an unfiltered child of this
1470 1508 * node already, and therefore this node is not filtered. So we can skip
1471 1509 * the expensive check_filter step.
1472 1510 */
1473 1511 if (nothead[i] != 1) {
1474 1512 isfiltered = check_filter(filter, i);
1475 1513 if (isfiltered == -1) {
1476 1514 PyErr_SetString(PyExc_TypeError,
1477 1515 "unable to check filter");
1478 1516 goto bail;
1479 1517 }
1480 1518
1481 1519 if (isfiltered) {
1482 1520 nothead[i] = 1;
1483 1521 continue;
1484 1522 }
1485 1523 }
1486 1524
1487 1525 if (index_get_parents(self, i, parents, (int)len - 1) < 0)
1488 1526 goto bail;
1489 1527 for (j = 0; j < 2; j++) {
1490 1528 if (parents[j] >= 0)
1491 1529 nothead[parents[j]] = 1;
1492 1530 }
1493 1531 }
1494 1532
1495 1533 for (i = 0; i < len; i++) {
1496 1534 PyObject *head;
1497 1535
1498 1536 if (nothead[i])
1499 1537 continue;
1500 1538 head = PyInt_FromSsize_t(i);
1501 1539 if (head == NULL || PyList_Append(heads, head) == -1) {
1502 1540 Py_XDECREF(head);
1503 1541 goto bail;
1504 1542 }
1505 1543 }
1506 1544
1507 1545 done:
1508 1546 self->headrevs = heads;
1509 1547 Py_XDECREF(filter);
1510 1548 free(nothead);
1511 1549 return list_copy(self->headrevs);
1512 1550 bail:
1513 1551 Py_XDECREF(filter);
1514 1552 Py_XDECREF(heads);
1515 1553 free(nothead);
1516 1554 return NULL;
1517 1555 }
1518 1556
1519 1557 static inline int nt_level(const char *node, Py_ssize_t level)
1520 1558 {
1521 1559 int v = node[level>>1];
1522 1560 if (!(level & 1))
1523 1561 v >>= 4;
1524 1562 return v & 0xf;
1525 1563 }
1526 1564
1527 1565 /*
1528 1566 * Return values:
1529 1567 *
1530 1568 * -4: match is ambiguous (multiple candidates)
1531 1569 * -2: not found
1532 1570 * rest: valid rev
1533 1571 */
1534 1572 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen,
1535 1573 int hex)
1536 1574 {
1537 1575 int (*getnybble)(const char *, Py_ssize_t) = hex ? hexdigit : nt_level;
1538 1576 int level, maxlevel, off;
1539 1577
1540 1578 if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
1541 1579 return -1;
1542 1580
1543 1581 if (self->nt == NULL)
1544 1582 return -2;
1545 1583
1546 1584 if (hex)
1547 1585 maxlevel = nodelen > 40 ? 40 : (int)nodelen;
1548 1586 else
1549 1587 maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2);
1550 1588
1551 1589 for (level = off = 0; level < maxlevel; level++) {
1552 1590 int k = getnybble(node, level);
1553 1591 nodetree *n = &self->nt[off];
1554 1592 int v = n->children[k];
1555 1593
1556 1594 if (v < 0) {
1557 1595 const char *n;
1558 1596 Py_ssize_t i;
1559 1597
1560 1598 v = -(v + 1);
1561 1599 n = index_node(self, v);
1562 1600 if (n == NULL)
1563 1601 return -2;
1564 1602 for (i = level; i < maxlevel; i++)
1565 1603 if (getnybble(node, i) != nt_level(n, i))
1566 1604 return -2;
1567 1605 return v;
1568 1606 }
1569 1607 if (v == 0)
1570 1608 return -2;
1571 1609 off = v;
1572 1610 }
1573 1611 /* multiple matches against an ambiguous prefix */
1574 1612 return -4;
1575 1613 }
1576 1614
1577 1615 static int nt_new(indexObject *self)
1578 1616 {
1579 1617 if (self->ntlength == self->ntcapacity) {
1580 1618 if (self->ntcapacity >= INT_MAX / (sizeof(nodetree) * 2)) {
1581 1619 PyErr_SetString(PyExc_MemoryError,
1582 1620 "overflow in nt_new");
1583 1621 return -1;
1584 1622 }
1585 1623 self->ntcapacity *= 2;
1586 1624 self->nt = realloc(self->nt,
1587 1625 self->ntcapacity * sizeof(nodetree));
1588 1626 if (self->nt == NULL) {
1589 1627 PyErr_SetString(PyExc_MemoryError, "out of memory");
1590 1628 return -1;
1591 1629 }
1592 1630 memset(&self->nt[self->ntlength], 0,
1593 1631 sizeof(nodetree) * (self->ntcapacity - self->ntlength));
1594 1632 }
1595 1633 return self->ntlength++;
1596 1634 }
1597 1635
1598 1636 static int nt_insert(indexObject *self, const char *node, int rev)
1599 1637 {
1600 1638 int level = 0;
1601 1639 int off = 0;
1602 1640
1603 1641 while (level < 40) {
1604 1642 int k = nt_level(node, level);
1605 1643 nodetree *n;
1606 1644 int v;
1607 1645
1608 1646 n = &self->nt[off];
1609 1647 v = n->children[k];
1610 1648
1611 1649 if (v == 0) {
1612 1650 n->children[k] = -rev - 1;
1613 1651 return 0;
1614 1652 }
1615 1653 if (v < 0) {
1616 1654 const char *oldnode = index_node(self, -(v + 1));
1617 1655 int noff;
1618 1656
1619 1657 if (!oldnode || !memcmp(oldnode, node, 20)) {
1620 1658 n->children[k] = -rev - 1;
1621 1659 return 0;
1622 1660 }
1623 1661 noff = nt_new(self);
1624 1662 if (noff == -1)
1625 1663 return -1;
1626 1664 /* self->nt may have been changed by realloc */
1627 1665 self->nt[off].children[k] = noff;
1628 1666 off = noff;
1629 1667 n = &self->nt[off];
1630 1668 n->children[nt_level(oldnode, ++level)] = v;
1631 1669 if (level > self->ntdepth)
1632 1670 self->ntdepth = level;
1633 1671 self->ntsplits += 1;
1634 1672 } else {
1635 1673 level += 1;
1636 1674 off = v;
1637 1675 }
1638 1676 }
1639 1677
1640 1678 return -1;
1641 1679 }
1642 1680
1643 1681 static int nt_init(indexObject *self)
1644 1682 {
1645 1683 if (self->nt == NULL) {
1646 1684 if ((size_t)self->raw_length > INT_MAX / sizeof(nodetree)) {
1647 1685 PyErr_SetString(PyExc_ValueError, "overflow in nt_init");
1648 1686 return -1;
1649 1687 }
1650 1688 self->ntcapacity = self->raw_length < 4
1651 1689 ? 4 : (int)self->raw_length / 2;
1652 1690
1653 1691 self->nt = calloc(self->ntcapacity, sizeof(nodetree));
1654 1692 if (self->nt == NULL) {
1655 1693 PyErr_NoMemory();
1656 1694 return -1;
1657 1695 }
1658 1696 self->ntlength = 1;
1659 1697 self->ntrev = (int)index_length(self) - 1;
1660 1698 self->ntlookups = 1;
1661 1699 self->ntmisses = 0;
1662 1700 if (nt_insert(self, nullid, INT_MAX) == -1)
1663 1701 return -1;
1664 1702 }
1665 1703 return 0;
1666 1704 }
1667 1705
1668 1706 /*
1669 1707 * Return values:
1670 1708 *
1671 1709 * -3: error (exception set)
1672 1710 * -2: not found (no exception set)
1673 1711 * rest: valid rev
1674 1712 */
1675 1713 static int index_find_node(indexObject *self,
1676 1714 const char *node, Py_ssize_t nodelen)
1677 1715 {
1678 1716 int rev;
1679 1717
1680 1718 self->ntlookups++;
1681 1719 rev = nt_find(self, node, nodelen, 0);
1682 1720 if (rev >= -1)
1683 1721 return rev;
1684 1722
1685 1723 if (nt_init(self) == -1)
1686 1724 return -3;
1687 1725
1688 1726 /*
1689 1727 * For the first handful of lookups, we scan the entire index,
1690 1728 * and cache only the matching nodes. This optimizes for cases
1691 1729 * like "hg tip", where only a few nodes are accessed.
1692 1730 *
1693 1731 * After that, we cache every node we visit, using a single
1694 1732 * scan amortized over multiple lookups. This gives the best
1695 1733 * bulk performance, e.g. for "hg log".
1696 1734 */
1697 1735 if (self->ntmisses++ < 4) {
1698 1736 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1699 1737 const char *n = index_node(self, rev);
1700 1738 if (n == NULL)
1701 1739 return -2;
1702 1740 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1703 1741 if (nt_insert(self, n, rev) == -1)
1704 1742 return -3;
1705 1743 break;
1706 1744 }
1707 1745 }
1708 1746 } else {
1709 1747 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1710 1748 const char *n = index_node(self, rev);
1711 1749 if (n == NULL) {
1712 1750 self->ntrev = rev + 1;
1713 1751 return -2;
1714 1752 }
1715 1753 if (nt_insert(self, n, rev) == -1) {
1716 1754 self->ntrev = rev + 1;
1717 1755 return -3;
1718 1756 }
1719 1757 if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
1720 1758 break;
1721 1759 }
1722 1760 }
1723 1761 self->ntrev = rev;
1724 1762 }
1725 1763
1726 1764 if (rev >= 0)
1727 1765 return rev;
1728 1766 return -2;
1729 1767 }
1730 1768
1731 1769 static void raise_revlog_error(void)
1732 1770 {
1733 1771 PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
1734 1772
1735 1773 mod = PyImport_ImportModule("mercurial.error");
1736 1774 if (mod == NULL) {
1737 1775 goto cleanup;
1738 1776 }
1739 1777
1740 1778 dict = PyModule_GetDict(mod);
1741 1779 if (dict == NULL) {
1742 1780 goto cleanup;
1743 1781 }
1744 1782 Py_INCREF(dict);
1745 1783
1746 1784 errclass = PyDict_GetItemString(dict, "RevlogError");
1747 1785 if (errclass == NULL) {
1748 1786 PyErr_SetString(PyExc_SystemError,
1749 1787 "could not find RevlogError");
1750 1788 goto cleanup;
1751 1789 }
1752 1790
1753 1791 /* value of exception is ignored by callers */
1754 1792 PyErr_SetString(errclass, "RevlogError");
1755 1793
1756 1794 cleanup:
1757 1795 Py_XDECREF(dict);
1758 1796 Py_XDECREF(mod);
1759 1797 }
1760 1798
1761 1799 static PyObject *index_getitem(indexObject *self, PyObject *value)
1762 1800 {
1763 1801 char *node;
1764 1802 Py_ssize_t nodelen;
1765 1803 int rev;
1766 1804
1767 1805 if (PyInt_Check(value))
1768 1806 return index_get(self, PyInt_AS_LONG(value));
1769 1807
1770 1808 if (node_check(value, &node, &nodelen) == -1)
1771 1809 return NULL;
1772 1810 rev = index_find_node(self, node, nodelen);
1773 1811 if (rev >= -1)
1774 1812 return PyInt_FromLong(rev);
1775 1813 if (rev == -2)
1776 1814 raise_revlog_error();
1777 1815 return NULL;
1778 1816 }
1779 1817
1780 1818 static int nt_partialmatch(indexObject *self, const char *node,
1781 1819 Py_ssize_t nodelen)
1782 1820 {
1783 1821 int rev;
1784 1822
1785 1823 if (nt_init(self) == -1)
1786 1824 return -3;
1787 1825
1788 1826 if (self->ntrev > 0) {
1789 1827 /* ensure that the radix tree is fully populated */
1790 1828 for (rev = self->ntrev - 1; rev >= 0; rev--) {
1791 1829 const char *n = index_node(self, rev);
1792 1830 if (n == NULL)
1793 1831 return -2;
1794 1832 if (nt_insert(self, n, rev) == -1)
1795 1833 return -3;
1796 1834 }
1797 1835 self->ntrev = rev;
1798 1836 }
1799 1837
1800 1838 return nt_find(self, node, nodelen, 1);
1801 1839 }
1802 1840
1803 1841 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1804 1842 {
1805 1843 const char *fullnode;
1806 1844 int nodelen;
1807 1845 char *node;
1808 1846 int rev, i;
1809 1847
1810 1848 if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
1811 1849 return NULL;
1812 1850
1813 1851 if (nodelen < 4) {
1814 1852 PyErr_SetString(PyExc_ValueError, "key too short");
1815 1853 return NULL;
1816 1854 }
1817 1855
1818 1856 if (nodelen > 40) {
1819 1857 PyErr_SetString(PyExc_ValueError, "key too long");
1820 1858 return NULL;
1821 1859 }
1822 1860
1823 1861 for (i = 0; i < nodelen; i++)
1824 1862 hexdigit(node, i);
1825 1863 if (PyErr_Occurred()) {
1826 1864 /* input contains non-hex characters */
1827 1865 PyErr_Clear();
1828 1866 Py_RETURN_NONE;
1829 1867 }
1830 1868
1831 1869 rev = nt_partialmatch(self, node, nodelen);
1832 1870
1833 1871 switch (rev) {
1834 1872 case -4:
1835 1873 raise_revlog_error();
1836 1874 case -3:
1837 1875 return NULL;
1838 1876 case -2:
1839 1877 Py_RETURN_NONE;
1840 1878 case -1:
1841 1879 return PyBytes_FromStringAndSize(nullid, 20);
1842 1880 }
1843 1881
1844 1882 fullnode = index_node(self, rev);
1845 1883 if (fullnode == NULL) {
1846 1884 PyErr_Format(PyExc_IndexError,
1847 1885 "could not access rev %d", rev);
1848 1886 return NULL;
1849 1887 }
1850 1888 return PyBytes_FromStringAndSize(fullnode, 20);
1851 1889 }
1852 1890
1853 1891 static PyObject *index_m_get(indexObject *self, PyObject *args)
1854 1892 {
1855 1893 Py_ssize_t nodelen;
1856 1894 PyObject *val;
1857 1895 char *node;
1858 1896 int rev;
1859 1897
1860 1898 if (!PyArg_ParseTuple(args, "O", &val))
1861 1899 return NULL;
1862 1900 if (node_check(val, &node, &nodelen) == -1)
1863 1901 return NULL;
1864 1902 rev = index_find_node(self, node, nodelen);
1865 1903 if (rev == -3)
1866 1904 return NULL;
1867 1905 if (rev == -2)
1868 1906 Py_RETURN_NONE;
1869 1907 return PyInt_FromLong(rev);
1870 1908 }
1871 1909
1872 1910 static int index_contains(indexObject *self, PyObject *value)
1873 1911 {
1874 1912 char *node;
1875 1913 Py_ssize_t nodelen;
1876 1914
1877 1915 if (PyInt_Check(value)) {
1878 1916 long rev = PyInt_AS_LONG(value);
1879 1917 return rev >= -1 && rev < index_length(self);
1880 1918 }
1881 1919
1882 1920 if (node_check(value, &node, &nodelen) == -1)
1883 1921 return -1;
1884 1922
1885 1923 switch (index_find_node(self, node, nodelen)) {
1886 1924 case -3:
1887 1925 return -1;
1888 1926 case -2:
1889 1927 return 0;
1890 1928 default:
1891 1929 return 1;
1892 1930 }
1893 1931 }
1894 1932
1895 1933 typedef uint64_t bitmask;
1896 1934
1897 1935 /*
1898 1936 * Given a disjoint set of revs, return all candidates for the
1899 1937 * greatest common ancestor. In revset notation, this is the set
1900 1938 * "heads(::a and ::b and ...)"
1901 1939 */
1902 1940 static PyObject *find_gca_candidates(indexObject *self, const int *revs,
1903 1941 int revcount)
1904 1942 {
1905 1943 const bitmask allseen = (1ull << revcount) - 1;
1906 1944 const bitmask poison = 1ull << revcount;
1907 1945 PyObject *gca = PyList_New(0);
1908 1946 int i, v, interesting;
1909 1947 int maxrev = -1;
1910 1948 bitmask sp;
1911 1949 bitmask *seen;
1912 1950
1913 1951 if (gca == NULL)
1914 1952 return PyErr_NoMemory();
1915 1953
1916 1954 for (i = 0; i < revcount; i++) {
1917 1955 if (revs[i] > maxrev)
1918 1956 maxrev = revs[i];
1919 1957 }
1920 1958
1921 1959 seen = calloc(sizeof(*seen), maxrev + 1);
1922 1960 if (seen == NULL) {
1923 1961 Py_DECREF(gca);
1924 1962 return PyErr_NoMemory();
1925 1963 }
1926 1964
1927 1965 for (i = 0; i < revcount; i++)
1928 1966 seen[revs[i]] = 1ull << i;
1929 1967
1930 1968 interesting = revcount;
1931 1969
1932 1970 for (v = maxrev; v >= 0 && interesting; v--) {
1933 1971 bitmask sv = seen[v];
1934 1972 int parents[2];
1935 1973
1936 1974 if (!sv)
1937 1975 continue;
1938 1976
1939 1977 if (sv < poison) {
1940 1978 interesting -= 1;
1941 1979 if (sv == allseen) {
1942 1980 PyObject *obj = PyInt_FromLong(v);
1943 1981 if (obj == NULL)
1944 1982 goto bail;
1945 1983 if (PyList_Append(gca, obj) == -1) {
1946 1984 Py_DECREF(obj);
1947 1985 goto bail;
1948 1986 }
1949 1987 sv |= poison;
1950 1988 for (i = 0; i < revcount; i++) {
1951 1989 if (revs[i] == v)
1952 1990 goto done;
1953 1991 }
1954 1992 }
1955 1993 }
1956 1994 if (index_get_parents(self, v, parents, maxrev) < 0)
1957 1995 goto bail;
1958 1996
1959 1997 for (i = 0; i < 2; i++) {
1960 1998 int p = parents[i];
1961 1999 if (p == -1)
1962 2000 continue;
1963 2001 sp = seen[p];
1964 2002 if (sv < poison) {
1965 2003 if (sp == 0) {
1966 2004 seen[p] = sv;
1967 2005 interesting++;
1968 2006 }
1969 2007 else if (sp != sv)
1970 2008 seen[p] |= sv;
1971 2009 } else {
1972 2010 if (sp && sp < poison)
1973 2011 interesting--;
1974 2012 seen[p] = sv;
1975 2013 }
1976 2014 }
1977 2015 }
1978 2016
1979 2017 done:
1980 2018 free(seen);
1981 2019 return gca;
1982 2020 bail:
1983 2021 free(seen);
1984 2022 Py_XDECREF(gca);
1985 2023 return NULL;
1986 2024 }
1987 2025
1988 2026 /*
1989 2027 * Given a disjoint set of revs, return the subset with the longest
1990 2028 * path to the root.
1991 2029 */
1992 2030 static PyObject *find_deepest(indexObject *self, PyObject *revs)
1993 2031 {
1994 2032 const Py_ssize_t revcount = PyList_GET_SIZE(revs);
1995 2033 static const Py_ssize_t capacity = 24;
1996 2034 int *depth, *interesting = NULL;
1997 2035 int i, j, v, ninteresting;
1998 2036 PyObject *dict = NULL, *keys = NULL;
1999 2037 long *seen = NULL;
2000 2038 int maxrev = -1;
2001 2039 long final;
2002 2040
2003 2041 if (revcount > capacity) {
2004 2042 PyErr_Format(PyExc_OverflowError,
2005 2043 "bitset size (%ld) > capacity (%ld)",
2006 2044 (long)revcount, (long)capacity);
2007 2045 return NULL;
2008 2046 }
2009 2047
2010 2048 for (i = 0; i < revcount; i++) {
2011 2049 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2012 2050 if (n > maxrev)
2013 2051 maxrev = n;
2014 2052 }
2015 2053
2016 2054 depth = calloc(sizeof(*depth), maxrev + 1);
2017 2055 if (depth == NULL)
2018 2056 return PyErr_NoMemory();
2019 2057
2020 2058 seen = calloc(sizeof(*seen), maxrev + 1);
2021 2059 if (seen == NULL) {
2022 2060 PyErr_NoMemory();
2023 2061 goto bail;
2024 2062 }
2025 2063
2026 2064 interesting = calloc(sizeof(*interesting), 2 << revcount);
2027 2065 if (interesting == NULL) {
2028 2066 PyErr_NoMemory();
2029 2067 goto bail;
2030 2068 }
2031 2069
2032 2070 if (PyList_Sort(revs) == -1)
2033 2071 goto bail;
2034 2072
2035 2073 for (i = 0; i < revcount; i++) {
2036 2074 int n = (int)PyInt_AsLong(PyList_GET_ITEM(revs, i));
2037 2075 long b = 1l << i;
2038 2076 depth[n] = 1;
2039 2077 seen[n] = b;
2040 2078 interesting[b] = 1;
2041 2079 }
2042 2080
2043 2081 ninteresting = (int)revcount;
2044 2082
2045 2083 for (v = maxrev; v >= 0 && ninteresting > 1; v--) {
2046 2084 int dv = depth[v];
2047 2085 int parents[2];
2048 2086 long sv;
2049 2087
2050 2088 if (dv == 0)
2051 2089 continue;
2052 2090
2053 2091 sv = seen[v];
2054 2092 if (index_get_parents(self, v, parents, maxrev) < 0)
2055 2093 goto bail;
2056 2094
2057 2095 for (i = 0; i < 2; i++) {
2058 2096 int p = parents[i];
2059 2097 long sp;
2060 2098 int dp;
2061 2099
2062 2100 if (p == -1)
2063 2101 continue;
2064 2102
2065 2103 dp = depth[p];
2066 2104 sp = seen[p];
2067 2105 if (dp <= dv) {
2068 2106 depth[p] = dv + 1;
2069 2107 if (sp != sv) {
2070 2108 interesting[sv] += 1;
2071 2109 seen[p] = sv;
2072 2110 if (sp) {
2073 2111 interesting[sp] -= 1;
2074 2112 if (interesting[sp] == 0)
2075 2113 ninteresting -= 1;
2076 2114 }
2077 2115 }
2078 2116 }
2079 2117 else if (dv == dp - 1) {
2080 2118 long nsp = sp | sv;
2081 2119 if (nsp == sp)
2082 2120 continue;
2083 2121 seen[p] = nsp;
2084 2122 interesting[sp] -= 1;
2085 2123 if (interesting[sp] == 0 && interesting[nsp] > 0)
2086 2124 ninteresting -= 1;
2087 2125 interesting[nsp] += 1;
2088 2126 }
2089 2127 }
2090 2128 interesting[sv] -= 1;
2091 2129 if (interesting[sv] == 0)
2092 2130 ninteresting -= 1;
2093 2131 }
2094 2132
2095 2133 final = 0;
2096 2134 j = ninteresting;
2097 2135 for (i = 0; i < (int)(2 << revcount) && j > 0; i++) {
2098 2136 if (interesting[i] == 0)
2099 2137 continue;
2100 2138 final |= i;
2101 2139 j -= 1;
2102 2140 }
2103 2141 if (final == 0) {
2104 2142 keys = PyList_New(0);
2105 2143 goto bail;
2106 2144 }
2107 2145
2108 2146 dict = PyDict_New();
2109 2147 if (dict == NULL)
2110 2148 goto bail;
2111 2149
2112 2150 for (i = 0; i < revcount; i++) {
2113 2151 PyObject *key;
2114 2152
2115 2153 if ((final & (1 << i)) == 0)
2116 2154 continue;
2117 2155
2118 2156 key = PyList_GET_ITEM(revs, i);
2119 2157 Py_INCREF(key);
2120 2158 Py_INCREF(Py_None);
2121 2159 if (PyDict_SetItem(dict, key, Py_None) == -1) {
2122 2160 Py_DECREF(key);
2123 2161 Py_DECREF(Py_None);
2124 2162 goto bail;
2125 2163 }
2126 2164 }
2127 2165
2128 2166 keys = PyDict_Keys(dict);
2129 2167
2130 2168 bail:
2131 2169 free(depth);
2132 2170 free(seen);
2133 2171 free(interesting);
2134 2172 Py_XDECREF(dict);
2135 2173
2136 2174 return keys;
2137 2175 }
2138 2176
2139 2177 /*
2140 2178 * Given a (possibly overlapping) set of revs, return all the
2141 2179 * common ancestors heads: heads(::args[0] and ::a[1] and ...)
2142 2180 */
2143 2181 static PyObject *index_commonancestorsheads(indexObject *self, PyObject *args)
2144 2182 {
2145 2183 PyObject *ret = NULL;
2146 2184 Py_ssize_t argcount, i, len;
2147 2185 bitmask repeat = 0;
2148 2186 int revcount = 0;
2149 2187 int *revs;
2150 2188
2151 2189 argcount = PySequence_Length(args);
2152 2190 revs = malloc(argcount * sizeof(*revs));
2153 2191 if (argcount > 0 && revs == NULL)
2154 2192 return PyErr_NoMemory();
2155 2193 len = index_length(self) - 1;
2156 2194
2157 2195 for (i = 0; i < argcount; i++) {
2158 2196 static const int capacity = 24;
2159 2197 PyObject *obj = PySequence_GetItem(args, i);
2160 2198 bitmask x;
2161 2199 long val;
2162 2200
2163 2201 if (!PyInt_Check(obj)) {
2164 2202 PyErr_SetString(PyExc_TypeError,
2165 2203 "arguments must all be ints");
2166 2204 Py_DECREF(obj);
2167 2205 goto bail;
2168 2206 }
2169 2207 val = PyInt_AsLong(obj);
2170 2208 Py_DECREF(obj);
2171 2209 if (val == -1) {
2172 2210 ret = PyList_New(0);
2173 2211 goto done;
2174 2212 }
2175 2213 if (val < 0 || val >= len) {
2176 2214 PyErr_SetString(PyExc_IndexError,
2177 2215 "index out of range");
2178 2216 goto bail;
2179 2217 }
2180 2218 /* this cheesy bloom filter lets us avoid some more
2181 2219 * expensive duplicate checks in the common set-is-disjoint
2182 2220 * case */
2183 2221 x = 1ull << (val & 0x3f);
2184 2222 if (repeat & x) {
2185 2223 int k;
2186 2224 for (k = 0; k < revcount; k++) {
2187 2225 if (val == revs[k])
2188 2226 goto duplicate;
2189 2227 }
2190 2228 }
2191 2229 else repeat |= x;
2192 2230 if (revcount >= capacity) {
2193 2231 PyErr_Format(PyExc_OverflowError,
2194 2232 "bitset size (%d) > capacity (%d)",
2195 2233 revcount, capacity);
2196 2234 goto bail;
2197 2235 }
2198 2236 revs[revcount++] = (int)val;
2199 2237 duplicate:;
2200 2238 }
2201 2239
2202 2240 if (revcount == 0) {
2203 2241 ret = PyList_New(0);
2204 2242 goto done;
2205 2243 }
2206 2244 if (revcount == 1) {
2207 2245 PyObject *obj;
2208 2246 ret = PyList_New(1);
2209 2247 if (ret == NULL)
2210 2248 goto bail;
2211 2249 obj = PyInt_FromLong(revs[0]);
2212 2250 if (obj == NULL)
2213 2251 goto bail;
2214 2252 PyList_SET_ITEM(ret, 0, obj);
2215 2253 goto done;
2216 2254 }
2217 2255
2218 2256 ret = find_gca_candidates(self, revs, revcount);
2219 2257 if (ret == NULL)
2220 2258 goto bail;
2221 2259
2222 2260 done:
2223 2261 free(revs);
2224 2262 return ret;
2225 2263
2226 2264 bail:
2227 2265 free(revs);
2228 2266 Py_XDECREF(ret);
2229 2267 return NULL;
2230 2268 }
2231 2269
2232 2270 /*
2233 2271 * Given a (possibly overlapping) set of revs, return the greatest
2234 2272 * common ancestors: those with the longest path to the root.
2235 2273 */
2236 2274 static PyObject *index_ancestors(indexObject *self, PyObject *args)
2237 2275 {
2238 2276 PyObject *ret;
2239 2277 PyObject *gca = index_commonancestorsheads(self, args);
2240 2278 if (gca == NULL)
2241 2279 return NULL;
2242 2280
2243 2281 if (PyList_GET_SIZE(gca) <= 1) {
2244 2282 return gca;
2245 2283 }
2246 2284
2247 2285 ret = find_deepest(self, gca);
2248 2286 Py_DECREF(gca);
2249 2287 return ret;
2250 2288 }
2251 2289
2252 2290 /*
2253 2291 * Invalidate any trie entries introduced by added revs.
2254 2292 */
2255 2293 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
2256 2294 {
2257 2295 Py_ssize_t i, len = PyList_GET_SIZE(self->added);
2258 2296
2259 2297 for (i = start; i < len; i++) {
2260 2298 PyObject *tuple = PyList_GET_ITEM(self->added, i);
2261 2299 PyObject *node = PyTuple_GET_ITEM(tuple, 7);
2262 2300
2263 2301 nt_insert(self, PyBytes_AS_STRING(node), -1);
2264 2302 }
2265 2303
2266 2304 if (start == 0)
2267 2305 Py_CLEAR(self->added);
2268 2306 }
2269 2307
2270 2308 /*
2271 2309 * Delete a numeric range of revs, which must be at the end of the
2272 2310 * range, but exclude the sentinel nullid entry.
2273 2311 */
2274 2312 static int index_slice_del(indexObject *self, PyObject *item)
2275 2313 {
2276 2314 Py_ssize_t start, stop, step, slicelength;
2277 2315 Py_ssize_t length = index_length(self);
2278 2316 int ret = 0;
2279 2317
2280 2318 /* Argument changed from PySliceObject* to PyObject* in Python 3. */
2281 2319 #ifdef IS_PY3K
2282 2320 if (PySlice_GetIndicesEx(item, length,
2283 2321 #else
2284 2322 if (PySlice_GetIndicesEx((PySliceObject*)item, length,
2285 2323 #endif
2286 2324 &start, &stop, &step, &slicelength) < 0)
2287 2325 return -1;
2288 2326
2289 2327 if (slicelength <= 0)
2290 2328 return 0;
2291 2329
2292 2330 if ((step < 0 && start < stop) || (step > 0 && start > stop))
2293 2331 stop = start;
2294 2332
2295 2333 if (step < 0) {
2296 2334 stop = start + 1;
2297 2335 start = stop + step*(slicelength - 1) - 1;
2298 2336 step = -step;
2299 2337 }
2300 2338
2301 2339 if (step != 1) {
2302 2340 PyErr_SetString(PyExc_ValueError,
2303 2341 "revlog index delete requires step size of 1");
2304 2342 return -1;
2305 2343 }
2306 2344
2307 2345 if (stop != length - 1) {
2308 2346 PyErr_SetString(PyExc_IndexError,
2309 2347 "revlog index deletion indices are invalid");
2310 2348 return -1;
2311 2349 }
2312 2350
2313 2351 if (start < self->length - 1) {
2314 2352 if (self->nt) {
2315 2353 Py_ssize_t i;
2316 2354
2317 2355 for (i = start + 1; i < self->length - 1; i++) {
2318 2356 const char *node = index_node(self, i);
2319 2357
2320 2358 if (node)
2321 2359 nt_insert(self, node, -1);
2322 2360 }
2323 2361 if (self->added)
2324 2362 nt_invalidate_added(self, 0);
2325 2363 if (self->ntrev > start)
2326 2364 self->ntrev = (int)start;
2327 2365 }
2328 2366 self->length = start + 1;
2329 2367 if (start < self->raw_length) {
2330 2368 if (self->cache) {
2331 2369 Py_ssize_t i;
2332 2370 for (i = start; i < self->raw_length; i++)
2333 2371 Py_CLEAR(self->cache[i]);
2334 2372 }
2335 2373 self->raw_length = start;
2336 2374 }
2337 2375 goto done;
2338 2376 }
2339 2377
2340 2378 if (self->nt) {
2341 2379 nt_invalidate_added(self, start - self->length + 1);
2342 2380 if (self->ntrev > start)
2343 2381 self->ntrev = (int)start;
2344 2382 }
2345 2383 if (self->added)
2346 2384 ret = PyList_SetSlice(self->added, start - self->length + 1,
2347 2385 PyList_GET_SIZE(self->added), NULL);
2348 2386 done:
2349 2387 Py_CLEAR(self->headrevs);
2350 2388 return ret;
2351 2389 }
2352 2390
2353 2391 /*
2354 2392 * Supported ops:
2355 2393 *
2356 2394 * slice deletion
2357 2395 * string assignment (extend node->rev mapping)
2358 2396 * string deletion (shrink node->rev mapping)
2359 2397 */
2360 2398 static int index_assign_subscript(indexObject *self, PyObject *item,
2361 2399 PyObject *value)
2362 2400 {
2363 2401 char *node;
2364 2402 Py_ssize_t nodelen;
2365 2403 long rev;
2366 2404
2367 2405 if (PySlice_Check(item) && value == NULL)
2368 2406 return index_slice_del(self, item);
2369 2407
2370 2408 if (node_check(item, &node, &nodelen) == -1)
2371 2409 return -1;
2372 2410
2373 2411 if (value == NULL)
2374 2412 return self->nt ? nt_insert(self, node, -1) : 0;
2375 2413 rev = PyInt_AsLong(value);
2376 2414 if (rev > INT_MAX || rev < 0) {
2377 2415 if (!PyErr_Occurred())
2378 2416 PyErr_SetString(PyExc_ValueError, "rev out of range");
2379 2417 return -1;
2380 2418 }
2381 2419
2382 2420 if (nt_init(self) == -1)
2383 2421 return -1;
2384 2422 return nt_insert(self, node, (int)rev);
2385 2423 }
2386 2424
2387 2425 /*
2388 2426 * Find all RevlogNG entries in an index that has inline data. Update
2389 2427 * the optional "offsets" table with those entries.
2390 2428 */
2391 2429 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
2392 2430 {
2393 2431 const char *data = (const char *)self->buf.buf;
2394 2432 Py_ssize_t pos = 0;
2395 2433 Py_ssize_t end = self->buf.len;
2396 2434 long incr = v1_hdrsize;
2397 2435 Py_ssize_t len = 0;
2398 2436
2399 2437 while (pos + v1_hdrsize <= end && pos >= 0) {
2400 2438 uint32_t comp_len;
2401 2439 /* 3rd element of header is length of compressed inline data */
2402 2440 comp_len = getbe32(data + pos + 8);
2403 2441 incr = v1_hdrsize + comp_len;
2404 2442 if (offsets)
2405 2443 offsets[len] = data + pos;
2406 2444 len++;
2407 2445 pos += incr;
2408 2446 }
2409 2447
2410 2448 if (pos != end) {
2411 2449 if (!PyErr_Occurred())
2412 2450 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2413 2451 return -1;
2414 2452 }
2415 2453
2416 2454 return len;
2417 2455 }
2418 2456
2419 2457 static int index_init(indexObject *self, PyObject *args)
2420 2458 {
2421 2459 PyObject *data_obj, *inlined_obj;
2422 2460 Py_ssize_t size;
2423 2461
2424 2462 /* Initialize before argument-checking to avoid index_dealloc() crash. */
2425 2463 self->raw_length = 0;
2426 2464 self->added = NULL;
2427 2465 self->cache = NULL;
2428 2466 self->data = NULL;
2429 2467 memset(&self->buf, 0, sizeof(self->buf));
2430 2468 self->headrevs = NULL;
2431 2469 self->filteredrevs = Py_None;
2432 2470 Py_INCREF(Py_None);
2433 2471 self->nt = NULL;
2434 2472 self->offsets = NULL;
2435 2473
2436 2474 if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
2437 2475 return -1;
2438 2476 if (!PyObject_CheckBuffer(data_obj)) {
2439 2477 PyErr_SetString(PyExc_TypeError,
2440 2478 "data does not support buffer interface");
2441 2479 return -1;
2442 2480 }
2443 2481
2444 2482 if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
2445 2483 return -1;
2446 2484 size = self->buf.len;
2447 2485
2448 2486 self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
2449 2487 self->data = data_obj;
2450 2488
2451 2489 self->ntlength = self->ntcapacity = 0;
2452 2490 self->ntdepth = self->ntsplits = 0;
2453 2491 self->ntlookups = self->ntmisses = 0;
2454 2492 self->ntrev = -1;
2455 2493 Py_INCREF(self->data);
2456 2494
2457 2495 if (self->inlined) {
2458 2496 Py_ssize_t len = inline_scan(self, NULL);
2459 2497 if (len == -1)
2460 2498 goto bail;
2461 2499 self->raw_length = len;
2462 2500 self->length = len + 1;
2463 2501 } else {
2464 2502 if (size % v1_hdrsize) {
2465 2503 PyErr_SetString(PyExc_ValueError, "corrupt index file");
2466 2504 goto bail;
2467 2505 }
2468 2506 self->raw_length = size / v1_hdrsize;
2469 2507 self->length = self->raw_length + 1;
2470 2508 }
2471 2509
2472 2510 return 0;
2473 2511 bail:
2474 2512 return -1;
2475 2513 }
2476 2514
2477 2515 static PyObject *index_nodemap(indexObject *self)
2478 2516 {
2479 2517 Py_INCREF(self);
2480 2518 return (PyObject *)self;
2481 2519 }
2482 2520
2483 2521 static void index_dealloc(indexObject *self)
2484 2522 {
2485 2523 _index_clearcaches(self);
2486 2524 Py_XDECREF(self->filteredrevs);
2487 2525 if (self->buf.buf) {
2488 2526 PyBuffer_Release(&self->buf);
2489 2527 memset(&self->buf, 0, sizeof(self->buf));
2490 2528 }
2491 2529 Py_XDECREF(self->data);
2492 2530 Py_XDECREF(self->added);
2493 2531 PyObject_Del(self);
2494 2532 }
2495 2533
2496 2534 static PySequenceMethods index_sequence_methods = {
2497 2535 (lenfunc)index_length, /* sq_length */
2498 2536 0, /* sq_concat */
2499 2537 0, /* sq_repeat */
2500 2538 (ssizeargfunc)index_get, /* sq_item */
2501 2539 0, /* sq_slice */
2502 2540 0, /* sq_ass_item */
2503 2541 0, /* sq_ass_slice */
2504 2542 (objobjproc)index_contains, /* sq_contains */
2505 2543 };
2506 2544
2507 2545 static PyMappingMethods index_mapping_methods = {
2508 2546 (lenfunc)index_length, /* mp_length */
2509 2547 (binaryfunc)index_getitem, /* mp_subscript */
2510 2548 (objobjargproc)index_assign_subscript, /* mp_ass_subscript */
2511 2549 };
2512 2550
2513 2551 static PyMethodDef index_methods[] = {
2514 2552 {"ancestors", (PyCFunction)index_ancestors, METH_VARARGS,
2515 2553 "return the gca set of the given revs"},
2516 2554 {"commonancestorsheads", (PyCFunction)index_commonancestorsheads,
2517 2555 METH_VARARGS,
2518 2556 "return the heads of the common ancestors of the given revs"},
2519 2557 {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
2520 2558 "clear the index caches"},
2521 2559 {"get", (PyCFunction)index_m_get, METH_VARARGS,
2522 2560 "get an index entry"},
2523 2561 {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
2524 2562 METH_VARARGS, "compute phases"},
2525 2563 {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
2526 2564 "reachableroots"},
2527 2565 {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
2528 2566 "get head revisions"}, /* Can do filtering since 3.2 */
2529 2567 {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
2530 2568 "get filtered head revisions"}, /* Can always do filtering */
2531 2569 {"insert", (PyCFunction)index_insert, METH_VARARGS,
2532 2570 "insert an index entry"},
2533 2571 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
2534 2572 "match a potentially ambiguous node ID"},
2535 2573 {"stats", (PyCFunction)index_stats, METH_NOARGS,
2536 2574 "stats for the index"},
2537 2575 {NULL} /* Sentinel */
2538 2576 };
2539 2577
2540 2578 static PyGetSetDef index_getset[] = {
2541 2579 {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
2542 2580 {NULL} /* Sentinel */
2543 2581 };
2544 2582
2545 2583 static PyTypeObject indexType = {
2546 2584 PyVarObject_HEAD_INIT(NULL, 0)
2547 2585 "parsers.index", /* tp_name */
2548 2586 sizeof(indexObject), /* tp_basicsize */
2549 2587 0, /* tp_itemsize */
2550 2588 (destructor)index_dealloc, /* tp_dealloc */
2551 2589 0, /* tp_print */
2552 2590 0, /* tp_getattr */
2553 2591 0, /* tp_setattr */
2554 2592 0, /* tp_compare */
2555 2593 0, /* tp_repr */
2556 2594 0, /* tp_as_number */
2557 2595 &index_sequence_methods, /* tp_as_sequence */
2558 2596 &index_mapping_methods, /* tp_as_mapping */
2559 2597 0, /* tp_hash */
2560 2598 0, /* tp_call */
2561 2599 0, /* tp_str */
2562 2600 0, /* tp_getattro */
2563 2601 0, /* tp_setattro */
2564 2602 0, /* tp_as_buffer */
2565 2603 Py_TPFLAGS_DEFAULT, /* tp_flags */
2566 2604 "revlog index", /* tp_doc */
2567 2605 0, /* tp_traverse */
2568 2606 0, /* tp_clear */
2569 2607 0, /* tp_richcompare */
2570 2608 0, /* tp_weaklistoffset */
2571 2609 0, /* tp_iter */
2572 2610 0, /* tp_iternext */
2573 2611 index_methods, /* tp_methods */
2574 2612 0, /* tp_members */
2575 2613 index_getset, /* tp_getset */
2576 2614 0, /* tp_base */
2577 2615 0, /* tp_dict */
2578 2616 0, /* tp_descr_get */
2579 2617 0, /* tp_descr_set */
2580 2618 0, /* tp_dictoffset */
2581 2619 (initproc)index_init, /* tp_init */
2582 2620 0, /* tp_alloc */
2583 2621 };
2584 2622
2585 2623 /*
2586 2624 * returns a tuple of the form (index, index, cache) with elements as
2587 2625 * follows:
2588 2626 *
2589 2627 * index: an index object that lazily parses RevlogNG records
2590 2628 * cache: if data is inlined, a tuple (0, index_file_content), else None
2591 2629 * index_file_content could be a string, or a buffer
2592 2630 *
2593 2631 * added complications are for backwards compatibility
2594 2632 */
2595 2633 static PyObject *parse_index2(PyObject *self, PyObject *args)
2596 2634 {
2597 2635 PyObject *tuple = NULL, *cache = NULL;
2598 2636 indexObject *idx;
2599 2637 int ret;
2600 2638
2601 2639 idx = PyObject_New(indexObject, &indexType);
2602 2640 if (idx == NULL)
2603 2641 goto bail;
2604 2642
2605 2643 ret = index_init(idx, args);
2606 2644 if (ret == -1)
2607 2645 goto bail;
2608 2646
2609 2647 if (idx->inlined) {
2610 2648 cache = Py_BuildValue("iO", 0, idx->data);
2611 2649 if (cache == NULL)
2612 2650 goto bail;
2613 2651 } else {
2614 2652 cache = Py_None;
2615 2653 Py_INCREF(cache);
2616 2654 }
2617 2655
2618 2656 tuple = Py_BuildValue("NN", idx, cache);
2619 2657 if (!tuple)
2620 2658 goto bail;
2621 2659 return tuple;
2622 2660
2623 2661 bail:
2624 2662 Py_XDECREF(idx);
2625 2663 Py_XDECREF(cache);
2626 2664 Py_XDECREF(tuple);
2627 2665 return NULL;
2628 2666 }
2629 2667
2630 2668 #define BUMPED_FIX 1
2631 2669 #define USING_SHA_256 2
2632 2670 #define FM1_HEADER_SIZE (4 + 8 + 2 + 2 + 1 + 1 + 1)
2633 2671
2634 2672 static PyObject *readshas(
2635 2673 const char *source, unsigned char num, Py_ssize_t hashwidth)
2636 2674 {
2637 2675 int i;
2638 2676 PyObject *list = PyTuple_New(num);
2639 2677 if (list == NULL) {
2640 2678 return NULL;
2641 2679 }
2642 2680 for (i = 0; i < num; i++) {
2643 2681 PyObject *hash = PyBytes_FromStringAndSize(source, hashwidth);
2644 2682 if (hash == NULL) {
2645 2683 Py_DECREF(list);
2646 2684 return NULL;
2647 2685 }
2648 2686 PyTuple_SET_ITEM(list, i, hash);
2649 2687 source += hashwidth;
2650 2688 }
2651 2689 return list;
2652 2690 }
2653 2691
2654 2692 static PyObject *fm1readmarker(const char *databegin, const char *dataend,
2655 2693 uint32_t *msize)
2656 2694 {
2657 2695 const char *data = databegin;
2658 2696 const char *meta;
2659 2697
2660 2698 double mtime;
2661 2699 int16_t tz;
2662 2700 uint16_t flags;
2663 2701 unsigned char nsuccs, nparents, nmetadata;
2664 2702 Py_ssize_t hashwidth = 20;
2665 2703
2666 2704 PyObject *prec = NULL, *parents = NULL, *succs = NULL;
2667 2705 PyObject *metadata = NULL, *ret = NULL;
2668 2706 int i;
2669 2707
2670 2708 if (data + FM1_HEADER_SIZE > dataend) {
2671 2709 goto overflow;
2672 2710 }
2673 2711
2674 2712 *msize = getbe32(data);
2675 2713 data += 4;
2676 2714 mtime = getbefloat64(data);
2677 2715 data += 8;
2678 2716 tz = getbeint16(data);
2679 2717 data += 2;
2680 2718 flags = getbeuint16(data);
2681 2719 data += 2;
2682 2720
2683 2721 if (flags & USING_SHA_256) {
2684 2722 hashwidth = 32;
2685 2723 }
2686 2724
2687 2725 nsuccs = (unsigned char)(*data++);
2688 2726 nparents = (unsigned char)(*data++);
2689 2727 nmetadata = (unsigned char)(*data++);
2690 2728
2691 2729 if (databegin + *msize > dataend) {
2692 2730 goto overflow;
2693 2731 }
2694 2732 dataend = databegin + *msize; /* narrow down to marker size */
2695 2733
2696 2734 if (data + hashwidth > dataend) {
2697 2735 goto overflow;
2698 2736 }
2699 2737 prec = PyBytes_FromStringAndSize(data, hashwidth);
2700 2738 data += hashwidth;
2701 2739 if (prec == NULL) {
2702 2740 goto bail;
2703 2741 }
2704 2742
2705 2743 if (data + nsuccs * hashwidth > dataend) {
2706 2744 goto overflow;
2707 2745 }
2708 2746 succs = readshas(data, nsuccs, hashwidth);
2709 2747 if (succs == NULL) {
2710 2748 goto bail;
2711 2749 }
2712 2750 data += nsuccs * hashwidth;
2713 2751
2714 2752 if (nparents == 1 || nparents == 2) {
2715 2753 if (data + nparents * hashwidth > dataend) {
2716 2754 goto overflow;
2717 2755 }
2718 2756 parents = readshas(data, nparents, hashwidth);
2719 2757 if (parents == NULL) {
2720 2758 goto bail;
2721 2759 }
2722 2760 data += nparents * hashwidth;
2723 2761 } else {
2724 2762 parents = Py_None;
2725 2763 }
2726 2764
2727 2765 if (data + 2 * nmetadata > dataend) {
2728 2766 goto overflow;
2729 2767 }
2730 2768 meta = data + (2 * nmetadata);
2731 2769 metadata = PyTuple_New(nmetadata);
2732 2770 if (metadata == NULL) {
2733 2771 goto bail;
2734 2772 }
2735 2773 for (i = 0; i < nmetadata; i++) {
2736 2774 PyObject *tmp, *left = NULL, *right = NULL;
2737 2775 Py_ssize_t leftsize = (unsigned char)(*data++);
2738 2776 Py_ssize_t rightsize = (unsigned char)(*data++);
2739 2777 if (meta + leftsize + rightsize > dataend) {
2740 2778 goto overflow;
2741 2779 }
2742 2780 left = PyBytes_FromStringAndSize(meta, leftsize);
2743 2781 meta += leftsize;
2744 2782 right = PyBytes_FromStringAndSize(meta, rightsize);
2745 2783 meta += rightsize;
2746 2784 tmp = PyTuple_New(2);
2747 2785 if (!left || !right || !tmp) {
2748 2786 Py_XDECREF(left);
2749 2787 Py_XDECREF(right);
2750 2788 Py_XDECREF(tmp);
2751 2789 goto bail;
2752 2790 }
2753 2791 PyTuple_SET_ITEM(tmp, 0, left);
2754 2792 PyTuple_SET_ITEM(tmp, 1, right);
2755 2793 PyTuple_SET_ITEM(metadata, i, tmp);
2756 2794 }
2757 2795 ret = Py_BuildValue("(OOHO(di)O)", prec, succs, flags,
2758 2796 metadata, mtime, (int)tz * 60, parents);
2759 2797 goto bail; /* return successfully */
2760 2798
2761 2799 overflow:
2762 2800 PyErr_SetString(PyExc_ValueError, "overflow in obsstore");
2763 2801 bail:
2764 2802 Py_XDECREF(prec);
2765 2803 Py_XDECREF(succs);
2766 2804 Py_XDECREF(metadata);
2767 2805 if (parents != Py_None)
2768 2806 Py_XDECREF(parents);
2769 2807 return ret;
2770 2808 }
2771 2809
2772 2810
2773 2811 static PyObject *fm1readmarkers(PyObject *self, PyObject *args) {
2774 2812 const char *data, *dataend;
2775 2813 int datalen;
2776 2814 Py_ssize_t offset, stop;
2777 2815 PyObject *markers = NULL;
2778 2816
2779 2817 if (!PyArg_ParseTuple(args, "s#nn", &data, &datalen, &offset, &stop)) {
2780 2818 return NULL;
2781 2819 }
2782 2820 dataend = data + datalen;
2783 2821 data += offset;
2784 2822 markers = PyList_New(0);
2785 2823 if (!markers) {
2786 2824 return NULL;
2787 2825 }
2788 2826 while (offset < stop) {
2789 2827 uint32_t msize;
2790 2828 int error;
2791 2829 PyObject *record = fm1readmarker(data, dataend, &msize);
2792 2830 if (!record) {
2793 2831 goto bail;
2794 2832 }
2795 2833 error = PyList_Append(markers, record);
2796 2834 Py_DECREF(record);
2797 2835 if (error) {
2798 2836 goto bail;
2799 2837 }
2800 2838 data += msize;
2801 2839 offset += msize;
2802 2840 }
2803 2841 return markers;
2804 2842 bail:
2805 2843 Py_DECREF(markers);
2806 2844 return NULL;
2807 2845 }
2808 2846
2809 2847 static char parsers_doc[] = "Efficient content parsing.";
2810 2848
2811 2849 PyObject *encodedir(PyObject *self, PyObject *args);
2812 2850 PyObject *pathencode(PyObject *self, PyObject *args);
2813 2851 PyObject *lowerencode(PyObject *self, PyObject *args);
2814 2852
2815 2853 static PyMethodDef methods[] = {
2816 2854 {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
2817 2855 {"nonnormalentries", nonnormalentries, METH_VARARGS,
2818 2856 "create a set containing non-normal entries of given dirstate\n"},
2857 {"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
2858 "create a set containing non-normal and other parent entries of given "
2859 "dirstate\n"},
2819 2860 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
2820 2861 {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
2821 2862 {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
2822 2863 {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
2823 2864 {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},
2824 2865 {"dict_new_presized", dict_new_presized, METH_VARARGS,
2825 2866 "construct a dict with an expected size\n"},
2826 2867 {"make_file_foldmap", make_file_foldmap, METH_VARARGS,
2827 2868 "make file foldmap\n"},
2828 2869 {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
2829 2870 {"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
2830 2871 {"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
2831 2872 {"fm1readmarkers", fm1readmarkers, METH_VARARGS,
2832 2873 "parse v1 obsolete markers\n"},
2833 2874 {NULL, NULL}
2834 2875 };
2835 2876
2836 2877 void dirs_module_init(PyObject *mod);
2837 2878 void manifest_module_init(PyObject *mod);
2838 2879
2839 2880 static void module_init(PyObject *mod)
2840 2881 {
2841 2882 /* This module constant has two purposes. First, it lets us unit test
2842 2883 * the ImportError raised without hard-coding any error text. This
2843 2884 * means we can change the text in the future without breaking tests,
2844 2885 * even across changesets without a recompile. Second, its presence
2845 2886 * can be used to determine whether the version-checking logic is
2846 2887 * present, which also helps in testing across changesets without a
2847 2888 * recompile. Note that this means the pure-Python version of parsers
2848 2889 * should not have this module constant. */
2849 2890 PyModule_AddStringConstant(mod, "versionerrortext", versionerrortext);
2850 2891
2851 2892 dirs_module_init(mod);
2852 2893 manifest_module_init(mod);
2853 2894
2854 2895 indexType.tp_new = PyType_GenericNew;
2855 2896 if (PyType_Ready(&indexType) < 0 ||
2856 2897 PyType_Ready(&dirstateTupleType) < 0)
2857 2898 return;
2858 2899 Py_INCREF(&indexType);
2859 2900 PyModule_AddObject(mod, "index", (PyObject *)&indexType);
2860 2901 Py_INCREF(&dirstateTupleType);
2861 2902 PyModule_AddObject(mod, "dirstatetuple",
2862 2903 (PyObject *)&dirstateTupleType);
2863 2904
2864 2905 nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
2865 2906 -1, -1, -1, -1, nullid, 20);
2866 2907 if (nullentry)
2867 2908 PyObject_GC_UnTrack(nullentry);
2868 2909 }
2869 2910
2870 2911 static int check_python_version(void)
2871 2912 {
2872 2913 PyObject *sys = PyImport_ImportModule("sys"), *ver;
2873 2914 long hexversion;
2874 2915 if (!sys)
2875 2916 return -1;
2876 2917 ver = PyObject_GetAttrString(sys, "hexversion");
2877 2918 Py_DECREF(sys);
2878 2919 if (!ver)
2879 2920 return -1;
2880 2921 hexversion = PyInt_AsLong(ver);
2881 2922 Py_DECREF(ver);
2882 2923 /* sys.hexversion is a 32-bit number by default, so the -1 case
2883 2924 * should only occur in unusual circumstances (e.g. if sys.hexversion
2884 2925 * is manually set to an invalid value). */
2885 2926 if ((hexversion == -1) || (hexversion >> 16 != PY_VERSION_HEX >> 16)) {
2886 2927 PyErr_Format(PyExc_ImportError, "%s: The Mercurial extension "
2887 2928 "modules were compiled with Python " PY_VERSION ", but "
2888 2929 "Mercurial is currently using Python with sys.hexversion=%ld: "
2889 2930 "Python %s\n at: %s", versionerrortext, hexversion,
2890 2931 Py_GetVersion(), Py_GetProgramFullPath());
2891 2932 return -1;
2892 2933 }
2893 2934 return 0;
2894 2935 }
2895 2936
2896 2937 #ifdef IS_PY3K
2897 2938 static struct PyModuleDef parsers_module = {
2898 2939 PyModuleDef_HEAD_INIT,
2899 2940 "parsers",
2900 2941 parsers_doc,
2901 2942 -1,
2902 2943 methods
2903 2944 };
2904 2945
2905 2946 PyMODINIT_FUNC PyInit_parsers(void)
2906 2947 {
2907 2948 PyObject *mod;
2908 2949
2909 2950 if (check_python_version() == -1)
2910 2951 return NULL;
2911 2952 mod = PyModule_Create(&parsers_module);
2912 2953 module_init(mod);
2913 2954 return mod;
2914 2955 }
2915 2956 #else
2916 2957 PyMODINIT_FUNC initparsers(void)
2917 2958 {
2918 2959 PyObject *mod;
2919 2960
2920 2961 if (check_python_version() == -1)
2921 2962 return;
2922 2963 mod = Py_InitModule3("parsers", methods, parsers_doc);
2923 2964 module_init(mod);
2924 2965 }
2925 2966 #endif
General Comments 0
You need to be logged in to leave comments. Login now