##// END OF EJS Templates
rust-cpython: using the new bindings from Python...
Georges Racinet -
r41150:536c8353 default
parent child Browse files
Show More
@@ -1,2607 +1,2617 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 try:
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
104 rustext = None
100 105
101 106 # Aliased for performance.
102 107 _zlibdecompress = zlib.decompress
103 108
104 109 # max size of revlog with inline data
105 110 _maxinline = 131072
106 111 _chunksize = 1048576
107 112
108 113 # Store flag processors (cf. 'addflagprocessor()' to register)
109 114 _flagprocessors = {
110 115 REVIDX_ISCENSORED: None,
111 116 }
112 117
113 118 # Flag processors for REVIDX_ELLIPSIS.
114 119 def ellipsisreadprocessor(rl, text):
115 120 return text, False
116 121
117 122 def ellipsiswriteprocessor(rl, text):
118 123 return text, False
119 124
120 125 def ellipsisrawprocessor(rl, text):
121 126 return False
122 127
123 128 ellipsisprocessor = (
124 129 ellipsisreadprocessor,
125 130 ellipsiswriteprocessor,
126 131 ellipsisrawprocessor,
127 132 )
128 133
129 134 def addflagprocessor(flag, processor):
130 135 """Register a flag processor on a revision data flag.
131 136
132 137 Invariant:
133 138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 140 - Only one flag processor can be registered on a specific flag.
136 141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 142 following signatures:
138 143 - (read) f(self, rawtext) -> text, bool
139 144 - (write) f(self, text) -> rawtext, bool
140 145 - (raw) f(self, rawtext) -> bool
141 146 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 147 directly visible to the user.
143 148 The boolean returned by these transforms is used to determine whether
144 149 the returned text can be used for hash integrity checking. For example,
145 150 if "write" returns False, then "text" is used to generate hash. If
146 151 "write" returns True, that basically means "rawtext" returned by "write"
147 152 should be used to generate hash. Usually, "write" and "read" return
148 153 different booleans. And "raw" returns a same boolean as "write".
149 154
150 155 Note: The 'raw' transform is used for changegroup generation and in some
151 156 debug commands. In this case the transform only indicates whether the
152 157 contents can be used for hash integrity checks.
153 158 """
154 159 _insertflagprocessor(flag, processor, _flagprocessors)
155 160
156 161 def _insertflagprocessor(flag, processor, flagprocessors):
157 162 if not flag & REVIDX_KNOWN_FLAGS:
158 163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 164 raise error.ProgrammingError(msg)
160 165 if flag not in REVIDX_FLAGS_ORDER:
161 166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 167 raise error.ProgrammingError(msg)
163 168 if flag in flagprocessors:
164 169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 170 raise error.Abort(msg)
166 171 flagprocessors[flag] = processor
167 172
168 173 def getoffset(q):
169 174 return int(q >> 16)
170 175
171 176 def gettype(q):
172 177 return int(q & 0xFFFF)
173 178
174 179 def offset_type(offset, type):
175 180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 181 raise ValueError('unknown revlog index flags')
177 182 return int(int(offset) << 16 | type)
178 183
179 184 @attr.s(slots=True, frozen=True)
180 185 class _revisioninfo(object):
181 186 """Information about a revision that allows building its fulltext
182 187 node: expected hash of the revision
183 188 p1, p2: parent revs of the revision
184 189 btext: built text cache consisting of a one-element list
185 190 cachedelta: (baserev, uncompressed_delta) or None
186 191 flags: flags associated to the revision storage
187 192
188 193 One of btext[0] or cachedelta must be set.
189 194 """
190 195 node = attr.ib()
191 196 p1 = attr.ib()
192 197 p2 = attr.ib()
193 198 btext = attr.ib()
194 199 textlen = attr.ib()
195 200 cachedelta = attr.ib()
196 201 flags = attr.ib()
197 202
198 203 @interfaceutil.implementer(repository.irevisiondelta)
199 204 @attr.s(slots=True)
200 205 class revlogrevisiondelta(object):
201 206 node = attr.ib()
202 207 p1node = attr.ib()
203 208 p2node = attr.ib()
204 209 basenode = attr.ib()
205 210 flags = attr.ib()
206 211 baserevisionsize = attr.ib()
207 212 revision = attr.ib()
208 213 delta = attr.ib()
209 214 linknode = attr.ib(default=None)
210 215
211 216 @interfaceutil.implementer(repository.iverifyproblem)
212 217 @attr.s(frozen=True)
213 218 class revlogproblem(object):
214 219 warning = attr.ib(default=None)
215 220 error = attr.ib(default=None)
216 221 node = attr.ib(default=None)
217 222
218 223 # index v0:
219 224 # 4 bytes: offset
220 225 # 4 bytes: compressed length
221 226 # 4 bytes: base rev
222 227 # 4 bytes: link rev
223 228 # 20 bytes: parent 1 nodeid
224 229 # 20 bytes: parent 2 nodeid
225 230 # 20 bytes: nodeid
226 231 indexformatv0 = struct.Struct(">4l20s20s20s")
227 232 indexformatv0_pack = indexformatv0.pack
228 233 indexformatv0_unpack = indexformatv0.unpack
229 234
230 235 class revlogoldindex(list):
231 236 def __getitem__(self, i):
232 237 if i == -1:
233 238 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 239 return list.__getitem__(self, i)
235 240
236 241 class revlogoldio(object):
237 242 def __init__(self):
238 243 self.size = indexformatv0.size
239 244
240 245 def parseindex(self, data, inline):
241 246 s = self.size
242 247 index = []
243 248 nodemap = {nullid: nullrev}
244 249 n = off = 0
245 250 l = len(data)
246 251 while off + s <= l:
247 252 cur = data[off:off + s]
248 253 off += s
249 254 e = indexformatv0_unpack(cur)
250 255 # transform to revlogv1 format
251 256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 258 index.append(e2)
254 259 nodemap[e[6]] = n
255 260 n += 1
256 261
257 262 return revlogoldindex(index), nodemap, None
258 263
259 264 def packentry(self, entry, node, version, rev):
260 265 if gettype(entry[0]):
261 266 raise error.RevlogError(_('index entry flags need revlog '
262 267 'version 1'))
263 268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 269 node(entry[5]), node(entry[6]), entry[7])
265 270 return indexformatv0_pack(*e2)
266 271
267 272 # index ng:
268 273 # 6 bytes: offset
269 274 # 2 bytes: flags
270 275 # 4 bytes: compressed length
271 276 # 4 bytes: uncompressed length
272 277 # 4 bytes: base rev
273 278 # 4 bytes: link rev
274 279 # 4 bytes: parent 1 rev
275 280 # 4 bytes: parent 2 rev
276 281 # 32 bytes: nodeid
277 282 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 283 indexformatng_pack = indexformatng.pack
279 284 versionformat = struct.Struct(">I")
280 285 versionformat_pack = versionformat.pack
281 286 versionformat_unpack = versionformat.unpack
282 287
283 288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 289 # signed integer)
285 290 _maxentrysize = 0x7fffffff
286 291
287 292 class revlogio(object):
288 293 def __init__(self):
289 294 self.size = indexformatng.size
290 295
291 296 def parseindex(self, data, inline):
292 297 # call the C implementation to parse the index data
293 298 index, cache = parsers.parse_index2(data, inline)
294 299 return index, getattr(index, 'nodemap', None), cache
295 300
296 301 def packentry(self, entry, node, version, rev):
297 302 p = indexformatng_pack(*entry)
298 303 if rev == 0:
299 304 p = versionformat_pack(version) + p[4:]
300 305 return p
301 306
302 307 class revlog(object):
303 308 """
304 309 the underlying revision storage object
305 310
306 311 A revlog consists of two parts, an index and the revision data.
307 312
308 313 The index is a file with a fixed record size containing
309 314 information on each revision, including its nodeid (hash), the
310 315 nodeids of its parents, the position and offset of its data within
311 316 the data file, and the revision it's based on. Finally, each entry
312 317 contains a linkrev entry that can serve as a pointer to external
313 318 data.
314 319
315 320 The revision data itself is a linear collection of data chunks.
316 321 Each chunk represents a revision and is usually represented as a
317 322 delta against the previous chunk. To bound lookup time, runs of
318 323 deltas are limited to about 2 times the length of the original
319 324 version data. This makes retrieval of a version proportional to
320 325 its size, or O(1) relative to the number of revisions.
321 326
322 327 Both pieces of the revlog are written to in an append-only
323 328 fashion, which means we never need to rewrite a file to insert or
324 329 remove data, and can use some simple techniques to avoid the need
325 330 for locking while reading.
326 331
327 332 If checkambig, indexfile is opened with checkambig=True at
328 333 writing, to avoid file stat ambiguity.
329 334
330 335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 336 index will be mmapped rather than read if it is larger than the
332 337 configured threshold.
333 338
334 339 If censorable is True, the revlog can have censored revisions.
335 340 """
336 341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 342 mmaplargeindex=False, censorable=False):
338 343 """
339 344 create a revlog object
340 345
341 346 opener is a function that abstracts the file opening operation
342 347 and can be used to implement COW semantics or the like.
343 348 """
344 349 self.indexfile = indexfile
345 350 self.datafile = datafile or (indexfile[:-2] + ".d")
346 351 self.opener = opener
347 352 # When True, indexfile is opened with checkambig=True at writing, to
348 353 # avoid file stat ambiguity.
349 354 self._checkambig = checkambig
350 355 self._censorable = censorable
351 356 # 3-tuple of (node, rev, text) for a raw revision.
352 357 self._revisioncache = None
353 358 # Maps rev to chain base rev.
354 359 self._chainbasecache = util.lrucachedict(100)
355 360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 361 self._chunkcache = (0, '')
357 362 # How much data to read and cache into the raw revlog data cache.
358 363 self._chunkcachesize = 65536
359 364 self._maxchainlen = None
360 365 self._deltabothparents = True
361 366 self.index = []
362 367 # Mapping of partial identifiers to full nodes.
363 368 self._pcache = {}
364 369 # Mapping of revision integer to full node.
365 370 self._nodecache = {nullid: nullrev}
366 371 self._nodepos = None
367 372 self._compengine = 'zlib'
368 373 self._maxdeltachainspan = -1
369 374 self._withsparseread = False
370 375 self._sparserevlog = False
371 376 self._srdensitythreshold = 0.50
372 377 self._srmingapsize = 262144
373 378
374 379 # Make copy of flag processors so each revlog instance can support
375 380 # custom flags.
376 381 self._flagprocessors = dict(_flagprocessors)
377 382
378 383 # 2-tuple of file handles being used for active writing.
379 384 self._writinghandles = None
380 385
381 386 mmapindexthreshold = None
382 387 v = REVLOG_DEFAULT_VERSION
383 388 opts = getattr(opener, 'options', None)
384 389 if opts is not None:
385 390 if 'revlogv2' in opts:
386 391 # version 2 revlogs always use generaldelta.
387 392 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
388 393 elif 'revlogv1' in opts:
389 394 if 'generaldelta' in opts:
390 395 v |= FLAG_GENERALDELTA
391 396 else:
392 397 v = 0
393 398 if 'chunkcachesize' in opts:
394 399 self._chunkcachesize = opts['chunkcachesize']
395 400 if 'maxchainlen' in opts:
396 401 self._maxchainlen = opts['maxchainlen']
397 402 if 'deltabothparents' in opts:
398 403 self._deltabothparents = opts['deltabothparents']
399 404 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
400 405 if 'compengine' in opts:
401 406 self._compengine = opts['compengine']
402 407 if 'maxdeltachainspan' in opts:
403 408 self._maxdeltachainspan = opts['maxdeltachainspan']
404 409 if mmaplargeindex and 'mmapindexthreshold' in opts:
405 410 mmapindexthreshold = opts['mmapindexthreshold']
406 411 self._sparserevlog = bool(opts.get('sparse-revlog', False))
407 412 withsparseread = bool(opts.get('with-sparse-read', False))
408 413 # sparse-revlog forces sparse-read
409 414 self._withsparseread = self._sparserevlog or withsparseread
410 415 if 'sparse-read-density-threshold' in opts:
411 416 self._srdensitythreshold = opts['sparse-read-density-threshold']
412 417 if 'sparse-read-min-gap-size' in opts:
413 418 self._srmingapsize = opts['sparse-read-min-gap-size']
414 419 if opts.get('enableellipsis'):
415 420 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
416 421
417 422 # revlog v0 doesn't have flag processors
418 423 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
419 424 _insertflagprocessor(flag, processor, self._flagprocessors)
420 425
421 426 if self._chunkcachesize <= 0:
422 427 raise error.RevlogError(_('revlog chunk cache size %r is not '
423 428 'greater than 0') % self._chunkcachesize)
424 429 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 430 raise error.RevlogError(_('revlog chunk cache size %r is not a '
426 431 'power of 2') % self._chunkcachesize)
427 432
428 433 self._loadindex(v, mmapindexthreshold)
429 434
430 435 def _loadindex(self, v, mmapindexthreshold):
431 436 indexdata = ''
432 437 self._initempty = True
433 438 try:
434 439 with self._indexfp() as f:
435 440 if (mmapindexthreshold is not None and
436 441 self.opener.fstat(f).st_size >= mmapindexthreshold):
437 442 indexdata = util.buffer(util.mmapread(f))
438 443 else:
439 444 indexdata = f.read()
440 445 if len(indexdata) > 0:
441 446 v = versionformat_unpack(indexdata[:4])[0]
442 447 self._initempty = False
443 448 except IOError as inst:
444 449 if inst.errno != errno.ENOENT:
445 450 raise
446 451
447 452 self.version = v
448 453 self._inline = v & FLAG_INLINE_DATA
449 454 self._generaldelta = v & FLAG_GENERALDELTA
450 455 flags = v & ~0xFFFF
451 456 fmt = v & 0xFFFF
452 457 if fmt == REVLOGV0:
453 458 if flags:
454 459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 460 'revlog %s') %
456 461 (flags >> 16, fmt, self.indexfile))
457 462 elif fmt == REVLOGV1:
458 463 if flags & ~REVLOGV1_FLAGS:
459 464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 465 'revlog %s') %
461 466 (flags >> 16, fmt, self.indexfile))
462 467 elif fmt == REVLOGV2:
463 468 if flags & ~REVLOGV2_FLAGS:
464 469 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 470 'revlog %s') %
466 471 (flags >> 16, fmt, self.indexfile))
467 472 else:
468 473 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 474 (fmt, self.indexfile))
470 475
471 476 self._storedeltachains = True
472 477
473 478 self._io = revlogio()
474 479 if self.version == REVLOGV0:
475 480 self._io = revlogoldio()
476 481 try:
477 482 d = self._io.parseindex(indexdata, self._inline)
478 483 except (ValueError, IndexError):
479 484 raise error.RevlogError(_("index %s is corrupted") %
480 485 self.indexfile)
481 486 self.index, nodemap, self._chunkcache = d
482 487 if nodemap is not None:
483 488 self.nodemap = self._nodecache = nodemap
484 489 if not self._chunkcache:
485 490 self._chunkclear()
486 491 # revnum -> (chain-length, sum-delta-length)
487 492 self._chaininfocache = {}
488 493 # revlog header -> revlog compressor
489 494 self._decompressors = {}
490 495
491 496 @util.propertycache
492 497 def _compressor(self):
493 498 return util.compengines[self._compengine].revlogcompressor()
494 499
495 500 def _indexfp(self, mode='r'):
496 501 """file object for the revlog's index file"""
497 502 args = {r'mode': mode}
498 503 if mode != 'r':
499 504 args[r'checkambig'] = self._checkambig
500 505 if mode == 'w':
501 506 args[r'atomictemp'] = True
502 507 return self.opener(self.indexfile, **args)
503 508
504 509 def _datafp(self, mode='r'):
505 510 """file object for the revlog's data file"""
506 511 return self.opener(self.datafile, mode=mode)
507 512
508 513 @contextlib.contextmanager
509 514 def _datareadfp(self, existingfp=None):
510 515 """file object suitable to read data"""
511 516 # Use explicit file handle, if given.
512 517 if existingfp is not None:
513 518 yield existingfp
514 519
515 520 # Use a file handle being actively used for writes, if available.
516 521 # There is some danger to doing this because reads will seek the
517 522 # file. However, _writeentry() performs a SEEK_END before all writes,
518 523 # so we should be safe.
519 524 elif self._writinghandles:
520 525 if self._inline:
521 526 yield self._writinghandles[0]
522 527 else:
523 528 yield self._writinghandles[1]
524 529
525 530 # Otherwise open a new file handle.
526 531 else:
527 532 if self._inline:
528 533 func = self._indexfp
529 534 else:
530 535 func = self._datafp
531 536 with func() as fp:
532 537 yield fp
533 538
534 539 def tip(self):
535 540 return self.node(len(self.index) - 1)
536 541 def __contains__(self, rev):
537 542 return 0 <= rev < len(self)
538 543 def __len__(self):
539 544 return len(self.index)
540 545 def __iter__(self):
541 546 return iter(pycompat.xrange(len(self)))
542 547 def revs(self, start=0, stop=None):
543 548 """iterate over all rev in this revlog (from start to stop)"""
544 549 return storageutil.iterrevs(len(self), start=start, stop=stop)
545 550
546 551 @util.propertycache
547 552 def nodemap(self):
548 553 if self.index:
549 554 # populate mapping down to the initial node
550 555 node0 = self.index[0][7] # get around changelog filtering
551 556 self.rev(node0)
552 557 return self._nodecache
553 558
554 559 def hasnode(self, node):
555 560 try:
556 561 self.rev(node)
557 562 return True
558 563 except KeyError:
559 564 return False
560 565
561 566 def candelta(self, baserev, rev):
562 567 """whether two revisions (baserev, rev) can be delta-ed or not"""
563 568 # Disable delta if either rev requires a content-changing flag
564 569 # processor (ex. LFS). This is because such flag processor can alter
565 570 # the rawtext content that the delta will be based on, and two clients
566 571 # could have a same revlog node with different flags (i.e. different
567 572 # rawtext contents) and the delta could be incompatible.
568 573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
569 574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
570 575 return False
571 576 return True
572 577
573 578 def clearcaches(self):
574 579 self._revisioncache = None
575 580 self._chainbasecache.clear()
576 581 self._chunkcache = (0, '')
577 582 self._pcache = {}
578 583
579 584 try:
580 585 self._nodecache.clearcaches()
581 586 except AttributeError:
582 587 self._nodecache = {nullid: nullrev}
583 588 self._nodepos = None
584 589
585 590 def rev(self, node):
586 591 try:
587 592 return self._nodecache[node]
588 593 except TypeError:
589 594 raise
590 595 except error.RevlogError:
591 596 # parsers.c radix tree lookup failed
592 597 if node == wdirid or node in wdirfilenodeids:
593 598 raise error.WdirUnsupported
594 599 raise error.LookupError(node, self.indexfile, _('no node'))
595 600 except KeyError:
596 601 # pure python cache lookup failed
597 602 n = self._nodecache
598 603 i = self.index
599 604 p = self._nodepos
600 605 if p is None:
601 606 p = len(i) - 1
602 607 else:
603 608 assert p < len(i)
604 609 for r in pycompat.xrange(p, -1, -1):
605 610 v = i[r][7]
606 611 n[v] = r
607 612 if v == node:
608 613 self._nodepos = r - 1
609 614 return r
610 615 if node == wdirid or node in wdirfilenodeids:
611 616 raise error.WdirUnsupported
612 617 raise error.LookupError(node, self.indexfile, _('no node'))
613 618
614 619 # Accessors for index entries.
615 620
616 621 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
617 622 # are flags.
618 623 def start(self, rev):
619 624 return int(self.index[rev][0] >> 16)
620 625
621 626 def flags(self, rev):
622 627 return self.index[rev][0] & 0xFFFF
623 628
624 629 def length(self, rev):
625 630 return self.index[rev][1]
626 631
627 632 def rawsize(self, rev):
628 633 """return the length of the uncompressed text for a given revision"""
629 634 l = self.index[rev][2]
630 635 if l >= 0:
631 636 return l
632 637
633 638 t = self.revision(rev, raw=True)
634 639 return len(t)
635 640
636 641 def size(self, rev):
637 642 """length of non-raw text (processed by a "read" flag processor)"""
638 643 # fast path: if no "read" flag processor could change the content,
639 644 # size is rawsize. note: ELLIPSIS is known to not change the content.
640 645 flags = self.flags(rev)
641 646 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
642 647 return self.rawsize(rev)
643 648
644 649 return len(self.revision(rev, raw=False))
645 650
646 651 def chainbase(self, rev):
647 652 base = self._chainbasecache.get(rev)
648 653 if base is not None:
649 654 return base
650 655
651 656 index = self.index
652 657 iterrev = rev
653 658 base = index[iterrev][3]
654 659 while base != iterrev:
655 660 iterrev = base
656 661 base = index[iterrev][3]
657 662
658 663 self._chainbasecache[rev] = base
659 664 return base
660 665
661 666 def linkrev(self, rev):
662 667 return self.index[rev][4]
663 668
664 669 def parentrevs(self, rev):
665 670 try:
666 671 entry = self.index[rev]
667 672 except IndexError:
668 673 if rev == wdirrev:
669 674 raise error.WdirUnsupported
670 675 raise
671 676
672 677 return entry[5], entry[6]
673 678
674 679 # fast parentrevs(rev) where rev isn't filtered
675 680 _uncheckedparentrevs = parentrevs
676 681
677 682 def node(self, rev):
678 683 try:
679 684 return self.index[rev][7]
680 685 except IndexError:
681 686 if rev == wdirrev:
682 687 raise error.WdirUnsupported
683 688 raise
684 689
685 690 # Derived from index values.
686 691
687 692 def end(self, rev):
688 693 return self.start(rev) + self.length(rev)
689 694
690 695 def parents(self, node):
691 696 i = self.index
692 697 d = i[self.rev(node)]
693 698 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
694 699
695 700 def chainlen(self, rev):
696 701 return self._chaininfo(rev)[0]
697 702
698 703 def _chaininfo(self, rev):
699 704 chaininfocache = self._chaininfocache
700 705 if rev in chaininfocache:
701 706 return chaininfocache[rev]
702 707 index = self.index
703 708 generaldelta = self._generaldelta
704 709 iterrev = rev
705 710 e = index[iterrev]
706 711 clen = 0
707 712 compresseddeltalen = 0
708 713 while iterrev != e[3]:
709 714 clen += 1
710 715 compresseddeltalen += e[1]
711 716 if generaldelta:
712 717 iterrev = e[3]
713 718 else:
714 719 iterrev -= 1
715 720 if iterrev in chaininfocache:
716 721 t = chaininfocache[iterrev]
717 722 clen += t[0]
718 723 compresseddeltalen += t[1]
719 724 break
720 725 e = index[iterrev]
721 726 else:
722 727 # Add text length of base since decompressing that also takes
723 728 # work. For cache hits the length is already included.
724 729 compresseddeltalen += e[1]
725 730 r = (clen, compresseddeltalen)
726 731 chaininfocache[rev] = r
727 732 return r
728 733
729 734 def _deltachain(self, rev, stoprev=None):
730 735 """Obtain the delta chain for a revision.
731 736
732 737 ``stoprev`` specifies a revision to stop at. If not specified, we
733 738 stop at the base of the chain.
734 739
735 740 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
736 741 revs in ascending order and ``stopped`` is a bool indicating whether
737 742 ``stoprev`` was hit.
738 743 """
739 744 # Try C implementation.
740 745 try:
741 746 return self.index.deltachain(rev, stoprev, self._generaldelta)
742 747 except AttributeError:
743 748 pass
744 749
745 750 chain = []
746 751
747 752 # Alias to prevent attribute lookup in tight loop.
748 753 index = self.index
749 754 generaldelta = self._generaldelta
750 755
751 756 iterrev = rev
752 757 e = index[iterrev]
753 758 while iterrev != e[3] and iterrev != stoprev:
754 759 chain.append(iterrev)
755 760 if generaldelta:
756 761 iterrev = e[3]
757 762 else:
758 763 iterrev -= 1
759 764 e = index[iterrev]
760 765
761 766 if iterrev == stoprev:
762 767 stopped = True
763 768 else:
764 769 chain.append(iterrev)
765 770 stopped = False
766 771
767 772 chain.reverse()
768 773 return chain, stopped
769 774
770 775 def ancestors(self, revs, stoprev=0, inclusive=False):
771 776 """Generate the ancestors of 'revs' in reverse revision order.
772 777 Does not generate revs lower than stoprev.
773 778
774 779 See the documentation for ancestor.lazyancestors for more details."""
775 780
776 781 # first, make sure start revisions aren't filtered
777 782 revs = list(revs)
778 783 checkrev = self.node
779 784 for r in revs:
780 785 checkrev(r)
781 786 # and we're sure ancestors aren't filtered as well
782 if util.safehasattr(parsers, 'rustlazyancestors'):
783 return ancestor.rustlazyancestors(
784 self.index, revs,
785 stoprev=stoprev, inclusive=inclusive)
786 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
787 stoprev=stoprev, inclusive=inclusive)
787
788 if rustext is not None:
789 lazyancestors = rustext.ancestor.LazyAncestors
790 arg = self.index
791 elif util.safehasattr(parsers, 'rustlazyancestors'):
792 lazyancestors = ancestor.rustlazyancestors
793 arg = self.index
794 else:
795 lazyancestors = ancestor.lazyancestors
796 arg = self._uncheckedparentrevs
797 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
788 798
789 799 def descendants(self, revs):
790 800 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
791 801
792 802 def findcommonmissing(self, common=None, heads=None):
793 803 """Return a tuple of the ancestors of common and the ancestors of heads
794 804 that are not ancestors of common. In revset terminology, we return the
795 805 tuple:
796 806
797 807 ::common, (::heads) - (::common)
798 808
799 809 The list is sorted by revision number, meaning it is
800 810 topologically sorted.
801 811
802 812 'heads' and 'common' are both lists of node IDs. If heads is
803 813 not supplied, uses all of the revlog's heads. If common is not
804 814 supplied, uses nullid."""
805 815 if common is None:
806 816 common = [nullid]
807 817 if heads is None:
808 818 heads = self.heads()
809 819
810 820 common = [self.rev(n) for n in common]
811 821 heads = [self.rev(n) for n in heads]
812 822
813 823 # we want the ancestors, but inclusive
814 824 class lazyset(object):
815 825 def __init__(self, lazyvalues):
816 826 self.addedvalues = set()
817 827 self.lazyvalues = lazyvalues
818 828
819 829 def __contains__(self, value):
820 830 return value in self.addedvalues or value in self.lazyvalues
821 831
822 832 def __iter__(self):
823 833 added = self.addedvalues
824 834 for r in added:
825 835 yield r
826 836 for r in self.lazyvalues:
827 837 if not r in added:
828 838 yield r
829 839
830 840 def add(self, value):
831 841 self.addedvalues.add(value)
832 842
833 843 def update(self, values):
834 844 self.addedvalues.update(values)
835 845
836 846 has = lazyset(self.ancestors(common))
837 847 has.add(nullrev)
838 848 has.update(common)
839 849
840 850 # take all ancestors from heads that aren't in has
841 851 missing = set()
842 852 visit = collections.deque(r for r in heads if r not in has)
843 853 while visit:
844 854 r = visit.popleft()
845 855 if r in missing:
846 856 continue
847 857 else:
848 858 missing.add(r)
849 859 for p in self.parentrevs(r):
850 860 if p not in has:
851 861 visit.append(p)
852 862 missing = list(missing)
853 863 missing.sort()
854 864 return has, [self.node(miss) for miss in missing]
855 865
856 866 def incrementalmissingrevs(self, common=None):
857 867 """Return an object that can be used to incrementally compute the
858 868 revision numbers of the ancestors of arbitrary sets that are not
859 869 ancestors of common. This is an ancestor.incrementalmissingancestors
860 870 object.
861 871
862 872 'common' is a list of revision numbers. If common is not supplied, uses
863 873 nullrev.
864 874 """
865 875 if common is None:
866 876 common = [nullrev]
867 877
868 878 return ancestor.incrementalmissingancestors(self.parentrevs, common)
869 879
870 880 def findmissingrevs(self, common=None, heads=None):
871 881 """Return the revision numbers of the ancestors of heads that
872 882 are not ancestors of common.
873 883
874 884 More specifically, return a list of revision numbers corresponding to
875 885 nodes N such that every N satisfies the following constraints:
876 886
877 887 1. N is an ancestor of some node in 'heads'
878 888 2. N is not an ancestor of any node in 'common'
879 889
880 890 The list is sorted by revision number, meaning it is
881 891 topologically sorted.
882 892
883 893 'heads' and 'common' are both lists of revision numbers. If heads is
884 894 not supplied, uses all of the revlog's heads. If common is not
885 895 supplied, uses nullid."""
886 896 if common is None:
887 897 common = [nullrev]
888 898 if heads is None:
889 899 heads = self.headrevs()
890 900
891 901 inc = self.incrementalmissingrevs(common=common)
892 902 return inc.missingancestors(heads)
893 903
894 904 def findmissing(self, common=None, heads=None):
895 905 """Return the ancestors of heads that are not ancestors of common.
896 906
897 907 More specifically, return a list of nodes N such that every N
898 908 satisfies the following constraints:
899 909
900 910 1. N is an ancestor of some node in 'heads'
901 911 2. N is not an ancestor of any node in 'common'
902 912
903 913 The list is sorted by revision number, meaning it is
904 914 topologically sorted.
905 915
906 916 'heads' and 'common' are both lists of node IDs. If heads is
907 917 not supplied, uses all of the revlog's heads. If common is not
908 918 supplied, uses nullid."""
909 919 if common is None:
910 920 common = [nullid]
911 921 if heads is None:
912 922 heads = self.heads()
913 923
914 924 common = [self.rev(n) for n in common]
915 925 heads = [self.rev(n) for n in heads]
916 926
917 927 inc = self.incrementalmissingrevs(common=common)
918 928 return [self.node(r) for r in inc.missingancestors(heads)]
919 929
920 930 def nodesbetween(self, roots=None, heads=None):
921 931 """Return a topological path from 'roots' to 'heads'.
922 932
923 933 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
924 934 topologically sorted list of all nodes N that satisfy both of
925 935 these constraints:
926 936
927 937 1. N is a descendant of some node in 'roots'
928 938 2. N is an ancestor of some node in 'heads'
929 939
930 940 Every node is considered to be both a descendant and an ancestor
931 941 of itself, so every reachable node in 'roots' and 'heads' will be
932 942 included in 'nodes'.
933 943
934 944 'outroots' is the list of reachable nodes in 'roots', i.e., the
935 945 subset of 'roots' that is returned in 'nodes'. Likewise,
936 946 'outheads' is the subset of 'heads' that is also in 'nodes'.
937 947
938 948 'roots' and 'heads' are both lists of node IDs. If 'roots' is
939 949 unspecified, uses nullid as the only root. If 'heads' is
940 950 unspecified, uses list of all of the revlog's heads."""
941 951 nonodes = ([], [], [])
942 952 if roots is not None:
943 953 roots = list(roots)
944 954 if not roots:
945 955 return nonodes
946 956 lowestrev = min([self.rev(n) for n in roots])
947 957 else:
948 958 roots = [nullid] # Everybody's a descendant of nullid
949 959 lowestrev = nullrev
950 960 if (lowestrev == nullrev) and (heads is None):
951 961 # We want _all_ the nodes!
952 962 return ([self.node(r) for r in self], [nullid], list(self.heads()))
953 963 if heads is None:
954 964 # All nodes are ancestors, so the latest ancestor is the last
955 965 # node.
956 966 highestrev = len(self) - 1
957 967 # Set ancestors to None to signal that every node is an ancestor.
958 968 ancestors = None
959 969 # Set heads to an empty dictionary for later discovery of heads
960 970 heads = {}
961 971 else:
962 972 heads = list(heads)
963 973 if not heads:
964 974 return nonodes
965 975 ancestors = set()
966 976 # Turn heads into a dictionary so we can remove 'fake' heads.
967 977 # Also, later we will be using it to filter out the heads we can't
968 978 # find from roots.
969 979 heads = dict.fromkeys(heads, False)
970 980 # Start at the top and keep marking parents until we're done.
971 981 nodestotag = set(heads)
972 982 # Remember where the top was so we can use it as a limit later.
973 983 highestrev = max([self.rev(n) for n in nodestotag])
974 984 while nodestotag:
975 985 # grab a node to tag
976 986 n = nodestotag.pop()
977 987 # Never tag nullid
978 988 if n == nullid:
979 989 continue
980 990 # A node's revision number represents its place in a
981 991 # topologically sorted list of nodes.
982 992 r = self.rev(n)
983 993 if r >= lowestrev:
984 994 if n not in ancestors:
985 995 # If we are possibly a descendant of one of the roots
986 996 # and we haven't already been marked as an ancestor
987 997 ancestors.add(n) # Mark as ancestor
988 998 # Add non-nullid parents to list of nodes to tag.
989 999 nodestotag.update([p for p in self.parents(n) if
990 1000 p != nullid])
991 1001 elif n in heads: # We've seen it before, is it a fake head?
992 1002 # So it is, real heads should not be the ancestors of
993 1003 # any other heads.
994 1004 heads.pop(n)
995 1005 if not ancestors:
996 1006 return nonodes
997 1007 # Now that we have our set of ancestors, we want to remove any
998 1008 # roots that are not ancestors.
999 1009
1000 1010 # If one of the roots was nullid, everything is included anyway.
1001 1011 if lowestrev > nullrev:
1002 1012 # But, since we weren't, let's recompute the lowest rev to not
1003 1013 # include roots that aren't ancestors.
1004 1014
1005 1015 # Filter out roots that aren't ancestors of heads
1006 1016 roots = [root for root in roots if root in ancestors]
1007 1017 # Recompute the lowest revision
1008 1018 if roots:
1009 1019 lowestrev = min([self.rev(root) for root in roots])
1010 1020 else:
1011 1021 # No more roots? Return empty list
1012 1022 return nonodes
1013 1023 else:
1014 1024 # We are descending from nullid, and don't need to care about
1015 1025 # any other roots.
1016 1026 lowestrev = nullrev
1017 1027 roots = [nullid]
1018 1028 # Transform our roots list into a set.
1019 1029 descendants = set(roots)
1020 1030 # Also, keep the original roots so we can filter out roots that aren't
1021 1031 # 'real' roots (i.e. are descended from other roots).
1022 1032 roots = descendants.copy()
1023 1033 # Our topologically sorted list of output nodes.
1024 1034 orderedout = []
1025 1035 # Don't start at nullid since we don't want nullid in our output list,
1026 1036 # and if nullid shows up in descendants, empty parents will look like
1027 1037 # they're descendants.
1028 1038 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1029 1039 n = self.node(r)
1030 1040 isdescendant = False
1031 1041 if lowestrev == nullrev: # Everybody is a descendant of nullid
1032 1042 isdescendant = True
1033 1043 elif n in descendants:
1034 1044 # n is already a descendant
1035 1045 isdescendant = True
1036 1046 # This check only needs to be done here because all the roots
1037 1047 # will start being marked is descendants before the loop.
1038 1048 if n in roots:
1039 1049 # If n was a root, check if it's a 'real' root.
1040 1050 p = tuple(self.parents(n))
1041 1051 # If any of its parents are descendants, it's not a root.
1042 1052 if (p[0] in descendants) or (p[1] in descendants):
1043 1053 roots.remove(n)
1044 1054 else:
1045 1055 p = tuple(self.parents(n))
1046 1056 # A node is a descendant if either of its parents are
1047 1057 # descendants. (We seeded the dependents list with the roots
1048 1058 # up there, remember?)
1049 1059 if (p[0] in descendants) or (p[1] in descendants):
1050 1060 descendants.add(n)
1051 1061 isdescendant = True
1052 1062 if isdescendant and ((ancestors is None) or (n in ancestors)):
1053 1063 # Only include nodes that are both descendants and ancestors.
1054 1064 orderedout.append(n)
1055 1065 if (ancestors is not None) and (n in heads):
1056 1066 # We're trying to figure out which heads are reachable
1057 1067 # from roots.
1058 1068 # Mark this head as having been reached
1059 1069 heads[n] = True
1060 1070 elif ancestors is None:
1061 1071 # Otherwise, we're trying to discover the heads.
1062 1072 # Assume this is a head because if it isn't, the next step
1063 1073 # will eventually remove it.
1064 1074 heads[n] = True
1065 1075 # But, obviously its parents aren't.
1066 1076 for p in self.parents(n):
1067 1077 heads.pop(p, None)
1068 1078 heads = [head for head, flag in heads.iteritems() if flag]
1069 1079 roots = list(roots)
1070 1080 assert orderedout
1071 1081 assert roots
1072 1082 assert heads
1073 1083 return (orderedout, roots, heads)
1074 1084
1075 1085 def headrevs(self):
1076 1086 try:
1077 1087 return self.index.headrevs()
1078 1088 except AttributeError:
1079 1089 return self._headrevs()
1080 1090
1081 1091 def computephases(self, roots):
1082 1092 return self.index.computephasesmapsets(roots)
1083 1093
1084 1094 def _headrevs(self):
1085 1095 count = len(self)
1086 1096 if not count:
1087 1097 return [nullrev]
1088 1098 # we won't iter over filtered rev so nobody is a head at start
1089 1099 ishead = [0] * (count + 1)
1090 1100 index = self.index
1091 1101 for r in self:
1092 1102 ishead[r] = 1 # I may be an head
1093 1103 e = index[r]
1094 1104 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1095 1105 return [r for r, val in enumerate(ishead) if val]
1096 1106
1097 1107 def heads(self, start=None, stop=None):
1098 1108 """return the list of all nodes that have no children
1099 1109
1100 1110 if start is specified, only heads that are descendants of
1101 1111 start will be returned
1102 1112 if stop is specified, it will consider all the revs from stop
1103 1113 as if they had no children
1104 1114 """
1105 1115 if start is None and stop is None:
1106 1116 if not len(self):
1107 1117 return [nullid]
1108 1118 return [self.node(r) for r in self.headrevs()]
1109 1119
1110 1120 if start is None:
1111 1121 start = nullrev
1112 1122 else:
1113 1123 start = self.rev(start)
1114 1124
1115 1125 stoprevs = set(self.rev(n) for n in stop or [])
1116 1126
1117 1127 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1118 1128 stoprevs=stoprevs)
1119 1129
1120 1130 return [self.node(rev) for rev in revs]
1121 1131
1122 1132 def children(self, node):
1123 1133 """find the children of a given node"""
1124 1134 c = []
1125 1135 p = self.rev(node)
1126 1136 for r in self.revs(start=p + 1):
1127 1137 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1128 1138 if prevs:
1129 1139 for pr in prevs:
1130 1140 if pr == p:
1131 1141 c.append(self.node(r))
1132 1142 elif p == nullrev:
1133 1143 c.append(self.node(r))
1134 1144 return c
1135 1145
1136 1146 def commonancestorsheads(self, a, b):
1137 1147 """calculate all the heads of the common ancestors of nodes a and b"""
1138 1148 a, b = self.rev(a), self.rev(b)
1139 1149 ancs = self._commonancestorsheads(a, b)
1140 1150 return pycompat.maplist(self.node, ancs)
1141 1151
1142 1152 def _commonancestorsheads(self, *revs):
1143 1153 """calculate all the heads of the common ancestors of revs"""
1144 1154 try:
1145 1155 ancs = self.index.commonancestorsheads(*revs)
1146 1156 except (AttributeError, OverflowError): # C implementation failed
1147 1157 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1148 1158 return ancs
1149 1159
1150 1160 def isancestor(self, a, b):
1151 1161 """return True if node a is an ancestor of node b
1152 1162
1153 1163 A revision is considered an ancestor of itself."""
1154 1164 a, b = self.rev(a), self.rev(b)
1155 1165 return self.isancestorrev(a, b)
1156 1166
1157 1167 def isancestorrev(self, a, b):
1158 1168 """return True if revision a is an ancestor of revision b
1159 1169
1160 1170 A revision is considered an ancestor of itself.
1161 1171
1162 1172 The implementation of this is trivial but the use of
1163 1173 commonancestorsheads is not."""
1164 1174 if a == nullrev:
1165 1175 return True
1166 1176 elif a == b:
1167 1177 return True
1168 1178 elif a > b:
1169 1179 return False
1170 1180 return a in self._commonancestorsheads(a, b)
1171 1181
1172 1182 def ancestor(self, a, b):
1173 1183 """calculate the "best" common ancestor of nodes a and b"""
1174 1184
1175 1185 a, b = self.rev(a), self.rev(b)
1176 1186 try:
1177 1187 ancs = self.index.ancestors(a, b)
1178 1188 except (AttributeError, OverflowError):
1179 1189 ancs = ancestor.ancestors(self.parentrevs, a, b)
1180 1190 if ancs:
1181 1191 # choose a consistent winner when there's a tie
1182 1192 return min(map(self.node, ancs))
1183 1193 return nullid
1184 1194
1185 1195 def _match(self, id):
1186 1196 if isinstance(id, int):
1187 1197 # rev
1188 1198 return self.node(id)
1189 1199 if len(id) == 20:
1190 1200 # possibly a binary node
1191 1201 # odds of a binary node being all hex in ASCII are 1 in 10**25
1192 1202 try:
1193 1203 node = id
1194 1204 self.rev(node) # quick search the index
1195 1205 return node
1196 1206 except error.LookupError:
1197 1207 pass # may be partial hex id
1198 1208 try:
1199 1209 # str(rev)
1200 1210 rev = int(id)
1201 1211 if "%d" % rev != id:
1202 1212 raise ValueError
1203 1213 if rev < 0:
1204 1214 rev = len(self) + rev
1205 1215 if rev < 0 or rev >= len(self):
1206 1216 raise ValueError
1207 1217 return self.node(rev)
1208 1218 except (ValueError, OverflowError):
1209 1219 pass
1210 1220 if len(id) == 40:
1211 1221 try:
1212 1222 # a full hex nodeid?
1213 1223 node = bin(id)
1214 1224 self.rev(node)
1215 1225 return node
1216 1226 except (TypeError, error.LookupError):
1217 1227 pass
1218 1228
1219 1229 def _partialmatch(self, id):
1220 1230 # we don't care wdirfilenodeids as they should be always full hash
1221 1231 maybewdir = wdirhex.startswith(id)
1222 1232 try:
1223 1233 partial = self.index.partialmatch(id)
1224 1234 if partial and self.hasnode(partial):
1225 1235 if maybewdir:
1226 1236 # single 'ff...' match in radix tree, ambiguous with wdir
1227 1237 raise error.RevlogError
1228 1238 return partial
1229 1239 if maybewdir:
1230 1240 # no 'ff...' match in radix tree, wdir identified
1231 1241 raise error.WdirUnsupported
1232 1242 return None
1233 1243 except error.RevlogError:
1234 1244 # parsers.c radix tree lookup gave multiple matches
1235 1245 # fast path: for unfiltered changelog, radix tree is accurate
1236 1246 if not getattr(self, 'filteredrevs', None):
1237 1247 raise error.AmbiguousPrefixLookupError(
1238 1248 id, self.indexfile, _('ambiguous identifier'))
1239 1249 # fall through to slow path that filters hidden revisions
1240 1250 except (AttributeError, ValueError):
1241 1251 # we are pure python, or key was too short to search radix tree
1242 1252 pass
1243 1253
1244 1254 if id in self._pcache:
1245 1255 return self._pcache[id]
1246 1256
1247 1257 if len(id) <= 40:
1248 1258 try:
1249 1259 # hex(node)[:...]
1250 1260 l = len(id) // 2 # grab an even number of digits
1251 1261 prefix = bin(id[:l * 2])
1252 1262 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1253 1263 nl = [n for n in nl if hex(n).startswith(id) and
1254 1264 self.hasnode(n)]
1255 1265 if nullhex.startswith(id):
1256 1266 nl.append(nullid)
1257 1267 if len(nl) > 0:
1258 1268 if len(nl) == 1 and not maybewdir:
1259 1269 self._pcache[id] = nl[0]
1260 1270 return nl[0]
1261 1271 raise error.AmbiguousPrefixLookupError(
1262 1272 id, self.indexfile, _('ambiguous identifier'))
1263 1273 if maybewdir:
1264 1274 raise error.WdirUnsupported
1265 1275 return None
1266 1276 except TypeError:
1267 1277 pass
1268 1278
1269 1279 def lookup(self, id):
1270 1280 """locate a node based on:
1271 1281 - revision number or str(revision number)
1272 1282 - nodeid or subset of hex nodeid
1273 1283 """
1274 1284 n = self._match(id)
1275 1285 if n is not None:
1276 1286 return n
1277 1287 n = self._partialmatch(id)
1278 1288 if n:
1279 1289 return n
1280 1290
1281 1291 raise error.LookupError(id, self.indexfile, _('no match found'))
1282 1292
1283 1293 def shortest(self, node, minlength=1):
1284 1294 """Find the shortest unambiguous prefix that matches node."""
1285 1295 def isvalid(prefix):
1286 1296 try:
1287 1297 node = self._partialmatch(prefix)
1288 1298 except error.AmbiguousPrefixLookupError:
1289 1299 return False
1290 1300 except error.WdirUnsupported:
1291 1301 # single 'ff...' match
1292 1302 return True
1293 1303 if node is None:
1294 1304 raise error.LookupError(node, self.indexfile, _('no node'))
1295 1305 return True
1296 1306
1297 1307 def maybewdir(prefix):
1298 1308 return all(c == 'f' for c in prefix)
1299 1309
1300 1310 hexnode = hex(node)
1301 1311
1302 1312 def disambiguate(hexnode, minlength):
1303 1313 """Disambiguate against wdirid."""
1304 1314 for length in range(minlength, 41):
1305 1315 prefix = hexnode[:length]
1306 1316 if not maybewdir(prefix):
1307 1317 return prefix
1308 1318
1309 1319 if not getattr(self, 'filteredrevs', None):
1310 1320 try:
1311 1321 length = max(self.index.shortest(node), minlength)
1312 1322 return disambiguate(hexnode, length)
1313 1323 except error.RevlogError:
1314 1324 if node != wdirid:
1315 1325 raise error.LookupError(node, self.indexfile, _('no node'))
1316 1326 except AttributeError:
1317 1327 # Fall through to pure code
1318 1328 pass
1319 1329
1320 1330 if node == wdirid:
1321 1331 for length in range(minlength, 41):
1322 1332 prefix = hexnode[:length]
1323 1333 if isvalid(prefix):
1324 1334 return prefix
1325 1335
1326 1336 for length in range(minlength, 41):
1327 1337 prefix = hexnode[:length]
1328 1338 if isvalid(prefix):
1329 1339 return disambiguate(hexnode, length)
1330 1340
1331 1341 def cmp(self, node, text):
1332 1342 """compare text with a given file revision
1333 1343
1334 1344 returns True if text is different than what is stored.
1335 1345 """
1336 1346 p1, p2 = self.parents(node)
1337 1347 return storageutil.hashrevisionsha1(text, p1, p2) != node
1338 1348
1339 1349 def _cachesegment(self, offset, data):
1340 1350 """Add a segment to the revlog cache.
1341 1351
1342 1352 Accepts an absolute offset and the data that is at that location.
1343 1353 """
1344 1354 o, d = self._chunkcache
1345 1355 # try to add to existing cache
1346 1356 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1347 1357 self._chunkcache = o, d + data
1348 1358 else:
1349 1359 self._chunkcache = offset, data
1350 1360
1351 1361 def _readsegment(self, offset, length, df=None):
1352 1362 """Load a segment of raw data from the revlog.
1353 1363
1354 1364 Accepts an absolute offset, length to read, and an optional existing
1355 1365 file handle to read from.
1356 1366
1357 1367 If an existing file handle is passed, it will be seeked and the
1358 1368 original seek position will NOT be restored.
1359 1369
1360 1370 Returns a str or buffer of raw byte data.
1361 1371
1362 1372 Raises if the requested number of bytes could not be read.
1363 1373 """
1364 1374 # Cache data both forward and backward around the requested
1365 1375 # data, in a fixed size window. This helps speed up operations
1366 1376 # involving reading the revlog backwards.
1367 1377 cachesize = self._chunkcachesize
1368 1378 realoffset = offset & ~(cachesize - 1)
1369 1379 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1370 1380 - realoffset)
1371 1381 with self._datareadfp(df) as df:
1372 1382 df.seek(realoffset)
1373 1383 d = df.read(reallength)
1374 1384
1375 1385 self._cachesegment(realoffset, d)
1376 1386 if offset != realoffset or reallength != length:
1377 1387 startoffset = offset - realoffset
1378 1388 if len(d) - startoffset < length:
1379 1389 raise error.RevlogError(
1380 1390 _('partial read of revlog %s; expected %d bytes from '
1381 1391 'offset %d, got %d') %
1382 1392 (self.indexfile if self._inline else self.datafile,
1383 1393 length, realoffset, len(d) - startoffset))
1384 1394
1385 1395 return util.buffer(d, startoffset, length)
1386 1396
1387 1397 if len(d) < length:
1388 1398 raise error.RevlogError(
1389 1399 _('partial read of revlog %s; expected %d bytes from offset '
1390 1400 '%d, got %d') %
1391 1401 (self.indexfile if self._inline else self.datafile,
1392 1402 length, offset, len(d)))
1393 1403
1394 1404 return d
1395 1405
1396 1406 def _getsegment(self, offset, length, df=None):
1397 1407 """Obtain a segment of raw data from the revlog.
1398 1408
1399 1409 Accepts an absolute offset, length of bytes to obtain, and an
1400 1410 optional file handle to the already-opened revlog. If the file
1401 1411 handle is used, it's original seek position will not be preserved.
1402 1412
1403 1413 Requests for data may be returned from a cache.
1404 1414
1405 1415 Returns a str or a buffer instance of raw byte data.
1406 1416 """
1407 1417 o, d = self._chunkcache
1408 1418 l = len(d)
1409 1419
1410 1420 # is it in the cache?
1411 1421 cachestart = offset - o
1412 1422 cacheend = cachestart + length
1413 1423 if cachestart >= 0 and cacheend <= l:
1414 1424 if cachestart == 0 and cacheend == l:
1415 1425 return d # avoid a copy
1416 1426 return util.buffer(d, cachestart, cacheend - cachestart)
1417 1427
1418 1428 return self._readsegment(offset, length, df=df)
1419 1429
1420 1430 def _getsegmentforrevs(self, startrev, endrev, df=None):
1421 1431 """Obtain a segment of raw data corresponding to a range of revisions.
1422 1432
1423 1433 Accepts the start and end revisions and an optional already-open
1424 1434 file handle to be used for reading. If the file handle is read, its
1425 1435 seek position will not be preserved.
1426 1436
1427 1437 Requests for data may be satisfied by a cache.
1428 1438
1429 1439 Returns a 2-tuple of (offset, data) for the requested range of
1430 1440 revisions. Offset is the integer offset from the beginning of the
1431 1441 revlog and data is a str or buffer of the raw byte data.
1432 1442
1433 1443 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1434 1444 to determine where each revision's data begins and ends.
1435 1445 """
1436 1446 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1437 1447 # (functions are expensive).
1438 1448 index = self.index
1439 1449 istart = index[startrev]
1440 1450 start = int(istart[0] >> 16)
1441 1451 if startrev == endrev:
1442 1452 end = start + istart[1]
1443 1453 else:
1444 1454 iend = index[endrev]
1445 1455 end = int(iend[0] >> 16) + iend[1]
1446 1456
1447 1457 if self._inline:
1448 1458 start += (startrev + 1) * self._io.size
1449 1459 end += (endrev + 1) * self._io.size
1450 1460 length = end - start
1451 1461
1452 1462 return start, self._getsegment(start, length, df=df)
1453 1463
1454 1464 def _chunk(self, rev, df=None):
1455 1465 """Obtain a single decompressed chunk for a revision.
1456 1466
1457 1467 Accepts an integer revision and an optional already-open file handle
1458 1468 to be used for reading. If used, the seek position of the file will not
1459 1469 be preserved.
1460 1470
1461 1471 Returns a str holding uncompressed data for the requested revision.
1462 1472 """
1463 1473 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1464 1474
1465 1475 def _chunks(self, revs, df=None, targetsize=None):
1466 1476 """Obtain decompressed chunks for the specified revisions.
1467 1477
1468 1478 Accepts an iterable of numeric revisions that are assumed to be in
1469 1479 ascending order. Also accepts an optional already-open file handle
1470 1480 to be used for reading. If used, the seek position of the file will
1471 1481 not be preserved.
1472 1482
1473 1483 This function is similar to calling ``self._chunk()`` multiple times,
1474 1484 but is faster.
1475 1485
1476 1486 Returns a list with decompressed data for each requested revision.
1477 1487 """
1478 1488 if not revs:
1479 1489 return []
1480 1490 start = self.start
1481 1491 length = self.length
1482 1492 inline = self._inline
1483 1493 iosize = self._io.size
1484 1494 buffer = util.buffer
1485 1495
1486 1496 l = []
1487 1497 ladd = l.append
1488 1498
1489 1499 if not self._withsparseread:
1490 1500 slicedchunks = (revs,)
1491 1501 else:
1492 1502 slicedchunks = deltautil.slicechunk(self, revs,
1493 1503 targetsize=targetsize)
1494 1504
1495 1505 for revschunk in slicedchunks:
1496 1506 firstrev = revschunk[0]
1497 1507 # Skip trailing revisions with empty diff
1498 1508 for lastrev in revschunk[::-1]:
1499 1509 if length(lastrev) != 0:
1500 1510 break
1501 1511
1502 1512 try:
1503 1513 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1504 1514 except OverflowError:
1505 1515 # issue4215 - we can't cache a run of chunks greater than
1506 1516 # 2G on Windows
1507 1517 return [self._chunk(rev, df=df) for rev in revschunk]
1508 1518
1509 1519 decomp = self.decompress
1510 1520 for rev in revschunk:
1511 1521 chunkstart = start(rev)
1512 1522 if inline:
1513 1523 chunkstart += (rev + 1) * iosize
1514 1524 chunklength = length(rev)
1515 1525 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1516 1526
1517 1527 return l
1518 1528
1519 1529 def _chunkclear(self):
1520 1530 """Clear the raw chunk cache."""
1521 1531 self._chunkcache = (0, '')
1522 1532
1523 1533 def deltaparent(self, rev):
1524 1534 """return deltaparent of the given revision"""
1525 1535 base = self.index[rev][3]
1526 1536 if base == rev:
1527 1537 return nullrev
1528 1538 elif self._generaldelta:
1529 1539 return base
1530 1540 else:
1531 1541 return rev - 1
1532 1542
1533 1543 def issnapshot(self, rev):
1534 1544 """tells whether rev is a snapshot
1535 1545 """
1536 1546 if not self._sparserevlog:
1537 1547 return self.deltaparent(rev) == nullrev
1538 1548 elif util.safehasattr(self.index, 'issnapshot'):
1539 1549 # directly assign the method to cache the testing and access
1540 1550 self.issnapshot = self.index.issnapshot
1541 1551 return self.issnapshot(rev)
1542 1552 if rev == nullrev:
1543 1553 return True
1544 1554 entry = self.index[rev]
1545 1555 base = entry[3]
1546 1556 if base == rev:
1547 1557 return True
1548 1558 if base == nullrev:
1549 1559 return True
1550 1560 p1 = entry[5]
1551 1561 p2 = entry[6]
1552 1562 if base == p1 or base == p2:
1553 1563 return False
1554 1564 return self.issnapshot(base)
1555 1565
1556 1566 def snapshotdepth(self, rev):
1557 1567 """number of snapshot in the chain before this one"""
1558 1568 if not self.issnapshot(rev):
1559 1569 raise error.ProgrammingError('revision %d not a snapshot')
1560 1570 return len(self._deltachain(rev)[0]) - 1
1561 1571
1562 1572 def revdiff(self, rev1, rev2):
1563 1573 """return or calculate a delta between two revisions
1564 1574
1565 1575 The delta calculated is in binary form and is intended to be written to
1566 1576 revlog data directly. So this function needs raw revision data.
1567 1577 """
1568 1578 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1569 1579 return bytes(self._chunk(rev2))
1570 1580
1571 1581 return mdiff.textdiff(self.revision(rev1, raw=True),
1572 1582 self.revision(rev2, raw=True))
1573 1583
1574 1584 def revision(self, nodeorrev, _df=None, raw=False):
1575 1585 """return an uncompressed revision of a given node or revision
1576 1586 number.
1577 1587
1578 1588 _df - an existing file handle to read from. (internal-only)
1579 1589 raw - an optional argument specifying if the revision data is to be
1580 1590 treated as raw data when applying flag transforms. 'raw' should be set
1581 1591 to True when generating changegroups or in debug commands.
1582 1592 """
1583 1593 if isinstance(nodeorrev, int):
1584 1594 rev = nodeorrev
1585 1595 node = self.node(rev)
1586 1596 else:
1587 1597 node = nodeorrev
1588 1598 rev = None
1589 1599
1590 1600 cachedrev = None
1591 1601 flags = None
1592 1602 rawtext = None
1593 1603 if node == nullid:
1594 1604 return ""
1595 1605 if self._revisioncache:
1596 1606 if self._revisioncache[0] == node:
1597 1607 # _cache only stores rawtext
1598 1608 if raw:
1599 1609 return self._revisioncache[2]
1600 1610 # duplicated, but good for perf
1601 1611 if rev is None:
1602 1612 rev = self.rev(node)
1603 1613 if flags is None:
1604 1614 flags = self.flags(rev)
1605 1615 # no extra flags set, no flag processor runs, text = rawtext
1606 1616 if flags == REVIDX_DEFAULT_FLAGS:
1607 1617 return self._revisioncache[2]
1608 1618 # rawtext is reusable. need to run flag processor
1609 1619 rawtext = self._revisioncache[2]
1610 1620
1611 1621 cachedrev = self._revisioncache[1]
1612 1622
1613 1623 # look up what we need to read
1614 1624 if rawtext is None:
1615 1625 if rev is None:
1616 1626 rev = self.rev(node)
1617 1627
1618 1628 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1619 1629 if stopped:
1620 1630 rawtext = self._revisioncache[2]
1621 1631
1622 1632 # drop cache to save memory
1623 1633 self._revisioncache = None
1624 1634
1625 1635 targetsize = None
1626 1636 rawsize = self.index[rev][2]
1627 1637 if 0 <= rawsize:
1628 1638 targetsize = 4 * rawsize
1629 1639
1630 1640 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1631 1641 if rawtext is None:
1632 1642 rawtext = bytes(bins[0])
1633 1643 bins = bins[1:]
1634 1644
1635 1645 rawtext = mdiff.patches(rawtext, bins)
1636 1646 self._revisioncache = (node, rev, rawtext)
1637 1647
1638 1648 if flags is None:
1639 1649 if rev is None:
1640 1650 rev = self.rev(node)
1641 1651 flags = self.flags(rev)
1642 1652
1643 1653 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1644 1654 if validatehash:
1645 1655 self.checkhash(text, node, rev=rev)
1646 1656
1647 1657 return text
1648 1658
1649 1659 def hash(self, text, p1, p2):
1650 1660 """Compute a node hash.
1651 1661
1652 1662 Available as a function so that subclasses can replace the hash
1653 1663 as needed.
1654 1664 """
1655 1665 return storageutil.hashrevisionsha1(text, p1, p2)
1656 1666
1657 1667 def _processflags(self, text, flags, operation, raw=False):
1658 1668 """Inspect revision data flags and applies transforms defined by
1659 1669 registered flag processors.
1660 1670
1661 1671 ``text`` - the revision data to process
1662 1672 ``flags`` - the revision flags
1663 1673 ``operation`` - the operation being performed (read or write)
1664 1674 ``raw`` - an optional argument describing if the raw transform should be
1665 1675 applied.
1666 1676
1667 1677 This method processes the flags in the order (or reverse order if
1668 1678 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1669 1679 flag processors registered for present flags. The order of flags defined
1670 1680 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1671 1681
1672 1682 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1673 1683 processed text and ``validatehash`` is a bool indicating whether the
1674 1684 returned text should be checked for hash integrity.
1675 1685
1676 1686 Note: If the ``raw`` argument is set, it has precedence over the
1677 1687 operation and will only update the value of ``validatehash``.
1678 1688 """
1679 1689 # fast path: no flag processors will run
1680 1690 if flags == 0:
1681 1691 return text, True
1682 1692 if not operation in ('read', 'write'):
1683 1693 raise error.ProgrammingError(_("invalid '%s' operation") %
1684 1694 operation)
1685 1695 # Check all flags are known.
1686 1696 if flags & ~REVIDX_KNOWN_FLAGS:
1687 1697 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1688 1698 (flags & ~REVIDX_KNOWN_FLAGS))
1689 1699 validatehash = True
1690 1700 # Depending on the operation (read or write), the order might be
1691 1701 # reversed due to non-commutative transforms.
1692 1702 orderedflags = REVIDX_FLAGS_ORDER
1693 1703 if operation == 'write':
1694 1704 orderedflags = reversed(orderedflags)
1695 1705
1696 1706 for flag in orderedflags:
1697 1707 # If a flagprocessor has been registered for a known flag, apply the
1698 1708 # related operation transform and update result tuple.
1699 1709 if flag & flags:
1700 1710 vhash = True
1701 1711
1702 1712 if flag not in self._flagprocessors:
1703 1713 message = _("missing processor for flag '%#x'") % (flag)
1704 1714 raise error.RevlogError(message)
1705 1715
1706 1716 processor = self._flagprocessors[flag]
1707 1717 if processor is not None:
1708 1718 readtransform, writetransform, rawtransform = processor
1709 1719
1710 1720 if raw:
1711 1721 vhash = rawtransform(self, text)
1712 1722 elif operation == 'read':
1713 1723 text, vhash = readtransform(self, text)
1714 1724 else: # write operation
1715 1725 text, vhash = writetransform(self, text)
1716 1726 validatehash = validatehash and vhash
1717 1727
1718 1728 return text, validatehash
1719 1729
1720 1730 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1721 1731 """Check node hash integrity.
1722 1732
1723 1733 Available as a function so that subclasses can extend hash mismatch
1724 1734 behaviors as needed.
1725 1735 """
1726 1736 try:
1727 1737 if p1 is None and p2 is None:
1728 1738 p1, p2 = self.parents(node)
1729 1739 if node != self.hash(text, p1, p2):
1730 1740 # Clear the revision cache on hash failure. The revision cache
1731 1741 # only stores the raw revision and clearing the cache does have
1732 1742 # the side-effect that we won't have a cache hit when the raw
1733 1743 # revision data is accessed. But this case should be rare and
1734 1744 # it is extra work to teach the cache about the hash
1735 1745 # verification state.
1736 1746 if self._revisioncache and self._revisioncache[0] == node:
1737 1747 self._revisioncache = None
1738 1748
1739 1749 revornode = rev
1740 1750 if revornode is None:
1741 1751 revornode = templatefilters.short(hex(node))
1742 1752 raise error.RevlogError(_("integrity check failed on %s:%s")
1743 1753 % (self.indexfile, pycompat.bytestr(revornode)))
1744 1754 except error.RevlogError:
1745 1755 if self._censorable and storageutil.iscensoredtext(text):
1746 1756 raise error.CensoredNodeError(self.indexfile, node, text)
1747 1757 raise
1748 1758
1749 1759 def _enforceinlinesize(self, tr, fp=None):
1750 1760 """Check if the revlog is too big for inline and convert if so.
1751 1761
1752 1762 This should be called after revisions are added to the revlog. If the
1753 1763 revlog has grown too large to be an inline revlog, it will convert it
1754 1764 to use multiple index and data files.
1755 1765 """
1756 1766 tiprev = len(self) - 1
1757 1767 if (not self._inline or
1758 1768 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1759 1769 return
1760 1770
1761 1771 trinfo = tr.find(self.indexfile)
1762 1772 if trinfo is None:
1763 1773 raise error.RevlogError(_("%s not found in the transaction")
1764 1774 % self.indexfile)
1765 1775
1766 1776 trindex = trinfo[2]
1767 1777 if trindex is not None:
1768 1778 dataoff = self.start(trindex)
1769 1779 else:
1770 1780 # revlog was stripped at start of transaction, use all leftover data
1771 1781 trindex = len(self) - 1
1772 1782 dataoff = self.end(tiprev)
1773 1783
1774 1784 tr.add(self.datafile, dataoff)
1775 1785
1776 1786 if fp:
1777 1787 fp.flush()
1778 1788 fp.close()
1779 1789 # We can't use the cached file handle after close(). So prevent
1780 1790 # its usage.
1781 1791 self._writinghandles = None
1782 1792
1783 1793 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1784 1794 for r in self:
1785 1795 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1786 1796
1787 1797 with self._indexfp('w') as fp:
1788 1798 self.version &= ~FLAG_INLINE_DATA
1789 1799 self._inline = False
1790 1800 io = self._io
1791 1801 for i in self:
1792 1802 e = io.packentry(self.index[i], self.node, self.version, i)
1793 1803 fp.write(e)
1794 1804
1795 1805 # the temp file replace the real index when we exit the context
1796 1806 # manager
1797 1807
1798 1808 tr.replace(self.indexfile, trindex * self._io.size)
1799 1809 self._chunkclear()
1800 1810
1801 1811 def _nodeduplicatecallback(self, transaction, node):
1802 1812 """called when trying to add a node already stored.
1803 1813 """
1804 1814
1805 1815 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1806 1816 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1807 1817 """add a revision to the log
1808 1818
1809 1819 text - the revision data to add
1810 1820 transaction - the transaction object used for rollback
1811 1821 link - the linkrev data to add
1812 1822 p1, p2 - the parent nodeids of the revision
1813 1823 cachedelta - an optional precomputed delta
1814 1824 node - nodeid of revision; typically node is not specified, and it is
1815 1825 computed by default as hash(text, p1, p2), however subclasses might
1816 1826 use different hashing method (and override checkhash() in such case)
1817 1827 flags - the known flags to set on the revision
1818 1828 deltacomputer - an optional deltacomputer instance shared between
1819 1829 multiple calls
1820 1830 """
1821 1831 if link == nullrev:
1822 1832 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1823 1833 % self.indexfile)
1824 1834
1825 1835 if flags:
1826 1836 node = node or self.hash(text, p1, p2)
1827 1837
1828 1838 rawtext, validatehash = self._processflags(text, flags, 'write')
1829 1839
1830 1840 # If the flag processor modifies the revision data, ignore any provided
1831 1841 # cachedelta.
1832 1842 if rawtext != text:
1833 1843 cachedelta = None
1834 1844
1835 1845 if len(rawtext) > _maxentrysize:
1836 1846 raise error.RevlogError(
1837 1847 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1838 1848 % (self.indexfile, len(rawtext)))
1839 1849
1840 1850 node = node or self.hash(rawtext, p1, p2)
1841 1851 if node in self.nodemap:
1842 1852 return node
1843 1853
1844 1854 if validatehash:
1845 1855 self.checkhash(rawtext, node, p1=p1, p2=p2)
1846 1856
1847 1857 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1848 1858 flags, cachedelta=cachedelta,
1849 1859 deltacomputer=deltacomputer)
1850 1860
1851 1861 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1852 1862 cachedelta=None, deltacomputer=None):
1853 1863 """add a raw revision with known flags, node and parents
1854 1864 useful when reusing a revision not stored in this revlog (ex: received
1855 1865 over wire, or read from an external bundle).
1856 1866 """
1857 1867 dfh = None
1858 1868 if not self._inline:
1859 1869 dfh = self._datafp("a+")
1860 1870 ifh = self._indexfp("a+")
1861 1871 try:
1862 1872 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1863 1873 flags, cachedelta, ifh, dfh,
1864 1874 deltacomputer=deltacomputer)
1865 1875 finally:
1866 1876 if dfh:
1867 1877 dfh.close()
1868 1878 ifh.close()
1869 1879
1870 1880 def compress(self, data):
1871 1881 """Generate a possibly-compressed representation of data."""
1872 1882 if not data:
1873 1883 return '', data
1874 1884
1875 1885 compressed = self._compressor.compress(data)
1876 1886
1877 1887 if compressed:
1878 1888 # The revlog compressor added the header in the returned data.
1879 1889 return '', compressed
1880 1890
1881 1891 if data[0:1] == '\0':
1882 1892 return '', data
1883 1893 return 'u', data
1884 1894
1885 1895 def decompress(self, data):
1886 1896 """Decompress a revlog chunk.
1887 1897
1888 1898 The chunk is expected to begin with a header identifying the
1889 1899 format type so it can be routed to an appropriate decompressor.
1890 1900 """
1891 1901 if not data:
1892 1902 return data
1893 1903
1894 1904 # Revlogs are read much more frequently than they are written and many
1895 1905 # chunks only take microseconds to decompress, so performance is
1896 1906 # important here.
1897 1907 #
1898 1908 # We can make a few assumptions about revlogs:
1899 1909 #
1900 1910 # 1) the majority of chunks will be compressed (as opposed to inline
1901 1911 # raw data).
1902 1912 # 2) decompressing *any* data will likely by at least 10x slower than
1903 1913 # returning raw inline data.
1904 1914 # 3) we want to prioritize common and officially supported compression
1905 1915 # engines
1906 1916 #
1907 1917 # It follows that we want to optimize for "decompress compressed data
1908 1918 # when encoded with common and officially supported compression engines"
1909 1919 # case over "raw data" and "data encoded by less common or non-official
1910 1920 # compression engines." That is why we have the inline lookup first
1911 1921 # followed by the compengines lookup.
1912 1922 #
1913 1923 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1914 1924 # compressed chunks. And this matters for changelog and manifest reads.
1915 1925 t = data[0:1]
1916 1926
1917 1927 if t == 'x':
1918 1928 try:
1919 1929 return _zlibdecompress(data)
1920 1930 except zlib.error as e:
1921 1931 raise error.RevlogError(_('revlog decompress error: %s') %
1922 1932 stringutil.forcebytestr(e))
1923 1933 # '\0' is more common than 'u' so it goes first.
1924 1934 elif t == '\0':
1925 1935 return data
1926 1936 elif t == 'u':
1927 1937 return util.buffer(data, 1)
1928 1938
1929 1939 try:
1930 1940 compressor = self._decompressors[t]
1931 1941 except KeyError:
1932 1942 try:
1933 1943 engine = util.compengines.forrevlogheader(t)
1934 1944 compressor = engine.revlogcompressor()
1935 1945 self._decompressors[t] = compressor
1936 1946 except KeyError:
1937 1947 raise error.RevlogError(_('unknown compression type %r') % t)
1938 1948
1939 1949 return compressor.decompress(data)
1940 1950
1941 1951 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1942 1952 cachedelta, ifh, dfh, alwayscache=False,
1943 1953 deltacomputer=None):
1944 1954 """internal function to add revisions to the log
1945 1955
1946 1956 see addrevision for argument descriptions.
1947 1957
1948 1958 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1949 1959
1950 1960 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1951 1961 be used.
1952 1962
1953 1963 invariants:
1954 1964 - rawtext is optional (can be None); if not set, cachedelta must be set.
1955 1965 if both are set, they must correspond to each other.
1956 1966 """
1957 1967 if node == nullid:
1958 1968 raise error.RevlogError(_("%s: attempt to add null revision") %
1959 1969 self.indexfile)
1960 1970 if node == wdirid or node in wdirfilenodeids:
1961 1971 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1962 1972 self.indexfile)
1963 1973
1964 1974 if self._inline:
1965 1975 fh = ifh
1966 1976 else:
1967 1977 fh = dfh
1968 1978
1969 1979 btext = [rawtext]
1970 1980
1971 1981 curr = len(self)
1972 1982 prev = curr - 1
1973 1983 offset = self.end(prev)
1974 1984 p1r, p2r = self.rev(p1), self.rev(p2)
1975 1985
1976 1986 # full versions are inserted when the needed deltas
1977 1987 # become comparable to the uncompressed text
1978 1988 if rawtext is None:
1979 1989 # need rawtext size, before changed by flag processors, which is
1980 1990 # the non-raw size. use revlog explicitly to avoid filelog's extra
1981 1991 # logic that might remove metadata size.
1982 1992 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1983 1993 cachedelta[1])
1984 1994 else:
1985 1995 textlen = len(rawtext)
1986 1996
1987 1997 if deltacomputer is None:
1988 1998 deltacomputer = deltautil.deltacomputer(self)
1989 1999
1990 2000 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1991 2001
1992 2002 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1993 2003
1994 2004 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1995 2005 deltainfo.base, link, p1r, p2r, node)
1996 2006 self.index.append(e)
1997 2007 self.nodemap[node] = curr
1998 2008
1999 2009 # Reset the pure node cache start lookup offset to account for new
2000 2010 # revision.
2001 2011 if self._nodepos is not None:
2002 2012 self._nodepos = curr
2003 2013
2004 2014 entry = self._io.packentry(e, self.node, self.version, curr)
2005 2015 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2006 2016 link, offset)
2007 2017
2008 2018 rawtext = btext[0]
2009 2019
2010 2020 if alwayscache and rawtext is None:
2011 2021 rawtext = deltacomputer.buildtext(revinfo, fh)
2012 2022
2013 2023 if type(rawtext) == bytes: # only accept immutable objects
2014 2024 self._revisioncache = (node, curr, rawtext)
2015 2025 self._chainbasecache[curr] = deltainfo.chainbase
2016 2026 return node
2017 2027
2018 2028 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2019 2029 # Files opened in a+ mode have inconsistent behavior on various
2020 2030 # platforms. Windows requires that a file positioning call be made
2021 2031 # when the file handle transitions between reads and writes. See
2022 2032 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2023 2033 # platforms, Python or the platform itself can be buggy. Some versions
2024 2034 # of Solaris have been observed to not append at the end of the file
2025 2035 # if the file was seeked to before the end. See issue4943 for more.
2026 2036 #
2027 2037 # We work around this issue by inserting a seek() before writing.
2028 2038 # Note: This is likely not necessary on Python 3. However, because
2029 2039 # the file handle is reused for reads and may be seeked there, we need
2030 2040 # to be careful before changing this.
2031 2041 ifh.seek(0, os.SEEK_END)
2032 2042 if dfh:
2033 2043 dfh.seek(0, os.SEEK_END)
2034 2044
2035 2045 curr = len(self) - 1
2036 2046 if not self._inline:
2037 2047 transaction.add(self.datafile, offset)
2038 2048 transaction.add(self.indexfile, curr * len(entry))
2039 2049 if data[0]:
2040 2050 dfh.write(data[0])
2041 2051 dfh.write(data[1])
2042 2052 ifh.write(entry)
2043 2053 else:
2044 2054 offset += curr * self._io.size
2045 2055 transaction.add(self.indexfile, offset, curr)
2046 2056 ifh.write(entry)
2047 2057 ifh.write(data[0])
2048 2058 ifh.write(data[1])
2049 2059 self._enforceinlinesize(transaction, ifh)
2050 2060
2051 2061 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2052 2062 """
2053 2063 add a delta group
2054 2064
2055 2065 given a set of deltas, add them to the revision log. the
2056 2066 first delta is against its parent, which should be in our
2057 2067 log, the rest are against the previous delta.
2058 2068
2059 2069 If ``addrevisioncb`` is defined, it will be called with arguments of
2060 2070 this revlog and the node that was added.
2061 2071 """
2062 2072
2063 2073 if self._writinghandles:
2064 2074 raise error.ProgrammingError('cannot nest addgroup() calls')
2065 2075
2066 2076 nodes = []
2067 2077
2068 2078 r = len(self)
2069 2079 end = 0
2070 2080 if r:
2071 2081 end = self.end(r - 1)
2072 2082 ifh = self._indexfp("a+")
2073 2083 isize = r * self._io.size
2074 2084 if self._inline:
2075 2085 transaction.add(self.indexfile, end + isize, r)
2076 2086 dfh = None
2077 2087 else:
2078 2088 transaction.add(self.indexfile, isize, r)
2079 2089 transaction.add(self.datafile, end)
2080 2090 dfh = self._datafp("a+")
2081 2091 def flush():
2082 2092 if dfh:
2083 2093 dfh.flush()
2084 2094 ifh.flush()
2085 2095
2086 2096 self._writinghandles = (ifh, dfh)
2087 2097
2088 2098 try:
2089 2099 deltacomputer = deltautil.deltacomputer(self)
2090 2100 # loop through our set of deltas
2091 2101 for data in deltas:
2092 2102 node, p1, p2, linknode, deltabase, delta, flags = data
2093 2103 link = linkmapper(linknode)
2094 2104 flags = flags or REVIDX_DEFAULT_FLAGS
2095 2105
2096 2106 nodes.append(node)
2097 2107
2098 2108 if node in self.nodemap:
2099 2109 self._nodeduplicatecallback(transaction, node)
2100 2110 # this can happen if two branches make the same change
2101 2111 continue
2102 2112
2103 2113 for p in (p1, p2):
2104 2114 if p not in self.nodemap:
2105 2115 raise error.LookupError(p, self.indexfile,
2106 2116 _('unknown parent'))
2107 2117
2108 2118 if deltabase not in self.nodemap:
2109 2119 raise error.LookupError(deltabase, self.indexfile,
2110 2120 _('unknown delta base'))
2111 2121
2112 2122 baserev = self.rev(deltabase)
2113 2123
2114 2124 if baserev != nullrev and self.iscensored(baserev):
2115 2125 # if base is censored, delta must be full replacement in a
2116 2126 # single patch operation
2117 2127 hlen = struct.calcsize(">lll")
2118 2128 oldlen = self.rawsize(baserev)
2119 2129 newlen = len(delta) - hlen
2120 2130 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2121 2131 raise error.CensoredBaseError(self.indexfile,
2122 2132 self.node(baserev))
2123 2133
2124 2134 if not flags and self._peek_iscensored(baserev, delta, flush):
2125 2135 flags |= REVIDX_ISCENSORED
2126 2136
2127 2137 # We assume consumers of addrevisioncb will want to retrieve
2128 2138 # the added revision, which will require a call to
2129 2139 # revision(). revision() will fast path if there is a cache
2130 2140 # hit. So, we tell _addrevision() to always cache in this case.
2131 2141 # We're only using addgroup() in the context of changegroup
2132 2142 # generation so the revision data can always be handled as raw
2133 2143 # by the flagprocessor.
2134 2144 self._addrevision(node, None, transaction, link,
2135 2145 p1, p2, flags, (baserev, delta),
2136 2146 ifh, dfh,
2137 2147 alwayscache=bool(addrevisioncb),
2138 2148 deltacomputer=deltacomputer)
2139 2149
2140 2150 if addrevisioncb:
2141 2151 addrevisioncb(self, node)
2142 2152
2143 2153 if not dfh and not self._inline:
2144 2154 # addrevision switched from inline to conventional
2145 2155 # reopen the index
2146 2156 ifh.close()
2147 2157 dfh = self._datafp("a+")
2148 2158 ifh = self._indexfp("a+")
2149 2159 self._writinghandles = (ifh, dfh)
2150 2160 finally:
2151 2161 self._writinghandles = None
2152 2162
2153 2163 if dfh:
2154 2164 dfh.close()
2155 2165 ifh.close()
2156 2166
2157 2167 return nodes
2158 2168
2159 2169 def iscensored(self, rev):
2160 2170 """Check if a file revision is censored."""
2161 2171 if not self._censorable:
2162 2172 return False
2163 2173
2164 2174 return self.flags(rev) & REVIDX_ISCENSORED
2165 2175
2166 2176 def _peek_iscensored(self, baserev, delta, flush):
2167 2177 """Quickly check if a delta produces a censored revision."""
2168 2178 if not self._censorable:
2169 2179 return False
2170 2180
2171 2181 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2172 2182
2173 2183 def getstrippoint(self, minlink):
2174 2184 """find the minimum rev that must be stripped to strip the linkrev
2175 2185
2176 2186 Returns a tuple containing the minimum rev and a set of all revs that
2177 2187 have linkrevs that will be broken by this strip.
2178 2188 """
2179 2189 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2180 2190 self.headrevs(),
2181 2191 self.linkrev, self.parentrevs)
2182 2192
2183 2193 def strip(self, minlink, transaction):
2184 2194 """truncate the revlog on the first revision with a linkrev >= minlink
2185 2195
2186 2196 This function is called when we're stripping revision minlink and
2187 2197 its descendants from the repository.
2188 2198
2189 2199 We have to remove all revisions with linkrev >= minlink, because
2190 2200 the equivalent changelog revisions will be renumbered after the
2191 2201 strip.
2192 2202
2193 2203 So we truncate the revlog on the first of these revisions, and
2194 2204 trust that the caller has saved the revisions that shouldn't be
2195 2205 removed and that it'll re-add them after this truncation.
2196 2206 """
2197 2207 if len(self) == 0:
2198 2208 return
2199 2209
2200 2210 rev, _ = self.getstrippoint(minlink)
2201 2211 if rev == len(self):
2202 2212 return
2203 2213
2204 2214 # first truncate the files on disk
2205 2215 end = self.start(rev)
2206 2216 if not self._inline:
2207 2217 transaction.add(self.datafile, end)
2208 2218 end = rev * self._io.size
2209 2219 else:
2210 2220 end += rev * self._io.size
2211 2221
2212 2222 transaction.add(self.indexfile, end)
2213 2223
2214 2224 # then reset internal state in memory to forget those revisions
2215 2225 self._revisioncache = None
2216 2226 self._chaininfocache = {}
2217 2227 self._chunkclear()
2218 2228 for x in pycompat.xrange(rev, len(self)):
2219 2229 del self.nodemap[self.node(x)]
2220 2230
2221 2231 del self.index[rev:-1]
2222 2232 self._nodepos = None
2223 2233
2224 2234 def checksize(self):
2225 2235 expected = 0
2226 2236 if len(self):
2227 2237 expected = max(0, self.end(len(self) - 1))
2228 2238
2229 2239 try:
2230 2240 with self._datafp() as f:
2231 2241 f.seek(0, 2)
2232 2242 actual = f.tell()
2233 2243 dd = actual - expected
2234 2244 except IOError as inst:
2235 2245 if inst.errno != errno.ENOENT:
2236 2246 raise
2237 2247 dd = 0
2238 2248
2239 2249 try:
2240 2250 f = self.opener(self.indexfile)
2241 2251 f.seek(0, 2)
2242 2252 actual = f.tell()
2243 2253 f.close()
2244 2254 s = self._io.size
2245 2255 i = max(0, actual // s)
2246 2256 di = actual - (i * s)
2247 2257 if self._inline:
2248 2258 databytes = 0
2249 2259 for r in self:
2250 2260 databytes += max(0, self.length(r))
2251 2261 dd = 0
2252 2262 di = actual - len(self) * s - databytes
2253 2263 except IOError as inst:
2254 2264 if inst.errno != errno.ENOENT:
2255 2265 raise
2256 2266 di = 0
2257 2267
2258 2268 return (dd, di)
2259 2269
2260 2270 def files(self):
2261 2271 res = [self.indexfile]
2262 2272 if not self._inline:
2263 2273 res.append(self.datafile)
2264 2274 return res
2265 2275
2266 2276 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2267 2277 assumehaveparentrevisions=False,
2268 2278 deltamode=repository.CG_DELTAMODE_STD):
2269 2279 if nodesorder not in ('nodes', 'storage', 'linear', None):
2270 2280 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2271 2281 nodesorder)
2272 2282
2273 2283 if nodesorder is None and not self._generaldelta:
2274 2284 nodesorder = 'storage'
2275 2285
2276 2286 if (not self._storedeltachains and
2277 2287 deltamode != repository.CG_DELTAMODE_PREV):
2278 2288 deltamode = repository.CG_DELTAMODE_FULL
2279 2289
2280 2290 return storageutil.emitrevisions(
2281 2291 self, nodes, nodesorder, revlogrevisiondelta,
2282 2292 deltaparentfn=self.deltaparent,
2283 2293 candeltafn=self.candelta,
2284 2294 rawsizefn=self.rawsize,
2285 2295 revdifffn=self.revdiff,
2286 2296 flagsfn=self.flags,
2287 2297 deltamode=deltamode,
2288 2298 revisiondata=revisiondata,
2289 2299 assumehaveparentrevisions=assumehaveparentrevisions)
2290 2300
2291 2301 DELTAREUSEALWAYS = 'always'
2292 2302 DELTAREUSESAMEREVS = 'samerevs'
2293 2303 DELTAREUSENEVER = 'never'
2294 2304
2295 2305 DELTAREUSEFULLADD = 'fulladd'
2296 2306
2297 2307 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2298 2308
2299 2309 def clone(self, tr, destrevlog, addrevisioncb=None,
2300 2310 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2301 2311 """Copy this revlog to another, possibly with format changes.
2302 2312
2303 2313 The destination revlog will contain the same revisions and nodes.
2304 2314 However, it may not be bit-for-bit identical due to e.g. delta encoding
2305 2315 differences.
2306 2316
2307 2317 The ``deltareuse`` argument control how deltas from the existing revlog
2308 2318 are preserved in the destination revlog. The argument can have the
2309 2319 following values:
2310 2320
2311 2321 DELTAREUSEALWAYS
2312 2322 Deltas will always be reused (if possible), even if the destination
2313 2323 revlog would not select the same revisions for the delta. This is the
2314 2324 fastest mode of operation.
2315 2325 DELTAREUSESAMEREVS
2316 2326 Deltas will be reused if the destination revlog would pick the same
2317 2327 revisions for the delta. This mode strikes a balance between speed
2318 2328 and optimization.
2319 2329 DELTAREUSENEVER
2320 2330 Deltas will never be reused. This is the slowest mode of execution.
2321 2331 This mode can be used to recompute deltas (e.g. if the diff/delta
2322 2332 algorithm changes).
2323 2333
2324 2334 Delta computation can be slow, so the choice of delta reuse policy can
2325 2335 significantly affect run time.
2326 2336
2327 2337 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2328 2338 two extremes. Deltas will be reused if they are appropriate. But if the
2329 2339 delta could choose a better revision, it will do so. This means if you
2330 2340 are converting a non-generaldelta revlog to a generaldelta revlog,
2331 2341 deltas will be recomputed if the delta's parent isn't a parent of the
2332 2342 revision.
2333 2343
2334 2344 In addition to the delta policy, the ``forcedeltabothparents``
2335 2345 argument controls whether to force compute deltas against both parents
2336 2346 for merges. By default, the current default is used.
2337 2347 """
2338 2348 if deltareuse not in self.DELTAREUSEALL:
2339 2349 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2340 2350
2341 2351 if len(destrevlog):
2342 2352 raise ValueError(_('destination revlog is not empty'))
2343 2353
2344 2354 if getattr(self, 'filteredrevs', None):
2345 2355 raise ValueError(_('source revlog has filtered revisions'))
2346 2356 if getattr(destrevlog, 'filteredrevs', None):
2347 2357 raise ValueError(_('destination revlog has filtered revisions'))
2348 2358
2349 2359 # lazydeltabase controls whether to reuse a cached delta, if possible.
2350 2360 oldlazydeltabase = destrevlog._lazydeltabase
2351 2361 oldamd = destrevlog._deltabothparents
2352 2362
2353 2363 try:
2354 2364 if deltareuse == self.DELTAREUSEALWAYS:
2355 2365 destrevlog._lazydeltabase = True
2356 2366 elif deltareuse == self.DELTAREUSESAMEREVS:
2357 2367 destrevlog._lazydeltabase = False
2358 2368
2359 2369 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2360 2370
2361 2371 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2362 2372 self.DELTAREUSESAMEREVS)
2363 2373
2364 2374 deltacomputer = deltautil.deltacomputer(destrevlog)
2365 2375 index = self.index
2366 2376 for rev in self:
2367 2377 entry = index[rev]
2368 2378
2369 2379 # Some classes override linkrev to take filtered revs into
2370 2380 # account. Use raw entry from index.
2371 2381 flags = entry[0] & 0xffff
2372 2382 linkrev = entry[4]
2373 2383 p1 = index[entry[5]][7]
2374 2384 p2 = index[entry[6]][7]
2375 2385 node = entry[7]
2376 2386
2377 2387 # (Possibly) reuse the delta from the revlog if allowed and
2378 2388 # the revlog chunk is a delta.
2379 2389 cachedelta = None
2380 2390 rawtext = None
2381 2391 if populatecachedelta:
2382 2392 dp = self.deltaparent(rev)
2383 2393 if dp != nullrev:
2384 2394 cachedelta = (dp, bytes(self._chunk(rev)))
2385 2395
2386 2396 if not cachedelta:
2387 2397 rawtext = self.revision(rev, raw=True)
2388 2398
2389 2399
2390 2400 if deltareuse == self.DELTAREUSEFULLADD:
2391 2401 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2392 2402 cachedelta=cachedelta,
2393 2403 node=node, flags=flags,
2394 2404 deltacomputer=deltacomputer)
2395 2405 else:
2396 2406 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2397 2407 checkambig=False)
2398 2408 dfh = None
2399 2409 if not destrevlog._inline:
2400 2410 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2401 2411 try:
2402 2412 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2403 2413 p2, flags, cachedelta, ifh, dfh,
2404 2414 deltacomputer=deltacomputer)
2405 2415 finally:
2406 2416 if dfh:
2407 2417 dfh.close()
2408 2418 ifh.close()
2409 2419
2410 2420 if addrevisioncb:
2411 2421 addrevisioncb(self, rev, node)
2412 2422 finally:
2413 2423 destrevlog._lazydeltabase = oldlazydeltabase
2414 2424 destrevlog._deltabothparents = oldamd
2415 2425
2416 2426 def censorrevision(self, tr, censornode, tombstone=b''):
2417 2427 if (self.version & 0xFFFF) == REVLOGV0:
2418 2428 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2419 2429 self.version)
2420 2430
2421 2431 censorrev = self.rev(censornode)
2422 2432 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2423 2433
2424 2434 if len(tombstone) > self.rawsize(censorrev):
2425 2435 raise error.Abort(_('censor tombstone must be no longer than '
2426 2436 'censored data'))
2427 2437
2428 2438 # Rewriting the revlog in place is hard. Our strategy for censoring is
2429 2439 # to create a new revlog, copy all revisions to it, then replace the
2430 2440 # revlogs on transaction close.
2431 2441
2432 2442 newindexfile = self.indexfile + b'.tmpcensored'
2433 2443 newdatafile = self.datafile + b'.tmpcensored'
2434 2444
2435 2445 # This is a bit dangerous. We could easily have a mismatch of state.
2436 2446 newrl = revlog(self.opener, newindexfile, newdatafile,
2437 2447 censorable=True)
2438 2448 newrl.version = self.version
2439 2449 newrl._generaldelta = self._generaldelta
2440 2450 newrl._io = self._io
2441 2451
2442 2452 for rev in self.revs():
2443 2453 node = self.node(rev)
2444 2454 p1, p2 = self.parents(node)
2445 2455
2446 2456 if rev == censorrev:
2447 2457 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2448 2458 p1, p2, censornode, REVIDX_ISCENSORED)
2449 2459
2450 2460 if newrl.deltaparent(rev) != nullrev:
2451 2461 raise error.Abort(_('censored revision stored as delta; '
2452 2462 'cannot censor'),
2453 2463 hint=_('censoring of revlogs is not '
2454 2464 'fully implemented; please report '
2455 2465 'this bug'))
2456 2466 continue
2457 2467
2458 2468 if self.iscensored(rev):
2459 2469 if self.deltaparent(rev) != nullrev:
2460 2470 raise error.Abort(_('cannot censor due to censored '
2461 2471 'revision having delta stored'))
2462 2472 rawtext = self._chunk(rev)
2463 2473 else:
2464 2474 rawtext = self.revision(rev, raw=True)
2465 2475
2466 2476 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2467 2477 self.flags(rev))
2468 2478
2469 2479 tr.addbackup(self.indexfile, location='store')
2470 2480 if not self._inline:
2471 2481 tr.addbackup(self.datafile, location='store')
2472 2482
2473 2483 self.opener.rename(newrl.indexfile, self.indexfile)
2474 2484 if not self._inline:
2475 2485 self.opener.rename(newrl.datafile, self.datafile)
2476 2486
2477 2487 self.clearcaches()
2478 2488 self._loadindex(self.version, None)
2479 2489
2480 2490 def verifyintegrity(self, state):
2481 2491 """Verifies the integrity of the revlog.
2482 2492
2483 2493 Yields ``revlogproblem`` instances describing problems that are
2484 2494 found.
2485 2495 """
2486 2496 dd, di = self.checksize()
2487 2497 if dd:
2488 2498 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2489 2499 if di:
2490 2500 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2491 2501
2492 2502 version = self.version & 0xFFFF
2493 2503
2494 2504 # The verifier tells us what version revlog we should be.
2495 2505 if version != state['expectedversion']:
2496 2506 yield revlogproblem(
2497 2507 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2498 2508 (self.indexfile, version, state['expectedversion']))
2499 2509
2500 2510 state['skipread'] = set()
2501 2511
2502 2512 for rev in self:
2503 2513 node = self.node(rev)
2504 2514
2505 2515 # Verify contents. 4 cases to care about:
2506 2516 #
2507 2517 # common: the most common case
2508 2518 # rename: with a rename
2509 2519 # meta: file content starts with b'\1\n', the metadata
2510 2520 # header defined in filelog.py, but without a rename
2511 2521 # ext: content stored externally
2512 2522 #
2513 2523 # More formally, their differences are shown below:
2514 2524 #
2515 2525 # | common | rename | meta | ext
2516 2526 # -------------------------------------------------------
2517 2527 # flags() | 0 | 0 | 0 | not 0
2518 2528 # renamed() | False | True | False | ?
2519 2529 # rawtext[0:2]=='\1\n'| False | True | True | ?
2520 2530 #
2521 2531 # "rawtext" means the raw text stored in revlog data, which
2522 2532 # could be retrieved by "revision(rev, raw=True)". "text"
2523 2533 # mentioned below is "revision(rev, raw=False)".
2524 2534 #
2525 2535 # There are 3 different lengths stored physically:
2526 2536 # 1. L1: rawsize, stored in revlog index
2527 2537 # 2. L2: len(rawtext), stored in revlog data
2528 2538 # 3. L3: len(text), stored in revlog data if flags==0, or
2529 2539 # possibly somewhere else if flags!=0
2530 2540 #
2531 2541 # L1 should be equal to L2. L3 could be different from them.
2532 2542 # "text" may or may not affect commit hash depending on flag
2533 2543 # processors (see revlog.addflagprocessor).
2534 2544 #
2535 2545 # | common | rename | meta | ext
2536 2546 # -------------------------------------------------
2537 2547 # rawsize() | L1 | L1 | L1 | L1
2538 2548 # size() | L1 | L2-LM | L1(*) | L1 (?)
2539 2549 # len(rawtext) | L2 | L2 | L2 | L2
2540 2550 # len(text) | L2 | L2 | L2 | L3
2541 2551 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2542 2552 #
2543 2553 # LM: length of metadata, depending on rawtext
2544 2554 # (*): not ideal, see comment in filelog.size
2545 2555 # (?): could be "- len(meta)" if the resolved content has
2546 2556 # rename metadata
2547 2557 #
2548 2558 # Checks needed to be done:
2549 2559 # 1. length check: L1 == L2, in all cases.
2550 2560 # 2. hash check: depending on flag processor, we may need to
2551 2561 # use either "text" (external), or "rawtext" (in revlog).
2552 2562
2553 2563 try:
2554 2564 skipflags = state.get('skipflags', 0)
2555 2565 if skipflags:
2556 2566 skipflags &= self.flags(rev)
2557 2567
2558 2568 if skipflags:
2559 2569 state['skipread'].add(node)
2560 2570 else:
2561 2571 # Side-effect: read content and verify hash.
2562 2572 self.revision(node)
2563 2573
2564 2574 l1 = self.rawsize(rev)
2565 2575 l2 = len(self.revision(node, raw=True))
2566 2576
2567 2577 if l1 != l2:
2568 2578 yield revlogproblem(
2569 2579 error=_('unpacked size is %d, %d expected') % (l2, l1),
2570 2580 node=node)
2571 2581
2572 2582 except error.CensoredNodeError:
2573 2583 if state['erroroncensored']:
2574 2584 yield revlogproblem(error=_('censored file data'),
2575 2585 node=node)
2576 2586 state['skipread'].add(node)
2577 2587 except Exception as e:
2578 2588 yield revlogproblem(
2579 2589 error=_('unpacking %s: %s') % (short(node),
2580 2590 stringutil.forcebytestr(e)),
2581 2591 node=node)
2582 2592 state['skipread'].add(node)
2583 2593
2584 2594 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2585 2595 revisionscount=False, trackedsize=False,
2586 2596 storedsize=False):
2587 2597 d = {}
2588 2598
2589 2599 if exclusivefiles:
2590 2600 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2591 2601 if not self._inline:
2592 2602 d['exclusivefiles'].append((self.opener, self.datafile))
2593 2603
2594 2604 if sharedfiles:
2595 2605 d['sharedfiles'] = []
2596 2606
2597 2607 if revisionscount:
2598 2608 d['revisionscount'] = len(self)
2599 2609
2600 2610 if trackedsize:
2601 2611 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2602 2612
2603 2613 if storedsize:
2604 2614 d['storedsize'] = sum(self.opener.stat(path).st_size
2605 2615 for path in self.files())
2606 2616
2607 2617 return d
General Comments 0
You need to be logged in to leave comments. Login now