##// END OF EJS Templates
flagutil: move REVIDX_KNOWN_FLAGS source of truth in flagutil (API)...
marmoute -
r42956:92ac6b16 default
parent child Browse files
Show More
@@ -1,2700 +1,2698 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 REVIDX_KNOWN_FLAGS,
57 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 57 )
59 58 from .thirdparty import (
60 59 attr,
61 60 )
62 61 from . import (
63 62 ancestor,
64 63 dagop,
65 64 error,
66 65 mdiff,
67 66 policy,
68 67 pycompat,
69 68 repository,
70 69 templatefilters,
71 70 util,
72 71 )
73 72 from .revlogutils import (
74 73 deltas as deltautil,
75 74 flagutil,
76 75 )
77 76 from .utils import (
78 77 interfaceutil,
79 78 storageutil,
80 79 stringutil,
81 80 )
82 81
83 82 # blanked usage of all the name to prevent pyflakes constraints
84 83 # We need these name available in the module for extensions.
85 84 REVLOGV0
86 85 REVLOGV1
87 86 REVLOGV2
88 87 FLAG_INLINE_DATA
89 88 FLAG_GENERALDELTA
90 89 REVLOG_DEFAULT_FLAGS
91 90 REVLOG_DEFAULT_FORMAT
92 91 REVLOG_DEFAULT_VERSION
93 92 REVLOGV1_FLAGS
94 93 REVLOGV2_FLAGS
95 94 REVIDX_ISCENSORED
96 95 REVIDX_ELLIPSIS
97 96 REVIDX_EXTSTORED
98 97 REVIDX_DEFAULT_FLAGS
99 98 REVIDX_FLAGS_ORDER
100 REVIDX_KNOWN_FLAGS
101 99 REVIDX_RAWTEXT_CHANGING_FLAGS
102 100
103 101 parsers = policy.importmod(r'parsers')
104 102 rustancestor = policy.importrust(r'ancestor')
105 103 rustdagop = policy.importrust(r'dagop')
106 104
107 105 # Aliased for performance.
108 106 _zlibdecompress = zlib.decompress
109 107
110 108 # max size of revlog with inline data
111 109 _maxinline = 131072
112 110 _chunksize = 1048576
113 111
114 112 # Flag processors for REVIDX_ELLIPSIS.
115 113 def ellipsisreadprocessor(rl, text):
116 114 return text, False
117 115
118 116 def ellipsiswriteprocessor(rl, text):
119 117 return text, False
120 118
121 119 def ellipsisrawprocessor(rl, text):
122 120 return False
123 121
124 122 ellipsisprocessor = (
125 123 ellipsisreadprocessor,
126 124 ellipsiswriteprocessor,
127 125 ellipsisrawprocessor,
128 126 )
129 127
130 128 def addflagprocessor(flag, processor):
131 129 """Register a flag processor on a revision data flag.
132 130
133 131 Invariant:
134 132 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
135 133 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
136 134 - Only one flag processor can be registered on a specific flag.
137 135 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
138 136 following signatures:
139 137 - (read) f(self, rawtext) -> text, bool
140 138 - (write) f(self, text) -> rawtext, bool
141 139 - (raw) f(self, rawtext) -> bool
142 140 "text" is presented to the user. "rawtext" is stored in revlog data, not
143 141 directly visible to the user.
144 142 The boolean returned by these transforms is used to determine whether
145 143 the returned text can be used for hash integrity checking. For example,
146 144 if "write" returns False, then "text" is used to generate hash. If
147 145 "write" returns True, that basically means "rawtext" returned by "write"
148 146 should be used to generate hash. Usually, "write" and "read" return
149 147 different booleans. And "raw" returns a same boolean as "write".
150 148
151 149 Note: The 'raw' transform is used for changegroup generation and in some
152 150 debug commands. In this case the transform only indicates whether the
153 151 contents can be used for hash integrity checks.
154 152 """
155 153 _insertflagprocessor(flag, processor, flagutil.flagprocessors)
156 154
157 155 def _insertflagprocessor(flag, processor, flagprocessors):
158 if not flag & REVIDX_KNOWN_FLAGS:
156 if not flag & flagutil.REVIDX_KNOWN_FLAGS:
159 157 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
160 158 raise error.ProgrammingError(msg)
161 159 if flag not in REVIDX_FLAGS_ORDER:
162 160 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
163 161 raise error.ProgrammingError(msg)
164 162 if flag in flagprocessors:
165 163 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
166 164 raise error.Abort(msg)
167 165 flagprocessors[flag] = processor
168 166
169 167 def getoffset(q):
170 168 return int(q >> 16)
171 169
172 170 def gettype(q):
173 171 return int(q & 0xFFFF)
174 172
175 173 def offset_type(offset, type):
176 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
174 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
177 175 raise ValueError('unknown revlog index flags')
178 176 return int(int(offset) << 16 | type)
179 177
180 178 @attr.s(slots=True, frozen=True)
181 179 class _revisioninfo(object):
182 180 """Information about a revision that allows building its fulltext
183 181 node: expected hash of the revision
184 182 p1, p2: parent revs of the revision
185 183 btext: built text cache consisting of a one-element list
186 184 cachedelta: (baserev, uncompressed_delta) or None
187 185 flags: flags associated to the revision storage
188 186
189 187 One of btext[0] or cachedelta must be set.
190 188 """
191 189 node = attr.ib()
192 190 p1 = attr.ib()
193 191 p2 = attr.ib()
194 192 btext = attr.ib()
195 193 textlen = attr.ib()
196 194 cachedelta = attr.ib()
197 195 flags = attr.ib()
198 196
199 197 @interfaceutil.implementer(repository.irevisiondelta)
200 198 @attr.s(slots=True)
201 199 class revlogrevisiondelta(object):
202 200 node = attr.ib()
203 201 p1node = attr.ib()
204 202 p2node = attr.ib()
205 203 basenode = attr.ib()
206 204 flags = attr.ib()
207 205 baserevisionsize = attr.ib()
208 206 revision = attr.ib()
209 207 delta = attr.ib()
210 208 linknode = attr.ib(default=None)
211 209
212 210 @interfaceutil.implementer(repository.iverifyproblem)
213 211 @attr.s(frozen=True)
214 212 class revlogproblem(object):
215 213 warning = attr.ib(default=None)
216 214 error = attr.ib(default=None)
217 215 node = attr.ib(default=None)
218 216
219 217 # index v0:
220 218 # 4 bytes: offset
221 219 # 4 bytes: compressed length
222 220 # 4 bytes: base rev
223 221 # 4 bytes: link rev
224 222 # 20 bytes: parent 1 nodeid
225 223 # 20 bytes: parent 2 nodeid
226 224 # 20 bytes: nodeid
227 225 indexformatv0 = struct.Struct(">4l20s20s20s")
228 226 indexformatv0_pack = indexformatv0.pack
229 227 indexformatv0_unpack = indexformatv0.unpack
230 228
231 229 class revlogoldindex(list):
232 230 def __getitem__(self, i):
233 231 if i == -1:
234 232 return (0, 0, 0, -1, -1, -1, -1, nullid)
235 233 return list.__getitem__(self, i)
236 234
237 235 class revlogoldio(object):
238 236 def __init__(self):
239 237 self.size = indexformatv0.size
240 238
241 239 def parseindex(self, data, inline):
242 240 s = self.size
243 241 index = []
244 242 nodemap = {nullid: nullrev}
245 243 n = off = 0
246 244 l = len(data)
247 245 while off + s <= l:
248 246 cur = data[off:off + s]
249 247 off += s
250 248 e = indexformatv0_unpack(cur)
251 249 # transform to revlogv1 format
252 250 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
253 251 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
254 252 index.append(e2)
255 253 nodemap[e[6]] = n
256 254 n += 1
257 255
258 256 return revlogoldindex(index), nodemap, None
259 257
260 258 def packentry(self, entry, node, version, rev):
261 259 if gettype(entry[0]):
262 260 raise error.RevlogError(_('index entry flags need revlog '
263 261 'version 1'))
264 262 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
265 263 node(entry[5]), node(entry[6]), entry[7])
266 264 return indexformatv0_pack(*e2)
267 265
268 266 # index ng:
269 267 # 6 bytes: offset
270 268 # 2 bytes: flags
271 269 # 4 bytes: compressed length
272 270 # 4 bytes: uncompressed length
273 271 # 4 bytes: base rev
274 272 # 4 bytes: link rev
275 273 # 4 bytes: parent 1 rev
276 274 # 4 bytes: parent 2 rev
277 275 # 32 bytes: nodeid
278 276 indexformatng = struct.Struct(">Qiiiiii20s12x")
279 277 indexformatng_pack = indexformatng.pack
280 278 versionformat = struct.Struct(">I")
281 279 versionformat_pack = versionformat.pack
282 280 versionformat_unpack = versionformat.unpack
283 281
284 282 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
285 283 # signed integer)
286 284 _maxentrysize = 0x7fffffff
287 285
288 286 class revlogio(object):
289 287 def __init__(self):
290 288 self.size = indexformatng.size
291 289
292 290 def parseindex(self, data, inline):
293 291 # call the C implementation to parse the index data
294 292 index, cache = parsers.parse_index2(data, inline)
295 293 return index, getattr(index, 'nodemap', None), cache
296 294
297 295 def packentry(self, entry, node, version, rev):
298 296 p = indexformatng_pack(*entry)
299 297 if rev == 0:
300 298 p = versionformat_pack(version) + p[4:]
301 299 return p
302 300
303 301 class revlog(object):
304 302 """
305 303 the underlying revision storage object
306 304
307 305 A revlog consists of two parts, an index and the revision data.
308 306
309 307 The index is a file with a fixed record size containing
310 308 information on each revision, including its nodeid (hash), the
311 309 nodeids of its parents, the position and offset of its data within
312 310 the data file, and the revision it's based on. Finally, each entry
313 311 contains a linkrev entry that can serve as a pointer to external
314 312 data.
315 313
316 314 The revision data itself is a linear collection of data chunks.
317 315 Each chunk represents a revision and is usually represented as a
318 316 delta against the previous chunk. To bound lookup time, runs of
319 317 deltas are limited to about 2 times the length of the original
320 318 version data. This makes retrieval of a version proportional to
321 319 its size, or O(1) relative to the number of revisions.
322 320
323 321 Both pieces of the revlog are written to in an append-only
324 322 fashion, which means we never need to rewrite a file to insert or
325 323 remove data, and can use some simple techniques to avoid the need
326 324 for locking while reading.
327 325
328 326 If checkambig, indexfile is opened with checkambig=True at
329 327 writing, to avoid file stat ambiguity.
330 328
331 329 If mmaplargeindex is True, and an mmapindexthreshold is set, the
332 330 index will be mmapped rather than read if it is larger than the
333 331 configured threshold.
334 332
335 333 If censorable is True, the revlog can have censored revisions.
336 334
337 335 If `upperboundcomp` is not None, this is the expected maximal gain from
338 336 compression for the data content.
339 337 """
340 338 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 339 mmaplargeindex=False, censorable=False,
342 340 upperboundcomp=None):
343 341 """
344 342 create a revlog object
345 343
346 344 opener is a function that abstracts the file opening operation
347 345 and can be used to implement COW semantics or the like.
348 346
349 347 """
350 348 self.upperboundcomp = upperboundcomp
351 349 self.indexfile = indexfile
352 350 self.datafile = datafile or (indexfile[:-2] + ".d")
353 351 self.opener = opener
354 352 # When True, indexfile is opened with checkambig=True at writing, to
355 353 # avoid file stat ambiguity.
356 354 self._checkambig = checkambig
357 355 self._mmaplargeindex = mmaplargeindex
358 356 self._censorable = censorable
359 357 # 3-tuple of (node, rev, text) for a raw revision.
360 358 self._revisioncache = None
361 359 # Maps rev to chain base rev.
362 360 self._chainbasecache = util.lrucachedict(100)
363 361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
364 362 self._chunkcache = (0, '')
365 363 # How much data to read and cache into the raw revlog data cache.
366 364 self._chunkcachesize = 65536
367 365 self._maxchainlen = None
368 366 self._deltabothparents = True
369 367 self.index = []
370 368 # Mapping of partial identifiers to full nodes.
371 369 self._pcache = {}
372 370 # Mapping of revision integer to full node.
373 371 self._nodecache = {nullid: nullrev}
374 372 self._nodepos = None
375 373 self._compengine = 'zlib'
376 374 self._compengineopts = {}
377 375 self._maxdeltachainspan = -1
378 376 self._withsparseread = False
379 377 self._sparserevlog = False
380 378 self._srdensitythreshold = 0.50
381 379 self._srmingapsize = 262144
382 380
383 381 # Make copy of flag processors so each revlog instance can support
384 382 # custom flags.
385 383 self._flagprocessors = dict(flagutil.flagprocessors)
386 384
387 385 # 2-tuple of file handles being used for active writing.
388 386 self._writinghandles = None
389 387
390 388 self._loadindex()
391 389
392 390 def _loadindex(self):
393 391 mmapindexthreshold = None
394 392 opts = getattr(self.opener, 'options', {}) or {}
395 393
396 394 if 'revlogv2' in opts:
397 395 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
398 396 elif 'revlogv1' in opts:
399 397 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
400 398 if 'generaldelta' in opts:
401 399 newversionflags |= FLAG_GENERALDELTA
402 400 elif getattr(self.opener, 'options', None) is not None:
403 401 # If options provided but no 'revlog*' found, the repository
404 402 # would have no 'requires' file in it, which means we have to
405 403 # stick to the old format.
406 404 newversionflags = REVLOGV0
407 405 else:
408 406 newversionflags = REVLOG_DEFAULT_VERSION
409 407
410 408 if 'chunkcachesize' in opts:
411 409 self._chunkcachesize = opts['chunkcachesize']
412 410 if 'maxchainlen' in opts:
413 411 self._maxchainlen = opts['maxchainlen']
414 412 if 'deltabothparents' in opts:
415 413 self._deltabothparents = opts['deltabothparents']
416 414 self._lazydelta = bool(opts.get('lazydelta', True))
417 415 self._lazydeltabase = False
418 416 if self._lazydelta:
419 417 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
420 418 if 'compengine' in opts:
421 419 self._compengine = opts['compengine']
422 420 if 'zlib.level' in opts:
423 421 self._compengineopts['zlib.level'] = opts['zlib.level']
424 422 if 'zstd.level' in opts:
425 423 self._compengineopts['zstd.level'] = opts['zstd.level']
426 424 if 'maxdeltachainspan' in opts:
427 425 self._maxdeltachainspan = opts['maxdeltachainspan']
428 426 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
429 427 mmapindexthreshold = opts['mmapindexthreshold']
430 428 self._sparserevlog = bool(opts.get('sparse-revlog', False))
431 429 withsparseread = bool(opts.get('with-sparse-read', False))
432 430 # sparse-revlog forces sparse-read
433 431 self._withsparseread = self._sparserevlog or withsparseread
434 432 if 'sparse-read-density-threshold' in opts:
435 433 self._srdensitythreshold = opts['sparse-read-density-threshold']
436 434 if 'sparse-read-min-gap-size' in opts:
437 435 self._srmingapsize = opts['sparse-read-min-gap-size']
438 436 if opts.get('enableellipsis'):
439 437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
440 438
441 439 # revlog v0 doesn't have flag processors
442 440 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
443 441 _insertflagprocessor(flag, processor, self._flagprocessors)
444 442
445 443 if self._chunkcachesize <= 0:
446 444 raise error.RevlogError(_('revlog chunk cache size %r is not '
447 445 'greater than 0') % self._chunkcachesize)
448 446 elif self._chunkcachesize & (self._chunkcachesize - 1):
449 447 raise error.RevlogError(_('revlog chunk cache size %r is not a '
450 448 'power of 2') % self._chunkcachesize)
451 449
452 450 indexdata = ''
453 451 self._initempty = True
454 452 try:
455 453 with self._indexfp() as f:
456 454 if (mmapindexthreshold is not None and
457 455 self.opener.fstat(f).st_size >= mmapindexthreshold):
458 456 # TODO: should .close() to release resources without
459 457 # relying on Python GC
460 458 indexdata = util.buffer(util.mmapread(f))
461 459 else:
462 460 indexdata = f.read()
463 461 if len(indexdata) > 0:
464 462 versionflags = versionformat_unpack(indexdata[:4])[0]
465 463 self._initempty = False
466 464 else:
467 465 versionflags = newversionflags
468 466 except IOError as inst:
469 467 if inst.errno != errno.ENOENT:
470 468 raise
471 469
472 470 versionflags = newversionflags
473 471
474 472 self.version = versionflags
475 473
476 474 flags = versionflags & ~0xFFFF
477 475 fmt = versionflags & 0xFFFF
478 476
479 477 if fmt == REVLOGV0:
480 478 if flags:
481 479 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
482 480 'revlog %s') %
483 481 (flags >> 16, fmt, self.indexfile))
484 482
485 483 self._inline = False
486 484 self._generaldelta = False
487 485
488 486 elif fmt == REVLOGV1:
489 487 if flags & ~REVLOGV1_FLAGS:
490 488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
491 489 'revlog %s') %
492 490 (flags >> 16, fmt, self.indexfile))
493 491
494 492 self._inline = versionflags & FLAG_INLINE_DATA
495 493 self._generaldelta = versionflags & FLAG_GENERALDELTA
496 494
497 495 elif fmt == REVLOGV2:
498 496 if flags & ~REVLOGV2_FLAGS:
499 497 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
500 498 'revlog %s') %
501 499 (flags >> 16, fmt, self.indexfile))
502 500
503 501 self._inline = versionflags & FLAG_INLINE_DATA
504 502 # generaldelta implied by version 2 revlogs.
505 503 self._generaldelta = True
506 504
507 505 else:
508 506 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
509 507 (fmt, self.indexfile))
510 508 # sparse-revlog can't be on without general-delta (issue6056)
511 509 if not self._generaldelta:
512 510 self._sparserevlog = False
513 511
514 512 self._storedeltachains = True
515 513
516 514 self._io = revlogio()
517 515 if self.version == REVLOGV0:
518 516 self._io = revlogoldio()
519 517 try:
520 518 d = self._io.parseindex(indexdata, self._inline)
521 519 except (ValueError, IndexError):
522 520 raise error.RevlogError(_("index %s is corrupted") %
523 521 self.indexfile)
524 522 self.index, nodemap, self._chunkcache = d
525 523 if nodemap is not None:
526 524 self.nodemap = self._nodecache = nodemap
527 525 if not self._chunkcache:
528 526 self._chunkclear()
529 527 # revnum -> (chain-length, sum-delta-length)
530 528 self._chaininfocache = {}
531 529 # revlog header -> revlog compressor
532 530 self._decompressors = {}
533 531
534 532 @util.propertycache
535 533 def _compressor(self):
536 534 engine = util.compengines[self._compengine]
537 535 return engine.revlogcompressor(self._compengineopts)
538 536
539 537 def _indexfp(self, mode='r'):
540 538 """file object for the revlog's index file"""
541 539 args = {r'mode': mode}
542 540 if mode != 'r':
543 541 args[r'checkambig'] = self._checkambig
544 542 if mode == 'w':
545 543 args[r'atomictemp'] = True
546 544 return self.opener(self.indexfile, **args)
547 545
548 546 def _datafp(self, mode='r'):
549 547 """file object for the revlog's data file"""
550 548 return self.opener(self.datafile, mode=mode)
551 549
552 550 @contextlib.contextmanager
553 551 def _datareadfp(self, existingfp=None):
554 552 """file object suitable to read data"""
555 553 # Use explicit file handle, if given.
556 554 if existingfp is not None:
557 555 yield existingfp
558 556
559 557 # Use a file handle being actively used for writes, if available.
560 558 # There is some danger to doing this because reads will seek the
561 559 # file. However, _writeentry() performs a SEEK_END before all writes,
562 560 # so we should be safe.
563 561 elif self._writinghandles:
564 562 if self._inline:
565 563 yield self._writinghandles[0]
566 564 else:
567 565 yield self._writinghandles[1]
568 566
569 567 # Otherwise open a new file handle.
570 568 else:
571 569 if self._inline:
572 570 func = self._indexfp
573 571 else:
574 572 func = self._datafp
575 573 with func() as fp:
576 574 yield fp
577 575
578 576 def tip(self):
579 577 return self.node(len(self.index) - 1)
580 578 def __contains__(self, rev):
581 579 return 0 <= rev < len(self)
582 580 def __len__(self):
583 581 return len(self.index)
584 582 def __iter__(self):
585 583 return iter(pycompat.xrange(len(self)))
586 584 def revs(self, start=0, stop=None):
587 585 """iterate over all rev in this revlog (from start to stop)"""
588 586 return storageutil.iterrevs(len(self), start=start, stop=stop)
589 587
590 588 @util.propertycache
591 589 def nodemap(self):
592 590 if self.index:
593 591 # populate mapping down to the initial node
594 592 node0 = self.index[0][7] # get around changelog filtering
595 593 self.rev(node0)
596 594 return self._nodecache
597 595
598 596 def hasnode(self, node):
599 597 try:
600 598 self.rev(node)
601 599 return True
602 600 except KeyError:
603 601 return False
604 602
605 603 def candelta(self, baserev, rev):
606 604 """whether two revisions (baserev, rev) can be delta-ed or not"""
607 605 # Disable delta if either rev requires a content-changing flag
608 606 # processor (ex. LFS). This is because such flag processor can alter
609 607 # the rawtext content that the delta will be based on, and two clients
610 608 # could have a same revlog node with different flags (i.e. different
611 609 # rawtext contents) and the delta could be incompatible.
612 610 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
613 611 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
614 612 return False
615 613 return True
616 614
617 615 def clearcaches(self):
618 616 self._revisioncache = None
619 617 self._chainbasecache.clear()
620 618 self._chunkcache = (0, '')
621 619 self._pcache = {}
622 620
623 621 try:
624 622 # If we are using the native C version, you are in a fun case
625 623 # where self.index, self.nodemap and self._nodecaches is the same
626 624 # object.
627 625 self._nodecache.clearcaches()
628 626 except AttributeError:
629 627 self._nodecache = {nullid: nullrev}
630 628 self._nodepos = None
631 629
632 630 def rev(self, node):
633 631 try:
634 632 return self._nodecache[node]
635 633 except TypeError:
636 634 raise
637 635 except error.RevlogError:
638 636 # parsers.c radix tree lookup failed
639 637 if node == wdirid or node in wdirfilenodeids:
640 638 raise error.WdirUnsupported
641 639 raise error.LookupError(node, self.indexfile, _('no node'))
642 640 except KeyError:
643 641 # pure python cache lookup failed
644 642 n = self._nodecache
645 643 i = self.index
646 644 p = self._nodepos
647 645 if p is None:
648 646 p = len(i) - 1
649 647 else:
650 648 assert p < len(i)
651 649 for r in pycompat.xrange(p, -1, -1):
652 650 v = i[r][7]
653 651 n[v] = r
654 652 if v == node:
655 653 self._nodepos = r - 1
656 654 return r
657 655 if node == wdirid or node in wdirfilenodeids:
658 656 raise error.WdirUnsupported
659 657 raise error.LookupError(node, self.indexfile, _('no node'))
660 658
661 659 # Accessors for index entries.
662 660
663 661 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
664 662 # are flags.
665 663 def start(self, rev):
666 664 return int(self.index[rev][0] >> 16)
667 665
668 666 def flags(self, rev):
669 667 return self.index[rev][0] & 0xFFFF
670 668
671 669 def length(self, rev):
672 670 return self.index[rev][1]
673 671
674 672 def rawsize(self, rev):
675 673 """return the length of the uncompressed text for a given revision"""
676 674 l = self.index[rev][2]
677 675 if l >= 0:
678 676 return l
679 677
680 678 t = self.revision(rev, raw=True)
681 679 return len(t)
682 680
683 681 def size(self, rev):
684 682 """length of non-raw text (processed by a "read" flag processor)"""
685 683 # fast path: if no "read" flag processor could change the content,
686 684 # size is rawsize. note: ELLIPSIS is known to not change the content.
687 685 flags = self.flags(rev)
688 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
686 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
689 687 return self.rawsize(rev)
690 688
691 689 return len(self.revision(rev, raw=False))
692 690
693 691 def chainbase(self, rev):
694 692 base = self._chainbasecache.get(rev)
695 693 if base is not None:
696 694 return base
697 695
698 696 index = self.index
699 697 iterrev = rev
700 698 base = index[iterrev][3]
701 699 while base != iterrev:
702 700 iterrev = base
703 701 base = index[iterrev][3]
704 702
705 703 self._chainbasecache[rev] = base
706 704 return base
707 705
708 706 def linkrev(self, rev):
709 707 return self.index[rev][4]
710 708
711 709 def parentrevs(self, rev):
712 710 try:
713 711 entry = self.index[rev]
714 712 except IndexError:
715 713 if rev == wdirrev:
716 714 raise error.WdirUnsupported
717 715 raise
718 716
719 717 return entry[5], entry[6]
720 718
721 719 # fast parentrevs(rev) where rev isn't filtered
722 720 _uncheckedparentrevs = parentrevs
723 721
724 722 def node(self, rev):
725 723 try:
726 724 return self.index[rev][7]
727 725 except IndexError:
728 726 if rev == wdirrev:
729 727 raise error.WdirUnsupported
730 728 raise
731 729
732 730 # Derived from index values.
733 731
734 732 def end(self, rev):
735 733 return self.start(rev) + self.length(rev)
736 734
737 735 def parents(self, node):
738 736 i = self.index
739 737 d = i[self.rev(node)]
740 738 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
741 739
742 740 def chainlen(self, rev):
743 741 return self._chaininfo(rev)[0]
744 742
745 743 def _chaininfo(self, rev):
746 744 chaininfocache = self._chaininfocache
747 745 if rev in chaininfocache:
748 746 return chaininfocache[rev]
749 747 index = self.index
750 748 generaldelta = self._generaldelta
751 749 iterrev = rev
752 750 e = index[iterrev]
753 751 clen = 0
754 752 compresseddeltalen = 0
755 753 while iterrev != e[3]:
756 754 clen += 1
757 755 compresseddeltalen += e[1]
758 756 if generaldelta:
759 757 iterrev = e[3]
760 758 else:
761 759 iterrev -= 1
762 760 if iterrev in chaininfocache:
763 761 t = chaininfocache[iterrev]
764 762 clen += t[0]
765 763 compresseddeltalen += t[1]
766 764 break
767 765 e = index[iterrev]
768 766 else:
769 767 # Add text length of base since decompressing that also takes
770 768 # work. For cache hits the length is already included.
771 769 compresseddeltalen += e[1]
772 770 r = (clen, compresseddeltalen)
773 771 chaininfocache[rev] = r
774 772 return r
775 773
776 774 def _deltachain(self, rev, stoprev=None):
777 775 """Obtain the delta chain for a revision.
778 776
779 777 ``stoprev`` specifies a revision to stop at. If not specified, we
780 778 stop at the base of the chain.
781 779
782 780 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
783 781 revs in ascending order and ``stopped`` is a bool indicating whether
784 782 ``stoprev`` was hit.
785 783 """
786 784 # Try C implementation.
787 785 try:
788 786 return self.index.deltachain(rev, stoprev, self._generaldelta)
789 787 except AttributeError:
790 788 pass
791 789
792 790 chain = []
793 791
794 792 # Alias to prevent attribute lookup in tight loop.
795 793 index = self.index
796 794 generaldelta = self._generaldelta
797 795
798 796 iterrev = rev
799 797 e = index[iterrev]
800 798 while iterrev != e[3] and iterrev != stoprev:
801 799 chain.append(iterrev)
802 800 if generaldelta:
803 801 iterrev = e[3]
804 802 else:
805 803 iterrev -= 1
806 804 e = index[iterrev]
807 805
808 806 if iterrev == stoprev:
809 807 stopped = True
810 808 else:
811 809 chain.append(iterrev)
812 810 stopped = False
813 811
814 812 chain.reverse()
815 813 return chain, stopped
816 814
817 815 def ancestors(self, revs, stoprev=0, inclusive=False):
818 816 """Generate the ancestors of 'revs' in reverse revision order.
819 817 Does not generate revs lower than stoprev.
820 818
821 819 See the documentation for ancestor.lazyancestors for more details."""
822 820
823 821 # first, make sure start revisions aren't filtered
824 822 revs = list(revs)
825 823 checkrev = self.node
826 824 for r in revs:
827 825 checkrev(r)
828 826 # and we're sure ancestors aren't filtered as well
829 827
830 828 if rustancestor is not None:
831 829 lazyancestors = rustancestor.LazyAncestors
832 830 arg = self.index
833 831 elif util.safehasattr(parsers, 'rustlazyancestors'):
834 832 lazyancestors = ancestor.rustlazyancestors
835 833 arg = self.index
836 834 else:
837 835 lazyancestors = ancestor.lazyancestors
838 836 arg = self._uncheckedparentrevs
839 837 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
840 838
841 839 def descendants(self, revs):
842 840 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
843 841
844 842 def findcommonmissing(self, common=None, heads=None):
845 843 """Return a tuple of the ancestors of common and the ancestors of heads
846 844 that are not ancestors of common. In revset terminology, we return the
847 845 tuple:
848 846
849 847 ::common, (::heads) - (::common)
850 848
851 849 The list is sorted by revision number, meaning it is
852 850 topologically sorted.
853 851
854 852 'heads' and 'common' are both lists of node IDs. If heads is
855 853 not supplied, uses all of the revlog's heads. If common is not
856 854 supplied, uses nullid."""
857 855 if common is None:
858 856 common = [nullid]
859 857 if heads is None:
860 858 heads = self.heads()
861 859
862 860 common = [self.rev(n) for n in common]
863 861 heads = [self.rev(n) for n in heads]
864 862
865 863 # we want the ancestors, but inclusive
866 864 class lazyset(object):
867 865 def __init__(self, lazyvalues):
868 866 self.addedvalues = set()
869 867 self.lazyvalues = lazyvalues
870 868
871 869 def __contains__(self, value):
872 870 return value in self.addedvalues or value in self.lazyvalues
873 871
874 872 def __iter__(self):
875 873 added = self.addedvalues
876 874 for r in added:
877 875 yield r
878 876 for r in self.lazyvalues:
879 877 if not r in added:
880 878 yield r
881 879
882 880 def add(self, value):
883 881 self.addedvalues.add(value)
884 882
885 883 def update(self, values):
886 884 self.addedvalues.update(values)
887 885
888 886 has = lazyset(self.ancestors(common))
889 887 has.add(nullrev)
890 888 has.update(common)
891 889
892 890 # take all ancestors from heads that aren't in has
893 891 missing = set()
894 892 visit = collections.deque(r for r in heads if r not in has)
895 893 while visit:
896 894 r = visit.popleft()
897 895 if r in missing:
898 896 continue
899 897 else:
900 898 missing.add(r)
901 899 for p in self.parentrevs(r):
902 900 if p not in has:
903 901 visit.append(p)
904 902 missing = list(missing)
905 903 missing.sort()
906 904 return has, [self.node(miss) for miss in missing]
907 905
908 906 def incrementalmissingrevs(self, common=None):
909 907 """Return an object that can be used to incrementally compute the
910 908 revision numbers of the ancestors of arbitrary sets that are not
911 909 ancestors of common. This is an ancestor.incrementalmissingancestors
912 910 object.
913 911
914 912 'common' is a list of revision numbers. If common is not supplied, uses
915 913 nullrev.
916 914 """
917 915 if common is None:
918 916 common = [nullrev]
919 917
920 918 if rustancestor is not None:
921 919 return rustancestor.MissingAncestors(self.index, common)
922 920 return ancestor.incrementalmissingancestors(self.parentrevs, common)
923 921
924 922 def findmissingrevs(self, common=None, heads=None):
925 923 """Return the revision numbers of the ancestors of heads that
926 924 are not ancestors of common.
927 925
928 926 More specifically, return a list of revision numbers corresponding to
929 927 nodes N such that every N satisfies the following constraints:
930 928
931 929 1. N is an ancestor of some node in 'heads'
932 930 2. N is not an ancestor of any node in 'common'
933 931
934 932 The list is sorted by revision number, meaning it is
935 933 topologically sorted.
936 934
937 935 'heads' and 'common' are both lists of revision numbers. If heads is
938 936 not supplied, uses all of the revlog's heads. If common is not
939 937 supplied, uses nullid."""
940 938 if common is None:
941 939 common = [nullrev]
942 940 if heads is None:
943 941 heads = self.headrevs()
944 942
945 943 inc = self.incrementalmissingrevs(common=common)
946 944 return inc.missingancestors(heads)
947 945
948 946 def findmissing(self, common=None, heads=None):
949 947 """Return the ancestors of heads that are not ancestors of common.
950 948
951 949 More specifically, return a list of nodes N such that every N
952 950 satisfies the following constraints:
953 951
954 952 1. N is an ancestor of some node in 'heads'
955 953 2. N is not an ancestor of any node in 'common'
956 954
957 955 The list is sorted by revision number, meaning it is
958 956 topologically sorted.
959 957
960 958 'heads' and 'common' are both lists of node IDs. If heads is
961 959 not supplied, uses all of the revlog's heads. If common is not
962 960 supplied, uses nullid."""
963 961 if common is None:
964 962 common = [nullid]
965 963 if heads is None:
966 964 heads = self.heads()
967 965
968 966 common = [self.rev(n) for n in common]
969 967 heads = [self.rev(n) for n in heads]
970 968
971 969 inc = self.incrementalmissingrevs(common=common)
972 970 return [self.node(r) for r in inc.missingancestors(heads)]
973 971
974 972 def nodesbetween(self, roots=None, heads=None):
975 973 """Return a topological path from 'roots' to 'heads'.
976 974
977 975 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
978 976 topologically sorted list of all nodes N that satisfy both of
979 977 these constraints:
980 978
981 979 1. N is a descendant of some node in 'roots'
982 980 2. N is an ancestor of some node in 'heads'
983 981
984 982 Every node is considered to be both a descendant and an ancestor
985 983 of itself, so every reachable node in 'roots' and 'heads' will be
986 984 included in 'nodes'.
987 985
988 986 'outroots' is the list of reachable nodes in 'roots', i.e., the
989 987 subset of 'roots' that is returned in 'nodes'. Likewise,
990 988 'outheads' is the subset of 'heads' that is also in 'nodes'.
991 989
992 990 'roots' and 'heads' are both lists of node IDs. If 'roots' is
993 991 unspecified, uses nullid as the only root. If 'heads' is
994 992 unspecified, uses list of all of the revlog's heads."""
995 993 nonodes = ([], [], [])
996 994 if roots is not None:
997 995 roots = list(roots)
998 996 if not roots:
999 997 return nonodes
1000 998 lowestrev = min([self.rev(n) for n in roots])
1001 999 else:
1002 1000 roots = [nullid] # Everybody's a descendant of nullid
1003 1001 lowestrev = nullrev
1004 1002 if (lowestrev == nullrev) and (heads is None):
1005 1003 # We want _all_ the nodes!
1006 1004 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1007 1005 if heads is None:
1008 1006 # All nodes are ancestors, so the latest ancestor is the last
1009 1007 # node.
1010 1008 highestrev = len(self) - 1
1011 1009 # Set ancestors to None to signal that every node is an ancestor.
1012 1010 ancestors = None
1013 1011 # Set heads to an empty dictionary for later discovery of heads
1014 1012 heads = {}
1015 1013 else:
1016 1014 heads = list(heads)
1017 1015 if not heads:
1018 1016 return nonodes
1019 1017 ancestors = set()
1020 1018 # Turn heads into a dictionary so we can remove 'fake' heads.
1021 1019 # Also, later we will be using it to filter out the heads we can't
1022 1020 # find from roots.
1023 1021 heads = dict.fromkeys(heads, False)
1024 1022 # Start at the top and keep marking parents until we're done.
1025 1023 nodestotag = set(heads)
1026 1024 # Remember where the top was so we can use it as a limit later.
1027 1025 highestrev = max([self.rev(n) for n in nodestotag])
1028 1026 while nodestotag:
1029 1027 # grab a node to tag
1030 1028 n = nodestotag.pop()
1031 1029 # Never tag nullid
1032 1030 if n == nullid:
1033 1031 continue
1034 1032 # A node's revision number represents its place in a
1035 1033 # topologically sorted list of nodes.
1036 1034 r = self.rev(n)
1037 1035 if r >= lowestrev:
1038 1036 if n not in ancestors:
1039 1037 # If we are possibly a descendant of one of the roots
1040 1038 # and we haven't already been marked as an ancestor
1041 1039 ancestors.add(n) # Mark as ancestor
1042 1040 # Add non-nullid parents to list of nodes to tag.
1043 1041 nodestotag.update([p for p in self.parents(n) if
1044 1042 p != nullid])
1045 1043 elif n in heads: # We've seen it before, is it a fake head?
1046 1044 # So it is, real heads should not be the ancestors of
1047 1045 # any other heads.
1048 1046 heads.pop(n)
1049 1047 if not ancestors:
1050 1048 return nonodes
1051 1049 # Now that we have our set of ancestors, we want to remove any
1052 1050 # roots that are not ancestors.
1053 1051
1054 1052 # If one of the roots was nullid, everything is included anyway.
1055 1053 if lowestrev > nullrev:
1056 1054 # But, since we weren't, let's recompute the lowest rev to not
1057 1055 # include roots that aren't ancestors.
1058 1056
1059 1057 # Filter out roots that aren't ancestors of heads
1060 1058 roots = [root for root in roots if root in ancestors]
1061 1059 # Recompute the lowest revision
1062 1060 if roots:
1063 1061 lowestrev = min([self.rev(root) for root in roots])
1064 1062 else:
1065 1063 # No more roots? Return empty list
1066 1064 return nonodes
1067 1065 else:
1068 1066 # We are descending from nullid, and don't need to care about
1069 1067 # any other roots.
1070 1068 lowestrev = nullrev
1071 1069 roots = [nullid]
1072 1070 # Transform our roots list into a set.
1073 1071 descendants = set(roots)
1074 1072 # Also, keep the original roots so we can filter out roots that aren't
1075 1073 # 'real' roots (i.e. are descended from other roots).
1076 1074 roots = descendants.copy()
1077 1075 # Our topologically sorted list of output nodes.
1078 1076 orderedout = []
1079 1077 # Don't start at nullid since we don't want nullid in our output list,
1080 1078 # and if nullid shows up in descendants, empty parents will look like
1081 1079 # they're descendants.
1082 1080 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1083 1081 n = self.node(r)
1084 1082 isdescendant = False
1085 1083 if lowestrev == nullrev: # Everybody is a descendant of nullid
1086 1084 isdescendant = True
1087 1085 elif n in descendants:
1088 1086 # n is already a descendant
1089 1087 isdescendant = True
1090 1088 # This check only needs to be done here because all the roots
1091 1089 # will start being marked is descendants before the loop.
1092 1090 if n in roots:
1093 1091 # If n was a root, check if it's a 'real' root.
1094 1092 p = tuple(self.parents(n))
1095 1093 # If any of its parents are descendants, it's not a root.
1096 1094 if (p[0] in descendants) or (p[1] in descendants):
1097 1095 roots.remove(n)
1098 1096 else:
1099 1097 p = tuple(self.parents(n))
1100 1098 # A node is a descendant if either of its parents are
1101 1099 # descendants. (We seeded the dependents list with the roots
1102 1100 # up there, remember?)
1103 1101 if (p[0] in descendants) or (p[1] in descendants):
1104 1102 descendants.add(n)
1105 1103 isdescendant = True
1106 1104 if isdescendant and ((ancestors is None) or (n in ancestors)):
1107 1105 # Only include nodes that are both descendants and ancestors.
1108 1106 orderedout.append(n)
1109 1107 if (ancestors is not None) and (n in heads):
1110 1108 # We're trying to figure out which heads are reachable
1111 1109 # from roots.
1112 1110 # Mark this head as having been reached
1113 1111 heads[n] = True
1114 1112 elif ancestors is None:
1115 1113 # Otherwise, we're trying to discover the heads.
1116 1114 # Assume this is a head because if it isn't, the next step
1117 1115 # will eventually remove it.
1118 1116 heads[n] = True
1119 1117 # But, obviously its parents aren't.
1120 1118 for p in self.parents(n):
1121 1119 heads.pop(p, None)
1122 1120 heads = [head for head, flag in heads.iteritems() if flag]
1123 1121 roots = list(roots)
1124 1122 assert orderedout
1125 1123 assert roots
1126 1124 assert heads
1127 1125 return (orderedout, roots, heads)
1128 1126
1129 1127 def headrevs(self, revs=None):
1130 1128 if revs is None:
1131 1129 try:
1132 1130 return self.index.headrevs()
1133 1131 except AttributeError:
1134 1132 return self._headrevs()
1135 1133 if rustdagop is not None:
1136 1134 return rustdagop.headrevs(self.index, revs)
1137 1135 return dagop.headrevs(revs, self._uncheckedparentrevs)
1138 1136
1139 1137 def computephases(self, roots):
1140 1138 return self.index.computephasesmapsets(roots)
1141 1139
1142 1140 def _headrevs(self):
1143 1141 count = len(self)
1144 1142 if not count:
1145 1143 return [nullrev]
1146 1144 # we won't iter over filtered rev so nobody is a head at start
1147 1145 ishead = [0] * (count + 1)
1148 1146 index = self.index
1149 1147 for r in self:
1150 1148 ishead[r] = 1 # I may be an head
1151 1149 e = index[r]
1152 1150 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1153 1151 return [r for r, val in enumerate(ishead) if val]
1154 1152
1155 1153 def heads(self, start=None, stop=None):
1156 1154 """return the list of all nodes that have no children
1157 1155
1158 1156 if start is specified, only heads that are descendants of
1159 1157 start will be returned
1160 1158 if stop is specified, it will consider all the revs from stop
1161 1159 as if they had no children
1162 1160 """
1163 1161 if start is None and stop is None:
1164 1162 if not len(self):
1165 1163 return [nullid]
1166 1164 return [self.node(r) for r in self.headrevs()]
1167 1165
1168 1166 if start is None:
1169 1167 start = nullrev
1170 1168 else:
1171 1169 start = self.rev(start)
1172 1170
1173 1171 stoprevs = set(self.rev(n) for n in stop or [])
1174 1172
1175 1173 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1176 1174 stoprevs=stoprevs)
1177 1175
1178 1176 return [self.node(rev) for rev in revs]
1179 1177
1180 1178 def children(self, node):
1181 1179 """find the children of a given node"""
1182 1180 c = []
1183 1181 p = self.rev(node)
1184 1182 for r in self.revs(start=p + 1):
1185 1183 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1186 1184 if prevs:
1187 1185 for pr in prevs:
1188 1186 if pr == p:
1189 1187 c.append(self.node(r))
1190 1188 elif p == nullrev:
1191 1189 c.append(self.node(r))
1192 1190 return c
1193 1191
1194 1192 def commonancestorsheads(self, a, b):
1195 1193 """calculate all the heads of the common ancestors of nodes a and b"""
1196 1194 a, b = self.rev(a), self.rev(b)
1197 1195 ancs = self._commonancestorsheads(a, b)
1198 1196 return pycompat.maplist(self.node, ancs)
1199 1197
1200 1198 def _commonancestorsheads(self, *revs):
1201 1199 """calculate all the heads of the common ancestors of revs"""
1202 1200 try:
1203 1201 ancs = self.index.commonancestorsheads(*revs)
1204 1202 except (AttributeError, OverflowError): # C implementation failed
1205 1203 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1206 1204 return ancs
1207 1205
1208 1206 def isancestor(self, a, b):
1209 1207 """return True if node a is an ancestor of node b
1210 1208
1211 1209 A revision is considered an ancestor of itself."""
1212 1210 a, b = self.rev(a), self.rev(b)
1213 1211 return self.isancestorrev(a, b)
1214 1212
1215 1213 def isancestorrev(self, a, b):
1216 1214 """return True if revision a is an ancestor of revision b
1217 1215
1218 1216 A revision is considered an ancestor of itself.
1219 1217
1220 1218 The implementation of this is trivial but the use of
1221 1219 reachableroots is not."""
1222 1220 if a == nullrev:
1223 1221 return True
1224 1222 elif a == b:
1225 1223 return True
1226 1224 elif a > b:
1227 1225 return False
1228 1226 return bool(self.reachableroots(a, [b], [a], includepath=False))
1229 1227
1230 1228 def reachableroots(self, minroot, heads, roots, includepath=False):
1231 1229 """return (heads(::<roots> and <roots>::<heads>))
1232 1230
1233 1231 If includepath is True, return (<roots>::<heads>)."""
1234 1232 try:
1235 1233 return self.index.reachableroots2(minroot, heads, roots,
1236 1234 includepath)
1237 1235 except AttributeError:
1238 1236 return dagop._reachablerootspure(self.parentrevs,
1239 1237 minroot, roots, heads, includepath)
1240 1238
1241 1239 def ancestor(self, a, b):
1242 1240 """calculate the "best" common ancestor of nodes a and b"""
1243 1241
1244 1242 a, b = self.rev(a), self.rev(b)
1245 1243 try:
1246 1244 ancs = self.index.ancestors(a, b)
1247 1245 except (AttributeError, OverflowError):
1248 1246 ancs = ancestor.ancestors(self.parentrevs, a, b)
1249 1247 if ancs:
1250 1248 # choose a consistent winner when there's a tie
1251 1249 return min(map(self.node, ancs))
1252 1250 return nullid
1253 1251
1254 1252 def _match(self, id):
1255 1253 if isinstance(id, int):
1256 1254 # rev
1257 1255 return self.node(id)
1258 1256 if len(id) == 20:
1259 1257 # possibly a binary node
1260 1258 # odds of a binary node being all hex in ASCII are 1 in 10**25
1261 1259 try:
1262 1260 node = id
1263 1261 self.rev(node) # quick search the index
1264 1262 return node
1265 1263 except error.LookupError:
1266 1264 pass # may be partial hex id
1267 1265 try:
1268 1266 # str(rev)
1269 1267 rev = int(id)
1270 1268 if "%d" % rev != id:
1271 1269 raise ValueError
1272 1270 if rev < 0:
1273 1271 rev = len(self) + rev
1274 1272 if rev < 0 or rev >= len(self):
1275 1273 raise ValueError
1276 1274 return self.node(rev)
1277 1275 except (ValueError, OverflowError):
1278 1276 pass
1279 1277 if len(id) == 40:
1280 1278 try:
1281 1279 # a full hex nodeid?
1282 1280 node = bin(id)
1283 1281 self.rev(node)
1284 1282 return node
1285 1283 except (TypeError, error.LookupError):
1286 1284 pass
1287 1285
1288 1286 def _partialmatch(self, id):
1289 1287 # we don't care wdirfilenodeids as they should be always full hash
1290 1288 maybewdir = wdirhex.startswith(id)
1291 1289 try:
1292 1290 partial = self.index.partialmatch(id)
1293 1291 if partial and self.hasnode(partial):
1294 1292 if maybewdir:
1295 1293 # single 'ff...' match in radix tree, ambiguous with wdir
1296 1294 raise error.RevlogError
1297 1295 return partial
1298 1296 if maybewdir:
1299 1297 # no 'ff...' match in radix tree, wdir identified
1300 1298 raise error.WdirUnsupported
1301 1299 return None
1302 1300 except error.RevlogError:
1303 1301 # parsers.c radix tree lookup gave multiple matches
1304 1302 # fast path: for unfiltered changelog, radix tree is accurate
1305 1303 if not getattr(self, 'filteredrevs', None):
1306 1304 raise error.AmbiguousPrefixLookupError(
1307 1305 id, self.indexfile, _('ambiguous identifier'))
1308 1306 # fall through to slow path that filters hidden revisions
1309 1307 except (AttributeError, ValueError):
1310 1308 # we are pure python, or key was too short to search radix tree
1311 1309 pass
1312 1310
1313 1311 if id in self._pcache:
1314 1312 return self._pcache[id]
1315 1313
1316 1314 if len(id) <= 40:
1317 1315 try:
1318 1316 # hex(node)[:...]
1319 1317 l = len(id) // 2 # grab an even number of digits
1320 1318 prefix = bin(id[:l * 2])
1321 1319 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1322 1320 nl = [n for n in nl if hex(n).startswith(id) and
1323 1321 self.hasnode(n)]
1324 1322 if nullhex.startswith(id):
1325 1323 nl.append(nullid)
1326 1324 if len(nl) > 0:
1327 1325 if len(nl) == 1 and not maybewdir:
1328 1326 self._pcache[id] = nl[0]
1329 1327 return nl[0]
1330 1328 raise error.AmbiguousPrefixLookupError(
1331 1329 id, self.indexfile, _('ambiguous identifier'))
1332 1330 if maybewdir:
1333 1331 raise error.WdirUnsupported
1334 1332 return None
1335 1333 except TypeError:
1336 1334 pass
1337 1335
1338 1336 def lookup(self, id):
1339 1337 """locate a node based on:
1340 1338 - revision number or str(revision number)
1341 1339 - nodeid or subset of hex nodeid
1342 1340 """
1343 1341 n = self._match(id)
1344 1342 if n is not None:
1345 1343 return n
1346 1344 n = self._partialmatch(id)
1347 1345 if n:
1348 1346 return n
1349 1347
1350 1348 raise error.LookupError(id, self.indexfile, _('no match found'))
1351 1349
1352 1350 def shortest(self, node, minlength=1):
1353 1351 """Find the shortest unambiguous prefix that matches node."""
1354 1352 def isvalid(prefix):
1355 1353 try:
1356 1354 matchednode = self._partialmatch(prefix)
1357 1355 except error.AmbiguousPrefixLookupError:
1358 1356 return False
1359 1357 except error.WdirUnsupported:
1360 1358 # single 'ff...' match
1361 1359 return True
1362 1360 if matchednode is None:
1363 1361 raise error.LookupError(node, self.indexfile, _('no node'))
1364 1362 return True
1365 1363
1366 1364 def maybewdir(prefix):
1367 1365 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1368 1366
1369 1367 hexnode = hex(node)
1370 1368
1371 1369 def disambiguate(hexnode, minlength):
1372 1370 """Disambiguate against wdirid."""
1373 1371 for length in range(minlength, 41):
1374 1372 prefix = hexnode[:length]
1375 1373 if not maybewdir(prefix):
1376 1374 return prefix
1377 1375
1378 1376 if not getattr(self, 'filteredrevs', None):
1379 1377 try:
1380 1378 length = max(self.index.shortest(node), minlength)
1381 1379 return disambiguate(hexnode, length)
1382 1380 except error.RevlogError:
1383 1381 if node != wdirid:
1384 1382 raise error.LookupError(node, self.indexfile, _('no node'))
1385 1383 except AttributeError:
1386 1384 # Fall through to pure code
1387 1385 pass
1388 1386
1389 1387 if node == wdirid:
1390 1388 for length in range(minlength, 41):
1391 1389 prefix = hexnode[:length]
1392 1390 if isvalid(prefix):
1393 1391 return prefix
1394 1392
1395 1393 for length in range(minlength, 41):
1396 1394 prefix = hexnode[:length]
1397 1395 if isvalid(prefix):
1398 1396 return disambiguate(hexnode, length)
1399 1397
1400 1398 def cmp(self, node, text):
1401 1399 """compare text with a given file revision
1402 1400
1403 1401 returns True if text is different than what is stored.
1404 1402 """
1405 1403 p1, p2 = self.parents(node)
1406 1404 return storageutil.hashrevisionsha1(text, p1, p2) != node
1407 1405
1408 1406 def _cachesegment(self, offset, data):
1409 1407 """Add a segment to the revlog cache.
1410 1408
1411 1409 Accepts an absolute offset and the data that is at that location.
1412 1410 """
1413 1411 o, d = self._chunkcache
1414 1412 # try to add to existing cache
1415 1413 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1416 1414 self._chunkcache = o, d + data
1417 1415 else:
1418 1416 self._chunkcache = offset, data
1419 1417
1420 1418 def _readsegment(self, offset, length, df=None):
1421 1419 """Load a segment of raw data from the revlog.
1422 1420
1423 1421 Accepts an absolute offset, length to read, and an optional existing
1424 1422 file handle to read from.
1425 1423
1426 1424 If an existing file handle is passed, it will be seeked and the
1427 1425 original seek position will NOT be restored.
1428 1426
1429 1427 Returns a str or buffer of raw byte data.
1430 1428
1431 1429 Raises if the requested number of bytes could not be read.
1432 1430 """
1433 1431 # Cache data both forward and backward around the requested
1434 1432 # data, in a fixed size window. This helps speed up operations
1435 1433 # involving reading the revlog backwards.
1436 1434 cachesize = self._chunkcachesize
1437 1435 realoffset = offset & ~(cachesize - 1)
1438 1436 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1439 1437 - realoffset)
1440 1438 with self._datareadfp(df) as df:
1441 1439 df.seek(realoffset)
1442 1440 d = df.read(reallength)
1443 1441
1444 1442 self._cachesegment(realoffset, d)
1445 1443 if offset != realoffset or reallength != length:
1446 1444 startoffset = offset - realoffset
1447 1445 if len(d) - startoffset < length:
1448 1446 raise error.RevlogError(
1449 1447 _('partial read of revlog %s; expected %d bytes from '
1450 1448 'offset %d, got %d') %
1451 1449 (self.indexfile if self._inline else self.datafile,
1452 1450 length, realoffset, len(d) - startoffset))
1453 1451
1454 1452 return util.buffer(d, startoffset, length)
1455 1453
1456 1454 if len(d) < length:
1457 1455 raise error.RevlogError(
1458 1456 _('partial read of revlog %s; expected %d bytes from offset '
1459 1457 '%d, got %d') %
1460 1458 (self.indexfile if self._inline else self.datafile,
1461 1459 length, offset, len(d)))
1462 1460
1463 1461 return d
1464 1462
1465 1463 def _getsegment(self, offset, length, df=None):
1466 1464 """Obtain a segment of raw data from the revlog.
1467 1465
1468 1466 Accepts an absolute offset, length of bytes to obtain, and an
1469 1467 optional file handle to the already-opened revlog. If the file
1470 1468 handle is used, it's original seek position will not be preserved.
1471 1469
1472 1470 Requests for data may be returned from a cache.
1473 1471
1474 1472 Returns a str or a buffer instance of raw byte data.
1475 1473 """
1476 1474 o, d = self._chunkcache
1477 1475 l = len(d)
1478 1476
1479 1477 # is it in the cache?
1480 1478 cachestart = offset - o
1481 1479 cacheend = cachestart + length
1482 1480 if cachestart >= 0 and cacheend <= l:
1483 1481 if cachestart == 0 and cacheend == l:
1484 1482 return d # avoid a copy
1485 1483 return util.buffer(d, cachestart, cacheend - cachestart)
1486 1484
1487 1485 return self._readsegment(offset, length, df=df)
1488 1486
1489 1487 def _getsegmentforrevs(self, startrev, endrev, df=None):
1490 1488 """Obtain a segment of raw data corresponding to a range of revisions.
1491 1489
1492 1490 Accepts the start and end revisions and an optional already-open
1493 1491 file handle to be used for reading. If the file handle is read, its
1494 1492 seek position will not be preserved.
1495 1493
1496 1494 Requests for data may be satisfied by a cache.
1497 1495
1498 1496 Returns a 2-tuple of (offset, data) for the requested range of
1499 1497 revisions. Offset is the integer offset from the beginning of the
1500 1498 revlog and data is a str or buffer of the raw byte data.
1501 1499
1502 1500 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1503 1501 to determine where each revision's data begins and ends.
1504 1502 """
1505 1503 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1506 1504 # (functions are expensive).
1507 1505 index = self.index
1508 1506 istart = index[startrev]
1509 1507 start = int(istart[0] >> 16)
1510 1508 if startrev == endrev:
1511 1509 end = start + istart[1]
1512 1510 else:
1513 1511 iend = index[endrev]
1514 1512 end = int(iend[0] >> 16) + iend[1]
1515 1513
1516 1514 if self._inline:
1517 1515 start += (startrev + 1) * self._io.size
1518 1516 end += (endrev + 1) * self._io.size
1519 1517 length = end - start
1520 1518
1521 1519 return start, self._getsegment(start, length, df=df)
1522 1520
1523 1521 def _chunk(self, rev, df=None):
1524 1522 """Obtain a single decompressed chunk for a revision.
1525 1523
1526 1524 Accepts an integer revision and an optional already-open file handle
1527 1525 to be used for reading. If used, the seek position of the file will not
1528 1526 be preserved.
1529 1527
1530 1528 Returns a str holding uncompressed data for the requested revision.
1531 1529 """
1532 1530 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1533 1531
1534 1532 def _chunks(self, revs, df=None, targetsize=None):
1535 1533 """Obtain decompressed chunks for the specified revisions.
1536 1534
1537 1535 Accepts an iterable of numeric revisions that are assumed to be in
1538 1536 ascending order. Also accepts an optional already-open file handle
1539 1537 to be used for reading. If used, the seek position of the file will
1540 1538 not be preserved.
1541 1539
1542 1540 This function is similar to calling ``self._chunk()`` multiple times,
1543 1541 but is faster.
1544 1542
1545 1543 Returns a list with decompressed data for each requested revision.
1546 1544 """
1547 1545 if not revs:
1548 1546 return []
1549 1547 start = self.start
1550 1548 length = self.length
1551 1549 inline = self._inline
1552 1550 iosize = self._io.size
1553 1551 buffer = util.buffer
1554 1552
1555 1553 l = []
1556 1554 ladd = l.append
1557 1555
1558 1556 if not self._withsparseread:
1559 1557 slicedchunks = (revs,)
1560 1558 else:
1561 1559 slicedchunks = deltautil.slicechunk(self, revs,
1562 1560 targetsize=targetsize)
1563 1561
1564 1562 for revschunk in slicedchunks:
1565 1563 firstrev = revschunk[0]
1566 1564 # Skip trailing revisions with empty diff
1567 1565 for lastrev in revschunk[::-1]:
1568 1566 if length(lastrev) != 0:
1569 1567 break
1570 1568
1571 1569 try:
1572 1570 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1573 1571 except OverflowError:
1574 1572 # issue4215 - we can't cache a run of chunks greater than
1575 1573 # 2G on Windows
1576 1574 return [self._chunk(rev, df=df) for rev in revschunk]
1577 1575
1578 1576 decomp = self.decompress
1579 1577 for rev in revschunk:
1580 1578 chunkstart = start(rev)
1581 1579 if inline:
1582 1580 chunkstart += (rev + 1) * iosize
1583 1581 chunklength = length(rev)
1584 1582 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1585 1583
1586 1584 return l
1587 1585
1588 1586 def _chunkclear(self):
1589 1587 """Clear the raw chunk cache."""
1590 1588 self._chunkcache = (0, '')
1591 1589
1592 1590 def deltaparent(self, rev):
1593 1591 """return deltaparent of the given revision"""
1594 1592 base = self.index[rev][3]
1595 1593 if base == rev:
1596 1594 return nullrev
1597 1595 elif self._generaldelta:
1598 1596 return base
1599 1597 else:
1600 1598 return rev - 1
1601 1599
1602 1600 def issnapshot(self, rev):
1603 1601 """tells whether rev is a snapshot
1604 1602 """
1605 1603 if not self._sparserevlog:
1606 1604 return self.deltaparent(rev) == nullrev
1607 1605 elif util.safehasattr(self.index, 'issnapshot'):
1608 1606 # directly assign the method to cache the testing and access
1609 1607 self.issnapshot = self.index.issnapshot
1610 1608 return self.issnapshot(rev)
1611 1609 if rev == nullrev:
1612 1610 return True
1613 1611 entry = self.index[rev]
1614 1612 base = entry[3]
1615 1613 if base == rev:
1616 1614 return True
1617 1615 if base == nullrev:
1618 1616 return True
1619 1617 p1 = entry[5]
1620 1618 p2 = entry[6]
1621 1619 if base == p1 or base == p2:
1622 1620 return False
1623 1621 return self.issnapshot(base)
1624 1622
1625 1623 def snapshotdepth(self, rev):
1626 1624 """number of snapshot in the chain before this one"""
1627 1625 if not self.issnapshot(rev):
1628 1626 raise error.ProgrammingError('revision %d not a snapshot')
1629 1627 return len(self._deltachain(rev)[0]) - 1
1630 1628
1631 1629 def revdiff(self, rev1, rev2):
1632 1630 """return or calculate a delta between two revisions
1633 1631
1634 1632 The delta calculated is in binary form and is intended to be written to
1635 1633 revlog data directly. So this function needs raw revision data.
1636 1634 """
1637 1635 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1638 1636 return bytes(self._chunk(rev2))
1639 1637
1640 1638 return mdiff.textdiff(self.revision(rev1, raw=True),
1641 1639 self.revision(rev2, raw=True))
1642 1640
1643 1641 def revision(self, nodeorrev, _df=None, raw=False):
1644 1642 """return an uncompressed revision of a given node or revision
1645 1643 number.
1646 1644
1647 1645 _df - an existing file handle to read from. (internal-only)
1648 1646 raw - an optional argument specifying if the revision data is to be
1649 1647 treated as raw data when applying flag transforms. 'raw' should be set
1650 1648 to True when generating changegroups or in debug commands.
1651 1649 """
1652 1650 return self._revisiondata(nodeorrev, _df, raw=raw)
1653 1651
1654 1652 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1655 1653 if isinstance(nodeorrev, int):
1656 1654 rev = nodeorrev
1657 1655 node = self.node(rev)
1658 1656 else:
1659 1657 node = nodeorrev
1660 1658 rev = None
1661 1659
1662 1660 cachedrev = None
1663 1661 flags = None
1664 1662 rawtext = None
1665 1663 if node == nullid:
1666 1664 return ""
1667 1665 if self._revisioncache:
1668 1666 if self._revisioncache[0] == node:
1669 1667 # _cache only stores rawtext
1670 1668 if raw:
1671 1669 return self._revisioncache[2]
1672 1670 # duplicated, but good for perf
1673 1671 if rev is None:
1674 1672 rev = self.rev(node)
1675 1673 if flags is None:
1676 1674 flags = self.flags(rev)
1677 1675 # no extra flags set, no flag processor runs, text = rawtext
1678 1676 if flags == REVIDX_DEFAULT_FLAGS:
1679 1677 return self._revisioncache[2]
1680 1678 # rawtext is reusable. need to run flag processor
1681 1679 rawtext = self._revisioncache[2]
1682 1680
1683 1681 cachedrev = self._revisioncache[1]
1684 1682
1685 1683 # look up what we need to read
1686 1684 if rawtext is None:
1687 1685 if rev is None:
1688 1686 rev = self.rev(node)
1689 1687
1690 1688 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1691 1689 if stopped:
1692 1690 rawtext = self._revisioncache[2]
1693 1691
1694 1692 # drop cache to save memory
1695 1693 self._revisioncache = None
1696 1694
1697 1695 targetsize = None
1698 1696 rawsize = self.index[rev][2]
1699 1697 if 0 <= rawsize:
1700 1698 targetsize = 4 * rawsize
1701 1699
1702 1700 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1703 1701 if rawtext is None:
1704 1702 rawtext = bytes(bins[0])
1705 1703 bins = bins[1:]
1706 1704
1707 1705 rawtext = mdiff.patches(rawtext, bins)
1708 1706 self._revisioncache = (node, rev, rawtext)
1709 1707
1710 1708 if flags is None:
1711 1709 if rev is None:
1712 1710 rev = self.rev(node)
1713 1711 flags = self.flags(rev)
1714 1712
1715 1713 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1716 1714 if validatehash:
1717 1715 self.checkhash(text, node, rev=rev)
1718 1716
1719 1717 return text
1720 1718
1721 1719 def rawdata(self, nodeorrev, _df=None, raw=False):
1722 1720 """return an uncompressed raw data of a given node or revision number.
1723 1721
1724 1722 _df - an existing file handle to read from. (internal-only)
1725 1723 """
1726 1724 return self._revisiondata(nodeorrev, _df, raw=True)
1727 1725
1728 1726 def hash(self, text, p1, p2):
1729 1727 """Compute a node hash.
1730 1728
1731 1729 Available as a function so that subclasses can replace the hash
1732 1730 as needed.
1733 1731 """
1734 1732 return storageutil.hashrevisionsha1(text, p1, p2)
1735 1733
1736 1734 def _processflags(self, text, flags, operation, raw=False):
1737 1735 """Inspect revision data flags and applies transforms defined by
1738 1736 registered flag processors.
1739 1737
1740 1738 ``text`` - the revision data to process
1741 1739 ``flags`` - the revision flags
1742 1740 ``operation`` - the operation being performed (read or write)
1743 1741 ``raw`` - an optional argument describing if the raw transform should be
1744 1742 applied.
1745 1743
1746 1744 This method processes the flags in the order (or reverse order if
1747 1745 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1748 1746 flag processors registered for present flags. The order of flags defined
1749 1747 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1750 1748
1751 1749 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1752 1750 processed text and ``validatehash`` is a bool indicating whether the
1753 1751 returned text should be checked for hash integrity.
1754 1752
1755 1753 Note: If the ``raw`` argument is set, it has precedence over the
1756 1754 operation and will only update the value of ``validatehash``.
1757 1755 """
1758 1756 # fast path: no flag processors will run
1759 1757 if flags == 0:
1760 1758 return text, True
1761 1759 if not operation in ('read', 'write'):
1762 1760 raise error.ProgrammingError(_("invalid '%s' operation") %
1763 1761 operation)
1764 1762 # Check all flags are known.
1765 if flags & ~REVIDX_KNOWN_FLAGS:
1763 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
1766 1764 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1767 (flags & ~REVIDX_KNOWN_FLAGS))
1765 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
1768 1766 validatehash = True
1769 1767 # Depending on the operation (read or write), the order might be
1770 1768 # reversed due to non-commutative transforms.
1771 1769 orderedflags = REVIDX_FLAGS_ORDER
1772 1770 if operation == 'write':
1773 1771 orderedflags = reversed(orderedflags)
1774 1772
1775 1773 for flag in orderedflags:
1776 1774 # If a flagprocessor has been registered for a known flag, apply the
1777 1775 # related operation transform and update result tuple.
1778 1776 if flag & flags:
1779 1777 vhash = True
1780 1778
1781 1779 if flag not in self._flagprocessors:
1782 1780 message = _("missing processor for flag '%#x'") % (flag)
1783 1781 raise error.RevlogError(message)
1784 1782
1785 1783 processor = self._flagprocessors[flag]
1786 1784 if processor is not None:
1787 1785 readtransform, writetransform, rawtransform = processor
1788 1786
1789 1787 if raw:
1790 1788 vhash = rawtransform(self, text)
1791 1789 elif operation == 'read':
1792 1790 text, vhash = readtransform(self, text)
1793 1791 else: # write operation
1794 1792 text, vhash = writetransform(self, text)
1795 1793 validatehash = validatehash and vhash
1796 1794
1797 1795 return text, validatehash
1798 1796
1799 1797 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1800 1798 """Check node hash integrity.
1801 1799
1802 1800 Available as a function so that subclasses can extend hash mismatch
1803 1801 behaviors as needed.
1804 1802 """
1805 1803 try:
1806 1804 if p1 is None and p2 is None:
1807 1805 p1, p2 = self.parents(node)
1808 1806 if node != self.hash(text, p1, p2):
1809 1807 # Clear the revision cache on hash failure. The revision cache
1810 1808 # only stores the raw revision and clearing the cache does have
1811 1809 # the side-effect that we won't have a cache hit when the raw
1812 1810 # revision data is accessed. But this case should be rare and
1813 1811 # it is extra work to teach the cache about the hash
1814 1812 # verification state.
1815 1813 if self._revisioncache and self._revisioncache[0] == node:
1816 1814 self._revisioncache = None
1817 1815
1818 1816 revornode = rev
1819 1817 if revornode is None:
1820 1818 revornode = templatefilters.short(hex(node))
1821 1819 raise error.RevlogError(_("integrity check failed on %s:%s")
1822 1820 % (self.indexfile, pycompat.bytestr(revornode)))
1823 1821 except error.RevlogError:
1824 1822 if self._censorable and storageutil.iscensoredtext(text):
1825 1823 raise error.CensoredNodeError(self.indexfile, node, text)
1826 1824 raise
1827 1825
1828 1826 def _enforceinlinesize(self, tr, fp=None):
1829 1827 """Check if the revlog is too big for inline and convert if so.
1830 1828
1831 1829 This should be called after revisions are added to the revlog. If the
1832 1830 revlog has grown too large to be an inline revlog, it will convert it
1833 1831 to use multiple index and data files.
1834 1832 """
1835 1833 tiprev = len(self) - 1
1836 1834 if (not self._inline or
1837 1835 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1838 1836 return
1839 1837
1840 1838 trinfo = tr.find(self.indexfile)
1841 1839 if trinfo is None:
1842 1840 raise error.RevlogError(_("%s not found in the transaction")
1843 1841 % self.indexfile)
1844 1842
1845 1843 trindex = trinfo[2]
1846 1844 if trindex is not None:
1847 1845 dataoff = self.start(trindex)
1848 1846 else:
1849 1847 # revlog was stripped at start of transaction, use all leftover data
1850 1848 trindex = len(self) - 1
1851 1849 dataoff = self.end(tiprev)
1852 1850
1853 1851 tr.add(self.datafile, dataoff)
1854 1852
1855 1853 if fp:
1856 1854 fp.flush()
1857 1855 fp.close()
1858 1856 # We can't use the cached file handle after close(). So prevent
1859 1857 # its usage.
1860 1858 self._writinghandles = None
1861 1859
1862 1860 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1863 1861 for r in self:
1864 1862 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1865 1863
1866 1864 with self._indexfp('w') as fp:
1867 1865 self.version &= ~FLAG_INLINE_DATA
1868 1866 self._inline = False
1869 1867 io = self._io
1870 1868 for i in self:
1871 1869 e = io.packentry(self.index[i], self.node, self.version, i)
1872 1870 fp.write(e)
1873 1871
1874 1872 # the temp file replace the real index when we exit the context
1875 1873 # manager
1876 1874
1877 1875 tr.replace(self.indexfile, trindex * self._io.size)
1878 1876 self._chunkclear()
1879 1877
1880 1878 def _nodeduplicatecallback(self, transaction, node):
1881 1879 """called when trying to add a node already stored.
1882 1880 """
1883 1881
1884 1882 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1885 1883 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1886 1884 """add a revision to the log
1887 1885
1888 1886 text - the revision data to add
1889 1887 transaction - the transaction object used for rollback
1890 1888 link - the linkrev data to add
1891 1889 p1, p2 - the parent nodeids of the revision
1892 1890 cachedelta - an optional precomputed delta
1893 1891 node - nodeid of revision; typically node is not specified, and it is
1894 1892 computed by default as hash(text, p1, p2), however subclasses might
1895 1893 use different hashing method (and override checkhash() in such case)
1896 1894 flags - the known flags to set on the revision
1897 1895 deltacomputer - an optional deltacomputer instance shared between
1898 1896 multiple calls
1899 1897 """
1900 1898 if link == nullrev:
1901 1899 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1902 1900 % self.indexfile)
1903 1901
1904 1902 if flags:
1905 1903 node = node or self.hash(text, p1, p2)
1906 1904
1907 1905 rawtext, validatehash = self._processflags(text, flags, 'write')
1908 1906
1909 1907 # If the flag processor modifies the revision data, ignore any provided
1910 1908 # cachedelta.
1911 1909 if rawtext != text:
1912 1910 cachedelta = None
1913 1911
1914 1912 if len(rawtext) > _maxentrysize:
1915 1913 raise error.RevlogError(
1916 1914 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1917 1915 % (self.indexfile, len(rawtext)))
1918 1916
1919 1917 node = node or self.hash(rawtext, p1, p2)
1920 1918 if node in self.nodemap:
1921 1919 return node
1922 1920
1923 1921 if validatehash:
1924 1922 self.checkhash(rawtext, node, p1=p1, p2=p2)
1925 1923
1926 1924 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1927 1925 flags, cachedelta=cachedelta,
1928 1926 deltacomputer=deltacomputer)
1929 1927
1930 1928 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1931 1929 cachedelta=None, deltacomputer=None):
1932 1930 """add a raw revision with known flags, node and parents
1933 1931 useful when reusing a revision not stored in this revlog (ex: received
1934 1932 over wire, or read from an external bundle).
1935 1933 """
1936 1934 dfh = None
1937 1935 if not self._inline:
1938 1936 dfh = self._datafp("a+")
1939 1937 ifh = self._indexfp("a+")
1940 1938 try:
1941 1939 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1942 1940 flags, cachedelta, ifh, dfh,
1943 1941 deltacomputer=deltacomputer)
1944 1942 finally:
1945 1943 if dfh:
1946 1944 dfh.close()
1947 1945 ifh.close()
1948 1946
1949 1947 def compress(self, data):
1950 1948 """Generate a possibly-compressed representation of data."""
1951 1949 if not data:
1952 1950 return '', data
1953 1951
1954 1952 compressed = self._compressor.compress(data)
1955 1953
1956 1954 if compressed:
1957 1955 # The revlog compressor added the header in the returned data.
1958 1956 return '', compressed
1959 1957
1960 1958 if data[0:1] == '\0':
1961 1959 return '', data
1962 1960 return 'u', data
1963 1961
1964 1962 def decompress(self, data):
1965 1963 """Decompress a revlog chunk.
1966 1964
1967 1965 The chunk is expected to begin with a header identifying the
1968 1966 format type so it can be routed to an appropriate decompressor.
1969 1967 """
1970 1968 if not data:
1971 1969 return data
1972 1970
1973 1971 # Revlogs are read much more frequently than they are written and many
1974 1972 # chunks only take microseconds to decompress, so performance is
1975 1973 # important here.
1976 1974 #
1977 1975 # We can make a few assumptions about revlogs:
1978 1976 #
1979 1977 # 1) the majority of chunks will be compressed (as opposed to inline
1980 1978 # raw data).
1981 1979 # 2) decompressing *any* data will likely by at least 10x slower than
1982 1980 # returning raw inline data.
1983 1981 # 3) we want to prioritize common and officially supported compression
1984 1982 # engines
1985 1983 #
1986 1984 # It follows that we want to optimize for "decompress compressed data
1987 1985 # when encoded with common and officially supported compression engines"
1988 1986 # case over "raw data" and "data encoded by less common or non-official
1989 1987 # compression engines." That is why we have the inline lookup first
1990 1988 # followed by the compengines lookup.
1991 1989 #
1992 1990 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1993 1991 # compressed chunks. And this matters for changelog and manifest reads.
1994 1992 t = data[0:1]
1995 1993
1996 1994 if t == 'x':
1997 1995 try:
1998 1996 return _zlibdecompress(data)
1999 1997 except zlib.error as e:
2000 1998 raise error.RevlogError(_('revlog decompress error: %s') %
2001 1999 stringutil.forcebytestr(e))
2002 2000 # '\0' is more common than 'u' so it goes first.
2003 2001 elif t == '\0':
2004 2002 return data
2005 2003 elif t == 'u':
2006 2004 return util.buffer(data, 1)
2007 2005
2008 2006 try:
2009 2007 compressor = self._decompressors[t]
2010 2008 except KeyError:
2011 2009 try:
2012 2010 engine = util.compengines.forrevlogheader(t)
2013 2011 compressor = engine.revlogcompressor(self._compengineopts)
2014 2012 self._decompressors[t] = compressor
2015 2013 except KeyError:
2016 2014 raise error.RevlogError(_('unknown compression type %r') % t)
2017 2015
2018 2016 return compressor.decompress(data)
2019 2017
2020 2018 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2021 2019 cachedelta, ifh, dfh, alwayscache=False,
2022 2020 deltacomputer=None):
2023 2021 """internal function to add revisions to the log
2024 2022
2025 2023 see addrevision for argument descriptions.
2026 2024
2027 2025 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2028 2026
2029 2027 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2030 2028 be used.
2031 2029
2032 2030 invariants:
2033 2031 - rawtext is optional (can be None); if not set, cachedelta must be set.
2034 2032 if both are set, they must correspond to each other.
2035 2033 """
2036 2034 if node == nullid:
2037 2035 raise error.RevlogError(_("%s: attempt to add null revision") %
2038 2036 self.indexfile)
2039 2037 if node == wdirid or node in wdirfilenodeids:
2040 2038 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2041 2039 self.indexfile)
2042 2040
2043 2041 if self._inline:
2044 2042 fh = ifh
2045 2043 else:
2046 2044 fh = dfh
2047 2045
2048 2046 btext = [rawtext]
2049 2047
2050 2048 curr = len(self)
2051 2049 prev = curr - 1
2052 2050 offset = self.end(prev)
2053 2051 p1r, p2r = self.rev(p1), self.rev(p2)
2054 2052
2055 2053 # full versions are inserted when the needed deltas
2056 2054 # become comparable to the uncompressed text
2057 2055 if rawtext is None:
2058 2056 # need rawtext size, before changed by flag processors, which is
2059 2057 # the non-raw size. use revlog explicitly to avoid filelog's extra
2060 2058 # logic that might remove metadata size.
2061 2059 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2062 2060 cachedelta[1])
2063 2061 else:
2064 2062 textlen = len(rawtext)
2065 2063
2066 2064 if deltacomputer is None:
2067 2065 deltacomputer = deltautil.deltacomputer(self)
2068 2066
2069 2067 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2070 2068
2071 2069 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2072 2070
2073 2071 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2074 2072 deltainfo.base, link, p1r, p2r, node)
2075 2073 self.index.append(e)
2076 2074 self.nodemap[node] = curr
2077 2075
2078 2076 # Reset the pure node cache start lookup offset to account for new
2079 2077 # revision.
2080 2078 if self._nodepos is not None:
2081 2079 self._nodepos = curr
2082 2080
2083 2081 entry = self._io.packentry(e, self.node, self.version, curr)
2084 2082 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2085 2083 link, offset)
2086 2084
2087 2085 rawtext = btext[0]
2088 2086
2089 2087 if alwayscache and rawtext is None:
2090 2088 rawtext = deltacomputer.buildtext(revinfo, fh)
2091 2089
2092 2090 if type(rawtext) == bytes: # only accept immutable objects
2093 2091 self._revisioncache = (node, curr, rawtext)
2094 2092 self._chainbasecache[curr] = deltainfo.chainbase
2095 2093 return node
2096 2094
2097 2095 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2098 2096 # Files opened in a+ mode have inconsistent behavior on various
2099 2097 # platforms. Windows requires that a file positioning call be made
2100 2098 # when the file handle transitions between reads and writes. See
2101 2099 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2102 2100 # platforms, Python or the platform itself can be buggy. Some versions
2103 2101 # of Solaris have been observed to not append at the end of the file
2104 2102 # if the file was seeked to before the end. See issue4943 for more.
2105 2103 #
2106 2104 # We work around this issue by inserting a seek() before writing.
2107 2105 # Note: This is likely not necessary on Python 3. However, because
2108 2106 # the file handle is reused for reads and may be seeked there, we need
2109 2107 # to be careful before changing this.
2110 2108 ifh.seek(0, os.SEEK_END)
2111 2109 if dfh:
2112 2110 dfh.seek(0, os.SEEK_END)
2113 2111
2114 2112 curr = len(self) - 1
2115 2113 if not self._inline:
2116 2114 transaction.add(self.datafile, offset)
2117 2115 transaction.add(self.indexfile, curr * len(entry))
2118 2116 if data[0]:
2119 2117 dfh.write(data[0])
2120 2118 dfh.write(data[1])
2121 2119 ifh.write(entry)
2122 2120 else:
2123 2121 offset += curr * self._io.size
2124 2122 transaction.add(self.indexfile, offset, curr)
2125 2123 ifh.write(entry)
2126 2124 ifh.write(data[0])
2127 2125 ifh.write(data[1])
2128 2126 self._enforceinlinesize(transaction, ifh)
2129 2127
2130 2128 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2131 2129 """
2132 2130 add a delta group
2133 2131
2134 2132 given a set of deltas, add them to the revision log. the
2135 2133 first delta is against its parent, which should be in our
2136 2134 log, the rest are against the previous delta.
2137 2135
2138 2136 If ``addrevisioncb`` is defined, it will be called with arguments of
2139 2137 this revlog and the node that was added.
2140 2138 """
2141 2139
2142 2140 if self._writinghandles:
2143 2141 raise error.ProgrammingError('cannot nest addgroup() calls')
2144 2142
2145 2143 nodes = []
2146 2144
2147 2145 r = len(self)
2148 2146 end = 0
2149 2147 if r:
2150 2148 end = self.end(r - 1)
2151 2149 ifh = self._indexfp("a+")
2152 2150 isize = r * self._io.size
2153 2151 if self._inline:
2154 2152 transaction.add(self.indexfile, end + isize, r)
2155 2153 dfh = None
2156 2154 else:
2157 2155 transaction.add(self.indexfile, isize, r)
2158 2156 transaction.add(self.datafile, end)
2159 2157 dfh = self._datafp("a+")
2160 2158 def flush():
2161 2159 if dfh:
2162 2160 dfh.flush()
2163 2161 ifh.flush()
2164 2162
2165 2163 self._writinghandles = (ifh, dfh)
2166 2164
2167 2165 try:
2168 2166 deltacomputer = deltautil.deltacomputer(self)
2169 2167 # loop through our set of deltas
2170 2168 for data in deltas:
2171 2169 node, p1, p2, linknode, deltabase, delta, flags = data
2172 2170 link = linkmapper(linknode)
2173 2171 flags = flags or REVIDX_DEFAULT_FLAGS
2174 2172
2175 2173 nodes.append(node)
2176 2174
2177 2175 if node in self.nodemap:
2178 2176 self._nodeduplicatecallback(transaction, node)
2179 2177 # this can happen if two branches make the same change
2180 2178 continue
2181 2179
2182 2180 for p in (p1, p2):
2183 2181 if p not in self.nodemap:
2184 2182 raise error.LookupError(p, self.indexfile,
2185 2183 _('unknown parent'))
2186 2184
2187 2185 if deltabase not in self.nodemap:
2188 2186 raise error.LookupError(deltabase, self.indexfile,
2189 2187 _('unknown delta base'))
2190 2188
2191 2189 baserev = self.rev(deltabase)
2192 2190
2193 2191 if baserev != nullrev and self.iscensored(baserev):
2194 2192 # if base is censored, delta must be full replacement in a
2195 2193 # single patch operation
2196 2194 hlen = struct.calcsize(">lll")
2197 2195 oldlen = self.rawsize(baserev)
2198 2196 newlen = len(delta) - hlen
2199 2197 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2200 2198 raise error.CensoredBaseError(self.indexfile,
2201 2199 self.node(baserev))
2202 2200
2203 2201 if not flags and self._peek_iscensored(baserev, delta, flush):
2204 2202 flags |= REVIDX_ISCENSORED
2205 2203
2206 2204 # We assume consumers of addrevisioncb will want to retrieve
2207 2205 # the added revision, which will require a call to
2208 2206 # revision(). revision() will fast path if there is a cache
2209 2207 # hit. So, we tell _addrevision() to always cache in this case.
2210 2208 # We're only using addgroup() in the context of changegroup
2211 2209 # generation so the revision data can always be handled as raw
2212 2210 # by the flagprocessor.
2213 2211 self._addrevision(node, None, transaction, link,
2214 2212 p1, p2, flags, (baserev, delta),
2215 2213 ifh, dfh,
2216 2214 alwayscache=bool(addrevisioncb),
2217 2215 deltacomputer=deltacomputer)
2218 2216
2219 2217 if addrevisioncb:
2220 2218 addrevisioncb(self, node)
2221 2219
2222 2220 if not dfh and not self._inline:
2223 2221 # addrevision switched from inline to conventional
2224 2222 # reopen the index
2225 2223 ifh.close()
2226 2224 dfh = self._datafp("a+")
2227 2225 ifh = self._indexfp("a+")
2228 2226 self._writinghandles = (ifh, dfh)
2229 2227 finally:
2230 2228 self._writinghandles = None
2231 2229
2232 2230 if dfh:
2233 2231 dfh.close()
2234 2232 ifh.close()
2235 2233
2236 2234 return nodes
2237 2235
2238 2236 def iscensored(self, rev):
2239 2237 """Check if a file revision is censored."""
2240 2238 if not self._censorable:
2241 2239 return False
2242 2240
2243 2241 return self.flags(rev) & REVIDX_ISCENSORED
2244 2242
2245 2243 def _peek_iscensored(self, baserev, delta, flush):
2246 2244 """Quickly check if a delta produces a censored revision."""
2247 2245 if not self._censorable:
2248 2246 return False
2249 2247
2250 2248 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2251 2249
2252 2250 def getstrippoint(self, minlink):
2253 2251 """find the minimum rev that must be stripped to strip the linkrev
2254 2252
2255 2253 Returns a tuple containing the minimum rev and a set of all revs that
2256 2254 have linkrevs that will be broken by this strip.
2257 2255 """
2258 2256 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2259 2257 self.headrevs(),
2260 2258 self.linkrev, self.parentrevs)
2261 2259
2262 2260 def strip(self, minlink, transaction):
2263 2261 """truncate the revlog on the first revision with a linkrev >= minlink
2264 2262
2265 2263 This function is called when we're stripping revision minlink and
2266 2264 its descendants from the repository.
2267 2265
2268 2266 We have to remove all revisions with linkrev >= minlink, because
2269 2267 the equivalent changelog revisions will be renumbered after the
2270 2268 strip.
2271 2269
2272 2270 So we truncate the revlog on the first of these revisions, and
2273 2271 trust that the caller has saved the revisions that shouldn't be
2274 2272 removed and that it'll re-add them after this truncation.
2275 2273 """
2276 2274 if len(self) == 0:
2277 2275 return
2278 2276
2279 2277 rev, _ = self.getstrippoint(minlink)
2280 2278 if rev == len(self):
2281 2279 return
2282 2280
2283 2281 # first truncate the files on disk
2284 2282 end = self.start(rev)
2285 2283 if not self._inline:
2286 2284 transaction.add(self.datafile, end)
2287 2285 end = rev * self._io.size
2288 2286 else:
2289 2287 end += rev * self._io.size
2290 2288
2291 2289 transaction.add(self.indexfile, end)
2292 2290
2293 2291 # then reset internal state in memory to forget those revisions
2294 2292 self._revisioncache = None
2295 2293 self._chaininfocache = {}
2296 2294 self._chunkclear()
2297 2295 for x in pycompat.xrange(rev, len(self)):
2298 2296 del self.nodemap[self.node(x)]
2299 2297
2300 2298 del self.index[rev:-1]
2301 2299 self._nodepos = None
2302 2300
2303 2301 def checksize(self):
2304 2302 """Check size of index and data files
2305 2303
2306 2304 return a (dd, di) tuple.
2307 2305 - dd: extra bytes for the "data" file
2308 2306 - di: extra bytes for the "index" file
2309 2307
2310 2308 A healthy revlog will return (0, 0).
2311 2309 """
2312 2310 expected = 0
2313 2311 if len(self):
2314 2312 expected = max(0, self.end(len(self) - 1))
2315 2313
2316 2314 try:
2317 2315 with self._datafp() as f:
2318 2316 f.seek(0, io.SEEK_END)
2319 2317 actual = f.tell()
2320 2318 dd = actual - expected
2321 2319 except IOError as inst:
2322 2320 if inst.errno != errno.ENOENT:
2323 2321 raise
2324 2322 dd = 0
2325 2323
2326 2324 try:
2327 2325 f = self.opener(self.indexfile)
2328 2326 f.seek(0, io.SEEK_END)
2329 2327 actual = f.tell()
2330 2328 f.close()
2331 2329 s = self._io.size
2332 2330 i = max(0, actual // s)
2333 2331 di = actual - (i * s)
2334 2332 if self._inline:
2335 2333 databytes = 0
2336 2334 for r in self:
2337 2335 databytes += max(0, self.length(r))
2338 2336 dd = 0
2339 2337 di = actual - len(self) * s - databytes
2340 2338 except IOError as inst:
2341 2339 if inst.errno != errno.ENOENT:
2342 2340 raise
2343 2341 di = 0
2344 2342
2345 2343 return (dd, di)
2346 2344
2347 2345 def files(self):
2348 2346 res = [self.indexfile]
2349 2347 if not self._inline:
2350 2348 res.append(self.datafile)
2351 2349 return res
2352 2350
2353 2351 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2354 2352 assumehaveparentrevisions=False,
2355 2353 deltamode=repository.CG_DELTAMODE_STD):
2356 2354 if nodesorder not in ('nodes', 'storage', 'linear', None):
2357 2355 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2358 2356 nodesorder)
2359 2357
2360 2358 if nodesorder is None and not self._generaldelta:
2361 2359 nodesorder = 'storage'
2362 2360
2363 2361 if (not self._storedeltachains and
2364 2362 deltamode != repository.CG_DELTAMODE_PREV):
2365 2363 deltamode = repository.CG_DELTAMODE_FULL
2366 2364
2367 2365 return storageutil.emitrevisions(
2368 2366 self, nodes, nodesorder, revlogrevisiondelta,
2369 2367 deltaparentfn=self.deltaparent,
2370 2368 candeltafn=self.candelta,
2371 2369 rawsizefn=self.rawsize,
2372 2370 revdifffn=self.revdiff,
2373 2371 flagsfn=self.flags,
2374 2372 deltamode=deltamode,
2375 2373 revisiondata=revisiondata,
2376 2374 assumehaveparentrevisions=assumehaveparentrevisions)
2377 2375
2378 2376 DELTAREUSEALWAYS = 'always'
2379 2377 DELTAREUSESAMEREVS = 'samerevs'
2380 2378 DELTAREUSENEVER = 'never'
2381 2379
2382 2380 DELTAREUSEFULLADD = 'fulladd'
2383 2381
2384 2382 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2385 2383
2386 2384 def clone(self, tr, destrevlog, addrevisioncb=None,
2387 2385 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2388 2386 """Copy this revlog to another, possibly with format changes.
2389 2387
2390 2388 The destination revlog will contain the same revisions and nodes.
2391 2389 However, it may not be bit-for-bit identical due to e.g. delta encoding
2392 2390 differences.
2393 2391
2394 2392 The ``deltareuse`` argument control how deltas from the existing revlog
2395 2393 are preserved in the destination revlog. The argument can have the
2396 2394 following values:
2397 2395
2398 2396 DELTAREUSEALWAYS
2399 2397 Deltas will always be reused (if possible), even if the destination
2400 2398 revlog would not select the same revisions for the delta. This is the
2401 2399 fastest mode of operation.
2402 2400 DELTAREUSESAMEREVS
2403 2401 Deltas will be reused if the destination revlog would pick the same
2404 2402 revisions for the delta. This mode strikes a balance between speed
2405 2403 and optimization.
2406 2404 DELTAREUSENEVER
2407 2405 Deltas will never be reused. This is the slowest mode of execution.
2408 2406 This mode can be used to recompute deltas (e.g. if the diff/delta
2409 2407 algorithm changes).
2410 2408
2411 2409 Delta computation can be slow, so the choice of delta reuse policy can
2412 2410 significantly affect run time.
2413 2411
2414 2412 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2415 2413 two extremes. Deltas will be reused if they are appropriate. But if the
2416 2414 delta could choose a better revision, it will do so. This means if you
2417 2415 are converting a non-generaldelta revlog to a generaldelta revlog,
2418 2416 deltas will be recomputed if the delta's parent isn't a parent of the
2419 2417 revision.
2420 2418
2421 2419 In addition to the delta policy, the ``forcedeltabothparents``
2422 2420 argument controls whether to force compute deltas against both parents
2423 2421 for merges. By default, the current default is used.
2424 2422 """
2425 2423 if deltareuse not in self.DELTAREUSEALL:
2426 2424 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2427 2425
2428 2426 if len(destrevlog):
2429 2427 raise ValueError(_('destination revlog is not empty'))
2430 2428
2431 2429 if getattr(self, 'filteredrevs', None):
2432 2430 raise ValueError(_('source revlog has filtered revisions'))
2433 2431 if getattr(destrevlog, 'filteredrevs', None):
2434 2432 raise ValueError(_('destination revlog has filtered revisions'))
2435 2433
2436 2434 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2437 2435 # if possible.
2438 2436 oldlazydelta = destrevlog._lazydelta
2439 2437 oldlazydeltabase = destrevlog._lazydeltabase
2440 2438 oldamd = destrevlog._deltabothparents
2441 2439
2442 2440 try:
2443 2441 if deltareuse == self.DELTAREUSEALWAYS:
2444 2442 destrevlog._lazydeltabase = True
2445 2443 destrevlog._lazydelta = True
2446 2444 elif deltareuse == self.DELTAREUSESAMEREVS:
2447 2445 destrevlog._lazydeltabase = False
2448 2446 destrevlog._lazydelta = True
2449 2447 elif deltareuse == self.DELTAREUSENEVER:
2450 2448 destrevlog._lazydeltabase = False
2451 2449 destrevlog._lazydelta = False
2452 2450
2453 2451 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2454 2452
2455 2453 deltacomputer = deltautil.deltacomputer(destrevlog)
2456 2454 index = self.index
2457 2455 for rev in self:
2458 2456 entry = index[rev]
2459 2457
2460 2458 # Some classes override linkrev to take filtered revs into
2461 2459 # account. Use raw entry from index.
2462 2460 flags = entry[0] & 0xffff
2463 2461 linkrev = entry[4]
2464 2462 p1 = index[entry[5]][7]
2465 2463 p2 = index[entry[6]][7]
2466 2464 node = entry[7]
2467 2465
2468 2466 # (Possibly) reuse the delta from the revlog if allowed and
2469 2467 # the revlog chunk is a delta.
2470 2468 cachedelta = None
2471 2469 rawtext = None
2472 2470 if (deltareuse != self.DELTAREUSEFULLADD
2473 2471 and destrevlog._lazydelta):
2474 2472 dp = self.deltaparent(rev)
2475 2473 if dp != nullrev:
2476 2474 cachedelta = (dp, bytes(self._chunk(rev)))
2477 2475
2478 2476 if not cachedelta:
2479 2477 rawtext = self.revision(rev, raw=True)
2480 2478
2481 2479
2482 2480 if deltareuse == self.DELTAREUSEFULLADD:
2483 2481 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2484 2482 cachedelta=cachedelta,
2485 2483 node=node, flags=flags,
2486 2484 deltacomputer=deltacomputer)
2487 2485 else:
2488 2486 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2489 2487 checkambig=False)
2490 2488 dfh = None
2491 2489 if not destrevlog._inline:
2492 2490 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2493 2491 try:
2494 2492 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2495 2493 p2, flags, cachedelta, ifh, dfh,
2496 2494 deltacomputer=deltacomputer)
2497 2495 finally:
2498 2496 if dfh:
2499 2497 dfh.close()
2500 2498 ifh.close()
2501 2499
2502 2500 if addrevisioncb:
2503 2501 addrevisioncb(self, rev, node)
2504 2502 finally:
2505 2503 destrevlog._lazydelta = oldlazydelta
2506 2504 destrevlog._lazydeltabase = oldlazydeltabase
2507 2505 destrevlog._deltabothparents = oldamd
2508 2506
2509 2507 def censorrevision(self, tr, censornode, tombstone=b''):
2510 2508 if (self.version & 0xFFFF) == REVLOGV0:
2511 2509 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2512 2510 self.version)
2513 2511
2514 2512 censorrev = self.rev(censornode)
2515 2513 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2516 2514
2517 2515 if len(tombstone) > self.rawsize(censorrev):
2518 2516 raise error.Abort(_('censor tombstone must be no longer than '
2519 2517 'censored data'))
2520 2518
2521 2519 # Rewriting the revlog in place is hard. Our strategy for censoring is
2522 2520 # to create a new revlog, copy all revisions to it, then replace the
2523 2521 # revlogs on transaction close.
2524 2522
2525 2523 newindexfile = self.indexfile + b'.tmpcensored'
2526 2524 newdatafile = self.datafile + b'.tmpcensored'
2527 2525
2528 2526 # This is a bit dangerous. We could easily have a mismatch of state.
2529 2527 newrl = revlog(self.opener, newindexfile, newdatafile,
2530 2528 censorable=True)
2531 2529 newrl.version = self.version
2532 2530 newrl._generaldelta = self._generaldelta
2533 2531 newrl._io = self._io
2534 2532
2535 2533 for rev in self.revs():
2536 2534 node = self.node(rev)
2537 2535 p1, p2 = self.parents(node)
2538 2536
2539 2537 if rev == censorrev:
2540 2538 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2541 2539 p1, p2, censornode, REVIDX_ISCENSORED)
2542 2540
2543 2541 if newrl.deltaparent(rev) != nullrev:
2544 2542 raise error.Abort(_('censored revision stored as delta; '
2545 2543 'cannot censor'),
2546 2544 hint=_('censoring of revlogs is not '
2547 2545 'fully implemented; please report '
2548 2546 'this bug'))
2549 2547 continue
2550 2548
2551 2549 if self.iscensored(rev):
2552 2550 if self.deltaparent(rev) != nullrev:
2553 2551 raise error.Abort(_('cannot censor due to censored '
2554 2552 'revision having delta stored'))
2555 2553 rawtext = self._chunk(rev)
2556 2554 else:
2557 2555 rawtext = self.revision(rev, raw=True)
2558 2556
2559 2557 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2560 2558 self.flags(rev))
2561 2559
2562 2560 tr.addbackup(self.indexfile, location='store')
2563 2561 if not self._inline:
2564 2562 tr.addbackup(self.datafile, location='store')
2565 2563
2566 2564 self.opener.rename(newrl.indexfile, self.indexfile)
2567 2565 if not self._inline:
2568 2566 self.opener.rename(newrl.datafile, self.datafile)
2569 2567
2570 2568 self.clearcaches()
2571 2569 self._loadindex()
2572 2570
2573 2571 def verifyintegrity(self, state):
2574 2572 """Verifies the integrity of the revlog.
2575 2573
2576 2574 Yields ``revlogproblem`` instances describing problems that are
2577 2575 found.
2578 2576 """
2579 2577 dd, di = self.checksize()
2580 2578 if dd:
2581 2579 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2582 2580 if di:
2583 2581 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2584 2582
2585 2583 version = self.version & 0xFFFF
2586 2584
2587 2585 # The verifier tells us what version revlog we should be.
2588 2586 if version != state['expectedversion']:
2589 2587 yield revlogproblem(
2590 2588 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2591 2589 (self.indexfile, version, state['expectedversion']))
2592 2590
2593 2591 state['skipread'] = set()
2594 2592
2595 2593 for rev in self:
2596 2594 node = self.node(rev)
2597 2595
2598 2596 # Verify contents. 4 cases to care about:
2599 2597 #
2600 2598 # common: the most common case
2601 2599 # rename: with a rename
2602 2600 # meta: file content starts with b'\1\n', the metadata
2603 2601 # header defined in filelog.py, but without a rename
2604 2602 # ext: content stored externally
2605 2603 #
2606 2604 # More formally, their differences are shown below:
2607 2605 #
2608 2606 # | common | rename | meta | ext
2609 2607 # -------------------------------------------------------
2610 2608 # flags() | 0 | 0 | 0 | not 0
2611 2609 # renamed() | False | True | False | ?
2612 2610 # rawtext[0:2]=='\1\n'| False | True | True | ?
2613 2611 #
2614 2612 # "rawtext" means the raw text stored in revlog data, which
2615 2613 # could be retrieved by "revision(rev, raw=True)". "text"
2616 2614 # mentioned below is "revision(rev, raw=False)".
2617 2615 #
2618 2616 # There are 3 different lengths stored physically:
2619 2617 # 1. L1: rawsize, stored in revlog index
2620 2618 # 2. L2: len(rawtext), stored in revlog data
2621 2619 # 3. L3: len(text), stored in revlog data if flags==0, or
2622 2620 # possibly somewhere else if flags!=0
2623 2621 #
2624 2622 # L1 should be equal to L2. L3 could be different from them.
2625 2623 # "text" may or may not affect commit hash depending on flag
2626 2624 # processors (see revlog.addflagprocessor).
2627 2625 #
2628 2626 # | common | rename | meta | ext
2629 2627 # -------------------------------------------------
2630 2628 # rawsize() | L1 | L1 | L1 | L1
2631 2629 # size() | L1 | L2-LM | L1(*) | L1 (?)
2632 2630 # len(rawtext) | L2 | L2 | L2 | L2
2633 2631 # len(text) | L2 | L2 | L2 | L3
2634 2632 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2635 2633 #
2636 2634 # LM: length of metadata, depending on rawtext
2637 2635 # (*): not ideal, see comment in filelog.size
2638 2636 # (?): could be "- len(meta)" if the resolved content has
2639 2637 # rename metadata
2640 2638 #
2641 2639 # Checks needed to be done:
2642 2640 # 1. length check: L1 == L2, in all cases.
2643 2641 # 2. hash check: depending on flag processor, we may need to
2644 2642 # use either "text" (external), or "rawtext" (in revlog).
2645 2643
2646 2644 try:
2647 2645 skipflags = state.get('skipflags', 0)
2648 2646 if skipflags:
2649 2647 skipflags &= self.flags(rev)
2650 2648
2651 2649 if skipflags:
2652 2650 state['skipread'].add(node)
2653 2651 else:
2654 2652 # Side-effect: read content and verify hash.
2655 2653 self.revision(node)
2656 2654
2657 2655 l1 = self.rawsize(rev)
2658 2656 l2 = len(self.revision(node, raw=True))
2659 2657
2660 2658 if l1 != l2:
2661 2659 yield revlogproblem(
2662 2660 error=_('unpacked size is %d, %d expected') % (l2, l1),
2663 2661 node=node)
2664 2662
2665 2663 except error.CensoredNodeError:
2666 2664 if state['erroroncensored']:
2667 2665 yield revlogproblem(error=_('censored file data'),
2668 2666 node=node)
2669 2667 state['skipread'].add(node)
2670 2668 except Exception as e:
2671 2669 yield revlogproblem(
2672 2670 error=_('unpacking %s: %s') % (short(node),
2673 2671 stringutil.forcebytestr(e)),
2674 2672 node=node)
2675 2673 state['skipread'].add(node)
2676 2674
2677 2675 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2678 2676 revisionscount=False, trackedsize=False,
2679 2677 storedsize=False):
2680 2678 d = {}
2681 2679
2682 2680 if exclusivefiles:
2683 2681 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2684 2682 if not self._inline:
2685 2683 d['exclusivefiles'].append((self.opener, self.datafile))
2686 2684
2687 2685 if sharedfiles:
2688 2686 d['sharedfiles'] = []
2689 2687
2690 2688 if revisionscount:
2691 2689 d['revisionscount'] = len(self)
2692 2690
2693 2691 if trackedsize:
2694 2692 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2695 2693
2696 2694 if storedsize:
2697 2695 d['storedsize'] = sum(self.opener.stat(path).st_size
2698 2696 for path in self.files())
2699 2697
2700 2698 return d
@@ -1,56 +1,55 b''
1 1 # revlogdeltas.py - constant used for revlog logic
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 from .. import (
13 13 repository,
14 util,
15 14 )
16 15
17 16 # revlog header flags
18 17 REVLOGV0 = 0
19 18 REVLOGV1 = 1
20 19 # Dummy value until file format is finalized.
21 20 # Reminder: change the bounds check in revlog.__init__ when this is changed.
22 21 REVLOGV2 = 0xDEAD
23 22 # Shared across v1 and v2.
24 23 FLAG_INLINE_DATA = (1 << 16)
25 24 # Only used by v1, implied by v2.
26 25 FLAG_GENERALDELTA = (1 << 17)
27 26 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
28 27 REVLOG_DEFAULT_FORMAT = REVLOGV1
29 28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 29 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
31 30 REVLOGV2_FLAGS = FLAG_INLINE_DATA
32 31
33 32 # revlog index flags
34 33
35 34 # For historical reasons, revlog's internal flags were exposed via the
36 35 # wire protocol and are even exposed in parts of the storage APIs.
37 36
38 37 # revision has censor metadata, must be verified
39 38 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
40 39 # revision hash does not match data (narrowhg)
41 40 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
42 41 # revision data is stored externally
43 42 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
44 43 REVIDX_DEFAULT_FLAGS = 0
45 44 # stable order in which flags need to be processed and their processors applied
46 45 REVIDX_FLAGS_ORDER = [
47 46 REVIDX_ISCENSORED,
48 47 REVIDX_ELLIPSIS,
49 48 REVIDX_EXTSTORED,
50 49 ]
51 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
50
52 51 # bitmark for flags that could cause rawdata content change
53 52 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
54 53
55 54 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
56 55
@@ -1,35 +1,39 b''
1 1 # flagutils.py - code to deal with revlog flags and their processors
2 2 #
3 3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 from .constants import (
12 12 REVIDX_DEFAULT_FLAGS,
13 13 REVIDX_ELLIPSIS,
14 14 REVIDX_EXTSTORED,
15 15 REVIDX_FLAGS_ORDER,
16 16 REVIDX_ISCENSORED,
17 REVIDX_KNOWN_FLAGS,
18 17 REVIDX_RAWTEXT_CHANGING_FLAGS,
19 18 )
20 19
20 from .. import (
21 util
22 )
23
21 24 # blanked usage of all the name to prevent pyflakes constraints
22 25 # We need these name available in the module for extensions.
23 26 REVIDX_ISCENSORED
24 27 REVIDX_ELLIPSIS
25 28 REVIDX_EXTSTORED
26 29 REVIDX_DEFAULT_FLAGS
27 30 REVIDX_FLAGS_ORDER
28 REVIDX_KNOWN_FLAGS
29 31 REVIDX_RAWTEXT_CHANGING_FLAGS
30 32
33 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
34
31 35 # Store flag processors (cf. 'addflagprocessor()' to register)
32 36 flagprocessors = {
33 37 REVIDX_ISCENSORED: None,
34 38 }
35 39
@@ -1,136 +1,139 b''
1 1 # coding=UTF-8
2 2
3 3 from __future__ import absolute_import
4 4
5 5 import base64
6 6 import zlib
7 7
8 8 from mercurial import (
9 9 changegroup,
10 10 exchange,
11 11 extensions,
12 12 revlog,
13 13 util,
14 14 )
15 from mercurial.revlogutils import (
16 flagutil,
17 )
15 18
16 19 # Test only: These flags are defined here only in the context of testing the
17 20 # behavior of the flag processor. The canonical way to add flags is to get in
18 21 # touch with the community and make them known in revlog.
19 22 REVIDX_NOOP = (1 << 3)
20 23 REVIDX_BASE64 = (1 << 2)
21 24 REVIDX_GZIP = (1 << 1)
22 25 REVIDX_FAIL = 1
23 26
24 27 def validatehash(self, text):
25 28 return True
26 29
27 30 def bypass(self, text):
28 31 return False
29 32
30 33 def noopdonothing(self, text):
31 34 return (text, True)
32 35
33 36 def b64encode(self, text):
34 37 return (base64.b64encode(text), False)
35 38
36 39 def b64decode(self, text):
37 40 return (base64.b64decode(text), True)
38 41
39 42 def gzipcompress(self, text):
40 43 return (zlib.compress(text), False)
41 44
42 45 def gzipdecompress(self, text):
43 46 return (zlib.decompress(text), True)
44 47
45 48 def supportedoutgoingversions(orig, repo):
46 49 versions = orig(repo)
47 50 versions.discard(b'01')
48 51 versions.discard(b'02')
49 52 versions.add(b'03')
50 53 return versions
51 54
52 55 def allsupportedversions(orig, ui):
53 56 versions = orig(ui)
54 57 versions.add(b'03')
55 58 return versions
56 59
57 60 def makewrappedfile(obj):
58 61 class wrappedfile(obj.__class__):
59 62 def addrevision(self, text, transaction, link, p1, p2,
60 63 cachedelta=None, node=None,
61 flags=revlog.REVIDX_DEFAULT_FLAGS):
64 flags=flagutil.REVIDX_DEFAULT_FLAGS):
62 65 if b'[NOOP]' in text:
63 66 flags |= REVIDX_NOOP
64 67
65 68 if b'[BASE64]' in text:
66 69 flags |= REVIDX_BASE64
67 70
68 71 if b'[GZIP]' in text:
69 72 flags |= REVIDX_GZIP
70 73
71 74 # This addrevision wrapper is meant to add a flag we will not have
72 75 # transforms registered for, ensuring we handle this error case.
73 76 if b'[FAIL]' in text:
74 77 flags |= REVIDX_FAIL
75 78
76 79 return super(wrappedfile, self).addrevision(text, transaction, link,
77 80 p1, p2,
78 81 cachedelta=cachedelta,
79 82 node=node,
80 83 flags=flags)
81 84
82 85 obj.__class__ = wrappedfile
83 86
84 87 def reposetup(ui, repo):
85 88 class wrappingflagprocessorrepo(repo.__class__):
86 89 def file(self, f):
87 90 orig = super(wrappingflagprocessorrepo, self).file(f)
88 91 makewrappedfile(orig)
89 92 return orig
90 93
91 94 repo.__class__ = wrappingflagprocessorrepo
92 95
93 96 def extsetup(ui):
94 97 # Enable changegroup3 for flags to be sent over the wire
95 98 wrapfunction = extensions.wrapfunction
96 99 wrapfunction(changegroup,
97 100 'supportedoutgoingversions',
98 101 supportedoutgoingversions)
99 102 wrapfunction(changegroup,
100 103 'allsupportedversions',
101 104 allsupportedversions)
102 105
103 106 # Teach revlog about our test flags
104 107 flags = [REVIDX_NOOP, REVIDX_BASE64, REVIDX_GZIP, REVIDX_FAIL]
105 revlog.REVIDX_KNOWN_FLAGS |= util.bitsfrom(flags)
108 flagutil.REVIDX_KNOWN_FLAGS |= util.bitsfrom(flags)
106 109 revlog.REVIDX_FLAGS_ORDER.extend(flags)
107 110
108 111 # Teach exchange to use changegroup 3
109 112 for k in exchange._bundlespeccontentopts.keys():
110 113 exchange._bundlespeccontentopts[k][b"cg.version"] = b"03"
111 114
112 115 # Register flag processors for each extension
113 116 revlog.addflagprocessor(
114 117 REVIDX_NOOP,
115 118 (
116 119 noopdonothing,
117 120 noopdonothing,
118 121 validatehash,
119 122 )
120 123 )
121 124 revlog.addflagprocessor(
122 125 REVIDX_BASE64,
123 126 (
124 127 b64decode,
125 128 b64encode,
126 129 bypass,
127 130 ),
128 131 )
129 132 revlog.addflagprocessor(
130 133 REVIDX_GZIP,
131 134 (
132 135 gzipdecompress,
133 136 gzipcompress,
134 137 bypass
135 138 )
136 139 )
@@ -1,708 +1,711 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 nullrev,
23 23 )
24 24 from mercurial.thirdparty import (
25 25 attr,
26 26 )
27 27 from mercurial import (
28 28 ancestor,
29 29 bundlerepo,
30 30 error,
31 31 extensions,
32 32 localrepo,
33 33 mdiff,
34 34 pycompat,
35 35 repository,
36 36 revlog,
37 37 store,
38 38 verify,
39 39 )
40 40 from mercurial.utils import (
41 41 cborutil,
42 42 interfaceutil,
43 43 storageutil,
44 44 )
45 from mercurial.revlogutils import (
46 flagutil,
47 )
45 48
46 49 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
47 50 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
48 51 # be specifying the version(s) of Mercurial they are tested with, or
49 52 # leave the attribute unspecified.
50 53 testedwith = 'ships-with-hg-core'
51 54
52 55 REQUIREMENT = 'testonly-simplestore'
53 56
54 57 def validatenode(node):
55 58 if isinstance(node, int):
56 59 raise ValueError('expected node; got int')
57 60
58 61 if len(node) != 20:
59 62 raise ValueError('expected 20 byte node')
60 63
61 64 def validaterev(rev):
62 65 if not isinstance(rev, int):
63 66 raise ValueError('expected int')
64 67
65 68 class simplestoreerror(error.StorageError):
66 69 pass
67 70
68 71 @interfaceutil.implementer(repository.irevisiondelta)
69 72 @attr.s(slots=True)
70 73 class simplestorerevisiondelta(object):
71 74 node = attr.ib()
72 75 p1node = attr.ib()
73 76 p2node = attr.ib()
74 77 basenode = attr.ib()
75 78 flags = attr.ib()
76 79 baserevisionsize = attr.ib()
77 80 revision = attr.ib()
78 81 delta = attr.ib()
79 82 linknode = attr.ib(default=None)
80 83
81 84 @interfaceutil.implementer(repository.iverifyproblem)
82 85 @attr.s(frozen=True)
83 86 class simplefilestoreproblem(object):
84 87 warning = attr.ib(default=None)
85 88 error = attr.ib(default=None)
86 89 node = attr.ib(default=None)
87 90
88 91 @interfaceutil.implementer(repository.ifilestorage)
89 92 class filestorage(object):
90 93 """Implements storage for a tracked path.
91 94
92 95 Data is stored in the VFS in a directory corresponding to the tracked
93 96 path.
94 97
95 98 Index data is stored in an ``index`` file using CBOR.
96 99
97 100 Fulltext data is stored in files having names of the node.
98 101 """
99 102
100 103 def __init__(self, svfs, path):
101 104 self._svfs = svfs
102 105 self._path = path
103 106
104 107 self._storepath = b'/'.join([b'data', path])
105 108 self._indexpath = b'/'.join([self._storepath, b'index'])
106 109
107 110 indexdata = self._svfs.tryread(self._indexpath)
108 111 if indexdata:
109 112 indexdata = cborutil.decodeall(indexdata)
110 113
111 114 self._indexdata = indexdata or []
112 115 self._indexbynode = {}
113 116 self._indexbyrev = {}
114 117 self._index = []
115 118 self._refreshindex()
116 119
117 120 def _refreshindex(self):
118 121 self._indexbynode.clear()
119 122 self._indexbyrev.clear()
120 123 self._index = []
121 124
122 125 for i, entry in enumerate(self._indexdata):
123 126 self._indexbynode[entry[b'node']] = entry
124 127 self._indexbyrev[i] = entry
125 128
126 129 self._indexbynode[nullid] = {
127 130 b'node': nullid,
128 131 b'p1': nullid,
129 132 b'p2': nullid,
130 133 b'linkrev': nullrev,
131 134 b'flags': 0,
132 135 }
133 136
134 137 self._indexbyrev[nullrev] = {
135 138 b'node': nullid,
136 139 b'p1': nullid,
137 140 b'p2': nullid,
138 141 b'linkrev': nullrev,
139 142 b'flags': 0,
140 143 }
141 144
142 145 for i, entry in enumerate(self._indexdata):
143 146 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
144 147
145 148 # start, length, rawsize, chainbase, linkrev, p1, p2, node
146 149 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
147 150 entry[b'node']))
148 151
149 152 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
150 153
151 154 def __len__(self):
152 155 return len(self._indexdata)
153 156
154 157 def __iter__(self):
155 158 return iter(range(len(self)))
156 159
157 160 def revs(self, start=0, stop=None):
158 161 step = 1
159 162 if stop is not None:
160 163 if start > stop:
161 164 step = -1
162 165
163 166 stop += step
164 167 else:
165 168 stop = len(self)
166 169
167 170 return range(start, stop, step)
168 171
169 172 def parents(self, node):
170 173 validatenode(node)
171 174
172 175 if node not in self._indexbynode:
173 176 raise KeyError('unknown node')
174 177
175 178 entry = self._indexbynode[node]
176 179
177 180 return entry[b'p1'], entry[b'p2']
178 181
179 182 def parentrevs(self, rev):
180 183 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
181 184 return self.rev(p1), self.rev(p2)
182 185
183 186 def rev(self, node):
184 187 validatenode(node)
185 188
186 189 try:
187 190 self._indexbynode[node]
188 191 except KeyError:
189 192 raise error.LookupError(node, self._indexpath, _('no node'))
190 193
191 194 for rev, entry in self._indexbyrev.items():
192 195 if entry[b'node'] == node:
193 196 return rev
194 197
195 198 raise error.ProgrammingError('this should not occur')
196 199
197 200 def node(self, rev):
198 201 validaterev(rev)
199 202
200 203 return self._indexbyrev[rev][b'node']
201 204
202 205 def hasnode(self, node):
203 206 validatenode(node)
204 207 return node in self._indexbynode
205 208
206 209 def censorrevision(self, tr, censornode, tombstone=b''):
207 210 raise NotImplementedError('TODO')
208 211
209 212 def lookup(self, node):
210 213 if isinstance(node, int):
211 214 return self.node(node)
212 215
213 216 if len(node) == 20:
214 217 self.rev(node)
215 218 return node
216 219
217 220 try:
218 221 rev = int(node)
219 222 if '%d' % rev != node:
220 223 raise ValueError
221 224
222 225 if rev < 0:
223 226 rev = len(self) + rev
224 227 if rev < 0 or rev >= len(self):
225 228 raise ValueError
226 229
227 230 return self.node(rev)
228 231 except (ValueError, OverflowError):
229 232 pass
230 233
231 234 if len(node) == 40:
232 235 try:
233 236 rawnode = bin(node)
234 237 self.rev(rawnode)
235 238 return rawnode
236 239 except TypeError:
237 240 pass
238 241
239 242 raise error.LookupError(node, self._path, _('invalid lookup input'))
240 243
241 244 def linkrev(self, rev):
242 245 validaterev(rev)
243 246
244 247 return self._indexbyrev[rev][b'linkrev']
245 248
246 249 def _flags(self, rev):
247 250 validaterev(rev)
248 251
249 252 return self._indexbyrev[rev][b'flags']
250 253
251 254 def _candelta(self, baserev, rev):
252 255 validaterev(baserev)
253 256 validaterev(rev)
254 257
255 258 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
256 259 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
257 260 return False
258 261
259 262 return True
260 263
261 264 def _processflags(self, text, flags, operation, raw=False):
262 265 if flags == 0:
263 266 return text, True
264 267
265 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
268 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
266 269 raise simplestoreerror(_("incompatible revision flag '%#x'") %
267 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
270 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
268 271
269 272 validatehash = True
270 273 # Depending on the operation (read or write), the order might be
271 274 # reversed due to non-commutative transforms.
272 275 orderedflags = revlog.REVIDX_FLAGS_ORDER
273 276 if operation == 'write':
274 277 orderedflags = reversed(orderedflags)
275 278
276 279 for flag in orderedflags:
277 280 # If a flagprocessor has been registered for a known flag, apply the
278 281 # related operation transform and update result tuple.
279 282 if flag & flags:
280 283 vhash = True
281 284
282 285 if flag not in revlog._flagprocessors:
283 286 message = _("missing processor for flag '%#x'") % (flag)
284 287 raise simplestoreerror(message)
285 288
286 289 processor = revlog._flagprocessors[flag]
287 290 if processor is not None:
288 291 readtransform, writetransform, rawtransform = processor
289 292
290 293 if raw:
291 294 vhash = rawtransform(self, text)
292 295 elif operation == 'read':
293 296 text, vhash = readtransform(self, text)
294 297 else: # write operation
295 298 text, vhash = writetransform(self, text)
296 299 validatehash = validatehash and vhash
297 300
298 301 return text, validatehash
299 302
300 303 def checkhash(self, text, node, p1=None, p2=None, rev=None):
301 304 if p1 is None and p2 is None:
302 305 p1, p2 = self.parents(node)
303 306 if node != storageutil.hashrevisionsha1(text, p1, p2):
304 307 raise simplestoreerror(_("integrity check failed on %s") %
305 308 self._path)
306 309
307 310 def revision(self, nodeorrev, raw=False):
308 311 if isinstance(nodeorrev, int):
309 312 node = self.node(nodeorrev)
310 313 else:
311 314 node = nodeorrev
312 315 validatenode(node)
313 316
314 317 if node == nullid:
315 318 return b''
316 319
317 320 rev = self.rev(node)
318 321 flags = self._flags(rev)
319 322
320 323 path = b'/'.join([self._storepath, hex(node)])
321 324 rawtext = self._svfs.read(path)
322 325
323 326 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
324 327 if validatehash:
325 328 self.checkhash(text, node, rev=rev)
326 329
327 330 return text
328 331
329 332 def rawdata(self, nodeorrev):
330 333 return self.revision(raw=True)
331 334
332 335 def read(self, node):
333 336 validatenode(node)
334 337
335 338 revision = self.revision(node)
336 339
337 340 if not revision.startswith(b'\1\n'):
338 341 return revision
339 342
340 343 start = revision.index(b'\1\n', 2)
341 344 return revision[start + 2:]
342 345
343 346 def renamed(self, node):
344 347 validatenode(node)
345 348
346 349 if self.parents(node)[0] != nullid:
347 350 return False
348 351
349 352 fulltext = self.revision(node)
350 353 m = storageutil.parsemeta(fulltext)[0]
351 354
352 355 if m and 'copy' in m:
353 356 return m['copy'], bin(m['copyrev'])
354 357
355 358 return False
356 359
357 360 def cmp(self, node, text):
358 361 validatenode(node)
359 362
360 363 t = text
361 364
362 365 if text.startswith(b'\1\n'):
363 366 t = b'\1\n\1\n' + text
364 367
365 368 p1, p2 = self.parents(node)
366 369
367 370 if storageutil.hashrevisionsha1(t, p1, p2) == node:
368 371 return False
369 372
370 373 if self.iscensored(self.rev(node)):
371 374 return text != b''
372 375
373 376 if self.renamed(node):
374 377 t2 = self.read(node)
375 378 return t2 != text
376 379
377 380 return True
378 381
379 382 def size(self, rev):
380 383 validaterev(rev)
381 384
382 385 node = self._indexbyrev[rev][b'node']
383 386
384 387 if self.renamed(node):
385 388 return len(self.read(node))
386 389
387 390 if self.iscensored(rev):
388 391 return 0
389 392
390 393 return len(self.revision(node))
391 394
392 395 def iscensored(self, rev):
393 396 validaterev(rev)
394 397
395 398 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
396 399
397 400 def commonancestorsheads(self, a, b):
398 401 validatenode(a)
399 402 validatenode(b)
400 403
401 404 a = self.rev(a)
402 405 b = self.rev(b)
403 406
404 407 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
405 408 return pycompat.maplist(self.node, ancestors)
406 409
407 410 def descendants(self, revs):
408 411 # This is a copy of revlog.descendants()
409 412 first = min(revs)
410 413 if first == nullrev:
411 414 for i in self:
412 415 yield i
413 416 return
414 417
415 418 seen = set(revs)
416 419 for i in self.revs(start=first + 1):
417 420 for x in self.parentrevs(i):
418 421 if x != nullrev and x in seen:
419 422 seen.add(i)
420 423 yield i
421 424 break
422 425
423 426 # Required by verify.
424 427 def files(self):
425 428 entries = self._svfs.listdir(self._storepath)
426 429
427 430 # Strip out undo.backup.* files created as part of transaction
428 431 # recording.
429 432 entries = [f for f in entries if not f.startswith('undo.backup.')]
430 433
431 434 return [b'/'.join((self._storepath, f)) for f in entries]
432 435
433 436 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
434 437 revisionscount=False, trackedsize=False,
435 438 storedsize=False):
436 439 # TODO do a real implementation of this
437 440 return {
438 441 'exclusivefiles': [],
439 442 'sharedfiles': [],
440 443 'revisionscount': len(self),
441 444 'trackedsize': 0,
442 445 'storedsize': None,
443 446 }
444 447
445 448 def verifyintegrity(self, state):
446 449 state['skipread'] = set()
447 450 for rev in self:
448 451 node = self.node(rev)
449 452 try:
450 453 self.revision(node)
451 454 except Exception as e:
452 455 yield simplefilestoreproblem(
453 456 error='unpacking %s: %s' % (node, e),
454 457 node=node)
455 458 state['skipread'].add(node)
456 459
457 460 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
458 461 assumehaveparentrevisions=False,
459 462 deltamode=repository.CG_DELTAMODE_STD):
460 463 # TODO this will probably break on some ordering options.
461 464 nodes = [n for n in nodes if n != nullid]
462 465 if not nodes:
463 466 return
464 467 for delta in storageutil.emitrevisions(
465 468 self, nodes, nodesorder, simplestorerevisiondelta,
466 469 revisiondata=revisiondata,
467 470 assumehaveparentrevisions=assumehaveparentrevisions,
468 471 deltamode=deltamode):
469 472 yield delta
470 473
471 474 def add(self, text, meta, transaction, linkrev, p1, p2):
472 475 if meta or text.startswith(b'\1\n'):
473 476 text = storageutil.packmeta(meta, text)
474 477
475 478 return self.addrevision(text, transaction, linkrev, p1, p2)
476 479
477 480 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
478 481 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
479 482 validatenode(p1)
480 483 validatenode(p2)
481 484
482 485 if flags:
483 486 node = node or storageutil.hashrevisionsha1(text, p1, p2)
484 487
485 488 rawtext, validatehash = self._processflags(text, flags, 'write')
486 489
487 490 node = node or storageutil.hashrevisionsha1(text, p1, p2)
488 491
489 492 if node in self._indexbynode:
490 493 return node
491 494
492 495 if validatehash:
493 496 self.checkhash(rawtext, node, p1=p1, p2=p2)
494 497
495 498 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
496 499 flags)
497 500
498 501 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
499 502 transaction.addbackup(self._indexpath)
500 503
501 504 path = b'/'.join([self._storepath, hex(node)])
502 505
503 506 self._svfs.write(path, rawtext)
504 507
505 508 self._indexdata.append({
506 509 b'node': node,
507 510 b'p1': p1,
508 511 b'p2': p2,
509 512 b'linkrev': link,
510 513 b'flags': flags,
511 514 })
512 515
513 516 self._reflectindexupdate()
514 517
515 518 return node
516 519
517 520 def _reflectindexupdate(self):
518 521 self._refreshindex()
519 522 self._svfs.write(self._indexpath,
520 523 ''.join(cborutil.streamencode(self._indexdata)))
521 524
522 525 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
523 526 maybemissingparents=False):
524 527 if maybemissingparents:
525 528 raise error.Abort(_('simple store does not support missing parents '
526 529 'write mode'))
527 530
528 531 nodes = []
529 532
530 533 transaction.addbackup(self._indexpath)
531 534
532 535 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
533 536 linkrev = linkmapper(linknode)
534 537 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
535 538
536 539 nodes.append(node)
537 540
538 541 if node in self._indexbynode:
539 542 continue
540 543
541 544 # Need to resolve the fulltext from the delta base.
542 545 if deltabase == nullid:
543 546 text = mdiff.patch(b'', delta)
544 547 else:
545 548 text = mdiff.patch(self.revision(deltabase), delta)
546 549
547 550 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
548 551 flags)
549 552
550 553 if addrevisioncb:
551 554 addrevisioncb(self, node)
552 555 return nodes
553 556
554 557 def _headrevs(self):
555 558 # Assume all revisions are heads by default.
556 559 revishead = {rev: True for rev in self._indexbyrev}
557 560
558 561 for rev, entry in self._indexbyrev.items():
559 562 # Unset head flag for all seen parents.
560 563 revishead[self.rev(entry[b'p1'])] = False
561 564 revishead[self.rev(entry[b'p2'])] = False
562 565
563 566 return [rev for rev, ishead in sorted(revishead.items())
564 567 if ishead]
565 568
566 569 def heads(self, start=None, stop=None):
567 570 # This is copied from revlog.py.
568 571 if start is None and stop is None:
569 572 if not len(self):
570 573 return [nullid]
571 574 return [self.node(r) for r in self._headrevs()]
572 575
573 576 if start is None:
574 577 start = nullid
575 578 if stop is None:
576 579 stop = []
577 580 stoprevs = set([self.rev(n) for n in stop])
578 581 startrev = self.rev(start)
579 582 reachable = {startrev}
580 583 heads = {startrev}
581 584
582 585 parentrevs = self.parentrevs
583 586 for r in self.revs(start=startrev + 1):
584 587 for p in parentrevs(r):
585 588 if p in reachable:
586 589 if r not in stoprevs:
587 590 reachable.add(r)
588 591 heads.add(r)
589 592 if p in heads and p not in stoprevs:
590 593 heads.remove(p)
591 594
592 595 return [self.node(r) for r in heads]
593 596
594 597 def children(self, node):
595 598 validatenode(node)
596 599
597 600 # This is a copy of revlog.children().
598 601 c = []
599 602 p = self.rev(node)
600 603 for r in self.revs(start=p + 1):
601 604 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
602 605 if prevs:
603 606 for pr in prevs:
604 607 if pr == p:
605 608 c.append(self.node(r))
606 609 elif p == nullrev:
607 610 c.append(self.node(r))
608 611 return c
609 612
610 613 def getstrippoint(self, minlink):
611 614 return storageutil.resolvestripinfo(
612 615 minlink, len(self) - 1, self._headrevs(), self.linkrev,
613 616 self.parentrevs)
614 617
615 618 def strip(self, minlink, transaction):
616 619 if not len(self):
617 620 return
618 621
619 622 rev, _ignored = self.getstrippoint(minlink)
620 623 if rev == len(self):
621 624 return
622 625
623 626 # Purge index data starting at the requested revision.
624 627 self._indexdata[rev:] = []
625 628 self._reflectindexupdate()
626 629
627 630 def issimplestorefile(f, kind, st):
628 631 if kind != stat.S_IFREG:
629 632 return False
630 633
631 634 if store.isrevlog(f, kind, st):
632 635 return False
633 636
634 637 # Ignore transaction undo files.
635 638 if f.startswith('undo.'):
636 639 return False
637 640
638 641 # Otherwise assume it belongs to the simple store.
639 642 return True
640 643
641 644 class simplestore(store.encodedstore):
642 645 def datafiles(self):
643 646 for x in super(simplestore, self).datafiles():
644 647 yield x
645 648
646 649 # Supplement with non-revlog files.
647 650 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
648 651
649 652 for unencoded, encoded, size in extrafiles:
650 653 try:
651 654 unencoded = store.decodefilename(unencoded)
652 655 except KeyError:
653 656 unencoded = None
654 657
655 658 yield unencoded, encoded, size
656 659
657 660 def reposetup(ui, repo):
658 661 if not repo.local():
659 662 return
660 663
661 664 if isinstance(repo, bundlerepo.bundlerepository):
662 665 raise error.Abort(_('cannot use simple store with bundlerepo'))
663 666
664 667 class simplestorerepo(repo.__class__):
665 668 def file(self, f):
666 669 return filestorage(self.svfs, f)
667 670
668 671 repo.__class__ = simplestorerepo
669 672
670 673 def featuresetup(ui, supported):
671 674 supported.add(REQUIREMENT)
672 675
673 676 def newreporequirements(orig, ui, createopts):
674 677 """Modifies default requirements for new repos to use the simple store."""
675 678 requirements = orig(ui, createopts)
676 679
677 680 # These requirements are only used to affect creation of the store
678 681 # object. We have our own store. So we can remove them.
679 682 # TODO do this once we feel like taking the test hit.
680 683 #if 'fncache' in requirements:
681 684 # requirements.remove('fncache')
682 685 #if 'dotencode' in requirements:
683 686 # requirements.remove('dotencode')
684 687
685 688 requirements.add(REQUIREMENT)
686 689
687 690 return requirements
688 691
689 692 def makestore(orig, requirements, path, vfstype):
690 693 if REQUIREMENT not in requirements:
691 694 return orig(requirements, path, vfstype)
692 695
693 696 return simplestore(path, vfstype)
694 697
695 698 def verifierinit(orig, self, *args, **kwargs):
696 699 orig(self, *args, **kwargs)
697 700
698 701 # We don't care that files in the store don't align with what is
699 702 # advertised. So suppress these warnings.
700 703 self.warnorphanstorefiles = False
701 704
702 705 def extsetup(ui):
703 706 localrepo.featuresetupfuncs.add(featuresetup)
704 707
705 708 extensions.wrapfunction(localrepo, 'newreporequirements',
706 709 newreporequirements)
707 710 extensions.wrapfunction(localrepo, 'makestore', makestore)
708 711 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now