##// END OF EJS Templates
revlog: clear revision cache on hash verification failure...
Gregory Szorc -
r40090:801ccd8e default
parent child Browse files
Show More
@@ -1,2548 +1,2557 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100
101 101 # Aliased for performance.
102 102 _zlibdecompress = zlib.decompress
103 103
104 104 # max size of revlog with inline data
105 105 _maxinline = 131072
106 106 _chunksize = 1048576
107 107
108 108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 109 _flagprocessors = {
110 110 REVIDX_ISCENSORED: None,
111 111 }
112 112
113 113 # Flag processors for REVIDX_ELLIPSIS.
114 114 def ellipsisreadprocessor(rl, text):
115 115 return text, False
116 116
117 117 def ellipsiswriteprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsisrawprocessor(rl, text):
121 121 return False
122 122
123 123 ellipsisprocessor = (
124 124 ellipsisreadprocessor,
125 125 ellipsiswriteprocessor,
126 126 ellipsisrawprocessor,
127 127 )
128 128
129 129 def addflagprocessor(flag, processor):
130 130 """Register a flag processor on a revision data flag.
131 131
132 132 Invariant:
133 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 135 - Only one flag processor can be registered on a specific flag.
136 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 137 following signatures:
138 138 - (read) f(self, rawtext) -> text, bool
139 139 - (write) f(self, text) -> rawtext, bool
140 140 - (raw) f(self, rawtext) -> bool
141 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 142 directly visible to the user.
143 143 The boolean returned by these transforms is used to determine whether
144 144 the returned text can be used for hash integrity checking. For example,
145 145 if "write" returns False, then "text" is used to generate hash. If
146 146 "write" returns True, that basically means "rawtext" returned by "write"
147 147 should be used to generate hash. Usually, "write" and "read" return
148 148 different booleans. And "raw" returns a same boolean as "write".
149 149
150 150 Note: The 'raw' transform is used for changegroup generation and in some
151 151 debug commands. In this case the transform only indicates whether the
152 152 contents can be used for hash integrity checks.
153 153 """
154 154 if not flag & REVIDX_KNOWN_FLAGS:
155 155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 156 raise error.ProgrammingError(msg)
157 157 if flag not in REVIDX_FLAGS_ORDER:
158 158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 159 raise error.ProgrammingError(msg)
160 160 if flag in _flagprocessors:
161 161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 162 raise error.Abort(msg)
163 163 _flagprocessors[flag] = processor
164 164
165 165 def getoffset(q):
166 166 return int(q >> 16)
167 167
168 168 def gettype(q):
169 169 return int(q & 0xFFFF)
170 170
171 171 def offset_type(offset, type):
172 172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 173 raise ValueError('unknown revlog index flags')
174 174 return int(int(offset) << 16 | type)
175 175
176 176 @attr.s(slots=True, frozen=True)
177 177 class _revisioninfo(object):
178 178 """Information about a revision that allows building its fulltext
179 179 node: expected hash of the revision
180 180 p1, p2: parent revs of the revision
181 181 btext: built text cache consisting of a one-element list
182 182 cachedelta: (baserev, uncompressed_delta) or None
183 183 flags: flags associated to the revision storage
184 184
185 185 One of btext[0] or cachedelta must be set.
186 186 """
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208 @interfaceutil.implementer(repository.iverifyproblem)
209 209 @attr.s(frozen=True)
210 210 class revlogproblem(object):
211 211 warning = attr.ib(default=None)
212 212 error = attr.ib(default=None)
213 213 node = attr.ib(default=None)
214 214
215 215 # index v0:
216 216 # 4 bytes: offset
217 217 # 4 bytes: compressed length
218 218 # 4 bytes: base rev
219 219 # 4 bytes: link rev
220 220 # 20 bytes: parent 1 nodeid
221 221 # 20 bytes: parent 2 nodeid
222 222 # 20 bytes: nodeid
223 223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 224 indexformatv0_pack = indexformatv0.pack
225 225 indexformatv0_unpack = indexformatv0.unpack
226 226
227 227 class revlogoldindex(list):
228 228 def __getitem__(self, i):
229 229 if i == -1:
230 230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 231 return list.__getitem__(self, i)
232 232
233 233 class revlogoldio(object):
234 234 def __init__(self):
235 235 self.size = indexformatv0.size
236 236
237 237 def parseindex(self, data, inline):
238 238 s = self.size
239 239 index = []
240 240 nodemap = {nullid: nullrev}
241 241 n = off = 0
242 242 l = len(data)
243 243 while off + s <= l:
244 244 cur = data[off:off + s]
245 245 off += s
246 246 e = indexformatv0_unpack(cur)
247 247 # transform to revlogv1 format
248 248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 250 index.append(e2)
251 251 nodemap[e[6]] = n
252 252 n += 1
253 253
254 254 return revlogoldindex(index), nodemap, None
255 255
256 256 def packentry(self, entry, node, version, rev):
257 257 if gettype(entry[0]):
258 258 raise error.RevlogError(_('index entry flags need revlog '
259 259 'version 1'))
260 260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 261 node(entry[5]), node(entry[6]), entry[7])
262 262 return indexformatv0_pack(*e2)
263 263
264 264 # index ng:
265 265 # 6 bytes: offset
266 266 # 2 bytes: flags
267 267 # 4 bytes: compressed length
268 268 # 4 bytes: uncompressed length
269 269 # 4 bytes: base rev
270 270 # 4 bytes: link rev
271 271 # 4 bytes: parent 1 rev
272 272 # 4 bytes: parent 2 rev
273 273 # 32 bytes: nodeid
274 274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 275 indexformatng_pack = indexformatng.pack
276 276 versionformat = struct.Struct(">I")
277 277 versionformat_pack = versionformat.pack
278 278 versionformat_unpack = versionformat.unpack
279 279
280 280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 281 # signed integer)
282 282 _maxentrysize = 0x7fffffff
283 283
284 284 class revlogio(object):
285 285 def __init__(self):
286 286 self.size = indexformatng.size
287 287
288 288 def parseindex(self, data, inline):
289 289 # call the C implementation to parse the index data
290 290 index, cache = parsers.parse_index2(data, inline)
291 291 return index, getattr(index, 'nodemap', None), cache
292 292
293 293 def packentry(self, entry, node, version, rev):
294 294 p = indexformatng_pack(*entry)
295 295 if rev == 0:
296 296 p = versionformat_pack(version) + p[4:]
297 297 return p
298 298
299 299 class revlog(object):
300 300 """
301 301 the underlying revision storage object
302 302
303 303 A revlog consists of two parts, an index and the revision data.
304 304
305 305 The index is a file with a fixed record size containing
306 306 information on each revision, including its nodeid (hash), the
307 307 nodeids of its parents, the position and offset of its data within
308 308 the data file, and the revision it's based on. Finally, each entry
309 309 contains a linkrev entry that can serve as a pointer to external
310 310 data.
311 311
312 312 The revision data itself is a linear collection of data chunks.
313 313 Each chunk represents a revision and is usually represented as a
314 314 delta against the previous chunk. To bound lookup time, runs of
315 315 deltas are limited to about 2 times the length of the original
316 316 version data. This makes retrieval of a version proportional to
317 317 its size, or O(1) relative to the number of revisions.
318 318
319 319 Both pieces of the revlog are written to in an append-only
320 320 fashion, which means we never need to rewrite a file to insert or
321 321 remove data, and can use some simple techniques to avoid the need
322 322 for locking while reading.
323 323
324 324 If checkambig, indexfile is opened with checkambig=True at
325 325 writing, to avoid file stat ambiguity.
326 326
327 327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 328 index will be mmapped rather than read if it is larger than the
329 329 configured threshold.
330 330
331 331 If censorable is True, the revlog can have censored revisions.
332 332 """
333 333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 334 mmaplargeindex=False, censorable=False):
335 335 """
336 336 create a revlog object
337 337
338 338 opener is a function that abstracts the file opening operation
339 339 and can be used to implement COW semantics or the like.
340 340 """
341 341 self.indexfile = indexfile
342 342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 343 self.opener = opener
344 344 # When True, indexfile is opened with checkambig=True at writing, to
345 345 # avoid file stat ambiguity.
346 346 self._checkambig = checkambig
347 347 self._censorable = censorable
348 348 # 3-tuple of (node, rev, text) for a raw revision.
349 349 self._revisioncache = None
350 350 # Maps rev to chain base rev.
351 351 self._chainbasecache = util.lrucachedict(100)
352 352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 353 self._chunkcache = (0, '')
354 354 # How much data to read and cache into the raw revlog data cache.
355 355 self._chunkcachesize = 65536
356 356 self._maxchainlen = None
357 357 self._deltabothparents = True
358 358 self.index = []
359 359 # Mapping of partial identifiers to full nodes.
360 360 self._pcache = {}
361 361 # Mapping of revision integer to full node.
362 362 self._nodecache = {nullid: nullrev}
363 363 self._nodepos = None
364 364 self._compengine = 'zlib'
365 365 self._maxdeltachainspan = -1
366 366 self._withsparseread = False
367 367 self._sparserevlog = False
368 368 self._srdensitythreshold = 0.50
369 369 self._srmingapsize = 262144
370 370
371 371 # Make copy of flag processors so each revlog instance can support
372 372 # custom flags.
373 373 self._flagprocessors = dict(_flagprocessors)
374 374
375 375 mmapindexthreshold = None
376 376 v = REVLOG_DEFAULT_VERSION
377 377 opts = getattr(opener, 'options', None)
378 378 if opts is not None:
379 379 if 'revlogv2' in opts:
380 380 # version 2 revlogs always use generaldelta.
381 381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 382 elif 'revlogv1' in opts:
383 383 if 'generaldelta' in opts:
384 384 v |= FLAG_GENERALDELTA
385 385 else:
386 386 v = 0
387 387 if 'chunkcachesize' in opts:
388 388 self._chunkcachesize = opts['chunkcachesize']
389 389 if 'maxchainlen' in opts:
390 390 self._maxchainlen = opts['maxchainlen']
391 391 if 'deltabothparents' in opts:
392 392 self._deltabothparents = opts['deltabothparents']
393 393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 394 if 'compengine' in opts:
395 395 self._compengine = opts['compengine']
396 396 if 'maxdeltachainspan' in opts:
397 397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 399 mmapindexthreshold = opts['mmapindexthreshold']
400 400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 401 withsparseread = bool(opts.get('with-sparse-read', False))
402 402 # sparse-revlog forces sparse-read
403 403 self._withsparseread = self._sparserevlog or withsparseread
404 404 if 'sparse-read-density-threshold' in opts:
405 405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 406 if 'sparse-read-min-gap-size' in opts:
407 407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 408 if opts.get('enableellipsis'):
409 409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410 410
411 411 if self._chunkcachesize <= 0:
412 412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 413 'greater than 0') % self._chunkcachesize)
414 414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 416 'power of 2') % self._chunkcachesize)
417 417
418 418 indexdata = ''
419 419 self._initempty = True
420 420 try:
421 421 with self._indexfp() as f:
422 422 if (mmapindexthreshold is not None and
423 423 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 424 indexdata = util.buffer(util.mmapread(f))
425 425 else:
426 426 indexdata = f.read()
427 427 if len(indexdata) > 0:
428 428 v = versionformat_unpack(indexdata[:4])[0]
429 429 self._initempty = False
430 430 except IOError as inst:
431 431 if inst.errno != errno.ENOENT:
432 432 raise
433 433
434 434 self.version = v
435 435 self._inline = v & FLAG_INLINE_DATA
436 436 self._generaldelta = v & FLAG_GENERALDELTA
437 437 flags = v & ~0xFFFF
438 438 fmt = v & 0xFFFF
439 439 if fmt == REVLOGV0:
440 440 if flags:
441 441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 442 'revlog %s') %
443 443 (flags >> 16, fmt, self.indexfile))
444 444 elif fmt == REVLOGV1:
445 445 if flags & ~REVLOGV1_FLAGS:
446 446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
447 447 'revlog %s') %
448 448 (flags >> 16, fmt, self.indexfile))
449 449 elif fmt == REVLOGV2:
450 450 if flags & ~REVLOGV2_FLAGS:
451 451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 452 'revlog %s') %
453 453 (flags >> 16, fmt, self.indexfile))
454 454 else:
455 455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
456 456 (fmt, self.indexfile))
457 457
458 458 self._storedeltachains = True
459 459
460 460 self._io = revlogio()
461 461 if self.version == REVLOGV0:
462 462 self._io = revlogoldio()
463 463 try:
464 464 d = self._io.parseindex(indexdata, self._inline)
465 465 except (ValueError, IndexError):
466 466 raise error.RevlogError(_("index %s is corrupted") %
467 467 self.indexfile)
468 468 self.index, nodemap, self._chunkcache = d
469 469 if nodemap is not None:
470 470 self.nodemap = self._nodecache = nodemap
471 471 if not self._chunkcache:
472 472 self._chunkclear()
473 473 # revnum -> (chain-length, sum-delta-length)
474 474 self._chaininfocache = {}
475 475 # revlog header -> revlog compressor
476 476 self._decompressors = {}
477 477
478 478 @util.propertycache
479 479 def _compressor(self):
480 480 return util.compengines[self._compengine].revlogcompressor()
481 481
482 482 def _indexfp(self, mode='r'):
483 483 """file object for the revlog's index file"""
484 484 args = {r'mode': mode}
485 485 if mode != 'r':
486 486 args[r'checkambig'] = self._checkambig
487 487 if mode == 'w':
488 488 args[r'atomictemp'] = True
489 489 return self.opener(self.indexfile, **args)
490 490
491 491 def _datafp(self, mode='r'):
492 492 """file object for the revlog's data file"""
493 493 return self.opener(self.datafile, mode=mode)
494 494
495 495 @contextlib.contextmanager
496 496 def _datareadfp(self, existingfp=None):
497 497 """file object suitable to read data"""
498 498 if existingfp is not None:
499 499 yield existingfp
500 500 else:
501 501 if self._inline:
502 502 func = self._indexfp
503 503 else:
504 504 func = self._datafp
505 505 with func() as fp:
506 506 yield fp
507 507
508 508 def tip(self):
509 509 return self.node(len(self.index) - 1)
510 510 def __contains__(self, rev):
511 511 return 0 <= rev < len(self)
512 512 def __len__(self):
513 513 return len(self.index)
514 514 def __iter__(self):
515 515 return iter(pycompat.xrange(len(self)))
516 516 def revs(self, start=0, stop=None):
517 517 """iterate over all rev in this revlog (from start to stop)"""
518 518 return storageutil.iterrevs(len(self), start=start, stop=stop)
519 519
520 520 @util.propertycache
521 521 def nodemap(self):
522 522 if self.index:
523 523 # populate mapping down to the initial node
524 524 node0 = self.index[0][7] # get around changelog filtering
525 525 self.rev(node0)
526 526 return self._nodecache
527 527
528 528 def hasnode(self, node):
529 529 try:
530 530 self.rev(node)
531 531 return True
532 532 except KeyError:
533 533 return False
534 534
535 535 def candelta(self, baserev, rev):
536 536 """whether two revisions (baserev, rev) can be delta-ed or not"""
537 537 # Disable delta if either rev requires a content-changing flag
538 538 # processor (ex. LFS). This is because such flag processor can alter
539 539 # the rawtext content that the delta will be based on, and two clients
540 540 # could have a same revlog node with different flags (i.e. different
541 541 # rawtext contents) and the delta could be incompatible.
542 542 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
543 543 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
544 544 return False
545 545 return True
546 546
547 547 def clearcaches(self):
548 548 self._revisioncache = None
549 549 self._chainbasecache.clear()
550 550 self._chunkcache = (0, '')
551 551 self._pcache = {}
552 552
553 553 try:
554 554 self._nodecache.clearcaches()
555 555 except AttributeError:
556 556 self._nodecache = {nullid: nullrev}
557 557 self._nodepos = None
558 558
559 559 def rev(self, node):
560 560 try:
561 561 return self._nodecache[node]
562 562 except TypeError:
563 563 raise
564 564 except error.RevlogError:
565 565 # parsers.c radix tree lookup failed
566 566 if node == wdirid or node in wdirfilenodeids:
567 567 raise error.WdirUnsupported
568 568 raise error.LookupError(node, self.indexfile, _('no node'))
569 569 except KeyError:
570 570 # pure python cache lookup failed
571 571 n = self._nodecache
572 572 i = self.index
573 573 p = self._nodepos
574 574 if p is None:
575 575 p = len(i) - 1
576 576 else:
577 577 assert p < len(i)
578 578 for r in pycompat.xrange(p, -1, -1):
579 579 v = i[r][7]
580 580 n[v] = r
581 581 if v == node:
582 582 self._nodepos = r - 1
583 583 return r
584 584 if node == wdirid or node in wdirfilenodeids:
585 585 raise error.WdirUnsupported
586 586 raise error.LookupError(node, self.indexfile, _('no node'))
587 587
588 588 # Accessors for index entries.
589 589
590 590 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
591 591 # are flags.
592 592 def start(self, rev):
593 593 return int(self.index[rev][0] >> 16)
594 594
595 595 def flags(self, rev):
596 596 return self.index[rev][0] & 0xFFFF
597 597
598 598 def length(self, rev):
599 599 return self.index[rev][1]
600 600
601 601 def rawsize(self, rev):
602 602 """return the length of the uncompressed text for a given revision"""
603 603 l = self.index[rev][2]
604 604 if l >= 0:
605 605 return l
606 606
607 607 t = self.revision(rev, raw=True)
608 608 return len(t)
609 609
610 610 def size(self, rev):
611 611 """length of non-raw text (processed by a "read" flag processor)"""
612 612 # fast path: if no "read" flag processor could change the content,
613 613 # size is rawsize. note: ELLIPSIS is known to not change the content.
614 614 flags = self.flags(rev)
615 615 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
616 616 return self.rawsize(rev)
617 617
618 618 return len(self.revision(rev, raw=False))
619 619
620 620 def chainbase(self, rev):
621 621 base = self._chainbasecache.get(rev)
622 622 if base is not None:
623 623 return base
624 624
625 625 index = self.index
626 626 iterrev = rev
627 627 base = index[iterrev][3]
628 628 while base != iterrev:
629 629 iterrev = base
630 630 base = index[iterrev][3]
631 631
632 632 self._chainbasecache[rev] = base
633 633 return base
634 634
635 635 def linkrev(self, rev):
636 636 return self.index[rev][4]
637 637
638 638 def parentrevs(self, rev):
639 639 try:
640 640 entry = self.index[rev]
641 641 except IndexError:
642 642 if rev == wdirrev:
643 643 raise error.WdirUnsupported
644 644 raise
645 645
646 646 return entry[5], entry[6]
647 647
648 648 def node(self, rev):
649 649 try:
650 650 return self.index[rev][7]
651 651 except IndexError:
652 652 if rev == wdirrev:
653 653 raise error.WdirUnsupported
654 654 raise
655 655
656 656 # Derived from index values.
657 657
658 658 def end(self, rev):
659 659 return self.start(rev) + self.length(rev)
660 660
661 661 def parents(self, node):
662 662 i = self.index
663 663 d = i[self.rev(node)]
664 664 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
665 665
666 666 def chainlen(self, rev):
667 667 return self._chaininfo(rev)[0]
668 668
669 669 def _chaininfo(self, rev):
670 670 chaininfocache = self._chaininfocache
671 671 if rev in chaininfocache:
672 672 return chaininfocache[rev]
673 673 index = self.index
674 674 generaldelta = self._generaldelta
675 675 iterrev = rev
676 676 e = index[iterrev]
677 677 clen = 0
678 678 compresseddeltalen = 0
679 679 while iterrev != e[3]:
680 680 clen += 1
681 681 compresseddeltalen += e[1]
682 682 if generaldelta:
683 683 iterrev = e[3]
684 684 else:
685 685 iterrev -= 1
686 686 if iterrev in chaininfocache:
687 687 t = chaininfocache[iterrev]
688 688 clen += t[0]
689 689 compresseddeltalen += t[1]
690 690 break
691 691 e = index[iterrev]
692 692 else:
693 693 # Add text length of base since decompressing that also takes
694 694 # work. For cache hits the length is already included.
695 695 compresseddeltalen += e[1]
696 696 r = (clen, compresseddeltalen)
697 697 chaininfocache[rev] = r
698 698 return r
699 699
700 700 def _deltachain(self, rev, stoprev=None):
701 701 """Obtain the delta chain for a revision.
702 702
703 703 ``stoprev`` specifies a revision to stop at. If not specified, we
704 704 stop at the base of the chain.
705 705
706 706 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
707 707 revs in ascending order and ``stopped`` is a bool indicating whether
708 708 ``stoprev`` was hit.
709 709 """
710 710 # Try C implementation.
711 711 try:
712 712 return self.index.deltachain(rev, stoprev, self._generaldelta)
713 713 except AttributeError:
714 714 pass
715 715
716 716 chain = []
717 717
718 718 # Alias to prevent attribute lookup in tight loop.
719 719 index = self.index
720 720 generaldelta = self._generaldelta
721 721
722 722 iterrev = rev
723 723 e = index[iterrev]
724 724 while iterrev != e[3] and iterrev != stoprev:
725 725 chain.append(iterrev)
726 726 if generaldelta:
727 727 iterrev = e[3]
728 728 else:
729 729 iterrev -= 1
730 730 e = index[iterrev]
731 731
732 732 if iterrev == stoprev:
733 733 stopped = True
734 734 else:
735 735 chain.append(iterrev)
736 736 stopped = False
737 737
738 738 chain.reverse()
739 739 return chain, stopped
740 740
741 741 def ancestors(self, revs, stoprev=0, inclusive=False):
742 742 """Generate the ancestors of 'revs' in reverse topological order.
743 743 Does not generate revs lower than stoprev.
744 744
745 745 See the documentation for ancestor.lazyancestors for more details."""
746 746
747 747 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
748 748 inclusive=inclusive)
749 749
750 750 def descendants(self, revs):
751 751 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
752 752
753 753 def findcommonmissing(self, common=None, heads=None):
754 754 """Return a tuple of the ancestors of common and the ancestors of heads
755 755 that are not ancestors of common. In revset terminology, we return the
756 756 tuple:
757 757
758 758 ::common, (::heads) - (::common)
759 759
760 760 The list is sorted by revision number, meaning it is
761 761 topologically sorted.
762 762
763 763 'heads' and 'common' are both lists of node IDs. If heads is
764 764 not supplied, uses all of the revlog's heads. If common is not
765 765 supplied, uses nullid."""
766 766 if common is None:
767 767 common = [nullid]
768 768 if heads is None:
769 769 heads = self.heads()
770 770
771 771 common = [self.rev(n) for n in common]
772 772 heads = [self.rev(n) for n in heads]
773 773
774 774 # we want the ancestors, but inclusive
775 775 class lazyset(object):
776 776 def __init__(self, lazyvalues):
777 777 self.addedvalues = set()
778 778 self.lazyvalues = lazyvalues
779 779
780 780 def __contains__(self, value):
781 781 return value in self.addedvalues or value in self.lazyvalues
782 782
783 783 def __iter__(self):
784 784 added = self.addedvalues
785 785 for r in added:
786 786 yield r
787 787 for r in self.lazyvalues:
788 788 if not r in added:
789 789 yield r
790 790
791 791 def add(self, value):
792 792 self.addedvalues.add(value)
793 793
794 794 def update(self, values):
795 795 self.addedvalues.update(values)
796 796
797 797 has = lazyset(self.ancestors(common))
798 798 has.add(nullrev)
799 799 has.update(common)
800 800
801 801 # take all ancestors from heads that aren't in has
802 802 missing = set()
803 803 visit = collections.deque(r for r in heads if r not in has)
804 804 while visit:
805 805 r = visit.popleft()
806 806 if r in missing:
807 807 continue
808 808 else:
809 809 missing.add(r)
810 810 for p in self.parentrevs(r):
811 811 if p not in has:
812 812 visit.append(p)
813 813 missing = list(missing)
814 814 missing.sort()
815 815 return has, [self.node(miss) for miss in missing]
816 816
817 817 def incrementalmissingrevs(self, common=None):
818 818 """Return an object that can be used to incrementally compute the
819 819 revision numbers of the ancestors of arbitrary sets that are not
820 820 ancestors of common. This is an ancestor.incrementalmissingancestors
821 821 object.
822 822
823 823 'common' is a list of revision numbers. If common is not supplied, uses
824 824 nullrev.
825 825 """
826 826 if common is None:
827 827 common = [nullrev]
828 828
829 829 return ancestor.incrementalmissingancestors(self.parentrevs, common)
830 830
831 831 def findmissingrevs(self, common=None, heads=None):
832 832 """Return the revision numbers of the ancestors of heads that
833 833 are not ancestors of common.
834 834
835 835 More specifically, return a list of revision numbers corresponding to
836 836 nodes N such that every N satisfies the following constraints:
837 837
838 838 1. N is an ancestor of some node in 'heads'
839 839 2. N is not an ancestor of any node in 'common'
840 840
841 841 The list is sorted by revision number, meaning it is
842 842 topologically sorted.
843 843
844 844 'heads' and 'common' are both lists of revision numbers. If heads is
845 845 not supplied, uses all of the revlog's heads. If common is not
846 846 supplied, uses nullid."""
847 847 if common is None:
848 848 common = [nullrev]
849 849 if heads is None:
850 850 heads = self.headrevs()
851 851
852 852 inc = self.incrementalmissingrevs(common=common)
853 853 return inc.missingancestors(heads)
854 854
855 855 def findmissing(self, common=None, heads=None):
856 856 """Return the ancestors of heads that are not ancestors of common.
857 857
858 858 More specifically, return a list of nodes N such that every N
859 859 satisfies the following constraints:
860 860
861 861 1. N is an ancestor of some node in 'heads'
862 862 2. N is not an ancestor of any node in 'common'
863 863
864 864 The list is sorted by revision number, meaning it is
865 865 topologically sorted.
866 866
867 867 'heads' and 'common' are both lists of node IDs. If heads is
868 868 not supplied, uses all of the revlog's heads. If common is not
869 869 supplied, uses nullid."""
870 870 if common is None:
871 871 common = [nullid]
872 872 if heads is None:
873 873 heads = self.heads()
874 874
875 875 common = [self.rev(n) for n in common]
876 876 heads = [self.rev(n) for n in heads]
877 877
878 878 inc = self.incrementalmissingrevs(common=common)
879 879 return [self.node(r) for r in inc.missingancestors(heads)]
880 880
881 881 def nodesbetween(self, roots=None, heads=None):
882 882 """Return a topological path from 'roots' to 'heads'.
883 883
884 884 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
885 885 topologically sorted list of all nodes N that satisfy both of
886 886 these constraints:
887 887
888 888 1. N is a descendant of some node in 'roots'
889 889 2. N is an ancestor of some node in 'heads'
890 890
891 891 Every node is considered to be both a descendant and an ancestor
892 892 of itself, so every reachable node in 'roots' and 'heads' will be
893 893 included in 'nodes'.
894 894
895 895 'outroots' is the list of reachable nodes in 'roots', i.e., the
896 896 subset of 'roots' that is returned in 'nodes'. Likewise,
897 897 'outheads' is the subset of 'heads' that is also in 'nodes'.
898 898
899 899 'roots' and 'heads' are both lists of node IDs. If 'roots' is
900 900 unspecified, uses nullid as the only root. If 'heads' is
901 901 unspecified, uses list of all of the revlog's heads."""
902 902 nonodes = ([], [], [])
903 903 if roots is not None:
904 904 roots = list(roots)
905 905 if not roots:
906 906 return nonodes
907 907 lowestrev = min([self.rev(n) for n in roots])
908 908 else:
909 909 roots = [nullid] # Everybody's a descendant of nullid
910 910 lowestrev = nullrev
911 911 if (lowestrev == nullrev) and (heads is None):
912 912 # We want _all_ the nodes!
913 913 return ([self.node(r) for r in self], [nullid], list(self.heads()))
914 914 if heads is None:
915 915 # All nodes are ancestors, so the latest ancestor is the last
916 916 # node.
917 917 highestrev = len(self) - 1
918 918 # Set ancestors to None to signal that every node is an ancestor.
919 919 ancestors = None
920 920 # Set heads to an empty dictionary for later discovery of heads
921 921 heads = {}
922 922 else:
923 923 heads = list(heads)
924 924 if not heads:
925 925 return nonodes
926 926 ancestors = set()
927 927 # Turn heads into a dictionary so we can remove 'fake' heads.
928 928 # Also, later we will be using it to filter out the heads we can't
929 929 # find from roots.
930 930 heads = dict.fromkeys(heads, False)
931 931 # Start at the top and keep marking parents until we're done.
932 932 nodestotag = set(heads)
933 933 # Remember where the top was so we can use it as a limit later.
934 934 highestrev = max([self.rev(n) for n in nodestotag])
935 935 while nodestotag:
936 936 # grab a node to tag
937 937 n = nodestotag.pop()
938 938 # Never tag nullid
939 939 if n == nullid:
940 940 continue
941 941 # A node's revision number represents its place in a
942 942 # topologically sorted list of nodes.
943 943 r = self.rev(n)
944 944 if r >= lowestrev:
945 945 if n not in ancestors:
946 946 # If we are possibly a descendant of one of the roots
947 947 # and we haven't already been marked as an ancestor
948 948 ancestors.add(n) # Mark as ancestor
949 949 # Add non-nullid parents to list of nodes to tag.
950 950 nodestotag.update([p for p in self.parents(n) if
951 951 p != nullid])
952 952 elif n in heads: # We've seen it before, is it a fake head?
953 953 # So it is, real heads should not be the ancestors of
954 954 # any other heads.
955 955 heads.pop(n)
956 956 if not ancestors:
957 957 return nonodes
958 958 # Now that we have our set of ancestors, we want to remove any
959 959 # roots that are not ancestors.
960 960
961 961 # If one of the roots was nullid, everything is included anyway.
962 962 if lowestrev > nullrev:
963 963 # But, since we weren't, let's recompute the lowest rev to not
964 964 # include roots that aren't ancestors.
965 965
966 966 # Filter out roots that aren't ancestors of heads
967 967 roots = [root for root in roots if root in ancestors]
968 968 # Recompute the lowest revision
969 969 if roots:
970 970 lowestrev = min([self.rev(root) for root in roots])
971 971 else:
972 972 # No more roots? Return empty list
973 973 return nonodes
974 974 else:
975 975 # We are descending from nullid, and don't need to care about
976 976 # any other roots.
977 977 lowestrev = nullrev
978 978 roots = [nullid]
979 979 # Transform our roots list into a set.
980 980 descendants = set(roots)
981 981 # Also, keep the original roots so we can filter out roots that aren't
982 982 # 'real' roots (i.e. are descended from other roots).
983 983 roots = descendants.copy()
984 984 # Our topologically sorted list of output nodes.
985 985 orderedout = []
986 986 # Don't start at nullid since we don't want nullid in our output list,
987 987 # and if nullid shows up in descendants, empty parents will look like
988 988 # they're descendants.
989 989 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
990 990 n = self.node(r)
991 991 isdescendant = False
992 992 if lowestrev == nullrev: # Everybody is a descendant of nullid
993 993 isdescendant = True
994 994 elif n in descendants:
995 995 # n is already a descendant
996 996 isdescendant = True
997 997 # This check only needs to be done here because all the roots
998 998 # will start being marked is descendants before the loop.
999 999 if n in roots:
1000 1000 # If n was a root, check if it's a 'real' root.
1001 1001 p = tuple(self.parents(n))
1002 1002 # If any of its parents are descendants, it's not a root.
1003 1003 if (p[0] in descendants) or (p[1] in descendants):
1004 1004 roots.remove(n)
1005 1005 else:
1006 1006 p = tuple(self.parents(n))
1007 1007 # A node is a descendant if either of its parents are
1008 1008 # descendants. (We seeded the dependents list with the roots
1009 1009 # up there, remember?)
1010 1010 if (p[0] in descendants) or (p[1] in descendants):
1011 1011 descendants.add(n)
1012 1012 isdescendant = True
1013 1013 if isdescendant and ((ancestors is None) or (n in ancestors)):
1014 1014 # Only include nodes that are both descendants and ancestors.
1015 1015 orderedout.append(n)
1016 1016 if (ancestors is not None) and (n in heads):
1017 1017 # We're trying to figure out which heads are reachable
1018 1018 # from roots.
1019 1019 # Mark this head as having been reached
1020 1020 heads[n] = True
1021 1021 elif ancestors is None:
1022 1022 # Otherwise, we're trying to discover the heads.
1023 1023 # Assume this is a head because if it isn't, the next step
1024 1024 # will eventually remove it.
1025 1025 heads[n] = True
1026 1026 # But, obviously its parents aren't.
1027 1027 for p in self.parents(n):
1028 1028 heads.pop(p, None)
1029 1029 heads = [head for head, flag in heads.iteritems() if flag]
1030 1030 roots = list(roots)
1031 1031 assert orderedout
1032 1032 assert roots
1033 1033 assert heads
1034 1034 return (orderedout, roots, heads)
1035 1035
1036 1036 def headrevs(self):
1037 1037 try:
1038 1038 return self.index.headrevs()
1039 1039 except AttributeError:
1040 1040 return self._headrevs()
1041 1041
1042 1042 def computephases(self, roots):
1043 1043 return self.index.computephasesmapsets(roots)
1044 1044
1045 1045 def _headrevs(self):
1046 1046 count = len(self)
1047 1047 if not count:
1048 1048 return [nullrev]
1049 1049 # we won't iter over filtered rev so nobody is a head at start
1050 1050 ishead = [0] * (count + 1)
1051 1051 index = self.index
1052 1052 for r in self:
1053 1053 ishead[r] = 1 # I may be an head
1054 1054 e = index[r]
1055 1055 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1056 1056 return [r for r, val in enumerate(ishead) if val]
1057 1057
1058 1058 def heads(self, start=None, stop=None):
1059 1059 """return the list of all nodes that have no children
1060 1060
1061 1061 if start is specified, only heads that are descendants of
1062 1062 start will be returned
1063 1063 if stop is specified, it will consider all the revs from stop
1064 1064 as if they had no children
1065 1065 """
1066 1066 if start is None and stop is None:
1067 1067 if not len(self):
1068 1068 return [nullid]
1069 1069 return [self.node(r) for r in self.headrevs()]
1070 1070
1071 1071 if start is None:
1072 1072 start = nullrev
1073 1073 else:
1074 1074 start = self.rev(start)
1075 1075
1076 1076 stoprevs = set(self.rev(n) for n in stop or [])
1077 1077
1078 1078 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1079 1079 stoprevs=stoprevs)
1080 1080
1081 1081 return [self.node(rev) for rev in revs]
1082 1082
1083 1083 def children(self, node):
1084 1084 """find the children of a given node"""
1085 1085 c = []
1086 1086 p = self.rev(node)
1087 1087 for r in self.revs(start=p + 1):
1088 1088 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1089 1089 if prevs:
1090 1090 for pr in prevs:
1091 1091 if pr == p:
1092 1092 c.append(self.node(r))
1093 1093 elif p == nullrev:
1094 1094 c.append(self.node(r))
1095 1095 return c
1096 1096
1097 1097 def commonancestorsheads(self, a, b):
1098 1098 """calculate all the heads of the common ancestors of nodes a and b"""
1099 1099 a, b = self.rev(a), self.rev(b)
1100 1100 ancs = self._commonancestorsheads(a, b)
1101 1101 return pycompat.maplist(self.node, ancs)
1102 1102
1103 1103 def _commonancestorsheads(self, *revs):
1104 1104 """calculate all the heads of the common ancestors of revs"""
1105 1105 try:
1106 1106 ancs = self.index.commonancestorsheads(*revs)
1107 1107 except (AttributeError, OverflowError): # C implementation failed
1108 1108 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1109 1109 return ancs
1110 1110
1111 1111 def isancestor(self, a, b):
1112 1112 """return True if node a is an ancestor of node b
1113 1113
1114 1114 A revision is considered an ancestor of itself."""
1115 1115 a, b = self.rev(a), self.rev(b)
1116 1116 return self.isancestorrev(a, b)
1117 1117
1118 1118 def isancestorrev(self, a, b):
1119 1119 """return True if revision a is an ancestor of revision b
1120 1120
1121 1121 A revision is considered an ancestor of itself.
1122 1122
1123 1123 The implementation of this is trivial but the use of
1124 1124 commonancestorsheads is not."""
1125 1125 if a == nullrev:
1126 1126 return True
1127 1127 elif a == b:
1128 1128 return True
1129 1129 elif a > b:
1130 1130 return False
1131 1131 return a in self._commonancestorsheads(a, b)
1132 1132
1133 1133 def ancestor(self, a, b):
1134 1134 """calculate the "best" common ancestor of nodes a and b"""
1135 1135
1136 1136 a, b = self.rev(a), self.rev(b)
1137 1137 try:
1138 1138 ancs = self.index.ancestors(a, b)
1139 1139 except (AttributeError, OverflowError):
1140 1140 ancs = ancestor.ancestors(self.parentrevs, a, b)
1141 1141 if ancs:
1142 1142 # choose a consistent winner when there's a tie
1143 1143 return min(map(self.node, ancs))
1144 1144 return nullid
1145 1145
1146 1146 def _match(self, id):
1147 1147 if isinstance(id, int):
1148 1148 # rev
1149 1149 return self.node(id)
1150 1150 if len(id) == 20:
1151 1151 # possibly a binary node
1152 1152 # odds of a binary node being all hex in ASCII are 1 in 10**25
1153 1153 try:
1154 1154 node = id
1155 1155 self.rev(node) # quick search the index
1156 1156 return node
1157 1157 except error.LookupError:
1158 1158 pass # may be partial hex id
1159 1159 try:
1160 1160 # str(rev)
1161 1161 rev = int(id)
1162 1162 if "%d" % rev != id:
1163 1163 raise ValueError
1164 1164 if rev < 0:
1165 1165 rev = len(self) + rev
1166 1166 if rev < 0 or rev >= len(self):
1167 1167 raise ValueError
1168 1168 return self.node(rev)
1169 1169 except (ValueError, OverflowError):
1170 1170 pass
1171 1171 if len(id) == 40:
1172 1172 try:
1173 1173 # a full hex nodeid?
1174 1174 node = bin(id)
1175 1175 self.rev(node)
1176 1176 return node
1177 1177 except (TypeError, error.LookupError):
1178 1178 pass
1179 1179
1180 1180 def _partialmatch(self, id):
1181 1181 # we don't care wdirfilenodeids as they should be always full hash
1182 1182 maybewdir = wdirhex.startswith(id)
1183 1183 try:
1184 1184 partial = self.index.partialmatch(id)
1185 1185 if partial and self.hasnode(partial):
1186 1186 if maybewdir:
1187 1187 # single 'ff...' match in radix tree, ambiguous with wdir
1188 1188 raise error.RevlogError
1189 1189 return partial
1190 1190 if maybewdir:
1191 1191 # no 'ff...' match in radix tree, wdir identified
1192 1192 raise error.WdirUnsupported
1193 1193 return None
1194 1194 except error.RevlogError:
1195 1195 # parsers.c radix tree lookup gave multiple matches
1196 1196 # fast path: for unfiltered changelog, radix tree is accurate
1197 1197 if not getattr(self, 'filteredrevs', None):
1198 1198 raise error.AmbiguousPrefixLookupError(
1199 1199 id, self.indexfile, _('ambiguous identifier'))
1200 1200 # fall through to slow path that filters hidden revisions
1201 1201 except (AttributeError, ValueError):
1202 1202 # we are pure python, or key was too short to search radix tree
1203 1203 pass
1204 1204
1205 1205 if id in self._pcache:
1206 1206 return self._pcache[id]
1207 1207
1208 1208 if len(id) <= 40:
1209 1209 try:
1210 1210 # hex(node)[:...]
1211 1211 l = len(id) // 2 # grab an even number of digits
1212 1212 prefix = bin(id[:l * 2])
1213 1213 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1214 1214 nl = [n for n in nl if hex(n).startswith(id) and
1215 1215 self.hasnode(n)]
1216 1216 if nullhex.startswith(id):
1217 1217 nl.append(nullid)
1218 1218 if len(nl) > 0:
1219 1219 if len(nl) == 1 and not maybewdir:
1220 1220 self._pcache[id] = nl[0]
1221 1221 return nl[0]
1222 1222 raise error.AmbiguousPrefixLookupError(
1223 1223 id, self.indexfile, _('ambiguous identifier'))
1224 1224 if maybewdir:
1225 1225 raise error.WdirUnsupported
1226 1226 return None
1227 1227 except TypeError:
1228 1228 pass
1229 1229
1230 1230 def lookup(self, id):
1231 1231 """locate a node based on:
1232 1232 - revision number or str(revision number)
1233 1233 - nodeid or subset of hex nodeid
1234 1234 """
1235 1235 n = self._match(id)
1236 1236 if n is not None:
1237 1237 return n
1238 1238 n = self._partialmatch(id)
1239 1239 if n:
1240 1240 return n
1241 1241
1242 1242 raise error.LookupError(id, self.indexfile, _('no match found'))
1243 1243
1244 1244 def shortest(self, node, minlength=1):
1245 1245 """Find the shortest unambiguous prefix that matches node."""
1246 1246 def isvalid(prefix):
1247 1247 try:
1248 1248 node = self._partialmatch(prefix)
1249 1249 except error.AmbiguousPrefixLookupError:
1250 1250 return False
1251 1251 except error.WdirUnsupported:
1252 1252 # single 'ff...' match
1253 1253 return True
1254 1254 if node is None:
1255 1255 raise error.LookupError(node, self.indexfile, _('no node'))
1256 1256 return True
1257 1257
1258 1258 def maybewdir(prefix):
1259 1259 return all(c == 'f' for c in prefix)
1260 1260
1261 1261 hexnode = hex(node)
1262 1262
1263 1263 def disambiguate(hexnode, minlength):
1264 1264 """Disambiguate against wdirid."""
1265 1265 for length in range(minlength, 41):
1266 1266 prefix = hexnode[:length]
1267 1267 if not maybewdir(prefix):
1268 1268 return prefix
1269 1269
1270 1270 if not getattr(self, 'filteredrevs', None):
1271 1271 try:
1272 1272 length = max(self.index.shortest(node), minlength)
1273 1273 return disambiguate(hexnode, length)
1274 1274 except error.RevlogError:
1275 1275 if node != wdirid:
1276 1276 raise error.LookupError(node, self.indexfile, _('no node'))
1277 1277 except AttributeError:
1278 1278 # Fall through to pure code
1279 1279 pass
1280 1280
1281 1281 if node == wdirid:
1282 1282 for length in range(minlength, 41):
1283 1283 prefix = hexnode[:length]
1284 1284 if isvalid(prefix):
1285 1285 return prefix
1286 1286
1287 1287 for length in range(minlength, 41):
1288 1288 prefix = hexnode[:length]
1289 1289 if isvalid(prefix):
1290 1290 return disambiguate(hexnode, length)
1291 1291
1292 1292 def cmp(self, node, text):
1293 1293 """compare text with a given file revision
1294 1294
1295 1295 returns True if text is different than what is stored.
1296 1296 """
1297 1297 p1, p2 = self.parents(node)
1298 1298 return storageutil.hashrevisionsha1(text, p1, p2) != node
1299 1299
1300 1300 def _cachesegment(self, offset, data):
1301 1301 """Add a segment to the revlog cache.
1302 1302
1303 1303 Accepts an absolute offset and the data that is at that location.
1304 1304 """
1305 1305 o, d = self._chunkcache
1306 1306 # try to add to existing cache
1307 1307 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1308 1308 self._chunkcache = o, d + data
1309 1309 else:
1310 1310 self._chunkcache = offset, data
1311 1311
1312 1312 def _readsegment(self, offset, length, df=None):
1313 1313 """Load a segment of raw data from the revlog.
1314 1314
1315 1315 Accepts an absolute offset, length to read, and an optional existing
1316 1316 file handle to read from.
1317 1317
1318 1318 If an existing file handle is passed, it will be seeked and the
1319 1319 original seek position will NOT be restored.
1320 1320
1321 1321 Returns a str or buffer of raw byte data.
1322 1322 """
1323 1323 # Cache data both forward and backward around the requested
1324 1324 # data, in a fixed size window. This helps speed up operations
1325 1325 # involving reading the revlog backwards.
1326 1326 cachesize = self._chunkcachesize
1327 1327 realoffset = offset & ~(cachesize - 1)
1328 1328 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1329 1329 - realoffset)
1330 1330 with self._datareadfp(df) as df:
1331 1331 df.seek(realoffset)
1332 1332 d = df.read(reallength)
1333 1333 self._cachesegment(realoffset, d)
1334 1334 if offset != realoffset or reallength != length:
1335 1335 return util.buffer(d, offset - realoffset, length)
1336 1336 return d
1337 1337
1338 1338 def _getsegment(self, offset, length, df=None):
1339 1339 """Obtain a segment of raw data from the revlog.
1340 1340
1341 1341 Accepts an absolute offset, length of bytes to obtain, and an
1342 1342 optional file handle to the already-opened revlog. If the file
1343 1343 handle is used, it's original seek position will not be preserved.
1344 1344
1345 1345 Requests for data may be returned from a cache.
1346 1346
1347 1347 Returns a str or a buffer instance of raw byte data.
1348 1348 """
1349 1349 o, d = self._chunkcache
1350 1350 l = len(d)
1351 1351
1352 1352 # is it in the cache?
1353 1353 cachestart = offset - o
1354 1354 cacheend = cachestart + length
1355 1355 if cachestart >= 0 and cacheend <= l:
1356 1356 if cachestart == 0 and cacheend == l:
1357 1357 return d # avoid a copy
1358 1358 return util.buffer(d, cachestart, cacheend - cachestart)
1359 1359
1360 1360 return self._readsegment(offset, length, df=df)
1361 1361
1362 1362 def _getsegmentforrevs(self, startrev, endrev, df=None):
1363 1363 """Obtain a segment of raw data corresponding to a range of revisions.
1364 1364
1365 1365 Accepts the start and end revisions and an optional already-open
1366 1366 file handle to be used for reading. If the file handle is read, its
1367 1367 seek position will not be preserved.
1368 1368
1369 1369 Requests for data may be satisfied by a cache.
1370 1370
1371 1371 Returns a 2-tuple of (offset, data) for the requested range of
1372 1372 revisions. Offset is the integer offset from the beginning of the
1373 1373 revlog and data is a str or buffer of the raw byte data.
1374 1374
1375 1375 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1376 1376 to determine where each revision's data begins and ends.
1377 1377 """
1378 1378 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1379 1379 # (functions are expensive).
1380 1380 index = self.index
1381 1381 istart = index[startrev]
1382 1382 start = int(istart[0] >> 16)
1383 1383 if startrev == endrev:
1384 1384 end = start + istart[1]
1385 1385 else:
1386 1386 iend = index[endrev]
1387 1387 end = int(iend[0] >> 16) + iend[1]
1388 1388
1389 1389 if self._inline:
1390 1390 start += (startrev + 1) * self._io.size
1391 1391 end += (endrev + 1) * self._io.size
1392 1392 length = end - start
1393 1393
1394 1394 return start, self._getsegment(start, length, df=df)
1395 1395
1396 1396 def _chunk(self, rev, df=None):
1397 1397 """Obtain a single decompressed chunk for a revision.
1398 1398
1399 1399 Accepts an integer revision and an optional already-open file handle
1400 1400 to be used for reading. If used, the seek position of the file will not
1401 1401 be preserved.
1402 1402
1403 1403 Returns a str holding uncompressed data for the requested revision.
1404 1404 """
1405 1405 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1406 1406
1407 1407 def _chunks(self, revs, df=None, targetsize=None):
1408 1408 """Obtain decompressed chunks for the specified revisions.
1409 1409
1410 1410 Accepts an iterable of numeric revisions that are assumed to be in
1411 1411 ascending order. Also accepts an optional already-open file handle
1412 1412 to be used for reading. If used, the seek position of the file will
1413 1413 not be preserved.
1414 1414
1415 1415 This function is similar to calling ``self._chunk()`` multiple times,
1416 1416 but is faster.
1417 1417
1418 1418 Returns a list with decompressed data for each requested revision.
1419 1419 """
1420 1420 if not revs:
1421 1421 return []
1422 1422 start = self.start
1423 1423 length = self.length
1424 1424 inline = self._inline
1425 1425 iosize = self._io.size
1426 1426 buffer = util.buffer
1427 1427
1428 1428 l = []
1429 1429 ladd = l.append
1430 1430
1431 1431 if not self._withsparseread:
1432 1432 slicedchunks = (revs,)
1433 1433 else:
1434 1434 slicedchunks = deltautil.slicechunk(self, revs,
1435 1435 targetsize=targetsize)
1436 1436
1437 1437 for revschunk in slicedchunks:
1438 1438 firstrev = revschunk[0]
1439 1439 # Skip trailing revisions with empty diff
1440 1440 for lastrev in revschunk[::-1]:
1441 1441 if length(lastrev) != 0:
1442 1442 break
1443 1443
1444 1444 try:
1445 1445 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1446 1446 except OverflowError:
1447 1447 # issue4215 - we can't cache a run of chunks greater than
1448 1448 # 2G on Windows
1449 1449 return [self._chunk(rev, df=df) for rev in revschunk]
1450 1450
1451 1451 decomp = self.decompress
1452 1452 for rev in revschunk:
1453 1453 chunkstart = start(rev)
1454 1454 if inline:
1455 1455 chunkstart += (rev + 1) * iosize
1456 1456 chunklength = length(rev)
1457 1457 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1458 1458
1459 1459 return l
1460 1460
1461 1461 def _chunkclear(self):
1462 1462 """Clear the raw chunk cache."""
1463 1463 self._chunkcache = (0, '')
1464 1464
1465 1465 def deltaparent(self, rev):
1466 1466 """return deltaparent of the given revision"""
1467 1467 base = self.index[rev][3]
1468 1468 if base == rev:
1469 1469 return nullrev
1470 1470 elif self._generaldelta:
1471 1471 return base
1472 1472 else:
1473 1473 return rev - 1
1474 1474
1475 1475 def issnapshot(self, rev):
1476 1476 """tells whether rev is a snapshot
1477 1477 """
1478 1478 if rev == nullrev:
1479 1479 return True
1480 1480 deltap = self.deltaparent(rev)
1481 1481 if deltap == nullrev:
1482 1482 return True
1483 1483 p1, p2 = self.parentrevs(rev)
1484 1484 if deltap in (p1, p2):
1485 1485 return False
1486 1486 return self.issnapshot(deltap)
1487 1487
1488 1488 def snapshotdepth(self, rev):
1489 1489 """number of snapshot in the chain before this one"""
1490 1490 if not self.issnapshot(rev):
1491 1491 raise error.ProgrammingError('revision %d not a snapshot')
1492 1492 return len(self._deltachain(rev)[0]) - 1
1493 1493
1494 1494 def revdiff(self, rev1, rev2):
1495 1495 """return or calculate a delta between two revisions
1496 1496
1497 1497 The delta calculated is in binary form and is intended to be written to
1498 1498 revlog data directly. So this function needs raw revision data.
1499 1499 """
1500 1500 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1501 1501 return bytes(self._chunk(rev2))
1502 1502
1503 1503 return mdiff.textdiff(self.revision(rev1, raw=True),
1504 1504 self.revision(rev2, raw=True))
1505 1505
1506 1506 def revision(self, nodeorrev, _df=None, raw=False):
1507 1507 """return an uncompressed revision of a given node or revision
1508 1508 number.
1509 1509
1510 1510 _df - an existing file handle to read from. (internal-only)
1511 1511 raw - an optional argument specifying if the revision data is to be
1512 1512 treated as raw data when applying flag transforms. 'raw' should be set
1513 1513 to True when generating changegroups or in debug commands.
1514 1514 """
1515 1515 if isinstance(nodeorrev, int):
1516 1516 rev = nodeorrev
1517 1517 node = self.node(rev)
1518 1518 else:
1519 1519 node = nodeorrev
1520 1520 rev = None
1521 1521
1522 1522 cachedrev = None
1523 1523 flags = None
1524 1524 rawtext = None
1525 1525 if node == nullid:
1526 1526 return ""
1527 1527 if self._revisioncache:
1528 1528 if self._revisioncache[0] == node:
1529 1529 # _cache only stores rawtext
1530 1530 if raw:
1531 1531 return self._revisioncache[2]
1532 1532 # duplicated, but good for perf
1533 1533 if rev is None:
1534 1534 rev = self.rev(node)
1535 1535 if flags is None:
1536 1536 flags = self.flags(rev)
1537 1537 # no extra flags set, no flag processor runs, text = rawtext
1538 1538 if flags == REVIDX_DEFAULT_FLAGS:
1539 1539 return self._revisioncache[2]
1540 1540 # rawtext is reusable. need to run flag processor
1541 1541 rawtext = self._revisioncache[2]
1542 1542
1543 1543 cachedrev = self._revisioncache[1]
1544 1544
1545 1545 # look up what we need to read
1546 1546 if rawtext is None:
1547 1547 if rev is None:
1548 1548 rev = self.rev(node)
1549 1549
1550 1550 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1551 1551 if stopped:
1552 1552 rawtext = self._revisioncache[2]
1553 1553
1554 1554 # drop cache to save memory
1555 1555 self._revisioncache = None
1556 1556
1557 1557 targetsize = None
1558 1558 rawsize = self.index[rev][2]
1559 1559 if 0 <= rawsize:
1560 1560 targetsize = 4 * rawsize
1561 1561
1562 1562 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1563 1563 if rawtext is None:
1564 1564 rawtext = bytes(bins[0])
1565 1565 bins = bins[1:]
1566 1566
1567 1567 rawtext = mdiff.patches(rawtext, bins)
1568 1568 self._revisioncache = (node, rev, rawtext)
1569 1569
1570 1570 if flags is None:
1571 1571 if rev is None:
1572 1572 rev = self.rev(node)
1573 1573 flags = self.flags(rev)
1574 1574
1575 1575 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1576 1576 if validatehash:
1577 1577 self.checkhash(text, node, rev=rev)
1578 1578
1579 1579 return text
1580 1580
1581 1581 def hash(self, text, p1, p2):
1582 1582 """Compute a node hash.
1583 1583
1584 1584 Available as a function so that subclasses can replace the hash
1585 1585 as needed.
1586 1586 """
1587 1587 return storageutil.hashrevisionsha1(text, p1, p2)
1588 1588
1589 1589 def _processflags(self, text, flags, operation, raw=False):
1590 1590 """Inspect revision data flags and applies transforms defined by
1591 1591 registered flag processors.
1592 1592
1593 1593 ``text`` - the revision data to process
1594 1594 ``flags`` - the revision flags
1595 1595 ``operation`` - the operation being performed (read or write)
1596 1596 ``raw`` - an optional argument describing if the raw transform should be
1597 1597 applied.
1598 1598
1599 1599 This method processes the flags in the order (or reverse order if
1600 1600 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1601 1601 flag processors registered for present flags. The order of flags defined
1602 1602 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1603 1603
1604 1604 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1605 1605 processed text and ``validatehash`` is a bool indicating whether the
1606 1606 returned text should be checked for hash integrity.
1607 1607
1608 1608 Note: If the ``raw`` argument is set, it has precedence over the
1609 1609 operation and will only update the value of ``validatehash``.
1610 1610 """
1611 1611 # fast path: no flag processors will run
1612 1612 if flags == 0:
1613 1613 return text, True
1614 1614 if not operation in ('read', 'write'):
1615 1615 raise error.ProgrammingError(_("invalid '%s' operation") %
1616 1616 operation)
1617 1617 # Check all flags are known.
1618 1618 if flags & ~REVIDX_KNOWN_FLAGS:
1619 1619 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1620 1620 (flags & ~REVIDX_KNOWN_FLAGS))
1621 1621 validatehash = True
1622 1622 # Depending on the operation (read or write), the order might be
1623 1623 # reversed due to non-commutative transforms.
1624 1624 orderedflags = REVIDX_FLAGS_ORDER
1625 1625 if operation == 'write':
1626 1626 orderedflags = reversed(orderedflags)
1627 1627
1628 1628 for flag in orderedflags:
1629 1629 # If a flagprocessor has been registered for a known flag, apply the
1630 1630 # related operation transform and update result tuple.
1631 1631 if flag & flags:
1632 1632 vhash = True
1633 1633
1634 1634 if flag not in self._flagprocessors:
1635 1635 message = _("missing processor for flag '%#x'") % (flag)
1636 1636 raise error.RevlogError(message)
1637 1637
1638 1638 processor = self._flagprocessors[flag]
1639 1639 if processor is not None:
1640 1640 readtransform, writetransform, rawtransform = processor
1641 1641
1642 1642 if raw:
1643 1643 vhash = rawtransform(self, text)
1644 1644 elif operation == 'read':
1645 1645 text, vhash = readtransform(self, text)
1646 1646 else: # write operation
1647 1647 text, vhash = writetransform(self, text)
1648 1648 validatehash = validatehash and vhash
1649 1649
1650 1650 return text, validatehash
1651 1651
1652 1652 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1653 1653 """Check node hash integrity.
1654 1654
1655 1655 Available as a function so that subclasses can extend hash mismatch
1656 1656 behaviors as needed.
1657 1657 """
1658 1658 try:
1659 1659 if p1 is None and p2 is None:
1660 1660 p1, p2 = self.parents(node)
1661 1661 if node != self.hash(text, p1, p2):
1662 # Clear the revision cache on hash failure. The revision cache
1663 # only stores the raw revision and clearing the cache does have
1664 # the side-effect that we won't have a cache hit when the raw
1665 # revision data is accessed. But this case should be rare and
1666 # it is extra work to teach the cache about the hash
1667 # verification state.
1668 if self._revisioncache and self._revisioncache[0] == node:
1669 self._revisioncache = None
1670
1662 1671 revornode = rev
1663 1672 if revornode is None:
1664 1673 revornode = templatefilters.short(hex(node))
1665 1674 raise error.RevlogError(_("integrity check failed on %s:%s")
1666 1675 % (self.indexfile, pycompat.bytestr(revornode)))
1667 1676 except error.RevlogError:
1668 1677 if self._censorable and storageutil.iscensoredtext(text):
1669 1678 raise error.CensoredNodeError(self.indexfile, node, text)
1670 1679 raise
1671 1680
1672 1681 def _enforceinlinesize(self, tr, fp=None):
1673 1682 """Check if the revlog is too big for inline and convert if so.
1674 1683
1675 1684 This should be called after revisions are added to the revlog. If the
1676 1685 revlog has grown too large to be an inline revlog, it will convert it
1677 1686 to use multiple index and data files.
1678 1687 """
1679 1688 tiprev = len(self) - 1
1680 1689 if (not self._inline or
1681 1690 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1682 1691 return
1683 1692
1684 1693 trinfo = tr.find(self.indexfile)
1685 1694 if trinfo is None:
1686 1695 raise error.RevlogError(_("%s not found in the transaction")
1687 1696 % self.indexfile)
1688 1697
1689 1698 trindex = trinfo[2]
1690 1699 if trindex is not None:
1691 1700 dataoff = self.start(trindex)
1692 1701 else:
1693 1702 # revlog was stripped at start of transaction, use all leftover data
1694 1703 trindex = len(self) - 1
1695 1704 dataoff = self.end(tiprev)
1696 1705
1697 1706 tr.add(self.datafile, dataoff)
1698 1707
1699 1708 if fp:
1700 1709 fp.flush()
1701 1710 fp.close()
1702 1711
1703 1712 with self._datafp('w') as df:
1704 1713 for r in self:
1705 1714 df.write(self._getsegmentforrevs(r, r)[1])
1706 1715
1707 1716 with self._indexfp('w') as fp:
1708 1717 self.version &= ~FLAG_INLINE_DATA
1709 1718 self._inline = False
1710 1719 io = self._io
1711 1720 for i in self:
1712 1721 e = io.packentry(self.index[i], self.node, self.version, i)
1713 1722 fp.write(e)
1714 1723
1715 1724 # the temp file replace the real index when we exit the context
1716 1725 # manager
1717 1726
1718 1727 tr.replace(self.indexfile, trindex * self._io.size)
1719 1728 self._chunkclear()
1720 1729
1721 1730 def _nodeduplicatecallback(self, transaction, node):
1722 1731 """called when trying to add a node already stored.
1723 1732 """
1724 1733
1725 1734 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1726 1735 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1727 1736 """add a revision to the log
1728 1737
1729 1738 text - the revision data to add
1730 1739 transaction - the transaction object used for rollback
1731 1740 link - the linkrev data to add
1732 1741 p1, p2 - the parent nodeids of the revision
1733 1742 cachedelta - an optional precomputed delta
1734 1743 node - nodeid of revision; typically node is not specified, and it is
1735 1744 computed by default as hash(text, p1, p2), however subclasses might
1736 1745 use different hashing method (and override checkhash() in such case)
1737 1746 flags - the known flags to set on the revision
1738 1747 deltacomputer - an optional deltacomputer instance shared between
1739 1748 multiple calls
1740 1749 """
1741 1750 if link == nullrev:
1742 1751 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1743 1752 % self.indexfile)
1744 1753
1745 1754 if flags:
1746 1755 node = node or self.hash(text, p1, p2)
1747 1756
1748 1757 rawtext, validatehash = self._processflags(text, flags, 'write')
1749 1758
1750 1759 # If the flag processor modifies the revision data, ignore any provided
1751 1760 # cachedelta.
1752 1761 if rawtext != text:
1753 1762 cachedelta = None
1754 1763
1755 1764 if len(rawtext) > _maxentrysize:
1756 1765 raise error.RevlogError(
1757 1766 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1758 1767 % (self.indexfile, len(rawtext)))
1759 1768
1760 1769 node = node or self.hash(rawtext, p1, p2)
1761 1770 if node in self.nodemap:
1762 1771 return node
1763 1772
1764 1773 if validatehash:
1765 1774 self.checkhash(rawtext, node, p1=p1, p2=p2)
1766 1775
1767 1776 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1768 1777 flags, cachedelta=cachedelta,
1769 1778 deltacomputer=deltacomputer)
1770 1779
1771 1780 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1772 1781 cachedelta=None, deltacomputer=None):
1773 1782 """add a raw revision with known flags, node and parents
1774 1783 useful when reusing a revision not stored in this revlog (ex: received
1775 1784 over wire, or read from an external bundle).
1776 1785 """
1777 1786 dfh = None
1778 1787 if not self._inline:
1779 1788 dfh = self._datafp("a+")
1780 1789 ifh = self._indexfp("a+")
1781 1790 try:
1782 1791 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1783 1792 flags, cachedelta, ifh, dfh,
1784 1793 deltacomputer=deltacomputer)
1785 1794 finally:
1786 1795 if dfh:
1787 1796 dfh.close()
1788 1797 ifh.close()
1789 1798
1790 1799 def compress(self, data):
1791 1800 """Generate a possibly-compressed representation of data."""
1792 1801 if not data:
1793 1802 return '', data
1794 1803
1795 1804 compressed = self._compressor.compress(data)
1796 1805
1797 1806 if compressed:
1798 1807 # The revlog compressor added the header in the returned data.
1799 1808 return '', compressed
1800 1809
1801 1810 if data[0:1] == '\0':
1802 1811 return '', data
1803 1812 return 'u', data
1804 1813
1805 1814 def decompress(self, data):
1806 1815 """Decompress a revlog chunk.
1807 1816
1808 1817 The chunk is expected to begin with a header identifying the
1809 1818 format type so it can be routed to an appropriate decompressor.
1810 1819 """
1811 1820 if not data:
1812 1821 return data
1813 1822
1814 1823 # Revlogs are read much more frequently than they are written and many
1815 1824 # chunks only take microseconds to decompress, so performance is
1816 1825 # important here.
1817 1826 #
1818 1827 # We can make a few assumptions about revlogs:
1819 1828 #
1820 1829 # 1) the majority of chunks will be compressed (as opposed to inline
1821 1830 # raw data).
1822 1831 # 2) decompressing *any* data will likely by at least 10x slower than
1823 1832 # returning raw inline data.
1824 1833 # 3) we want to prioritize common and officially supported compression
1825 1834 # engines
1826 1835 #
1827 1836 # It follows that we want to optimize for "decompress compressed data
1828 1837 # when encoded with common and officially supported compression engines"
1829 1838 # case over "raw data" and "data encoded by less common or non-official
1830 1839 # compression engines." That is why we have the inline lookup first
1831 1840 # followed by the compengines lookup.
1832 1841 #
1833 1842 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1834 1843 # compressed chunks. And this matters for changelog and manifest reads.
1835 1844 t = data[0:1]
1836 1845
1837 1846 if t == 'x':
1838 1847 try:
1839 1848 return _zlibdecompress(data)
1840 1849 except zlib.error as e:
1841 1850 raise error.RevlogError(_('revlog decompress error: %s') %
1842 1851 stringutil.forcebytestr(e))
1843 1852 # '\0' is more common than 'u' so it goes first.
1844 1853 elif t == '\0':
1845 1854 return data
1846 1855 elif t == 'u':
1847 1856 return util.buffer(data, 1)
1848 1857
1849 1858 try:
1850 1859 compressor = self._decompressors[t]
1851 1860 except KeyError:
1852 1861 try:
1853 1862 engine = util.compengines.forrevlogheader(t)
1854 1863 compressor = engine.revlogcompressor()
1855 1864 self._decompressors[t] = compressor
1856 1865 except KeyError:
1857 1866 raise error.RevlogError(_('unknown compression type %r') % t)
1858 1867
1859 1868 return compressor.decompress(data)
1860 1869
1861 1870 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1862 1871 cachedelta, ifh, dfh, alwayscache=False,
1863 1872 deltacomputer=None):
1864 1873 """internal function to add revisions to the log
1865 1874
1866 1875 see addrevision for argument descriptions.
1867 1876
1868 1877 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1869 1878
1870 1879 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1871 1880 be used.
1872 1881
1873 1882 invariants:
1874 1883 - rawtext is optional (can be None); if not set, cachedelta must be set.
1875 1884 if both are set, they must correspond to each other.
1876 1885 """
1877 1886 if node == nullid:
1878 1887 raise error.RevlogError(_("%s: attempt to add null revision") %
1879 1888 self.indexfile)
1880 1889 if node == wdirid or node in wdirfilenodeids:
1881 1890 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1882 1891 self.indexfile)
1883 1892
1884 1893 if self._inline:
1885 1894 fh = ifh
1886 1895 else:
1887 1896 fh = dfh
1888 1897
1889 1898 btext = [rawtext]
1890 1899
1891 1900 curr = len(self)
1892 1901 prev = curr - 1
1893 1902 offset = self.end(prev)
1894 1903 p1r, p2r = self.rev(p1), self.rev(p2)
1895 1904
1896 1905 # full versions are inserted when the needed deltas
1897 1906 # become comparable to the uncompressed text
1898 1907 if rawtext is None:
1899 1908 # need rawtext size, before changed by flag processors, which is
1900 1909 # the non-raw size. use revlog explicitly to avoid filelog's extra
1901 1910 # logic that might remove metadata size.
1902 1911 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1903 1912 cachedelta[1])
1904 1913 else:
1905 1914 textlen = len(rawtext)
1906 1915
1907 1916 if deltacomputer is None:
1908 1917 deltacomputer = deltautil.deltacomputer(self)
1909 1918
1910 1919 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1911 1920
1912 1921 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1913 1922
1914 1923 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1915 1924 deltainfo.base, link, p1r, p2r, node)
1916 1925 self.index.append(e)
1917 1926 self.nodemap[node] = curr
1918 1927
1919 1928 entry = self._io.packentry(e, self.node, self.version, curr)
1920 1929 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1921 1930 link, offset)
1922 1931
1923 1932 rawtext = btext[0]
1924 1933
1925 1934 if alwayscache and rawtext is None:
1926 1935 rawtext = deltacomputer.buildtext(revinfo, fh)
1927 1936
1928 1937 if type(rawtext) == bytes: # only accept immutable objects
1929 1938 self._revisioncache = (node, curr, rawtext)
1930 1939 self._chainbasecache[curr] = deltainfo.chainbase
1931 1940 return node
1932 1941
1933 1942 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1934 1943 # Files opened in a+ mode have inconsistent behavior on various
1935 1944 # platforms. Windows requires that a file positioning call be made
1936 1945 # when the file handle transitions between reads and writes. See
1937 1946 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1938 1947 # platforms, Python or the platform itself can be buggy. Some versions
1939 1948 # of Solaris have been observed to not append at the end of the file
1940 1949 # if the file was seeked to before the end. See issue4943 for more.
1941 1950 #
1942 1951 # We work around this issue by inserting a seek() before writing.
1943 1952 # Note: This is likely not necessary on Python 3.
1944 1953 ifh.seek(0, os.SEEK_END)
1945 1954 if dfh:
1946 1955 dfh.seek(0, os.SEEK_END)
1947 1956
1948 1957 curr = len(self) - 1
1949 1958 if not self._inline:
1950 1959 transaction.add(self.datafile, offset)
1951 1960 transaction.add(self.indexfile, curr * len(entry))
1952 1961 if data[0]:
1953 1962 dfh.write(data[0])
1954 1963 dfh.write(data[1])
1955 1964 ifh.write(entry)
1956 1965 else:
1957 1966 offset += curr * self._io.size
1958 1967 transaction.add(self.indexfile, offset, curr)
1959 1968 ifh.write(entry)
1960 1969 ifh.write(data[0])
1961 1970 ifh.write(data[1])
1962 1971 self._enforceinlinesize(transaction, ifh)
1963 1972
1964 1973 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1965 1974 """
1966 1975 add a delta group
1967 1976
1968 1977 given a set of deltas, add them to the revision log. the
1969 1978 first delta is against its parent, which should be in our
1970 1979 log, the rest are against the previous delta.
1971 1980
1972 1981 If ``addrevisioncb`` is defined, it will be called with arguments of
1973 1982 this revlog and the node that was added.
1974 1983 """
1975 1984
1976 1985 nodes = []
1977 1986
1978 1987 r = len(self)
1979 1988 end = 0
1980 1989 if r:
1981 1990 end = self.end(r - 1)
1982 1991 ifh = self._indexfp("a+")
1983 1992 isize = r * self._io.size
1984 1993 if self._inline:
1985 1994 transaction.add(self.indexfile, end + isize, r)
1986 1995 dfh = None
1987 1996 else:
1988 1997 transaction.add(self.indexfile, isize, r)
1989 1998 transaction.add(self.datafile, end)
1990 1999 dfh = self._datafp("a+")
1991 2000 def flush():
1992 2001 if dfh:
1993 2002 dfh.flush()
1994 2003 ifh.flush()
1995 2004 try:
1996 2005 deltacomputer = deltautil.deltacomputer(self)
1997 2006 # loop through our set of deltas
1998 2007 for data in deltas:
1999 2008 node, p1, p2, linknode, deltabase, delta, flags = data
2000 2009 link = linkmapper(linknode)
2001 2010 flags = flags or REVIDX_DEFAULT_FLAGS
2002 2011
2003 2012 nodes.append(node)
2004 2013
2005 2014 if node in self.nodemap:
2006 2015 self._nodeduplicatecallback(transaction, node)
2007 2016 # this can happen if two branches make the same change
2008 2017 continue
2009 2018
2010 2019 for p in (p1, p2):
2011 2020 if p not in self.nodemap:
2012 2021 raise error.LookupError(p, self.indexfile,
2013 2022 _('unknown parent'))
2014 2023
2015 2024 if deltabase not in self.nodemap:
2016 2025 raise error.LookupError(deltabase, self.indexfile,
2017 2026 _('unknown delta base'))
2018 2027
2019 2028 baserev = self.rev(deltabase)
2020 2029
2021 2030 if baserev != nullrev and self.iscensored(baserev):
2022 2031 # if base is censored, delta must be full replacement in a
2023 2032 # single patch operation
2024 2033 hlen = struct.calcsize(">lll")
2025 2034 oldlen = self.rawsize(baserev)
2026 2035 newlen = len(delta) - hlen
2027 2036 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2028 2037 raise error.CensoredBaseError(self.indexfile,
2029 2038 self.node(baserev))
2030 2039
2031 2040 if not flags and self._peek_iscensored(baserev, delta, flush):
2032 2041 flags |= REVIDX_ISCENSORED
2033 2042
2034 2043 # We assume consumers of addrevisioncb will want to retrieve
2035 2044 # the added revision, which will require a call to
2036 2045 # revision(). revision() will fast path if there is a cache
2037 2046 # hit. So, we tell _addrevision() to always cache in this case.
2038 2047 # We're only using addgroup() in the context of changegroup
2039 2048 # generation so the revision data can always be handled as raw
2040 2049 # by the flagprocessor.
2041 2050 self._addrevision(node, None, transaction, link,
2042 2051 p1, p2, flags, (baserev, delta),
2043 2052 ifh, dfh,
2044 2053 alwayscache=bool(addrevisioncb),
2045 2054 deltacomputer=deltacomputer)
2046 2055
2047 2056 if addrevisioncb:
2048 2057 addrevisioncb(self, node)
2049 2058
2050 2059 if not dfh and not self._inline:
2051 2060 # addrevision switched from inline to conventional
2052 2061 # reopen the index
2053 2062 ifh.close()
2054 2063 dfh = self._datafp("a+")
2055 2064 ifh = self._indexfp("a+")
2056 2065 finally:
2057 2066 if dfh:
2058 2067 dfh.close()
2059 2068 ifh.close()
2060 2069
2061 2070 return nodes
2062 2071
2063 2072 def iscensored(self, rev):
2064 2073 """Check if a file revision is censored."""
2065 2074 if not self._censorable:
2066 2075 return False
2067 2076
2068 2077 return self.flags(rev) & REVIDX_ISCENSORED
2069 2078
2070 2079 def _peek_iscensored(self, baserev, delta, flush):
2071 2080 """Quickly check if a delta produces a censored revision."""
2072 2081 if not self._censorable:
2073 2082 return False
2074 2083
2075 2084 # Fragile heuristic: unless new file meta keys are added alphabetically
2076 2085 # preceding "censored", all censored revisions are prefixed by
2077 2086 # "\1\ncensored:". A delta producing such a censored revision must be a
2078 2087 # full-replacement delta, so we inspect the first and only patch in the
2079 2088 # delta for this prefix.
2080 2089 hlen = struct.calcsize(">lll")
2081 2090 if len(delta) <= hlen:
2082 2091 return False
2083 2092
2084 2093 oldlen = self.rawsize(baserev)
2085 2094 newlen = len(delta) - hlen
2086 2095 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2087 2096 return False
2088 2097
2089 2098 add = "\1\ncensored:"
2090 2099 addlen = len(add)
2091 2100 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2092 2101
2093 2102 def getstrippoint(self, minlink):
2094 2103 """find the minimum rev that must be stripped to strip the linkrev
2095 2104
2096 2105 Returns a tuple containing the minimum rev and a set of all revs that
2097 2106 have linkrevs that will be broken by this strip.
2098 2107 """
2099 2108 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2100 2109 self.headrevs(),
2101 2110 self.linkrev, self.parentrevs)
2102 2111
2103 2112 def strip(self, minlink, transaction):
2104 2113 """truncate the revlog on the first revision with a linkrev >= minlink
2105 2114
2106 2115 This function is called when we're stripping revision minlink and
2107 2116 its descendants from the repository.
2108 2117
2109 2118 We have to remove all revisions with linkrev >= minlink, because
2110 2119 the equivalent changelog revisions will be renumbered after the
2111 2120 strip.
2112 2121
2113 2122 So we truncate the revlog on the first of these revisions, and
2114 2123 trust that the caller has saved the revisions that shouldn't be
2115 2124 removed and that it'll re-add them after this truncation.
2116 2125 """
2117 2126 if len(self) == 0:
2118 2127 return
2119 2128
2120 2129 rev, _ = self.getstrippoint(minlink)
2121 2130 if rev == len(self):
2122 2131 return
2123 2132
2124 2133 # first truncate the files on disk
2125 2134 end = self.start(rev)
2126 2135 if not self._inline:
2127 2136 transaction.add(self.datafile, end)
2128 2137 end = rev * self._io.size
2129 2138 else:
2130 2139 end += rev * self._io.size
2131 2140
2132 2141 transaction.add(self.indexfile, end)
2133 2142
2134 2143 # then reset internal state in memory to forget those revisions
2135 2144 self._revisioncache = None
2136 2145 self._chaininfocache = {}
2137 2146 self._chunkclear()
2138 2147 for x in pycompat.xrange(rev, len(self)):
2139 2148 del self.nodemap[self.node(x)]
2140 2149
2141 2150 del self.index[rev:-1]
2142 2151 self._nodepos = None
2143 2152
2144 2153 def checksize(self):
2145 2154 expected = 0
2146 2155 if len(self):
2147 2156 expected = max(0, self.end(len(self) - 1))
2148 2157
2149 2158 try:
2150 2159 with self._datafp() as f:
2151 2160 f.seek(0, 2)
2152 2161 actual = f.tell()
2153 2162 dd = actual - expected
2154 2163 except IOError as inst:
2155 2164 if inst.errno != errno.ENOENT:
2156 2165 raise
2157 2166 dd = 0
2158 2167
2159 2168 try:
2160 2169 f = self.opener(self.indexfile)
2161 2170 f.seek(0, 2)
2162 2171 actual = f.tell()
2163 2172 f.close()
2164 2173 s = self._io.size
2165 2174 i = max(0, actual // s)
2166 2175 di = actual - (i * s)
2167 2176 if self._inline:
2168 2177 databytes = 0
2169 2178 for r in self:
2170 2179 databytes += max(0, self.length(r))
2171 2180 dd = 0
2172 2181 di = actual - len(self) * s - databytes
2173 2182 except IOError as inst:
2174 2183 if inst.errno != errno.ENOENT:
2175 2184 raise
2176 2185 di = 0
2177 2186
2178 2187 return (dd, di)
2179 2188
2180 2189 def files(self):
2181 2190 res = [self.indexfile]
2182 2191 if not self._inline:
2183 2192 res.append(self.datafile)
2184 2193 return res
2185 2194
2186 2195 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2187 2196 assumehaveparentrevisions=False, deltaprevious=False):
2188 2197 if nodesorder not in ('nodes', 'storage', None):
2189 2198 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2190 2199 nodesorder)
2191 2200
2192 2201 if nodesorder is None and not self._generaldelta:
2193 2202 nodesorder = 'storage'
2194 2203
2195 2204 return storageutil.emitrevisions(
2196 2205 self, nodes, nodesorder, revlogrevisiondelta,
2197 2206 deltaparentfn=self.deltaparent,
2198 2207 candeltafn=self.candelta,
2199 2208 rawsizefn=self.rawsize,
2200 2209 revdifffn=self.revdiff,
2201 2210 flagsfn=self.flags,
2202 2211 sendfulltext=not self._storedeltachains,
2203 2212 revisiondata=revisiondata,
2204 2213 assumehaveparentrevisions=assumehaveparentrevisions,
2205 2214 deltaprevious=deltaprevious)
2206 2215
2207 2216 DELTAREUSEALWAYS = 'always'
2208 2217 DELTAREUSESAMEREVS = 'samerevs'
2209 2218 DELTAREUSENEVER = 'never'
2210 2219
2211 2220 DELTAREUSEFULLADD = 'fulladd'
2212 2221
2213 2222 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2214 2223
2215 2224 def clone(self, tr, destrevlog, addrevisioncb=None,
2216 2225 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2217 2226 """Copy this revlog to another, possibly with format changes.
2218 2227
2219 2228 The destination revlog will contain the same revisions and nodes.
2220 2229 However, it may not be bit-for-bit identical due to e.g. delta encoding
2221 2230 differences.
2222 2231
2223 2232 The ``deltareuse`` argument control how deltas from the existing revlog
2224 2233 are preserved in the destination revlog. The argument can have the
2225 2234 following values:
2226 2235
2227 2236 DELTAREUSEALWAYS
2228 2237 Deltas will always be reused (if possible), even if the destination
2229 2238 revlog would not select the same revisions for the delta. This is the
2230 2239 fastest mode of operation.
2231 2240 DELTAREUSESAMEREVS
2232 2241 Deltas will be reused if the destination revlog would pick the same
2233 2242 revisions for the delta. This mode strikes a balance between speed
2234 2243 and optimization.
2235 2244 DELTAREUSENEVER
2236 2245 Deltas will never be reused. This is the slowest mode of execution.
2237 2246 This mode can be used to recompute deltas (e.g. if the diff/delta
2238 2247 algorithm changes).
2239 2248
2240 2249 Delta computation can be slow, so the choice of delta reuse policy can
2241 2250 significantly affect run time.
2242 2251
2243 2252 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2244 2253 two extremes. Deltas will be reused if they are appropriate. But if the
2245 2254 delta could choose a better revision, it will do so. This means if you
2246 2255 are converting a non-generaldelta revlog to a generaldelta revlog,
2247 2256 deltas will be recomputed if the delta's parent isn't a parent of the
2248 2257 revision.
2249 2258
2250 2259 In addition to the delta policy, the ``deltabothparents`` argument
2251 2260 controls whether to compute deltas against both parents for merges.
2252 2261 By default, the current default is used.
2253 2262 """
2254 2263 if deltareuse not in self.DELTAREUSEALL:
2255 2264 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2256 2265
2257 2266 if len(destrevlog):
2258 2267 raise ValueError(_('destination revlog is not empty'))
2259 2268
2260 2269 if getattr(self, 'filteredrevs', None):
2261 2270 raise ValueError(_('source revlog has filtered revisions'))
2262 2271 if getattr(destrevlog, 'filteredrevs', None):
2263 2272 raise ValueError(_('destination revlog has filtered revisions'))
2264 2273
2265 2274 # lazydeltabase controls whether to reuse a cached delta, if possible.
2266 2275 oldlazydeltabase = destrevlog._lazydeltabase
2267 2276 oldamd = destrevlog._deltabothparents
2268 2277
2269 2278 try:
2270 2279 if deltareuse == self.DELTAREUSEALWAYS:
2271 2280 destrevlog._lazydeltabase = True
2272 2281 elif deltareuse == self.DELTAREUSESAMEREVS:
2273 2282 destrevlog._lazydeltabase = False
2274 2283
2275 2284 destrevlog._deltabothparents = deltabothparents or oldamd
2276 2285
2277 2286 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2278 2287 self.DELTAREUSESAMEREVS)
2279 2288
2280 2289 deltacomputer = deltautil.deltacomputer(destrevlog)
2281 2290 index = self.index
2282 2291 for rev in self:
2283 2292 entry = index[rev]
2284 2293
2285 2294 # Some classes override linkrev to take filtered revs into
2286 2295 # account. Use raw entry from index.
2287 2296 flags = entry[0] & 0xffff
2288 2297 linkrev = entry[4]
2289 2298 p1 = index[entry[5]][7]
2290 2299 p2 = index[entry[6]][7]
2291 2300 node = entry[7]
2292 2301
2293 2302 # (Possibly) reuse the delta from the revlog if allowed and
2294 2303 # the revlog chunk is a delta.
2295 2304 cachedelta = None
2296 2305 rawtext = None
2297 2306 if populatecachedelta:
2298 2307 dp = self.deltaparent(rev)
2299 2308 if dp != nullrev:
2300 2309 cachedelta = (dp, bytes(self._chunk(rev)))
2301 2310
2302 2311 if not cachedelta:
2303 2312 rawtext = self.revision(rev, raw=True)
2304 2313
2305 2314
2306 2315 if deltareuse == self.DELTAREUSEFULLADD:
2307 2316 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2308 2317 cachedelta=cachedelta,
2309 2318 node=node, flags=flags,
2310 2319 deltacomputer=deltacomputer)
2311 2320 else:
2312 2321 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2313 2322 checkambig=False)
2314 2323 dfh = None
2315 2324 if not destrevlog._inline:
2316 2325 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2317 2326 try:
2318 2327 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2319 2328 p2, flags, cachedelta, ifh, dfh,
2320 2329 deltacomputer=deltacomputer)
2321 2330 finally:
2322 2331 if dfh:
2323 2332 dfh.close()
2324 2333 ifh.close()
2325 2334
2326 2335 if addrevisioncb:
2327 2336 addrevisioncb(self, rev, node)
2328 2337 finally:
2329 2338 destrevlog._lazydeltabase = oldlazydeltabase
2330 2339 destrevlog._deltabothparents = oldamd
2331 2340
2332 2341 def censorrevision(self, node, tombstone=b''):
2333 2342 if (self.version & 0xFFFF) == REVLOGV0:
2334 2343 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2335 2344 self.version)
2336 2345
2337 2346 rev = self.rev(node)
2338 2347 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2339 2348
2340 2349 if len(tombstone) > self.rawsize(rev):
2341 2350 raise error.Abort(_('censor tombstone must be no longer than '
2342 2351 'censored data'))
2343 2352
2344 2353 # Using two files instead of one makes it easy to rewrite entry-by-entry
2345 2354 idxread = self.opener(self.indexfile, 'r')
2346 2355 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2347 2356 if self.version & FLAG_INLINE_DATA:
2348 2357 dataread, datawrite = idxread, idxwrite
2349 2358 else:
2350 2359 dataread = self.opener(self.datafile, 'r')
2351 2360 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2352 2361
2353 2362 # Copy all revlog data up to the entry to be censored.
2354 2363 offset = self.start(rev)
2355 2364
2356 2365 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2357 2366 idxwrite.write(chunk)
2358 2367 for chunk in util.filechunkiter(dataread, limit=offset):
2359 2368 datawrite.write(chunk)
2360 2369
2361 2370 def rewriteindex(r, newoffs, newdata=None):
2362 2371 """Rewrite the index entry with a new data offset and new data.
2363 2372
2364 2373 The newdata argument, if given, is a tuple of three positive
2365 2374 integers: (new compressed, new uncompressed, added flag bits).
2366 2375 """
2367 2376 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2368 2377 flags = gettype(offlags)
2369 2378 if newdata:
2370 2379 comp, uncomp, nflags = newdata
2371 2380 flags |= nflags
2372 2381 offlags = offset_type(newoffs, flags)
2373 2382 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2374 2383 idxwrite.write(self._io.packentry(e, None, self.version, r))
2375 2384 idxread.seek(self._io.size, 1)
2376 2385
2377 2386 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2378 2387 """Write the given fulltext with the given data offset.
2379 2388
2380 2389 Returns:
2381 2390 The integer number of data bytes written, for tracking data
2382 2391 offsets.
2383 2392 """
2384 2393 flag, compdata = self.compress(data)
2385 2394 newcomp = len(flag) + len(compdata)
2386 2395 rewriteindex(r, offs, (newcomp, len(data), nflags))
2387 2396 datawrite.write(flag)
2388 2397 datawrite.write(compdata)
2389 2398 dataread.seek(self.length(r), 1)
2390 2399 return newcomp
2391 2400
2392 2401 # Rewrite censored entry with (padded) tombstone data.
2393 2402 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2394 2403 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2395 2404
2396 2405 # Rewrite all following filelog revisions fixing up offsets and deltas.
2397 2406 for srev in pycompat.xrange(rev + 1, len(self)):
2398 2407 if rev in self.parentrevs(srev):
2399 2408 # Immediate children of censored node must be re-added as
2400 2409 # fulltext.
2401 2410 try:
2402 2411 revdata = self.revision(srev)
2403 2412 except error.CensoredNodeError as e:
2404 2413 revdata = e.tombstone
2405 2414 dlen = rewrite(srev, offset, revdata)
2406 2415 else:
2407 2416 # Copy any other revision data verbatim after fixing up the
2408 2417 # offset.
2409 2418 rewriteindex(srev, offset)
2410 2419 dlen = self.length(srev)
2411 2420 for chunk in util.filechunkiter(dataread, limit=dlen):
2412 2421 datawrite.write(chunk)
2413 2422 offset += dlen
2414 2423
2415 2424 idxread.close()
2416 2425 idxwrite.close()
2417 2426 if dataread is not idxread:
2418 2427 dataread.close()
2419 2428 datawrite.close()
2420 2429
2421 2430 def verifyintegrity(self, state):
2422 2431 """Verifies the integrity of the revlog.
2423 2432
2424 2433 Yields ``revlogproblem`` instances describing problems that are
2425 2434 found.
2426 2435 """
2427 2436 dd, di = self.checksize()
2428 2437 if dd:
2429 2438 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2430 2439 if di:
2431 2440 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2432 2441
2433 2442 version = self.version & 0xFFFF
2434 2443
2435 2444 # The verifier tells us what version revlog we should be.
2436 2445 if version != state['expectedversion']:
2437 2446 yield revlogproblem(
2438 2447 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2439 2448 (self.indexfile, version, state['expectedversion']))
2440 2449
2441 2450 state['skipread'] = set()
2442 2451
2443 2452 for rev in self:
2444 2453 node = self.node(rev)
2445 2454
2446 2455 # Verify contents. 4 cases to care about:
2447 2456 #
2448 2457 # common: the most common case
2449 2458 # rename: with a rename
2450 2459 # meta: file content starts with b'\1\n', the metadata
2451 2460 # header defined in filelog.py, but without a rename
2452 2461 # ext: content stored externally
2453 2462 #
2454 2463 # More formally, their differences are shown below:
2455 2464 #
2456 2465 # | common | rename | meta | ext
2457 2466 # -------------------------------------------------------
2458 2467 # flags() | 0 | 0 | 0 | not 0
2459 2468 # renamed() | False | True | False | ?
2460 2469 # rawtext[0:2]=='\1\n'| False | True | True | ?
2461 2470 #
2462 2471 # "rawtext" means the raw text stored in revlog data, which
2463 2472 # could be retrieved by "revision(rev, raw=True)". "text"
2464 2473 # mentioned below is "revision(rev, raw=False)".
2465 2474 #
2466 2475 # There are 3 different lengths stored physically:
2467 2476 # 1. L1: rawsize, stored in revlog index
2468 2477 # 2. L2: len(rawtext), stored in revlog data
2469 2478 # 3. L3: len(text), stored in revlog data if flags==0, or
2470 2479 # possibly somewhere else if flags!=0
2471 2480 #
2472 2481 # L1 should be equal to L2. L3 could be different from them.
2473 2482 # "text" may or may not affect commit hash depending on flag
2474 2483 # processors (see revlog.addflagprocessor).
2475 2484 #
2476 2485 # | common | rename | meta | ext
2477 2486 # -------------------------------------------------
2478 2487 # rawsize() | L1 | L1 | L1 | L1
2479 2488 # size() | L1 | L2-LM | L1(*) | L1 (?)
2480 2489 # len(rawtext) | L2 | L2 | L2 | L2
2481 2490 # len(text) | L2 | L2 | L2 | L3
2482 2491 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2483 2492 #
2484 2493 # LM: length of metadata, depending on rawtext
2485 2494 # (*): not ideal, see comment in filelog.size
2486 2495 # (?): could be "- len(meta)" if the resolved content has
2487 2496 # rename metadata
2488 2497 #
2489 2498 # Checks needed to be done:
2490 2499 # 1. length check: L1 == L2, in all cases.
2491 2500 # 2. hash check: depending on flag processor, we may need to
2492 2501 # use either "text" (external), or "rawtext" (in revlog).
2493 2502
2494 2503 try:
2495 2504 skipflags = state.get('skipflags', 0)
2496 2505 if skipflags:
2497 2506 skipflags &= self.flags(rev)
2498 2507
2499 2508 if skipflags:
2500 2509 state['skipread'].add(node)
2501 2510 else:
2502 2511 # Side-effect: read content and verify hash.
2503 2512 self.revision(node)
2504 2513
2505 2514 l1 = self.rawsize(rev)
2506 2515 l2 = len(self.revision(node, raw=True))
2507 2516
2508 2517 if l1 != l2:
2509 2518 yield revlogproblem(
2510 2519 error=_('unpacked size is %d, %d expected') % (l2, l1),
2511 2520 node=node)
2512 2521
2513 2522 except error.CensoredNodeError:
2514 2523 if state['erroroncensored']:
2515 2524 yield revlogproblem(error=_('censored file data'),
2516 2525 node=node)
2517 2526 state['skipread'].add(node)
2518 2527 except Exception as e:
2519 2528 yield revlogproblem(
2520 2529 error=_('unpacking %s: %s') % (short(node),
2521 2530 stringutil.forcebytestr(e)),
2522 2531 node=node)
2523 2532 state['skipread'].add(node)
2524 2533
2525 2534 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2526 2535 revisionscount=False, trackedsize=False,
2527 2536 storedsize=False):
2528 2537 d = {}
2529 2538
2530 2539 if exclusivefiles:
2531 2540 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2532 2541 if not self._inline:
2533 2542 d['exclusivefiles'].append((self.opener, self.datafile))
2534 2543
2535 2544 if sharedfiles:
2536 2545 d['sharedfiles'] = []
2537 2546
2538 2547 if revisionscount:
2539 2548 d['revisionscount'] = len(self)
2540 2549
2541 2550 if trackedsize:
2542 2551 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2543 2552
2544 2553 if storedsize:
2545 2554 d['storedsize'] = sum(self.opener.stat(path).st_size
2546 2555 for path in self.files())
2547 2556
2548 2557 return d
@@ -1,1342 +1,1344 b''
1 1 # storage.py - Testing of storage primitives.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import unittest
11 11
12 12 from ..node import (
13 13 hex,
14 14 nullid,
15 15 nullrev,
16 16 )
17 17 from .. import (
18 18 error,
19 19 mdiff,
20 20 repository,
21 21 )
22 22 from ..utils import (
23 23 storageutil,
24 24 )
25 25
26 26 class basetestcase(unittest.TestCase):
27 27 if not getattr(unittest.TestCase, r'assertRaisesRegex', False):
28 28 assertRaisesRegex = (# camelcase-required
29 29 unittest.TestCase.assertRaisesRegexp)
30 30
31 31 class ifileindextests(basetestcase):
32 32 """Generic tests for the ifileindex interface.
33 33
34 34 All file storage backends for index data should conform to the tests in this
35 35 class.
36 36
37 37 Use ``makeifileindextests()`` to create an instance of this type.
38 38 """
39 39 def testempty(self):
40 40 f = self._makefilefn()
41 41 self.assertEqual(len(f), 0, 'new file store has 0 length by default')
42 42 self.assertEqual(list(f), [], 'iter yields nothing by default')
43 43
44 44 gen = iter(f)
45 45 with self.assertRaises(StopIteration):
46 46 next(gen)
47 47
48 48 # revs() should evaluate to an empty list.
49 49 self.assertEqual(list(f.revs()), [])
50 50
51 51 revs = iter(f.revs())
52 52 with self.assertRaises(StopIteration):
53 53 next(revs)
54 54
55 55 self.assertEqual(list(f.revs(start=20)), [])
56 56
57 57 # parents() and parentrevs() work with nullid/nullrev.
58 58 self.assertEqual(f.parents(nullid), (nullid, nullid))
59 59 self.assertEqual(f.parentrevs(nullrev), (nullrev, nullrev))
60 60
61 61 with self.assertRaises(error.LookupError):
62 62 f.parents(b'\x01' * 20)
63 63
64 64 for i in range(-5, 5):
65 65 if i == nullrev:
66 66 continue
67 67
68 68 with self.assertRaises(IndexError):
69 69 f.parentrevs(i)
70 70
71 71 # nullid/nullrev lookup always works.
72 72 self.assertEqual(f.rev(nullid), nullrev)
73 73 self.assertEqual(f.node(nullrev), nullid)
74 74
75 75 with self.assertRaises(error.LookupError):
76 76 f.rev(b'\x01' * 20)
77 77
78 78 for i in range(-5, 5):
79 79 if i == nullrev:
80 80 continue
81 81
82 82 with self.assertRaises(IndexError):
83 83 f.node(i)
84 84
85 85 self.assertEqual(f.lookup(nullid), nullid)
86 86 self.assertEqual(f.lookup(nullrev), nullid)
87 87 self.assertEqual(f.lookup(hex(nullid)), nullid)
88 88 self.assertEqual(f.lookup(b'%d' % nullrev), nullid)
89 89
90 90 with self.assertRaises(error.LookupError):
91 91 f.lookup(b'badvalue')
92 92
93 93 with self.assertRaises(error.LookupError):
94 94 f.lookup(hex(nullid)[0:12])
95 95
96 96 with self.assertRaises(error.LookupError):
97 97 f.lookup(b'-2')
98 98
99 99 with self.assertRaises(error.LookupError):
100 100 f.lookup(b'0')
101 101
102 102 with self.assertRaises(error.LookupError):
103 103 f.lookup(b'1')
104 104
105 105 with self.assertRaises(error.LookupError):
106 106 f.lookup(b'11111111111111111111111111111111111111')
107 107
108 108 for i in range(-5, 5):
109 109 if i == nullrev:
110 110 continue
111 111
112 112 with self.assertRaises(LookupError):
113 113 f.lookup(i)
114 114
115 115 self.assertEqual(f.linkrev(nullrev), nullrev)
116 116
117 117 for i in range(-5, 5):
118 118 if i == nullrev:
119 119 continue
120 120
121 121 with self.assertRaises(IndexError):
122 122 f.linkrev(i)
123 123
124 124 self.assertFalse(f.iscensored(nullrev))
125 125
126 126 for i in range(-5, 5):
127 127 if i == nullrev:
128 128 continue
129 129
130 130 with self.assertRaises(IndexError):
131 131 f.iscensored(i)
132 132
133 133 self.assertEqual(list(f.commonancestorsheads(nullid, nullid)), [])
134 134
135 135 with self.assertRaises(ValueError):
136 136 self.assertEqual(list(f.descendants([])), [])
137 137
138 138 self.assertEqual(list(f.descendants([nullrev])), [])
139 139
140 140 self.assertEqual(f.heads(), [nullid])
141 141 self.assertEqual(f.heads(nullid), [nullid])
142 142 self.assertEqual(f.heads(None, [nullid]), [nullid])
143 143 self.assertEqual(f.heads(nullid, [nullid]), [nullid])
144 144
145 145 self.assertEqual(f.children(nullid), [])
146 146
147 147 with self.assertRaises(error.LookupError):
148 148 f.children(b'\x01' * 20)
149 149
150 150 def testsinglerevision(self):
151 151 f = self._makefilefn()
152 152 with self._maketransactionfn() as tr:
153 153 node = f.add(b'initial', None, tr, 0, nullid, nullid)
154 154
155 155 self.assertEqual(len(f), 1)
156 156 self.assertEqual(list(f), [0])
157 157
158 158 gen = iter(f)
159 159 self.assertEqual(next(gen), 0)
160 160
161 161 with self.assertRaises(StopIteration):
162 162 next(gen)
163 163
164 164 self.assertEqual(list(f.revs()), [0])
165 165 self.assertEqual(list(f.revs(start=1)), [])
166 166 self.assertEqual(list(f.revs(start=0)), [0])
167 167 self.assertEqual(list(f.revs(stop=0)), [0])
168 168 self.assertEqual(list(f.revs(stop=1)), [0])
169 169 self.assertEqual(list(f.revs(1, 1)), [])
170 170 # TODO buggy
171 171 self.assertEqual(list(f.revs(1, 0)), [1, 0])
172 172 self.assertEqual(list(f.revs(2, 0)), [2, 1, 0])
173 173
174 174 self.assertEqual(f.parents(node), (nullid, nullid))
175 175 self.assertEqual(f.parentrevs(0), (nullrev, nullrev))
176 176
177 177 with self.assertRaises(error.LookupError):
178 178 f.parents(b'\x01' * 20)
179 179
180 180 with self.assertRaises(IndexError):
181 181 f.parentrevs(1)
182 182
183 183 self.assertEqual(f.rev(node), 0)
184 184
185 185 with self.assertRaises(error.LookupError):
186 186 f.rev(b'\x01' * 20)
187 187
188 188 self.assertEqual(f.node(0), node)
189 189
190 190 with self.assertRaises(IndexError):
191 191 f.node(1)
192 192
193 193 self.assertEqual(f.lookup(node), node)
194 194 self.assertEqual(f.lookup(0), node)
195 195 self.assertEqual(f.lookup(-1), nullid)
196 196 self.assertEqual(f.lookup(b'0'), node)
197 197 self.assertEqual(f.lookup(hex(node)), node)
198 198
199 199 with self.assertRaises(error.LookupError):
200 200 f.lookup(hex(node)[0:12])
201 201
202 202 with self.assertRaises(error.LookupError):
203 203 f.lookup(-2)
204 204
205 205 with self.assertRaises(error.LookupError):
206 206 f.lookup(b'-2')
207 207
208 208 with self.assertRaises(error.LookupError):
209 209 f.lookup(1)
210 210
211 211 with self.assertRaises(error.LookupError):
212 212 f.lookup(b'1')
213 213
214 214 self.assertEqual(f.linkrev(0), 0)
215 215
216 216 with self.assertRaises(IndexError):
217 217 f.linkrev(1)
218 218
219 219 self.assertFalse(f.iscensored(0))
220 220
221 221 with self.assertRaises(IndexError):
222 222 f.iscensored(1)
223 223
224 224 self.assertEqual(list(f.descendants([0])), [])
225 225
226 226 self.assertEqual(f.heads(), [node])
227 227 self.assertEqual(f.heads(node), [node])
228 228 self.assertEqual(f.heads(stop=[node]), [node])
229 229
230 230 with self.assertRaises(error.LookupError):
231 231 f.heads(stop=[b'\x01' * 20])
232 232
233 233 self.assertEqual(f.children(node), [])
234 234
235 235 def testmultiplerevisions(self):
236 236 fulltext0 = b'x' * 1024
237 237 fulltext1 = fulltext0 + b'y'
238 238 fulltext2 = b'y' + fulltext0 + b'z'
239 239
240 240 f = self._makefilefn()
241 241 with self._maketransactionfn() as tr:
242 242 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
243 243 node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
244 244 node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
245 245
246 246 self.assertEqual(len(f), 3)
247 247 self.assertEqual(list(f), [0, 1, 2])
248 248
249 249 gen = iter(f)
250 250 self.assertEqual(next(gen), 0)
251 251 self.assertEqual(next(gen), 1)
252 252 self.assertEqual(next(gen), 2)
253 253
254 254 with self.assertRaises(StopIteration):
255 255 next(gen)
256 256
257 257 self.assertEqual(list(f.revs()), [0, 1, 2])
258 258 self.assertEqual(list(f.revs(0)), [0, 1, 2])
259 259 self.assertEqual(list(f.revs(1)), [1, 2])
260 260 self.assertEqual(list(f.revs(2)), [2])
261 261 self.assertEqual(list(f.revs(3)), [])
262 262 self.assertEqual(list(f.revs(stop=1)), [0, 1])
263 263 self.assertEqual(list(f.revs(stop=2)), [0, 1, 2])
264 264 self.assertEqual(list(f.revs(stop=3)), [0, 1, 2])
265 265 self.assertEqual(list(f.revs(2, 0)), [2, 1, 0])
266 266 self.assertEqual(list(f.revs(2, 1)), [2, 1])
267 267 # TODO this is wrong
268 268 self.assertEqual(list(f.revs(3, 2)), [3, 2])
269 269
270 270 self.assertEqual(f.parents(node0), (nullid, nullid))
271 271 self.assertEqual(f.parents(node1), (node0, nullid))
272 272 self.assertEqual(f.parents(node2), (node1, nullid))
273 273
274 274 self.assertEqual(f.parentrevs(0), (nullrev, nullrev))
275 275 self.assertEqual(f.parentrevs(1), (0, nullrev))
276 276 self.assertEqual(f.parentrevs(2), (1, nullrev))
277 277
278 278 self.assertEqual(f.rev(node0), 0)
279 279 self.assertEqual(f.rev(node1), 1)
280 280 self.assertEqual(f.rev(node2), 2)
281 281
282 282 with self.assertRaises(error.LookupError):
283 283 f.rev(b'\x01' * 20)
284 284
285 285 self.assertEqual(f.node(0), node0)
286 286 self.assertEqual(f.node(1), node1)
287 287 self.assertEqual(f.node(2), node2)
288 288
289 289 with self.assertRaises(IndexError):
290 290 f.node(3)
291 291
292 292 self.assertEqual(f.lookup(node0), node0)
293 293 self.assertEqual(f.lookup(0), node0)
294 294 self.assertEqual(f.lookup(b'0'), node0)
295 295 self.assertEqual(f.lookup(hex(node0)), node0)
296 296
297 297 self.assertEqual(f.lookup(node1), node1)
298 298 self.assertEqual(f.lookup(1), node1)
299 299 self.assertEqual(f.lookup(b'1'), node1)
300 300 self.assertEqual(f.lookup(hex(node1)), node1)
301 301
302 302 self.assertEqual(f.linkrev(0), 0)
303 303 self.assertEqual(f.linkrev(1), 1)
304 304 self.assertEqual(f.linkrev(2), 3)
305 305
306 306 with self.assertRaises(IndexError):
307 307 f.linkrev(3)
308 308
309 309 self.assertFalse(f.iscensored(0))
310 310 self.assertFalse(f.iscensored(1))
311 311 self.assertFalse(f.iscensored(2))
312 312
313 313 with self.assertRaises(IndexError):
314 314 f.iscensored(3)
315 315
316 316 self.assertEqual(f.commonancestorsheads(node1, nullid), [])
317 317 self.assertEqual(f.commonancestorsheads(node1, node0), [node0])
318 318 self.assertEqual(f.commonancestorsheads(node1, node1), [node1])
319 319 self.assertEqual(f.commonancestorsheads(node0, node1), [node0])
320 320 self.assertEqual(f.commonancestorsheads(node1, node2), [node1])
321 321 self.assertEqual(f.commonancestorsheads(node2, node1), [node1])
322 322
323 323 self.assertEqual(list(f.descendants([0])), [1, 2])
324 324 self.assertEqual(list(f.descendants([1])), [2])
325 325 self.assertEqual(list(f.descendants([0, 1])), [1, 2])
326 326
327 327 self.assertEqual(f.heads(), [node2])
328 328 self.assertEqual(f.heads(node0), [node2])
329 329 self.assertEqual(f.heads(node1), [node2])
330 330 self.assertEqual(f.heads(node2), [node2])
331 331
332 332 # TODO this behavior seems wonky. Is it correct? If so, the
333 333 # docstring for heads() should be updated to reflect desired
334 334 # behavior.
335 335 self.assertEqual(f.heads(stop=[node1]), [node1, node2])
336 336 self.assertEqual(f.heads(stop=[node0]), [node0, node2])
337 337 self.assertEqual(f.heads(stop=[node1, node2]), [node1, node2])
338 338
339 339 with self.assertRaises(error.LookupError):
340 340 f.heads(stop=[b'\x01' * 20])
341 341
342 342 self.assertEqual(f.children(node0), [node1])
343 343 self.assertEqual(f.children(node1), [node2])
344 344 self.assertEqual(f.children(node2), [])
345 345
346 346 def testmultipleheads(self):
347 347 f = self._makefilefn()
348 348
349 349 with self._maketransactionfn() as tr:
350 350 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
351 351 node1 = f.add(b'1', None, tr, 1, node0, nullid)
352 352 node2 = f.add(b'2', None, tr, 2, node1, nullid)
353 353 node3 = f.add(b'3', None, tr, 3, node0, nullid)
354 354 node4 = f.add(b'4', None, tr, 4, node3, nullid)
355 355 node5 = f.add(b'5', None, tr, 5, node0, nullid)
356 356
357 357 self.assertEqual(len(f), 6)
358 358
359 359 self.assertEqual(list(f.descendants([0])), [1, 2, 3, 4, 5])
360 360 self.assertEqual(list(f.descendants([1])), [2])
361 361 self.assertEqual(list(f.descendants([2])), [])
362 362 self.assertEqual(list(f.descendants([3])), [4])
363 363 self.assertEqual(list(f.descendants([0, 1])), [1, 2, 3, 4, 5])
364 364 self.assertEqual(list(f.descendants([1, 3])), [2, 4])
365 365
366 366 self.assertEqual(f.heads(), [node2, node4, node5])
367 367 self.assertEqual(f.heads(node0), [node2, node4, node5])
368 368 self.assertEqual(f.heads(node1), [node2])
369 369 self.assertEqual(f.heads(node2), [node2])
370 370 self.assertEqual(f.heads(node3), [node4])
371 371 self.assertEqual(f.heads(node4), [node4])
372 372 self.assertEqual(f.heads(node5), [node5])
373 373
374 374 # TODO this seems wrong.
375 375 self.assertEqual(f.heads(stop=[node0]), [node0, node2, node4, node5])
376 376 self.assertEqual(f.heads(stop=[node1]), [node1, node2, node4, node5])
377 377
378 378 self.assertEqual(f.children(node0), [node1, node3, node5])
379 379 self.assertEqual(f.children(node1), [node2])
380 380 self.assertEqual(f.children(node2), [])
381 381 self.assertEqual(f.children(node3), [node4])
382 382 self.assertEqual(f.children(node4), [])
383 383 self.assertEqual(f.children(node5), [])
384 384
385 385 class ifiledatatests(basetestcase):
386 386 """Generic tests for the ifiledata interface.
387 387
388 388 All file storage backends for data should conform to the tests in this
389 389 class.
390 390
391 391 Use ``makeifiledatatests()`` to create an instance of this type.
392 392 """
393 393 def testempty(self):
394 394 f = self._makefilefn()
395 395
396 396 self.assertEqual(f.storageinfo(), {})
397 397 self.assertEqual(f.storageinfo(revisionscount=True, trackedsize=True),
398 398 {'revisionscount': 0, 'trackedsize': 0})
399 399
400 400 self.assertEqual(f.size(nullrev), 0)
401 401
402 402 for i in range(-5, 5):
403 403 if i == nullrev:
404 404 continue
405 405
406 406 with self.assertRaises(IndexError):
407 407 f.size(i)
408 408
409 409 self.assertEqual(f.revision(nullid), b'')
410 410 self.assertEqual(f.revision(nullid, raw=True), b'')
411 411
412 412 with self.assertRaises(error.LookupError):
413 413 f.revision(b'\x01' * 20)
414 414
415 415 self.assertEqual(f.read(nullid), b'')
416 416
417 417 with self.assertRaises(error.LookupError):
418 418 f.read(b'\x01' * 20)
419 419
420 420 self.assertFalse(f.renamed(nullid))
421 421
422 422 with self.assertRaises(error.LookupError):
423 423 f.read(b'\x01' * 20)
424 424
425 425 self.assertTrue(f.cmp(nullid, b''))
426 426 self.assertTrue(f.cmp(nullid, b'foo'))
427 427
428 428 with self.assertRaises(error.LookupError):
429 429 f.cmp(b'\x01' * 20, b'irrelevant')
430 430
431 431 # Emitting empty list is an empty generator.
432 432 gen = f.emitrevisions([])
433 433 with self.assertRaises(StopIteration):
434 434 next(gen)
435 435
436 436 # Emitting null node yields nothing.
437 437 gen = f.emitrevisions([nullid])
438 438 with self.assertRaises(StopIteration):
439 439 next(gen)
440 440
441 441 # Requesting unknown node fails.
442 442 with self.assertRaises(error.LookupError):
443 443 list(f.emitrevisions([b'\x01' * 20]))
444 444
445 445 def testsinglerevision(self):
446 446 fulltext = b'initial'
447 447
448 448 f = self._makefilefn()
449 449 with self._maketransactionfn() as tr:
450 450 node = f.add(fulltext, None, tr, 0, nullid, nullid)
451 451
452 452 self.assertEqual(f.storageinfo(), {})
453 453 self.assertEqual(f.storageinfo(revisionscount=True, trackedsize=True),
454 454 {'revisionscount': 1, 'trackedsize': len(fulltext)})
455 455
456 456 self.assertEqual(f.size(0), len(fulltext))
457 457
458 458 with self.assertRaises(IndexError):
459 459 f.size(1)
460 460
461 461 self.assertEqual(f.revision(node), fulltext)
462 462 self.assertEqual(f.revision(node, raw=True), fulltext)
463 463
464 464 self.assertEqual(f.read(node), fulltext)
465 465
466 466 self.assertFalse(f.renamed(node))
467 467
468 468 self.assertFalse(f.cmp(node, fulltext))
469 469 self.assertTrue(f.cmp(node, fulltext + b'extra'))
470 470
471 471 # Emitting a single revision works.
472 472 gen = f.emitrevisions([node])
473 473 rev = next(gen)
474 474
475 475 self.assertEqual(rev.node, node)
476 476 self.assertEqual(rev.p1node, nullid)
477 477 self.assertEqual(rev.p2node, nullid)
478 478 self.assertIsNone(rev.linknode)
479 479 self.assertEqual(rev.basenode, nullid)
480 480 self.assertIsNone(rev.baserevisionsize)
481 481 self.assertIsNone(rev.revision)
482 482 self.assertIsNone(rev.delta)
483 483
484 484 with self.assertRaises(StopIteration):
485 485 next(gen)
486 486
487 487 # Requesting revision data works.
488 488 gen = f.emitrevisions([node], revisiondata=True)
489 489 rev = next(gen)
490 490
491 491 self.assertEqual(rev.node, node)
492 492 self.assertEqual(rev.p1node, nullid)
493 493 self.assertEqual(rev.p2node, nullid)
494 494 self.assertIsNone(rev.linknode)
495 495 self.assertEqual(rev.basenode, nullid)
496 496 self.assertIsNone(rev.baserevisionsize)
497 497 self.assertEqual(rev.revision, fulltext)
498 498 self.assertIsNone(rev.delta)
499 499
500 500 with self.assertRaises(StopIteration):
501 501 next(gen)
502 502
503 503 # Emitting an unknown node after a known revision results in error.
504 504 with self.assertRaises(error.LookupError):
505 505 list(f.emitrevisions([node, b'\x01' * 20]))
506 506
507 507 def testmultiplerevisions(self):
508 508 fulltext0 = b'x' * 1024
509 509 fulltext1 = fulltext0 + b'y'
510 510 fulltext2 = b'y' + fulltext0 + b'z'
511 511
512 512 f = self._makefilefn()
513 513 with self._maketransactionfn() as tr:
514 514 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
515 515 node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
516 516 node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
517 517
518 518 self.assertEqual(f.storageinfo(), {})
519 519 self.assertEqual(
520 520 f.storageinfo(revisionscount=True, trackedsize=True),
521 521 {
522 522 'revisionscount': 3,
523 523 'trackedsize': len(fulltext0) + len(fulltext1) + len(fulltext2),
524 524 })
525 525
526 526 self.assertEqual(f.size(0), len(fulltext0))
527 527 self.assertEqual(f.size(1), len(fulltext1))
528 528 self.assertEqual(f.size(2), len(fulltext2))
529 529
530 530 with self.assertRaises(IndexError):
531 531 f.size(3)
532 532
533 533 self.assertEqual(f.revision(node0), fulltext0)
534 534 self.assertEqual(f.revision(node0, raw=True), fulltext0)
535 535 self.assertEqual(f.revision(node1), fulltext1)
536 536 self.assertEqual(f.revision(node1, raw=True), fulltext1)
537 537 self.assertEqual(f.revision(node2), fulltext2)
538 538 self.assertEqual(f.revision(node2, raw=True), fulltext2)
539 539
540 540 with self.assertRaises(error.LookupError):
541 541 f.revision(b'\x01' * 20)
542 542
543 543 self.assertEqual(f.read(node0), fulltext0)
544 544 self.assertEqual(f.read(node1), fulltext1)
545 545 self.assertEqual(f.read(node2), fulltext2)
546 546
547 547 with self.assertRaises(error.LookupError):
548 548 f.read(b'\x01' * 20)
549 549
550 550 self.assertFalse(f.renamed(node0))
551 551 self.assertFalse(f.renamed(node1))
552 552 self.assertFalse(f.renamed(node2))
553 553
554 554 with self.assertRaises(error.LookupError):
555 555 f.renamed(b'\x01' * 20)
556 556
557 557 self.assertFalse(f.cmp(node0, fulltext0))
558 558 self.assertFalse(f.cmp(node1, fulltext1))
559 559 self.assertFalse(f.cmp(node2, fulltext2))
560 560
561 561 self.assertTrue(f.cmp(node1, fulltext0))
562 562 self.assertTrue(f.cmp(node2, fulltext1))
563 563
564 564 with self.assertRaises(error.LookupError):
565 565 f.cmp(b'\x01' * 20, b'irrelevant')
566 566
567 567 # Nodes should be emitted in order.
568 568 gen = f.emitrevisions([node0, node1, node2], revisiondata=True)
569 569
570 570 rev = next(gen)
571 571
572 572 self.assertEqual(rev.node, node0)
573 573 self.assertEqual(rev.p1node, nullid)
574 574 self.assertEqual(rev.p2node, nullid)
575 575 self.assertIsNone(rev.linknode)
576 576 self.assertEqual(rev.basenode, nullid)
577 577 self.assertIsNone(rev.baserevisionsize)
578 578 self.assertEqual(rev.revision, fulltext0)
579 579 self.assertIsNone(rev.delta)
580 580
581 581 rev = next(gen)
582 582
583 583 self.assertEqual(rev.node, node1)
584 584 self.assertEqual(rev.p1node, node0)
585 585 self.assertEqual(rev.p2node, nullid)
586 586 self.assertIsNone(rev.linknode)
587 587 self.assertEqual(rev.basenode, node0)
588 588 self.assertIsNone(rev.baserevisionsize)
589 589 self.assertIsNone(rev.revision)
590 590 self.assertEqual(rev.delta,
591 591 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' +
592 592 fulltext1)
593 593
594 594 rev = next(gen)
595 595
596 596 self.assertEqual(rev.node, node2)
597 597 self.assertEqual(rev.p1node, node1)
598 598 self.assertEqual(rev.p2node, nullid)
599 599 self.assertIsNone(rev.linknode)
600 600 self.assertEqual(rev.basenode, node1)
601 601 self.assertIsNone(rev.baserevisionsize)
602 602 self.assertIsNone(rev.revision)
603 603 self.assertEqual(rev.delta,
604 604 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' +
605 605 fulltext2)
606 606
607 607 with self.assertRaises(StopIteration):
608 608 next(gen)
609 609
610 610 # Request not in DAG order is reordered to be in DAG order.
611 611 gen = f.emitrevisions([node2, node1, node0], revisiondata=True)
612 612
613 613 rev = next(gen)
614 614
615 615 self.assertEqual(rev.node, node0)
616 616 self.assertEqual(rev.p1node, nullid)
617 617 self.assertEqual(rev.p2node, nullid)
618 618 self.assertIsNone(rev.linknode)
619 619 self.assertEqual(rev.basenode, nullid)
620 620 self.assertIsNone(rev.baserevisionsize)
621 621 self.assertEqual(rev.revision, fulltext0)
622 622 self.assertIsNone(rev.delta)
623 623
624 624 rev = next(gen)
625 625
626 626 self.assertEqual(rev.node, node1)
627 627 self.assertEqual(rev.p1node, node0)
628 628 self.assertEqual(rev.p2node, nullid)
629 629 self.assertIsNone(rev.linknode)
630 630 self.assertEqual(rev.basenode, node0)
631 631 self.assertIsNone(rev.baserevisionsize)
632 632 self.assertIsNone(rev.revision)
633 633 self.assertEqual(rev.delta,
634 634 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' +
635 635 fulltext1)
636 636
637 637 rev = next(gen)
638 638
639 639 self.assertEqual(rev.node, node2)
640 640 self.assertEqual(rev.p1node, node1)
641 641 self.assertEqual(rev.p2node, nullid)
642 642 self.assertIsNone(rev.linknode)
643 643 self.assertEqual(rev.basenode, node1)
644 644 self.assertIsNone(rev.baserevisionsize)
645 645 self.assertIsNone(rev.revision)
646 646 self.assertEqual(rev.delta,
647 647 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' +
648 648 fulltext2)
649 649
650 650 with self.assertRaises(StopIteration):
651 651 next(gen)
652 652
653 653 # Unrecognized nodesorder value raises ProgrammingError.
654 654 with self.assertRaises(error.ProgrammingError):
655 655 list(f.emitrevisions([], nodesorder='bad'))
656 656
657 657 # nodesorder=storage is recognized. But we can't test it thoroughly
658 658 # because behavior is storage-dependent.
659 659 res = list(f.emitrevisions([node2, node1, node0],
660 660 nodesorder='storage'))
661 661 self.assertEqual(len(res), 3)
662 662 self.assertEqual({o.node for o in res}, {node0, node1, node2})
663 663
664 664 # nodesorder=nodes forces the order.
665 665 gen = f.emitrevisions([node2, node0], nodesorder='nodes',
666 666 revisiondata=True)
667 667
668 668 rev = next(gen)
669 669 self.assertEqual(rev.node, node2)
670 670 self.assertEqual(rev.p1node, node1)
671 671 self.assertEqual(rev.p2node, nullid)
672 672 self.assertEqual(rev.basenode, nullid)
673 673 self.assertIsNone(rev.baserevisionsize)
674 674 self.assertEqual(rev.revision, fulltext2)
675 675 self.assertIsNone(rev.delta)
676 676
677 677 rev = next(gen)
678 678 self.assertEqual(rev.node, node0)
679 679 self.assertEqual(rev.p1node, nullid)
680 680 self.assertEqual(rev.p2node, nullid)
681 681 # Delta behavior is storage dependent, so we can't easily test it.
682 682
683 683 with self.assertRaises(StopIteration):
684 684 next(gen)
685 685
686 686 # assumehaveparentrevisions=False (the default) won't send a delta for
687 687 # the first revision.
688 688 gen = f.emitrevisions({node2, node1}, revisiondata=True)
689 689
690 690 rev = next(gen)
691 691 self.assertEqual(rev.node, node1)
692 692 self.assertEqual(rev.p1node, node0)
693 693 self.assertEqual(rev.p2node, nullid)
694 694 self.assertEqual(rev.basenode, nullid)
695 695 self.assertIsNone(rev.baserevisionsize)
696 696 self.assertEqual(rev.revision, fulltext1)
697 697 self.assertIsNone(rev.delta)
698 698
699 699 rev = next(gen)
700 700 self.assertEqual(rev.node, node2)
701 701 self.assertEqual(rev.p1node, node1)
702 702 self.assertEqual(rev.p2node, nullid)
703 703 self.assertEqual(rev.basenode, node1)
704 704 self.assertIsNone(rev.baserevisionsize)
705 705 self.assertIsNone(rev.revision)
706 706 self.assertEqual(rev.delta,
707 707 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' +
708 708 fulltext2)
709 709
710 710 with self.assertRaises(StopIteration):
711 711 next(gen)
712 712
713 713 # assumehaveparentrevisions=True allows delta against initial revision.
714 714 gen = f.emitrevisions([node2, node1],
715 715 revisiondata=True, assumehaveparentrevisions=True)
716 716
717 717 rev = next(gen)
718 718 self.assertEqual(rev.node, node1)
719 719 self.assertEqual(rev.p1node, node0)
720 720 self.assertEqual(rev.p2node, nullid)
721 721 self.assertEqual(rev.basenode, node0)
722 722 self.assertIsNone(rev.baserevisionsize)
723 723 self.assertIsNone(rev.revision)
724 724 self.assertEqual(rev.delta,
725 725 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' +
726 726 fulltext1)
727 727
728 728 # forceprevious=True forces a delta against the previous revision.
729 729 # Special case for initial revision.
730 730 gen = f.emitrevisions([node0], revisiondata=True, deltaprevious=True)
731 731
732 732 rev = next(gen)
733 733 self.assertEqual(rev.node, node0)
734 734 self.assertEqual(rev.p1node, nullid)
735 735 self.assertEqual(rev.p2node, nullid)
736 736 self.assertEqual(rev.basenode, nullid)
737 737 self.assertIsNone(rev.baserevisionsize)
738 738 self.assertIsNone(rev.revision)
739 739 self.assertEqual(rev.delta,
740 740 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' +
741 741 fulltext0)
742 742
743 743 with self.assertRaises(StopIteration):
744 744 next(gen)
745 745
746 746 gen = f.emitrevisions([node0, node2], revisiondata=True,
747 747 deltaprevious=True)
748 748
749 749 rev = next(gen)
750 750 self.assertEqual(rev.node, node0)
751 751 self.assertEqual(rev.p1node, nullid)
752 752 self.assertEqual(rev.p2node, nullid)
753 753 self.assertEqual(rev.basenode, nullid)
754 754 self.assertIsNone(rev.baserevisionsize)
755 755 self.assertIsNone(rev.revision)
756 756 self.assertEqual(rev.delta,
757 757 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' +
758 758 fulltext0)
759 759
760 760 rev = next(gen)
761 761 self.assertEqual(rev.node, node2)
762 762 self.assertEqual(rev.p1node, node1)
763 763 self.assertEqual(rev.p2node, nullid)
764 764 self.assertEqual(rev.basenode, node0)
765 765
766 766 with self.assertRaises(StopIteration):
767 767 next(gen)
768 768
769 769 def testrenamed(self):
770 770 fulltext0 = b'foo'
771 771 fulltext1 = b'bar'
772 772 fulltext2 = b'baz'
773 773
774 774 meta1 = {
775 775 b'copy': b'source0',
776 776 b'copyrev': b'a' * 40,
777 777 }
778 778
779 779 meta2 = {
780 780 b'copy': b'source1',
781 781 b'copyrev': b'b' * 40,
782 782 }
783 783
784 784 stored1 = b''.join([
785 785 b'\x01\ncopy: source0\n',
786 786 b'copyrev: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\x01\n',
787 787 fulltext1,
788 788 ])
789 789
790 790 stored2 = b''.join([
791 791 b'\x01\ncopy: source1\n',
792 792 b'copyrev: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\x01\n',
793 793 fulltext2,
794 794 ])
795 795
796 796 f = self._makefilefn()
797 797 with self._maketransactionfn() as tr:
798 798 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
799 799 node1 = f.add(fulltext1, meta1, tr, 1, node0, nullid)
800 800 node2 = f.add(fulltext2, meta2, tr, 2, nullid, nullid)
801 801
802 802 # Metadata header isn't recognized when parent isn't nullid.
803 803 self.assertEqual(f.size(1), len(stored1))
804 804 self.assertEqual(f.size(2), len(fulltext2))
805 805
806 806 self.assertEqual(f.revision(node1), stored1)
807 807 self.assertEqual(f.revision(node1, raw=True), stored1)
808 808 self.assertEqual(f.revision(node2), stored2)
809 809 self.assertEqual(f.revision(node2, raw=True), stored2)
810 810
811 811 self.assertEqual(f.read(node1), fulltext1)
812 812 self.assertEqual(f.read(node2), fulltext2)
813 813
814 814 # Returns False when first parent is set.
815 815 self.assertFalse(f.renamed(node1))
816 816 self.assertEqual(f.renamed(node2), (b'source1', b'\xbb' * 20))
817 817
818 818 self.assertTrue(f.cmp(node1, fulltext1))
819 819 self.assertTrue(f.cmp(node1, stored1))
820 820 self.assertFalse(f.cmp(node2, fulltext2))
821 821 self.assertTrue(f.cmp(node2, stored2))
822 822
823 823 def testmetadataprefix(self):
824 824 # Content with metadata prefix has extra prefix inserted in storage.
825 825 fulltext0 = b'\x01\nfoo'
826 826 stored0 = b'\x01\n\x01\n\x01\nfoo'
827 827
828 828 fulltext1 = b'\x01\nbar'
829 829 meta1 = {
830 830 b'copy': b'source0',
831 831 b'copyrev': b'b' * 40,
832 832 }
833 833 stored1 = b''.join([
834 834 b'\x01\ncopy: source0\n',
835 835 b'copyrev: %s\n' % (b'b' * 40),
836 836 b'\x01\n\x01\nbar',
837 837 ])
838 838
839 839 f = self._makefilefn()
840 840 with self._maketransactionfn() as tr:
841 841 node0 = f.add(fulltext0, {}, tr, 0, nullid, nullid)
842 842 node1 = f.add(fulltext1, meta1, tr, 1, nullid, nullid)
843 843
844 844 # TODO this is buggy.
845 845 self.assertEqual(f.size(0), len(fulltext0) + 4)
846 846
847 847 self.assertEqual(f.size(1), len(fulltext1))
848 848
849 849 self.assertEqual(f.revision(node0), stored0)
850 850 self.assertEqual(f.revision(node0, raw=True), stored0)
851 851
852 852 self.assertEqual(f.revision(node1), stored1)
853 853 self.assertEqual(f.revision(node1, raw=True), stored1)
854 854
855 855 self.assertEqual(f.read(node0), fulltext0)
856 856 self.assertEqual(f.read(node1), fulltext1)
857 857
858 858 self.assertFalse(f.cmp(node0, fulltext0))
859 859 self.assertTrue(f.cmp(node0, stored0))
860 860
861 861 self.assertFalse(f.cmp(node1, fulltext1))
862 862 self.assertTrue(f.cmp(node1, stored0))
863 863
864 864 def testbadnoderead(self):
865 865 f = self._makefilefn()
866 866
867 867 fulltext0 = b'foo\n' * 30
868 868 fulltext1 = fulltext0 + b'bar\n'
869 869
870 870 with self._maketransactionfn() as tr:
871 871 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
872 872 node1 = b'\xaa' * 20
873 873
874 874 self._addrawrevisionfn(f, tr, node1, node0, nullid, 1,
875 875 rawtext=fulltext1)
876 876
877 877 self.assertEqual(len(f), 2)
878 878 self.assertEqual(f.parents(node1), (node0, nullid))
879 879
880 880 # revision() raises since it performs hash verification.
881 881 with self.assertRaises(error.StorageError):
882 882 f.revision(node1)
883 883
884 # revision(raw=True) still verifies hashes.
885 # TODO this is buggy because of cache interaction.
886 self.assertEqual(f.revision(node1, raw=True), fulltext1)
884 # raw=True still verifies because there are no special storage
885 # settings.
886 with self.assertRaises(error.StorageError):
887 f.revision(node1, raw=True)
887 888
888 889 # read() behaves like revision().
889 # TODO this is buggy because of cache interaction.
890 f.read(node1)
890 with self.assertRaises(error.StorageError):
891 f.read(node1)
891 892
892 893 # We can't test renamed() here because some backends may not require
893 894 # reading/validating the fulltext to return rename metadata.
894 895
895 896 def testbadnoderevisionraw(self):
896 897 # Like above except we test revision(raw=True) first to isolate
897 898 # revision caching behavior.
898 899 f = self._makefilefn()
899 900
900 901 fulltext0 = b'foo\n' * 30
901 902 fulltext1 = fulltext0 + b'bar\n'
902 903
903 904 with self._maketransactionfn() as tr:
904 905 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
905 906 node1 = b'\xaa' * 20
906 907
907 908 self._addrawrevisionfn(f, tr, node1, node0, nullid, 1,
908 909 rawtext=fulltext1)
909 910
910 911 with self.assertRaises(error.StorageError):
911 912 f.revision(node1, raw=True)
912 913
913 914 with self.assertRaises(error.StorageError):
914 915 f.revision(node1, raw=True)
915 916
916 917 def testbadnoderevisionraw(self):
917 918 # Like above except we test read() first to isolate revision caching
918 919 # behavior.
919 920 f = self._makefilefn()
920 921
921 922 fulltext0 = b'foo\n' * 30
922 923 fulltext1 = fulltext0 + b'bar\n'
923 924
924 925 with self._maketransactionfn() as tr:
925 926 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
926 927 node1 = b'\xaa' * 20
927 928
928 929 self._addrawrevisionfn(f, tr, node1, node0, nullid, 1,
929 930 rawtext=fulltext1)
930 931
931 932 with self.assertRaises(error.StorageError):
932 933 f.read(node1)
933 934
934 # TODO this should raise error.StorageError.
935 f.read(node1)
935 with self.assertRaises(error.StorageError):
936 f.read(node1)
936 937
937 938 def testbadnodedelta(self):
938 939 f = self._makefilefn()
939 940
940 941 fulltext0 = b'foo\n' * 31
941 942 fulltext1 = fulltext0 + b'bar\n'
942 943 fulltext2 = fulltext1 + b'baz\n'
943 944
944 945 with self._maketransactionfn() as tr:
945 946 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
946 947 node1 = b'\xaa' * 20
947 948
948 949 self._addrawrevisionfn(f, tr, node1, node0, nullid, 1,
949 950 rawtext=fulltext1)
950 951
951 952 with self.assertRaises(error.StorageError):
952 953 f.read(node1)
953 954
954 955 diff = mdiff.textdiff(fulltext1, fulltext2)
955 956 node2 = storageutil.hashrevisionsha1(fulltext2, node1, nullid)
956 957 deltas = [(node2, node1, nullid, b'\x01' * 20, node1, diff, 0)]
957 958
958 959 # This /might/ fail on some backends.
959 960 with self._maketransactionfn() as tr:
960 961 f.addgroup(deltas, lambda x: 0, tr)
961 962
962 963 self.assertEqual(len(f), 3)
963 964
964 965 # Assuming a delta is stored, we shouldn't need to validate node1 in
965 966 # order to retrieve node2.
966 967 self.assertEqual(f.read(node2), fulltext2)
967 968
968 969 def testcensored(self):
969 970 f = self._makefilefn()
970 971
971 972 stored1 = storageutil.packmeta({
972 973 b'censored': b'tombstone',
973 974 }, b'')
974 975
975 976 with self._maketransactionfn() as tr:
976 977 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
977 978
978 979 # The node value doesn't matter since we can't verify it.
979 980 node1 = b'\xbb' * 20
980 981
981 982 self._addrawrevisionfn(f, tr, node1, node0, nullid, 1, stored1,
982 983 censored=True)
983 984
984 985 self.assertTrue(f.iscensored(1))
985 986
986 987 with self.assertRaises(error.CensoredNodeError):
987 988 f.revision(1)
988 989
989 self.assertEqual(f.revision(1, raw=True), stored1)
990 with self.assertRaises(error.CensoredNodeError):
991 f.revision(1, raw=True)
990 992
991 993 with self.assertRaises(error.CensoredNodeError):
992 994 f.read(1)
993 995
994 996 def testcensoredrawrevision(self):
995 997 # Like above, except we do the revision(raw=True) request first to
996 998 # isolate revision caching behavior.
997 999
998 1000 f = self._makefilefn()
999 1001
1000 1002 stored1 = storageutil.packmeta({
1001 1003 b'censored': b'tombstone',
1002 1004 }, b'')
1003 1005
1004 1006 with self._maketransactionfn() as tr:
1005 1007 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1006 1008
1007 1009 # The node value doesn't matter since we can't verify it.
1008 1010 node1 = b'\xbb' * 20
1009 1011
1010 1012 self._addrawrevisionfn(f, tr, node1, node0, nullid, 1, stored1,
1011 1013 censored=True)
1012 1014
1013 1015 with self.assertRaises(error.CensoredNodeError):
1014 1016 f.revision(1, raw=True)
1015 1017
1016 1018 class ifilemutationtests(basetestcase):
1017 1019 """Generic tests for the ifilemutation interface.
1018 1020
1019 1021 All file storage backends that support writing should conform to this
1020 1022 interface.
1021 1023
1022 1024 Use ``makeifilemutationtests()`` to create an instance of this type.
1023 1025 """
1024 1026 def testaddnoop(self):
1025 1027 f = self._makefilefn()
1026 1028 with self._maketransactionfn() as tr:
1027 1029 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1028 1030 node1 = f.add(b'foo', None, tr, 0, nullid, nullid)
1029 1031 # Varying by linkrev shouldn't impact hash.
1030 1032 node2 = f.add(b'foo', None, tr, 1, nullid, nullid)
1031 1033
1032 1034 self.assertEqual(node1, node0)
1033 1035 self.assertEqual(node2, node0)
1034 1036 self.assertEqual(len(f), 1)
1035 1037
1036 1038 def testaddrevisionbadnode(self):
1037 1039 f = self._makefilefn()
1038 1040 with self._maketransactionfn() as tr:
1039 1041 # Adding a revision with bad node value fails.
1040 1042 with self.assertRaises(error.StorageError):
1041 1043 f.addrevision(b'foo', tr, 0, nullid, nullid, node=b'\x01' * 20)
1042 1044
1043 1045 def testaddrevisionunknownflag(self):
1044 1046 f = self._makefilefn()
1045 1047 with self._maketransactionfn() as tr:
1046 1048 for i in range(15, 0, -1):
1047 1049 if (1 << i) & ~repository.REVISION_FLAGS_KNOWN:
1048 1050 flags = 1 << i
1049 1051 break
1050 1052
1051 1053 with self.assertRaises(error.StorageError):
1052 1054 f.addrevision(b'foo', tr, 0, nullid, nullid, flags=flags)
1053 1055
1054 1056 def testaddgroupsimple(self):
1055 1057 f = self._makefilefn()
1056 1058
1057 1059 callbackargs = []
1058 1060 def cb(*args, **kwargs):
1059 1061 callbackargs.append((args, kwargs))
1060 1062
1061 1063 def linkmapper(node):
1062 1064 return 0
1063 1065
1064 1066 with self._maketransactionfn() as tr:
1065 1067 nodes = f.addgroup([], None, tr, addrevisioncb=cb)
1066 1068
1067 1069 self.assertEqual(nodes, [])
1068 1070 self.assertEqual(callbackargs, [])
1069 1071 self.assertEqual(len(f), 0)
1070 1072
1071 1073 fulltext0 = b'foo'
1072 1074 delta0 = mdiff.trivialdiffheader(len(fulltext0)) + fulltext0
1073 1075
1074 1076 deltas = [
1075 1077 (b'\x01' * 20, nullid, nullid, nullid, nullid, delta0, 0),
1076 1078 ]
1077 1079
1078 1080 with self._maketransactionfn() as tr:
1079 1081 with self.assertRaises(error.StorageError):
1080 1082 f.addgroup(deltas, linkmapper, tr, addrevisioncb=cb)
1081 1083
1082 1084 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
1083 1085
1084 1086 f = self._makefilefn()
1085 1087
1086 1088 deltas = [
1087 1089 (node0, nullid, nullid, nullid, nullid, delta0, 0),
1088 1090 ]
1089 1091
1090 1092 with self._maketransactionfn() as tr:
1091 1093 nodes = f.addgroup(deltas, linkmapper, tr, addrevisioncb=cb)
1092 1094
1093 1095 self.assertEqual(nodes, [
1094 1096 b'\x49\xd8\xcb\xb1\x5c\xe2\x57\x92\x04\x47'
1095 1097 b'\x00\x6b\x46\x97\x8b\x7a\xf9\x80\xa9\x79'])
1096 1098
1097 1099 self.assertEqual(len(callbackargs), 1)
1098 1100 self.assertEqual(callbackargs[0][0][1], nodes[0])
1099 1101
1100 1102 self.assertEqual(list(f.revs()), [0])
1101 1103 self.assertEqual(f.rev(nodes[0]), 0)
1102 1104 self.assertEqual(f.node(0), nodes[0])
1103 1105
1104 1106 def testaddgroupmultiple(self):
1105 1107 f = self._makefilefn()
1106 1108
1107 1109 fulltexts = [
1108 1110 b'foo',
1109 1111 b'bar',
1110 1112 b'x' * 1024,
1111 1113 ]
1112 1114
1113 1115 nodes = []
1114 1116 with self._maketransactionfn() as tr:
1115 1117 for fulltext in fulltexts:
1116 1118 nodes.append(f.add(fulltext, None, tr, 0, nullid, nullid))
1117 1119
1118 1120 f = self._makefilefn()
1119 1121 deltas = []
1120 1122 for i, fulltext in enumerate(fulltexts):
1121 1123 delta = mdiff.trivialdiffheader(len(fulltext)) + fulltext
1122 1124
1123 1125 deltas.append((nodes[i], nullid, nullid, nullid, nullid, delta, 0))
1124 1126
1125 1127 with self._maketransactionfn() as tr:
1126 1128 self.assertEqual(f.addgroup(deltas, lambda x: 0, tr), nodes)
1127 1129
1128 1130 self.assertEqual(len(f), len(deltas))
1129 1131 self.assertEqual(list(f.revs()), [0, 1, 2])
1130 1132 self.assertEqual(f.rev(nodes[0]), 0)
1131 1133 self.assertEqual(f.rev(nodes[1]), 1)
1132 1134 self.assertEqual(f.rev(nodes[2]), 2)
1133 1135 self.assertEqual(f.node(0), nodes[0])
1134 1136 self.assertEqual(f.node(1), nodes[1])
1135 1137 self.assertEqual(f.node(2), nodes[2])
1136 1138
1137 1139 def testdeltaagainstcensored(self):
1138 1140 # Attempt to apply a delta made against a censored revision.
1139 1141 f = self._makefilefn()
1140 1142
1141 1143 stored1 = storageutil.packmeta({
1142 1144 b'censored': b'tombstone',
1143 1145 }, b'')
1144 1146
1145 1147 with self._maketransactionfn() as tr:
1146 1148 node0 = f.add(b'foo\n' * 30, None, tr, 0, nullid, nullid)
1147 1149
1148 1150 # The node value doesn't matter since we can't verify it.
1149 1151 node1 = b'\xbb' * 20
1150 1152
1151 1153 self._addrawrevisionfn(f, tr, node1, node0, nullid, 1, stored1,
1152 1154 censored=True)
1153 1155
1154 1156 delta = mdiff.textdiff(b'bar\n' * 30, (b'bar\n' * 30) + b'baz\n')
1155 1157 deltas = [(b'\xcc' * 20, node1, nullid, b'\x01' * 20, node1, delta, 0)]
1156 1158
1157 1159 with self._maketransactionfn() as tr:
1158 1160 with self.assertRaises(error.CensoredBaseError):
1159 1161 f.addgroup(deltas, lambda x: 0, tr)
1160 1162
1161 1163 def testcensorrevisionbasic(self):
1162 1164 f = self._makefilefn()
1163 1165
1164 1166 with self._maketransactionfn() as tr:
1165 1167 node0 = f.add(b'foo\n' * 30, None, tr, 0, nullid, nullid)
1166 1168 node1 = f.add(b'foo\n' * 31, None, tr, 1, node0, nullid)
1167 1169 node2 = f.add(b'foo\n' * 32, None, tr, 2, node1, nullid)
1168 1170
1169 1171 with self._maketransactionfn() as tr:
1170 1172 f.censorrevision(tr, node1)
1171 1173
1172 1174 self.assertEqual(len(f), 3)
1173 1175 self.assertEqual(list(f.revs()), [0, 1, 2])
1174 1176
1175 1177 self.assertEqual(f.read(node0), b'foo\n' * 30)
1176 1178
1177 1179 # TODO revlog can't resolve revision after censor. Probably due to a
1178 1180 # cache on the revlog instance.
1179 1181 with self.assertRaises(error.StorageError):
1180 1182 self.assertEqual(f.read(node2), b'foo\n' * 32)
1181 1183
1182 1184 # TODO should raise CensoredNodeError, but fallout from above prevents.
1183 1185 with self.assertRaises(error.StorageError):
1184 1186 f.read(node1)
1185 1187
1186 1188 def testgetstrippointnoparents(self):
1187 1189 # N revisions where none have parents.
1188 1190 f = self._makefilefn()
1189 1191
1190 1192 with self._maketransactionfn() as tr:
1191 1193 for rev in range(10):
1192 1194 f.add(b'%d' % rev, None, tr, rev, nullid, nullid)
1193 1195
1194 1196 for rev in range(10):
1195 1197 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1196 1198
1197 1199 def testgetstrippointlinear(self):
1198 1200 # N revisions in a linear chain.
1199 1201 f = self._makefilefn()
1200 1202
1201 1203 with self._maketransactionfn() as tr:
1202 1204 p1 = nullid
1203 1205
1204 1206 for rev in range(10):
1205 1207 f.add(b'%d' % rev, None, tr, rev, p1, nullid)
1206 1208
1207 1209 for rev in range(10):
1208 1210 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1209 1211
1210 1212 def testgetstrippointmultipleheads(self):
1211 1213 f = self._makefilefn()
1212 1214
1213 1215 with self._maketransactionfn() as tr:
1214 1216 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
1215 1217 node1 = f.add(b'1', None, tr, 1, node0, nullid)
1216 1218 f.add(b'2', None, tr, 2, node1, nullid)
1217 1219 f.add(b'3', None, tr, 3, node0, nullid)
1218 1220 f.add(b'4', None, tr, 4, node0, nullid)
1219 1221
1220 1222 for rev in range(5):
1221 1223 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1222 1224
1223 1225 def testgetstrippointearlierlinkrevs(self):
1224 1226 f = self._makefilefn()
1225 1227
1226 1228 with self._maketransactionfn() as tr:
1227 1229 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
1228 1230 f.add(b'1', None, tr, 10, node0, nullid)
1229 1231 f.add(b'2', None, tr, 5, node0, nullid)
1230 1232
1231 1233 self.assertEqual(f.getstrippoint(0), (0, set()))
1232 1234 self.assertEqual(f.getstrippoint(1), (1, set()))
1233 1235 self.assertEqual(f.getstrippoint(2), (1, set()))
1234 1236 self.assertEqual(f.getstrippoint(3), (1, set()))
1235 1237 self.assertEqual(f.getstrippoint(4), (1, set()))
1236 1238 self.assertEqual(f.getstrippoint(5), (1, set()))
1237 1239 self.assertEqual(f.getstrippoint(6), (1, {2}))
1238 1240 self.assertEqual(f.getstrippoint(7), (1, {2}))
1239 1241 self.assertEqual(f.getstrippoint(8), (1, {2}))
1240 1242 self.assertEqual(f.getstrippoint(9), (1, {2}))
1241 1243 self.assertEqual(f.getstrippoint(10), (1, {2}))
1242 1244 self.assertEqual(f.getstrippoint(11), (3, set()))
1243 1245
1244 1246 def teststripempty(self):
1245 1247 f = self._makefilefn()
1246 1248
1247 1249 with self._maketransactionfn() as tr:
1248 1250 f.strip(0, tr)
1249 1251
1250 1252 self.assertEqual(len(f), 0)
1251 1253
1252 1254 def teststripall(self):
1253 1255 f = self._makefilefn()
1254 1256
1255 1257 with self._maketransactionfn() as tr:
1256 1258 p1 = nullid
1257 1259 for rev in range(10):
1258 1260 p1 = f.add(b'%d' % rev, None, tr, rev, p1, nullid)
1259 1261
1260 1262 self.assertEqual(len(f), 10)
1261 1263
1262 1264 with self._maketransactionfn() as tr:
1263 1265 f.strip(0, tr)
1264 1266
1265 1267 self.assertEqual(len(f), 0)
1266 1268
1267 1269 def teststrippartial(self):
1268 1270 f = self._makefilefn()
1269 1271
1270 1272 with self._maketransactionfn() as tr:
1271 1273 f.add(b'0', None, tr, 0, nullid, nullid)
1272 1274 node1 = f.add(b'1', None, tr, 5, nullid, nullid)
1273 1275 node2 = f.add(b'2', None, tr, 10, nullid, nullid)
1274 1276
1275 1277 self.assertEqual(len(f), 3)
1276 1278
1277 1279 with self._maketransactionfn() as tr:
1278 1280 f.strip(11, tr)
1279 1281
1280 1282 self.assertEqual(len(f), 3)
1281 1283
1282 1284 with self._maketransactionfn() as tr:
1283 1285 f.strip(10, tr)
1284 1286
1285 1287 self.assertEqual(len(f), 2)
1286 1288
1287 1289 with self.assertRaises(error.LookupError):
1288 1290 f.rev(node2)
1289 1291
1290 1292 with self._maketransactionfn() as tr:
1291 1293 f.strip(6, tr)
1292 1294
1293 1295 self.assertEqual(len(f), 2)
1294 1296
1295 1297 with self._maketransactionfn() as tr:
1296 1298 f.strip(3, tr)
1297 1299
1298 1300 self.assertEqual(len(f), 1)
1299 1301
1300 1302 with self.assertRaises(error.LookupError):
1301 1303 f.rev(node1)
1302 1304
1303 1305 def makeifileindextests(makefilefn, maketransactionfn, addrawrevisionfn):
1304 1306 """Create a unittest.TestCase class suitable for testing file storage.
1305 1307
1306 1308 ``makefilefn`` is a callable which receives the test case as an
1307 1309 argument and returns an object implementing the ``ifilestorage`` interface.
1308 1310
1309 1311 ``maketransactionfn`` is a callable which receives the test case as an
1310 1312 argument and returns a transaction object.
1311 1313
1312 1314 ``addrawrevisionfn`` is a callable which receives arguments describing a
1313 1315 low-level revision to add. This callable allows the insertion of
1314 1316 potentially bad data into the store in order to facilitate testing.
1315 1317
1316 1318 Returns a type that is a ``unittest.TestCase`` that can be used for
1317 1319 testing the object implementing the file storage interface. Simply
1318 1320 assign the returned value to a module-level attribute and a test loader
1319 1321 should find and run it automatically.
1320 1322 """
1321 1323 d = {
1322 1324 r'_makefilefn': makefilefn,
1323 1325 r'_maketransactionfn': maketransactionfn,
1324 1326 r'_addrawrevisionfn': addrawrevisionfn,
1325 1327 }
1326 1328 return type(r'ifileindextests', (ifileindextests,), d)
1327 1329
1328 1330 def makeifiledatatests(makefilefn, maketransactionfn, addrawrevisionfn):
1329 1331 d = {
1330 1332 r'_makefilefn': makefilefn,
1331 1333 r'_maketransactionfn': maketransactionfn,
1332 1334 r'_addrawrevisionfn': addrawrevisionfn,
1333 1335 }
1334 1336 return type(r'ifiledatatests', (ifiledatatests,), d)
1335 1337
1336 1338 def makeifilemutationtests(makefilefn, maketransactionfn, addrawrevisionfn):
1337 1339 d = {
1338 1340 r'_makefilefn': makefilefn,
1339 1341 r'_maketransactionfn': maketransactionfn,
1340 1342 r'_addrawrevisionfn': addrawrevisionfn,
1341 1343 }
1342 1344 return type(r'ifilemutationtests', (ifilemutationtests,), d)
General Comments 0
You need to be logged in to leave comments. Login now