##// END OF EJS Templates
vfs: give all vfs an options attribute by default...
marmoute -
r43295:3518da50 default
parent child Browse files
Show More
@@ -1,43 +1,44 b''
1 1 #!/usr/bin/env python
2 2 # Dump revlogs as raw data stream
3 3 # $ find .hg/store/ -name "*.i" | xargs dumprevlog > repo.dump
4 4
5 5 from __future__ import absolute_import, print_function
6 6
7 7 import sys
8 8 from mercurial import (
9 9 encoding,
10 10 node,
11 11 pycompat,
12 12 revlog,
13 13 )
14 14 from mercurial.utils import (
15 15 procutil,
16 16 )
17 17
18 18 for fp in (sys.stdin, sys.stdout, sys.stderr):
19 19 procutil.setbinary(fp)
20 20
21 21 def binopen(path, mode=b'rb'):
22 22 if b'b' not in mode:
23 23 mode = mode + b'b'
24 24 return open(path, pycompat.sysstr(mode))
25 binopen.options = {}
25 26
26 27 def printb(data, end=b'\n'):
27 28 sys.stdout.flush()
28 29 pycompat.stdout.write(data + end)
29 30
30 31 for f in sys.argv[1:]:
31 32 r = revlog.revlog(binopen, encoding.strtolocal(f))
32 33 print("file:", f)
33 34 for i in r:
34 35 n = r.node(i)
35 36 p = r.parents(n)
36 37 d = r.revision(n)
37 38 printb(b"node: %s" % node.hex(n))
38 39 printb(b"linkrev: %d" % r.linkrev(i))
39 40 printb(b"parents: %s %s" % (node.hex(p[0]), node.hex(p[1])))
40 41 printb(b"length: %d" % len(d))
41 42 printb(b"-start-")
42 43 printb(d)
43 44 printb(b"-end-")
@@ -1,2660 +1,2660 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 57 )
58 58 from .thirdparty import (
59 59 attr,
60 60 )
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 )
79 79 from .utils import (
80 80 storageutil,
81 81 stringutil,
82 82 )
83 83
84 84 # blanked usage of all the name to prevent pyflakes constraints
85 85 # We need these name available in the module for extensions.
86 86 REVLOGV0
87 87 REVLOGV1
88 88 REVLOGV2
89 89 FLAG_INLINE_DATA
90 90 FLAG_GENERALDELTA
91 91 REVLOG_DEFAULT_FLAGS
92 92 REVLOG_DEFAULT_FORMAT
93 93 REVLOG_DEFAULT_VERSION
94 94 REVLOGV1_FLAGS
95 95 REVLOGV2_FLAGS
96 96 REVIDX_ISCENSORED
97 97 REVIDX_ELLIPSIS
98 98 REVIDX_EXTSTORED
99 99 REVIDX_DEFAULT_FLAGS
100 100 REVIDX_FLAGS_ORDER
101 101 REVIDX_RAWTEXT_CHANGING_FLAGS
102 102
103 103 parsers = policy.importmod(r'parsers')
104 104 rustancestor = policy.importrust(r'ancestor')
105 105 rustdagop = policy.importrust(r'dagop')
106 106
107 107 # Aliased for performance.
108 108 _zlibdecompress = zlib.decompress
109 109
110 110 # max size of revlog with inline data
111 111 _maxinline = 131072
112 112 _chunksize = 1048576
113 113
114 114 # Flag processors for REVIDX_ELLIPSIS.
115 115 def ellipsisreadprocessor(rl, text):
116 116 return text, False, {}
117 117
118 118 def ellipsiswriteprocessor(rl, text, sidedata):
119 119 return text, False
120 120
121 121 def ellipsisrawprocessor(rl, text):
122 122 return False
123 123
124 124 ellipsisprocessor = (
125 125 ellipsisreadprocessor,
126 126 ellipsiswriteprocessor,
127 127 ellipsisrawprocessor,
128 128 )
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 @attr.s(slots=True, frozen=True)
142 142 class _revisioninfo(object):
143 143 """Information about a revision that allows building its fulltext
144 144 node: expected hash of the revision
145 145 p1, p2: parent revs of the revision
146 146 btext: built text cache consisting of a one-element list
147 147 cachedelta: (baserev, uncompressed_delta) or None
148 148 flags: flags associated to the revision storage
149 149
150 150 One of btext[0] or cachedelta must be set.
151 151 """
152 152 node = attr.ib()
153 153 p1 = attr.ib()
154 154 p2 = attr.ib()
155 155 btext = attr.ib()
156 156 textlen = attr.ib()
157 157 cachedelta = attr.ib()
158 158 flags = attr.ib()
159 159
160 160 @interfaceutil.implementer(repository.irevisiondelta)
161 161 @attr.s(slots=True)
162 162 class revlogrevisiondelta(object):
163 163 node = attr.ib()
164 164 p1node = attr.ib()
165 165 p2node = attr.ib()
166 166 basenode = attr.ib()
167 167 flags = attr.ib()
168 168 baserevisionsize = attr.ib()
169 169 revision = attr.ib()
170 170 delta = attr.ib()
171 171 linknode = attr.ib(default=None)
172 172
173 173 @interfaceutil.implementer(repository.iverifyproblem)
174 174 @attr.s(frozen=True)
175 175 class revlogproblem(object):
176 176 warning = attr.ib(default=None)
177 177 error = attr.ib(default=None)
178 178 node = attr.ib(default=None)
179 179
180 180 # index v0:
181 181 # 4 bytes: offset
182 182 # 4 bytes: compressed length
183 183 # 4 bytes: base rev
184 184 # 4 bytes: link rev
185 185 # 20 bytes: parent 1 nodeid
186 186 # 20 bytes: parent 2 nodeid
187 187 # 20 bytes: nodeid
188 188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 189 indexformatv0_pack = indexformatv0.pack
190 190 indexformatv0_unpack = indexformatv0.unpack
191 191
192 192 class revlogoldindex(list):
193 193 def __getitem__(self, i):
194 194 if i == -1:
195 195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 196 return list.__getitem__(self, i)
197 197
198 198 class revlogoldio(object):
199 199 def __init__(self):
200 200 self.size = indexformatv0.size
201 201
202 202 def parseindex(self, data, inline):
203 203 s = self.size
204 204 index = []
205 205 nodemap = {nullid: nullrev}
206 206 n = off = 0
207 207 l = len(data)
208 208 while off + s <= l:
209 209 cur = data[off:off + s]
210 210 off += s
211 211 e = indexformatv0_unpack(cur)
212 212 # transform to revlogv1 format
213 213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 215 index.append(e2)
216 216 nodemap[e[6]] = n
217 217 n += 1
218 218
219 219 return revlogoldindex(index), nodemap, None
220 220
221 221 def packentry(self, entry, node, version, rev):
222 222 if gettype(entry[0]):
223 223 raise error.RevlogError(_('index entry flags need revlog '
224 224 'version 1'))
225 225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 226 node(entry[5]), node(entry[6]), entry[7])
227 227 return indexformatv0_pack(*e2)
228 228
229 229 # index ng:
230 230 # 6 bytes: offset
231 231 # 2 bytes: flags
232 232 # 4 bytes: compressed length
233 233 # 4 bytes: uncompressed length
234 234 # 4 bytes: base rev
235 235 # 4 bytes: link rev
236 236 # 4 bytes: parent 1 rev
237 237 # 4 bytes: parent 2 rev
238 238 # 32 bytes: nodeid
239 239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 240 indexformatng_pack = indexformatng.pack
241 241 versionformat = struct.Struct(">I")
242 242 versionformat_pack = versionformat.pack
243 243 versionformat_unpack = versionformat.unpack
244 244
245 245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 246 # signed integer)
247 247 _maxentrysize = 0x7fffffff
248 248
249 249 class revlogio(object):
250 250 def __init__(self):
251 251 self.size = indexformatng.size
252 252
253 253 def parseindex(self, data, inline):
254 254 # call the C implementation to parse the index data
255 255 index, cache = parsers.parse_index2(data, inline)
256 256 return index, getattr(index, 'nodemap', None), cache
257 257
258 258 def packentry(self, entry, node, version, rev):
259 259 p = indexformatng_pack(*entry)
260 260 if rev == 0:
261 261 p = versionformat_pack(version) + p[4:]
262 262 return p
263 263
264 264 class revlog(object):
265 265 """
266 266 the underlying revision storage object
267 267
268 268 A revlog consists of two parts, an index and the revision data.
269 269
270 270 The index is a file with a fixed record size containing
271 271 information on each revision, including its nodeid (hash), the
272 272 nodeids of its parents, the position and offset of its data within
273 273 the data file, and the revision it's based on. Finally, each entry
274 274 contains a linkrev entry that can serve as a pointer to external
275 275 data.
276 276
277 277 The revision data itself is a linear collection of data chunks.
278 278 Each chunk represents a revision and is usually represented as a
279 279 delta against the previous chunk. To bound lookup time, runs of
280 280 deltas are limited to about 2 times the length of the original
281 281 version data. This makes retrieval of a version proportional to
282 282 its size, or O(1) relative to the number of revisions.
283 283
284 284 Both pieces of the revlog are written to in an append-only
285 285 fashion, which means we never need to rewrite a file to insert or
286 286 remove data, and can use some simple techniques to avoid the need
287 287 for locking while reading.
288 288
289 289 If checkambig, indexfile is opened with checkambig=True at
290 290 writing, to avoid file stat ambiguity.
291 291
292 292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 293 index will be mmapped rather than read if it is larger than the
294 294 configured threshold.
295 295
296 296 If censorable is True, the revlog can have censored revisions.
297 297
298 298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 299 compression for the data content.
300 300 """
301 301
302 302 _flagserrorclass = error.RevlogError
303 303
304 304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 305 mmaplargeindex=False, censorable=False,
306 306 upperboundcomp=None):
307 307 """
308 308 create a revlog object
309 309
310 310 opener is a function that abstracts the file opening operation
311 311 and can be used to implement COW semantics or the like.
312 312
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile or (indexfile[:-2] + ".d")
317 317 self.opener = opener
318 318 # When True, indexfile is opened with checkambig=True at writing, to
319 319 # avoid file stat ambiguity.
320 320 self._checkambig = checkambig
321 321 self._mmaplargeindex = mmaplargeindex
322 322 self._censorable = censorable
323 323 # 3-tuple of (node, rev, text) for a raw revision.
324 324 self._revisioncache = None
325 325 # Maps rev to chain base rev.
326 326 self._chainbasecache = util.lrucachedict(100)
327 327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 328 self._chunkcache = (0, '')
329 329 # How much data to read and cache into the raw revlog data cache.
330 330 self._chunkcachesize = 65536
331 331 self._maxchainlen = None
332 332 self._deltabothparents = True
333 333 self.index = []
334 334 # Mapping of partial identifiers to full nodes.
335 335 self._pcache = {}
336 336 # Mapping of revision integer to full node.
337 337 self._nodecache = {nullid: nullrev}
338 338 self._nodepos = None
339 339 self._compengine = 'zlib'
340 340 self._compengineopts = {}
341 341 self._maxdeltachainspan = -1
342 342 self._withsparseread = False
343 343 self._sparserevlog = False
344 344 self._srdensitythreshold = 0.50
345 345 self._srmingapsize = 262144
346 346
347 347 # Make copy of flag processors so each revlog instance can support
348 348 # custom flags.
349 349 self._flagprocessors = dict(flagutil.flagprocessors)
350 350
351 351 # 2-tuple of file handles being used for active writing.
352 352 self._writinghandles = None
353 353
354 354 self._loadindex()
355 355
356 356 def _loadindex(self):
357 357 mmapindexthreshold = None
358 opts = getattr(self.opener, 'options', {}) or {}
358 opts = self.opener.options
359 359
360 360 if 'revlogv2' in opts:
361 361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 362 elif 'revlogv1' in opts:
363 363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 364 if 'generaldelta' in opts:
365 365 newversionflags |= FLAG_GENERALDELTA
366 elif 'revlogv0' in getattr(self.opener, 'options', {}):
366 elif 'revlogv0' in self.opener.options:
367 367 newversionflags = REVLOGV0
368 368 else:
369 369 newversionflags = REVLOG_DEFAULT_VERSION
370 370
371 371 if 'chunkcachesize' in opts:
372 372 self._chunkcachesize = opts['chunkcachesize']
373 373 if 'maxchainlen' in opts:
374 374 self._maxchainlen = opts['maxchainlen']
375 375 if 'deltabothparents' in opts:
376 376 self._deltabothparents = opts['deltabothparents']
377 377 self._lazydelta = bool(opts.get('lazydelta', True))
378 378 self._lazydeltabase = False
379 379 if self._lazydelta:
380 380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 381 if 'compengine' in opts:
382 382 self._compengine = opts['compengine']
383 383 if 'zlib.level' in opts:
384 384 self._compengineopts['zlib.level'] = opts['zlib.level']
385 385 if 'zstd.level' in opts:
386 386 self._compengineopts['zstd.level'] = opts['zstd.level']
387 387 if 'maxdeltachainspan' in opts:
388 388 self._maxdeltachainspan = opts['maxdeltachainspan']
389 389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 390 mmapindexthreshold = opts['mmapindexthreshold']
391 391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
392 392 withsparseread = bool(opts.get('with-sparse-read', False))
393 393 # sparse-revlog forces sparse-read
394 394 self._withsparseread = self._sparserevlog or withsparseread
395 395 if 'sparse-read-density-threshold' in opts:
396 396 self._srdensitythreshold = opts['sparse-read-density-threshold']
397 397 if 'sparse-read-min-gap-size' in opts:
398 398 self._srmingapsize = opts['sparse-read-min-gap-size']
399 399 if opts.get('enableellipsis'):
400 400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
401 401
402 402 # revlog v0 doesn't have flag processors
403 403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
404 404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
405 405
406 406 if self._chunkcachesize <= 0:
407 407 raise error.RevlogError(_('revlog chunk cache size %r is not '
408 408 'greater than 0') % self._chunkcachesize)
409 409 elif self._chunkcachesize & (self._chunkcachesize - 1):
410 410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
411 411 'power of 2') % self._chunkcachesize)
412 412
413 413 indexdata = ''
414 414 self._initempty = True
415 415 try:
416 416 with self._indexfp() as f:
417 417 if (mmapindexthreshold is not None and
418 418 self.opener.fstat(f).st_size >= mmapindexthreshold):
419 419 # TODO: should .close() to release resources without
420 420 # relying on Python GC
421 421 indexdata = util.buffer(util.mmapread(f))
422 422 else:
423 423 indexdata = f.read()
424 424 if len(indexdata) > 0:
425 425 versionflags = versionformat_unpack(indexdata[:4])[0]
426 426 self._initempty = False
427 427 else:
428 428 versionflags = newversionflags
429 429 except IOError as inst:
430 430 if inst.errno != errno.ENOENT:
431 431 raise
432 432
433 433 versionflags = newversionflags
434 434
435 435 self.version = versionflags
436 436
437 437 flags = versionflags & ~0xFFFF
438 438 fmt = versionflags & 0xFFFF
439 439
440 440 if fmt == REVLOGV0:
441 441 if flags:
442 442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
443 443 'revlog %s') %
444 444 (flags >> 16, fmt, self.indexfile))
445 445
446 446 self._inline = False
447 447 self._generaldelta = False
448 448
449 449 elif fmt == REVLOGV1:
450 450 if flags & ~REVLOGV1_FLAGS:
451 451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 452 'revlog %s') %
453 453 (flags >> 16, fmt, self.indexfile))
454 454
455 455 self._inline = versionflags & FLAG_INLINE_DATA
456 456 self._generaldelta = versionflags & FLAG_GENERALDELTA
457 457
458 458 elif fmt == REVLOGV2:
459 459 if flags & ~REVLOGV2_FLAGS:
460 460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 461 'revlog %s') %
462 462 (flags >> 16, fmt, self.indexfile))
463 463
464 464 self._inline = versionflags & FLAG_INLINE_DATA
465 465 # generaldelta implied by version 2 revlogs.
466 466 self._generaldelta = True
467 467
468 468 else:
469 469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
470 470 (fmt, self.indexfile))
471 471 # sparse-revlog can't be on without general-delta (issue6056)
472 472 if not self._generaldelta:
473 473 self._sparserevlog = False
474 474
475 475 self._storedeltachains = True
476 476
477 477 self._io = revlogio()
478 478 if self.version == REVLOGV0:
479 479 self._io = revlogoldio()
480 480 try:
481 481 d = self._io.parseindex(indexdata, self._inline)
482 482 except (ValueError, IndexError):
483 483 raise error.RevlogError(_("index %s is corrupted") %
484 484 self.indexfile)
485 485 self.index, nodemap, self._chunkcache = d
486 486 if nodemap is not None:
487 487 self.nodemap = self._nodecache = nodemap
488 488 if not self._chunkcache:
489 489 self._chunkclear()
490 490 # revnum -> (chain-length, sum-delta-length)
491 491 self._chaininfocache = {}
492 492 # revlog header -> revlog compressor
493 493 self._decompressors = {}
494 494
495 495 @util.propertycache
496 496 def _compressor(self):
497 497 engine = util.compengines[self._compengine]
498 498 return engine.revlogcompressor(self._compengineopts)
499 499
500 500 def _indexfp(self, mode='r'):
501 501 """file object for the revlog's index file"""
502 502 args = {r'mode': mode}
503 503 if mode != 'r':
504 504 args[r'checkambig'] = self._checkambig
505 505 if mode == 'w':
506 506 args[r'atomictemp'] = True
507 507 return self.opener(self.indexfile, **args)
508 508
509 509 def _datafp(self, mode='r'):
510 510 """file object for the revlog's data file"""
511 511 return self.opener(self.datafile, mode=mode)
512 512
513 513 @contextlib.contextmanager
514 514 def _datareadfp(self, existingfp=None):
515 515 """file object suitable to read data"""
516 516 # Use explicit file handle, if given.
517 517 if existingfp is not None:
518 518 yield existingfp
519 519
520 520 # Use a file handle being actively used for writes, if available.
521 521 # There is some danger to doing this because reads will seek the
522 522 # file. However, _writeentry() performs a SEEK_END before all writes,
523 523 # so we should be safe.
524 524 elif self._writinghandles:
525 525 if self._inline:
526 526 yield self._writinghandles[0]
527 527 else:
528 528 yield self._writinghandles[1]
529 529
530 530 # Otherwise open a new file handle.
531 531 else:
532 532 if self._inline:
533 533 func = self._indexfp
534 534 else:
535 535 func = self._datafp
536 536 with func() as fp:
537 537 yield fp
538 538
539 539 def tip(self):
540 540 return self.node(len(self.index) - 1)
541 541 def __contains__(self, rev):
542 542 return 0 <= rev < len(self)
543 543 def __len__(self):
544 544 return len(self.index)
545 545 def __iter__(self):
546 546 return iter(pycompat.xrange(len(self)))
547 547 def revs(self, start=0, stop=None):
548 548 """iterate over all rev in this revlog (from start to stop)"""
549 549 return storageutil.iterrevs(len(self), start=start, stop=stop)
550 550
551 551 @util.propertycache
552 552 def nodemap(self):
553 553 if self.index:
554 554 # populate mapping down to the initial node
555 555 node0 = self.index[0][7] # get around changelog filtering
556 556 self.rev(node0)
557 557 return self._nodecache
558 558
559 559 def hasnode(self, node):
560 560 try:
561 561 self.rev(node)
562 562 return True
563 563 except KeyError:
564 564 return False
565 565
566 566 def candelta(self, baserev, rev):
567 567 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 568 # Disable delta if either rev requires a content-changing flag
569 569 # processor (ex. LFS). This is because such flag processor can alter
570 570 # the rawtext content that the delta will be based on, and two clients
571 571 # could have a same revlog node with different flags (i.e. different
572 572 # rawtext contents) and the delta could be incompatible.
573 573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 575 return False
576 576 return True
577 577
578 578 def clearcaches(self):
579 579 self._revisioncache = None
580 580 self._chainbasecache.clear()
581 581 self._chunkcache = (0, '')
582 582 self._pcache = {}
583 583
584 584 try:
585 585 # If we are using the native C version, you are in a fun case
586 586 # where self.index, self.nodemap and self._nodecaches is the same
587 587 # object.
588 588 self._nodecache.clearcaches()
589 589 except AttributeError:
590 590 self._nodecache = {nullid: nullrev}
591 591 self._nodepos = None
592 592
593 593 def rev(self, node):
594 594 try:
595 595 return self._nodecache[node]
596 596 except TypeError:
597 597 raise
598 598 except error.RevlogError:
599 599 # parsers.c radix tree lookup failed
600 600 if node == wdirid or node in wdirfilenodeids:
601 601 raise error.WdirUnsupported
602 602 raise error.LookupError(node, self.indexfile, _('no node'))
603 603 except KeyError:
604 604 # pure python cache lookup failed
605 605 n = self._nodecache
606 606 i = self.index
607 607 p = self._nodepos
608 608 if p is None:
609 609 p = len(i) - 1
610 610 else:
611 611 assert p < len(i)
612 612 for r in pycompat.xrange(p, -1, -1):
613 613 v = i[r][7]
614 614 n[v] = r
615 615 if v == node:
616 616 self._nodepos = r - 1
617 617 return r
618 618 if node == wdirid or node in wdirfilenodeids:
619 619 raise error.WdirUnsupported
620 620 raise error.LookupError(node, self.indexfile, _('no node'))
621 621
622 622 # Accessors for index entries.
623 623
624 624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 625 # are flags.
626 626 def start(self, rev):
627 627 return int(self.index[rev][0] >> 16)
628 628
629 629 def flags(self, rev):
630 630 return self.index[rev][0] & 0xFFFF
631 631
632 632 def length(self, rev):
633 633 return self.index[rev][1]
634 634
635 635 def rawsize(self, rev):
636 636 """return the length of the uncompressed text for a given revision"""
637 637 l = self.index[rev][2]
638 638 if l >= 0:
639 639 return l
640 640
641 641 t = self.rawdata(rev)
642 642 return len(t)
643 643
644 644 def size(self, rev):
645 645 """length of non-raw text (processed by a "read" flag processor)"""
646 646 # fast path: if no "read" flag processor could change the content,
647 647 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 648 flags = self.flags(rev)
649 649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 650 return self.rawsize(rev)
651 651
652 652 return len(self.revision(rev, raw=False))
653 653
654 654 def chainbase(self, rev):
655 655 base = self._chainbasecache.get(rev)
656 656 if base is not None:
657 657 return base
658 658
659 659 index = self.index
660 660 iterrev = rev
661 661 base = index[iterrev][3]
662 662 while base != iterrev:
663 663 iterrev = base
664 664 base = index[iterrev][3]
665 665
666 666 self._chainbasecache[rev] = base
667 667 return base
668 668
669 669 def linkrev(self, rev):
670 670 return self.index[rev][4]
671 671
672 672 def parentrevs(self, rev):
673 673 try:
674 674 entry = self.index[rev]
675 675 except IndexError:
676 676 if rev == wdirrev:
677 677 raise error.WdirUnsupported
678 678 raise
679 679
680 680 return entry[5], entry[6]
681 681
682 682 # fast parentrevs(rev) where rev isn't filtered
683 683 _uncheckedparentrevs = parentrevs
684 684
685 685 def node(self, rev):
686 686 try:
687 687 return self.index[rev][7]
688 688 except IndexError:
689 689 if rev == wdirrev:
690 690 raise error.WdirUnsupported
691 691 raise
692 692
693 693 # Derived from index values.
694 694
695 695 def end(self, rev):
696 696 return self.start(rev) + self.length(rev)
697 697
698 698 def parents(self, node):
699 699 i = self.index
700 700 d = i[self.rev(node)]
701 701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
702 702
703 703 def chainlen(self, rev):
704 704 return self._chaininfo(rev)[0]
705 705
706 706 def _chaininfo(self, rev):
707 707 chaininfocache = self._chaininfocache
708 708 if rev in chaininfocache:
709 709 return chaininfocache[rev]
710 710 index = self.index
711 711 generaldelta = self._generaldelta
712 712 iterrev = rev
713 713 e = index[iterrev]
714 714 clen = 0
715 715 compresseddeltalen = 0
716 716 while iterrev != e[3]:
717 717 clen += 1
718 718 compresseddeltalen += e[1]
719 719 if generaldelta:
720 720 iterrev = e[3]
721 721 else:
722 722 iterrev -= 1
723 723 if iterrev in chaininfocache:
724 724 t = chaininfocache[iterrev]
725 725 clen += t[0]
726 726 compresseddeltalen += t[1]
727 727 break
728 728 e = index[iterrev]
729 729 else:
730 730 # Add text length of base since decompressing that also takes
731 731 # work. For cache hits the length is already included.
732 732 compresseddeltalen += e[1]
733 733 r = (clen, compresseddeltalen)
734 734 chaininfocache[rev] = r
735 735 return r
736 736
737 737 def _deltachain(self, rev, stoprev=None):
738 738 """Obtain the delta chain for a revision.
739 739
740 740 ``stoprev`` specifies a revision to stop at. If not specified, we
741 741 stop at the base of the chain.
742 742
743 743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
744 744 revs in ascending order and ``stopped`` is a bool indicating whether
745 745 ``stoprev`` was hit.
746 746 """
747 747 # Try C implementation.
748 748 try:
749 749 return self.index.deltachain(rev, stoprev, self._generaldelta)
750 750 except AttributeError:
751 751 pass
752 752
753 753 chain = []
754 754
755 755 # Alias to prevent attribute lookup in tight loop.
756 756 index = self.index
757 757 generaldelta = self._generaldelta
758 758
759 759 iterrev = rev
760 760 e = index[iterrev]
761 761 while iterrev != e[3] and iterrev != stoprev:
762 762 chain.append(iterrev)
763 763 if generaldelta:
764 764 iterrev = e[3]
765 765 else:
766 766 iterrev -= 1
767 767 e = index[iterrev]
768 768
769 769 if iterrev == stoprev:
770 770 stopped = True
771 771 else:
772 772 chain.append(iterrev)
773 773 stopped = False
774 774
775 775 chain.reverse()
776 776 return chain, stopped
777 777
778 778 def ancestors(self, revs, stoprev=0, inclusive=False):
779 779 """Generate the ancestors of 'revs' in reverse revision order.
780 780 Does not generate revs lower than stoprev.
781 781
782 782 See the documentation for ancestor.lazyancestors for more details."""
783 783
784 784 # first, make sure start revisions aren't filtered
785 785 revs = list(revs)
786 786 checkrev = self.node
787 787 for r in revs:
788 788 checkrev(r)
789 789 # and we're sure ancestors aren't filtered as well
790 790
791 791 if rustancestor is not None:
792 792 lazyancestors = rustancestor.LazyAncestors
793 793 arg = self.index
794 794 elif util.safehasattr(parsers, 'rustlazyancestors'):
795 795 lazyancestors = ancestor.rustlazyancestors
796 796 arg = self.index
797 797 else:
798 798 lazyancestors = ancestor.lazyancestors
799 799 arg = self._uncheckedparentrevs
800 800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
801 801
802 802 def descendants(self, revs):
803 803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
804 804
805 805 def findcommonmissing(self, common=None, heads=None):
806 806 """Return a tuple of the ancestors of common and the ancestors of heads
807 807 that are not ancestors of common. In revset terminology, we return the
808 808 tuple:
809 809
810 810 ::common, (::heads) - (::common)
811 811
812 812 The list is sorted by revision number, meaning it is
813 813 topologically sorted.
814 814
815 815 'heads' and 'common' are both lists of node IDs. If heads is
816 816 not supplied, uses all of the revlog's heads. If common is not
817 817 supplied, uses nullid."""
818 818 if common is None:
819 819 common = [nullid]
820 820 if heads is None:
821 821 heads = self.heads()
822 822
823 823 common = [self.rev(n) for n in common]
824 824 heads = [self.rev(n) for n in heads]
825 825
826 826 # we want the ancestors, but inclusive
827 827 class lazyset(object):
828 828 def __init__(self, lazyvalues):
829 829 self.addedvalues = set()
830 830 self.lazyvalues = lazyvalues
831 831
832 832 def __contains__(self, value):
833 833 return value in self.addedvalues or value in self.lazyvalues
834 834
835 835 def __iter__(self):
836 836 added = self.addedvalues
837 837 for r in added:
838 838 yield r
839 839 for r in self.lazyvalues:
840 840 if not r in added:
841 841 yield r
842 842
843 843 def add(self, value):
844 844 self.addedvalues.add(value)
845 845
846 846 def update(self, values):
847 847 self.addedvalues.update(values)
848 848
849 849 has = lazyset(self.ancestors(common))
850 850 has.add(nullrev)
851 851 has.update(common)
852 852
853 853 # take all ancestors from heads that aren't in has
854 854 missing = set()
855 855 visit = collections.deque(r for r in heads if r not in has)
856 856 while visit:
857 857 r = visit.popleft()
858 858 if r in missing:
859 859 continue
860 860 else:
861 861 missing.add(r)
862 862 for p in self.parentrevs(r):
863 863 if p not in has:
864 864 visit.append(p)
865 865 missing = list(missing)
866 866 missing.sort()
867 867 return has, [self.node(miss) for miss in missing]
868 868
869 869 def incrementalmissingrevs(self, common=None):
870 870 """Return an object that can be used to incrementally compute the
871 871 revision numbers of the ancestors of arbitrary sets that are not
872 872 ancestors of common. This is an ancestor.incrementalmissingancestors
873 873 object.
874 874
875 875 'common' is a list of revision numbers. If common is not supplied, uses
876 876 nullrev.
877 877 """
878 878 if common is None:
879 879 common = [nullrev]
880 880
881 881 if rustancestor is not None:
882 882 return rustancestor.MissingAncestors(self.index, common)
883 883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
884 884
885 885 def findmissingrevs(self, common=None, heads=None):
886 886 """Return the revision numbers of the ancestors of heads that
887 887 are not ancestors of common.
888 888
889 889 More specifically, return a list of revision numbers corresponding to
890 890 nodes N such that every N satisfies the following constraints:
891 891
892 892 1. N is an ancestor of some node in 'heads'
893 893 2. N is not an ancestor of any node in 'common'
894 894
895 895 The list is sorted by revision number, meaning it is
896 896 topologically sorted.
897 897
898 898 'heads' and 'common' are both lists of revision numbers. If heads is
899 899 not supplied, uses all of the revlog's heads. If common is not
900 900 supplied, uses nullid."""
901 901 if common is None:
902 902 common = [nullrev]
903 903 if heads is None:
904 904 heads = self.headrevs()
905 905
906 906 inc = self.incrementalmissingrevs(common=common)
907 907 return inc.missingancestors(heads)
908 908
909 909 def findmissing(self, common=None, heads=None):
910 910 """Return the ancestors of heads that are not ancestors of common.
911 911
912 912 More specifically, return a list of nodes N such that every N
913 913 satisfies the following constraints:
914 914
915 915 1. N is an ancestor of some node in 'heads'
916 916 2. N is not an ancestor of any node in 'common'
917 917
918 918 The list is sorted by revision number, meaning it is
919 919 topologically sorted.
920 920
921 921 'heads' and 'common' are both lists of node IDs. If heads is
922 922 not supplied, uses all of the revlog's heads. If common is not
923 923 supplied, uses nullid."""
924 924 if common is None:
925 925 common = [nullid]
926 926 if heads is None:
927 927 heads = self.heads()
928 928
929 929 common = [self.rev(n) for n in common]
930 930 heads = [self.rev(n) for n in heads]
931 931
932 932 inc = self.incrementalmissingrevs(common=common)
933 933 return [self.node(r) for r in inc.missingancestors(heads)]
934 934
935 935 def nodesbetween(self, roots=None, heads=None):
936 936 """Return a topological path from 'roots' to 'heads'.
937 937
938 938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
939 939 topologically sorted list of all nodes N that satisfy both of
940 940 these constraints:
941 941
942 942 1. N is a descendant of some node in 'roots'
943 943 2. N is an ancestor of some node in 'heads'
944 944
945 945 Every node is considered to be both a descendant and an ancestor
946 946 of itself, so every reachable node in 'roots' and 'heads' will be
947 947 included in 'nodes'.
948 948
949 949 'outroots' is the list of reachable nodes in 'roots', i.e., the
950 950 subset of 'roots' that is returned in 'nodes'. Likewise,
951 951 'outheads' is the subset of 'heads' that is also in 'nodes'.
952 952
953 953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
954 954 unspecified, uses nullid as the only root. If 'heads' is
955 955 unspecified, uses list of all of the revlog's heads."""
956 956 nonodes = ([], [], [])
957 957 if roots is not None:
958 958 roots = list(roots)
959 959 if not roots:
960 960 return nonodes
961 961 lowestrev = min([self.rev(n) for n in roots])
962 962 else:
963 963 roots = [nullid] # Everybody's a descendant of nullid
964 964 lowestrev = nullrev
965 965 if (lowestrev == nullrev) and (heads is None):
966 966 # We want _all_ the nodes!
967 967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
968 968 if heads is None:
969 969 # All nodes are ancestors, so the latest ancestor is the last
970 970 # node.
971 971 highestrev = len(self) - 1
972 972 # Set ancestors to None to signal that every node is an ancestor.
973 973 ancestors = None
974 974 # Set heads to an empty dictionary for later discovery of heads
975 975 heads = {}
976 976 else:
977 977 heads = list(heads)
978 978 if not heads:
979 979 return nonodes
980 980 ancestors = set()
981 981 # Turn heads into a dictionary so we can remove 'fake' heads.
982 982 # Also, later we will be using it to filter out the heads we can't
983 983 # find from roots.
984 984 heads = dict.fromkeys(heads, False)
985 985 # Start at the top and keep marking parents until we're done.
986 986 nodestotag = set(heads)
987 987 # Remember where the top was so we can use it as a limit later.
988 988 highestrev = max([self.rev(n) for n in nodestotag])
989 989 while nodestotag:
990 990 # grab a node to tag
991 991 n = nodestotag.pop()
992 992 # Never tag nullid
993 993 if n == nullid:
994 994 continue
995 995 # A node's revision number represents its place in a
996 996 # topologically sorted list of nodes.
997 997 r = self.rev(n)
998 998 if r >= lowestrev:
999 999 if n not in ancestors:
1000 1000 # If we are possibly a descendant of one of the roots
1001 1001 # and we haven't already been marked as an ancestor
1002 1002 ancestors.add(n) # Mark as ancestor
1003 1003 # Add non-nullid parents to list of nodes to tag.
1004 1004 nodestotag.update([p for p in self.parents(n) if
1005 1005 p != nullid])
1006 1006 elif n in heads: # We've seen it before, is it a fake head?
1007 1007 # So it is, real heads should not be the ancestors of
1008 1008 # any other heads.
1009 1009 heads.pop(n)
1010 1010 if not ancestors:
1011 1011 return nonodes
1012 1012 # Now that we have our set of ancestors, we want to remove any
1013 1013 # roots that are not ancestors.
1014 1014
1015 1015 # If one of the roots was nullid, everything is included anyway.
1016 1016 if lowestrev > nullrev:
1017 1017 # But, since we weren't, let's recompute the lowest rev to not
1018 1018 # include roots that aren't ancestors.
1019 1019
1020 1020 # Filter out roots that aren't ancestors of heads
1021 1021 roots = [root for root in roots if root in ancestors]
1022 1022 # Recompute the lowest revision
1023 1023 if roots:
1024 1024 lowestrev = min([self.rev(root) for root in roots])
1025 1025 else:
1026 1026 # No more roots? Return empty list
1027 1027 return nonodes
1028 1028 else:
1029 1029 # We are descending from nullid, and don't need to care about
1030 1030 # any other roots.
1031 1031 lowestrev = nullrev
1032 1032 roots = [nullid]
1033 1033 # Transform our roots list into a set.
1034 1034 descendants = set(roots)
1035 1035 # Also, keep the original roots so we can filter out roots that aren't
1036 1036 # 'real' roots (i.e. are descended from other roots).
1037 1037 roots = descendants.copy()
1038 1038 # Our topologically sorted list of output nodes.
1039 1039 orderedout = []
1040 1040 # Don't start at nullid since we don't want nullid in our output list,
1041 1041 # and if nullid shows up in descendants, empty parents will look like
1042 1042 # they're descendants.
1043 1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1044 1044 n = self.node(r)
1045 1045 isdescendant = False
1046 1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1047 1047 isdescendant = True
1048 1048 elif n in descendants:
1049 1049 # n is already a descendant
1050 1050 isdescendant = True
1051 1051 # This check only needs to be done here because all the roots
1052 1052 # will start being marked is descendants before the loop.
1053 1053 if n in roots:
1054 1054 # If n was a root, check if it's a 'real' root.
1055 1055 p = tuple(self.parents(n))
1056 1056 # If any of its parents are descendants, it's not a root.
1057 1057 if (p[0] in descendants) or (p[1] in descendants):
1058 1058 roots.remove(n)
1059 1059 else:
1060 1060 p = tuple(self.parents(n))
1061 1061 # A node is a descendant if either of its parents are
1062 1062 # descendants. (We seeded the dependents list with the roots
1063 1063 # up there, remember?)
1064 1064 if (p[0] in descendants) or (p[1] in descendants):
1065 1065 descendants.add(n)
1066 1066 isdescendant = True
1067 1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1068 1068 # Only include nodes that are both descendants and ancestors.
1069 1069 orderedout.append(n)
1070 1070 if (ancestors is not None) and (n in heads):
1071 1071 # We're trying to figure out which heads are reachable
1072 1072 # from roots.
1073 1073 # Mark this head as having been reached
1074 1074 heads[n] = True
1075 1075 elif ancestors is None:
1076 1076 # Otherwise, we're trying to discover the heads.
1077 1077 # Assume this is a head because if it isn't, the next step
1078 1078 # will eventually remove it.
1079 1079 heads[n] = True
1080 1080 # But, obviously its parents aren't.
1081 1081 for p in self.parents(n):
1082 1082 heads.pop(p, None)
1083 1083 heads = [head for head, flag in heads.iteritems() if flag]
1084 1084 roots = list(roots)
1085 1085 assert orderedout
1086 1086 assert roots
1087 1087 assert heads
1088 1088 return (orderedout, roots, heads)
1089 1089
1090 1090 def headrevs(self, revs=None):
1091 1091 if revs is None:
1092 1092 try:
1093 1093 return self.index.headrevs()
1094 1094 except AttributeError:
1095 1095 return self._headrevs()
1096 1096 if rustdagop is not None:
1097 1097 return rustdagop.headrevs(self.index, revs)
1098 1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1099 1099
1100 1100 def computephases(self, roots):
1101 1101 return self.index.computephasesmapsets(roots)
1102 1102
1103 1103 def _headrevs(self):
1104 1104 count = len(self)
1105 1105 if not count:
1106 1106 return [nullrev]
1107 1107 # we won't iter over filtered rev so nobody is a head at start
1108 1108 ishead = [0] * (count + 1)
1109 1109 index = self.index
1110 1110 for r in self:
1111 1111 ishead[r] = 1 # I may be an head
1112 1112 e = index[r]
1113 1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1114 1114 return [r for r, val in enumerate(ishead) if val]
1115 1115
1116 1116 def heads(self, start=None, stop=None):
1117 1117 """return the list of all nodes that have no children
1118 1118
1119 1119 if start is specified, only heads that are descendants of
1120 1120 start will be returned
1121 1121 if stop is specified, it will consider all the revs from stop
1122 1122 as if they had no children
1123 1123 """
1124 1124 if start is None and stop is None:
1125 1125 if not len(self):
1126 1126 return [nullid]
1127 1127 return [self.node(r) for r in self.headrevs()]
1128 1128
1129 1129 if start is None:
1130 1130 start = nullrev
1131 1131 else:
1132 1132 start = self.rev(start)
1133 1133
1134 1134 stoprevs = set(self.rev(n) for n in stop or [])
1135 1135
1136 1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1137 1137 stoprevs=stoprevs)
1138 1138
1139 1139 return [self.node(rev) for rev in revs]
1140 1140
1141 1141 def children(self, node):
1142 1142 """find the children of a given node"""
1143 1143 c = []
1144 1144 p = self.rev(node)
1145 1145 for r in self.revs(start=p + 1):
1146 1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1147 1147 if prevs:
1148 1148 for pr in prevs:
1149 1149 if pr == p:
1150 1150 c.append(self.node(r))
1151 1151 elif p == nullrev:
1152 1152 c.append(self.node(r))
1153 1153 return c
1154 1154
1155 1155 def commonancestorsheads(self, a, b):
1156 1156 """calculate all the heads of the common ancestors of nodes a and b"""
1157 1157 a, b = self.rev(a), self.rev(b)
1158 1158 ancs = self._commonancestorsheads(a, b)
1159 1159 return pycompat.maplist(self.node, ancs)
1160 1160
1161 1161 def _commonancestorsheads(self, *revs):
1162 1162 """calculate all the heads of the common ancestors of revs"""
1163 1163 try:
1164 1164 ancs = self.index.commonancestorsheads(*revs)
1165 1165 except (AttributeError, OverflowError): # C implementation failed
1166 1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1167 1167 return ancs
1168 1168
1169 1169 def isancestor(self, a, b):
1170 1170 """return True if node a is an ancestor of node b
1171 1171
1172 1172 A revision is considered an ancestor of itself."""
1173 1173 a, b = self.rev(a), self.rev(b)
1174 1174 return self.isancestorrev(a, b)
1175 1175
1176 1176 def isancestorrev(self, a, b):
1177 1177 """return True if revision a is an ancestor of revision b
1178 1178
1179 1179 A revision is considered an ancestor of itself.
1180 1180
1181 1181 The implementation of this is trivial but the use of
1182 1182 reachableroots is not."""
1183 1183 if a == nullrev:
1184 1184 return True
1185 1185 elif a == b:
1186 1186 return True
1187 1187 elif a > b:
1188 1188 return False
1189 1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1190 1190
1191 1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1192 1192 """return (heads(::<roots> and <roots>::<heads>))
1193 1193
1194 1194 If includepath is True, return (<roots>::<heads>)."""
1195 1195 try:
1196 1196 return self.index.reachableroots2(minroot, heads, roots,
1197 1197 includepath)
1198 1198 except AttributeError:
1199 1199 return dagop._reachablerootspure(self.parentrevs,
1200 1200 minroot, roots, heads, includepath)
1201 1201
1202 1202 def ancestor(self, a, b):
1203 1203 """calculate the "best" common ancestor of nodes a and b"""
1204 1204
1205 1205 a, b = self.rev(a), self.rev(b)
1206 1206 try:
1207 1207 ancs = self.index.ancestors(a, b)
1208 1208 except (AttributeError, OverflowError):
1209 1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 1210 if ancs:
1211 1211 # choose a consistent winner when there's a tie
1212 1212 return min(map(self.node, ancs))
1213 1213 return nullid
1214 1214
1215 1215 def _match(self, id):
1216 1216 if isinstance(id, int):
1217 1217 # rev
1218 1218 return self.node(id)
1219 1219 if len(id) == 20:
1220 1220 # possibly a binary node
1221 1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 1222 try:
1223 1223 node = id
1224 1224 self.rev(node) # quick search the index
1225 1225 return node
1226 1226 except error.LookupError:
1227 1227 pass # may be partial hex id
1228 1228 try:
1229 1229 # str(rev)
1230 1230 rev = int(id)
1231 1231 if "%d" % rev != id:
1232 1232 raise ValueError
1233 1233 if rev < 0:
1234 1234 rev = len(self) + rev
1235 1235 if rev < 0 or rev >= len(self):
1236 1236 raise ValueError
1237 1237 return self.node(rev)
1238 1238 except (ValueError, OverflowError):
1239 1239 pass
1240 1240 if len(id) == 40:
1241 1241 try:
1242 1242 # a full hex nodeid?
1243 1243 node = bin(id)
1244 1244 self.rev(node)
1245 1245 return node
1246 1246 except (TypeError, error.LookupError):
1247 1247 pass
1248 1248
1249 1249 def _partialmatch(self, id):
1250 1250 # we don't care wdirfilenodeids as they should be always full hash
1251 1251 maybewdir = wdirhex.startswith(id)
1252 1252 try:
1253 1253 partial = self.index.partialmatch(id)
1254 1254 if partial and self.hasnode(partial):
1255 1255 if maybewdir:
1256 1256 # single 'ff...' match in radix tree, ambiguous with wdir
1257 1257 raise error.RevlogError
1258 1258 return partial
1259 1259 if maybewdir:
1260 1260 # no 'ff...' match in radix tree, wdir identified
1261 1261 raise error.WdirUnsupported
1262 1262 return None
1263 1263 except error.RevlogError:
1264 1264 # parsers.c radix tree lookup gave multiple matches
1265 1265 # fast path: for unfiltered changelog, radix tree is accurate
1266 1266 if not getattr(self, 'filteredrevs', None):
1267 1267 raise error.AmbiguousPrefixLookupError(
1268 1268 id, self.indexfile, _('ambiguous identifier'))
1269 1269 # fall through to slow path that filters hidden revisions
1270 1270 except (AttributeError, ValueError):
1271 1271 # we are pure python, or key was too short to search radix tree
1272 1272 pass
1273 1273
1274 1274 if id in self._pcache:
1275 1275 return self._pcache[id]
1276 1276
1277 1277 if len(id) <= 40:
1278 1278 try:
1279 1279 # hex(node)[:...]
1280 1280 l = len(id) // 2 # grab an even number of digits
1281 1281 prefix = bin(id[:l * 2])
1282 1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 1283 nl = [n for n in nl if hex(n).startswith(id) and
1284 1284 self.hasnode(n)]
1285 1285 if nullhex.startswith(id):
1286 1286 nl.append(nullid)
1287 1287 if len(nl) > 0:
1288 1288 if len(nl) == 1 and not maybewdir:
1289 1289 self._pcache[id] = nl[0]
1290 1290 return nl[0]
1291 1291 raise error.AmbiguousPrefixLookupError(
1292 1292 id, self.indexfile, _('ambiguous identifier'))
1293 1293 if maybewdir:
1294 1294 raise error.WdirUnsupported
1295 1295 return None
1296 1296 except TypeError:
1297 1297 pass
1298 1298
1299 1299 def lookup(self, id):
1300 1300 """locate a node based on:
1301 1301 - revision number or str(revision number)
1302 1302 - nodeid or subset of hex nodeid
1303 1303 """
1304 1304 n = self._match(id)
1305 1305 if n is not None:
1306 1306 return n
1307 1307 n = self._partialmatch(id)
1308 1308 if n:
1309 1309 return n
1310 1310
1311 1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1312 1312
1313 1313 def shortest(self, node, minlength=1):
1314 1314 """Find the shortest unambiguous prefix that matches node."""
1315 1315 def isvalid(prefix):
1316 1316 try:
1317 1317 matchednode = self._partialmatch(prefix)
1318 1318 except error.AmbiguousPrefixLookupError:
1319 1319 return False
1320 1320 except error.WdirUnsupported:
1321 1321 # single 'ff...' match
1322 1322 return True
1323 1323 if matchednode is None:
1324 1324 raise error.LookupError(node, self.indexfile, _('no node'))
1325 1325 return True
1326 1326
1327 1327 def maybewdir(prefix):
1328 1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1329 1329
1330 1330 hexnode = hex(node)
1331 1331
1332 1332 def disambiguate(hexnode, minlength):
1333 1333 """Disambiguate against wdirid."""
1334 1334 for length in range(minlength, 41):
1335 1335 prefix = hexnode[:length]
1336 1336 if not maybewdir(prefix):
1337 1337 return prefix
1338 1338
1339 1339 if not getattr(self, 'filteredrevs', None):
1340 1340 try:
1341 1341 length = max(self.index.shortest(node), minlength)
1342 1342 return disambiguate(hexnode, length)
1343 1343 except error.RevlogError:
1344 1344 if node != wdirid:
1345 1345 raise error.LookupError(node, self.indexfile, _('no node'))
1346 1346 except AttributeError:
1347 1347 # Fall through to pure code
1348 1348 pass
1349 1349
1350 1350 if node == wdirid:
1351 1351 for length in range(minlength, 41):
1352 1352 prefix = hexnode[:length]
1353 1353 if isvalid(prefix):
1354 1354 return prefix
1355 1355
1356 1356 for length in range(minlength, 41):
1357 1357 prefix = hexnode[:length]
1358 1358 if isvalid(prefix):
1359 1359 return disambiguate(hexnode, length)
1360 1360
1361 1361 def cmp(self, node, text):
1362 1362 """compare text with a given file revision
1363 1363
1364 1364 returns True if text is different than what is stored.
1365 1365 """
1366 1366 p1, p2 = self.parents(node)
1367 1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368 1368
1369 1369 def _cachesegment(self, offset, data):
1370 1370 """Add a segment to the revlog cache.
1371 1371
1372 1372 Accepts an absolute offset and the data that is at that location.
1373 1373 """
1374 1374 o, d = self._chunkcache
1375 1375 # try to add to existing cache
1376 1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 1377 self._chunkcache = o, d + data
1378 1378 else:
1379 1379 self._chunkcache = offset, data
1380 1380
1381 1381 def _readsegment(self, offset, length, df=None):
1382 1382 """Load a segment of raw data from the revlog.
1383 1383
1384 1384 Accepts an absolute offset, length to read, and an optional existing
1385 1385 file handle to read from.
1386 1386
1387 1387 If an existing file handle is passed, it will be seeked and the
1388 1388 original seek position will NOT be restored.
1389 1389
1390 1390 Returns a str or buffer of raw byte data.
1391 1391
1392 1392 Raises if the requested number of bytes could not be read.
1393 1393 """
1394 1394 # Cache data both forward and backward around the requested
1395 1395 # data, in a fixed size window. This helps speed up operations
1396 1396 # involving reading the revlog backwards.
1397 1397 cachesize = self._chunkcachesize
1398 1398 realoffset = offset & ~(cachesize - 1)
1399 1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 1400 - realoffset)
1401 1401 with self._datareadfp(df) as df:
1402 1402 df.seek(realoffset)
1403 1403 d = df.read(reallength)
1404 1404
1405 1405 self._cachesegment(realoffset, d)
1406 1406 if offset != realoffset or reallength != length:
1407 1407 startoffset = offset - realoffset
1408 1408 if len(d) - startoffset < length:
1409 1409 raise error.RevlogError(
1410 1410 _('partial read of revlog %s; expected %d bytes from '
1411 1411 'offset %d, got %d') %
1412 1412 (self.indexfile if self._inline else self.datafile,
1413 1413 length, realoffset, len(d) - startoffset))
1414 1414
1415 1415 return util.buffer(d, startoffset, length)
1416 1416
1417 1417 if len(d) < length:
1418 1418 raise error.RevlogError(
1419 1419 _('partial read of revlog %s; expected %d bytes from offset '
1420 1420 '%d, got %d') %
1421 1421 (self.indexfile if self._inline else self.datafile,
1422 1422 length, offset, len(d)))
1423 1423
1424 1424 return d
1425 1425
1426 1426 def _getsegment(self, offset, length, df=None):
1427 1427 """Obtain a segment of raw data from the revlog.
1428 1428
1429 1429 Accepts an absolute offset, length of bytes to obtain, and an
1430 1430 optional file handle to the already-opened revlog. If the file
1431 1431 handle is used, it's original seek position will not be preserved.
1432 1432
1433 1433 Requests for data may be returned from a cache.
1434 1434
1435 1435 Returns a str or a buffer instance of raw byte data.
1436 1436 """
1437 1437 o, d = self._chunkcache
1438 1438 l = len(d)
1439 1439
1440 1440 # is it in the cache?
1441 1441 cachestart = offset - o
1442 1442 cacheend = cachestart + length
1443 1443 if cachestart >= 0 and cacheend <= l:
1444 1444 if cachestart == 0 and cacheend == l:
1445 1445 return d # avoid a copy
1446 1446 return util.buffer(d, cachestart, cacheend - cachestart)
1447 1447
1448 1448 return self._readsegment(offset, length, df=df)
1449 1449
1450 1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 1451 """Obtain a segment of raw data corresponding to a range of revisions.
1452 1452
1453 1453 Accepts the start and end revisions and an optional already-open
1454 1454 file handle to be used for reading. If the file handle is read, its
1455 1455 seek position will not be preserved.
1456 1456
1457 1457 Requests for data may be satisfied by a cache.
1458 1458
1459 1459 Returns a 2-tuple of (offset, data) for the requested range of
1460 1460 revisions. Offset is the integer offset from the beginning of the
1461 1461 revlog and data is a str or buffer of the raw byte data.
1462 1462
1463 1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 1464 to determine where each revision's data begins and ends.
1465 1465 """
1466 1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 1467 # (functions are expensive).
1468 1468 index = self.index
1469 1469 istart = index[startrev]
1470 1470 start = int(istart[0] >> 16)
1471 1471 if startrev == endrev:
1472 1472 end = start + istart[1]
1473 1473 else:
1474 1474 iend = index[endrev]
1475 1475 end = int(iend[0] >> 16) + iend[1]
1476 1476
1477 1477 if self._inline:
1478 1478 start += (startrev + 1) * self._io.size
1479 1479 end += (endrev + 1) * self._io.size
1480 1480 length = end - start
1481 1481
1482 1482 return start, self._getsegment(start, length, df=df)
1483 1483
1484 1484 def _chunk(self, rev, df=None):
1485 1485 """Obtain a single decompressed chunk for a revision.
1486 1486
1487 1487 Accepts an integer revision and an optional already-open file handle
1488 1488 to be used for reading. If used, the seek position of the file will not
1489 1489 be preserved.
1490 1490
1491 1491 Returns a str holding uncompressed data for the requested revision.
1492 1492 """
1493 1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494 1494
1495 1495 def _chunks(self, revs, df=None, targetsize=None):
1496 1496 """Obtain decompressed chunks for the specified revisions.
1497 1497
1498 1498 Accepts an iterable of numeric revisions that are assumed to be in
1499 1499 ascending order. Also accepts an optional already-open file handle
1500 1500 to be used for reading. If used, the seek position of the file will
1501 1501 not be preserved.
1502 1502
1503 1503 This function is similar to calling ``self._chunk()`` multiple times,
1504 1504 but is faster.
1505 1505
1506 1506 Returns a list with decompressed data for each requested revision.
1507 1507 """
1508 1508 if not revs:
1509 1509 return []
1510 1510 start = self.start
1511 1511 length = self.length
1512 1512 inline = self._inline
1513 1513 iosize = self._io.size
1514 1514 buffer = util.buffer
1515 1515
1516 1516 l = []
1517 1517 ladd = l.append
1518 1518
1519 1519 if not self._withsparseread:
1520 1520 slicedchunks = (revs,)
1521 1521 else:
1522 1522 slicedchunks = deltautil.slicechunk(self, revs,
1523 1523 targetsize=targetsize)
1524 1524
1525 1525 for revschunk in slicedchunks:
1526 1526 firstrev = revschunk[0]
1527 1527 # Skip trailing revisions with empty diff
1528 1528 for lastrev in revschunk[::-1]:
1529 1529 if length(lastrev) != 0:
1530 1530 break
1531 1531
1532 1532 try:
1533 1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 1534 except OverflowError:
1535 1535 # issue4215 - we can't cache a run of chunks greater than
1536 1536 # 2G on Windows
1537 1537 return [self._chunk(rev, df=df) for rev in revschunk]
1538 1538
1539 1539 decomp = self.decompress
1540 1540 for rev in revschunk:
1541 1541 chunkstart = start(rev)
1542 1542 if inline:
1543 1543 chunkstart += (rev + 1) * iosize
1544 1544 chunklength = length(rev)
1545 1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546 1546
1547 1547 return l
1548 1548
1549 1549 def _chunkclear(self):
1550 1550 """Clear the raw chunk cache."""
1551 1551 self._chunkcache = (0, '')
1552 1552
1553 1553 def deltaparent(self, rev):
1554 1554 """return deltaparent of the given revision"""
1555 1555 base = self.index[rev][3]
1556 1556 if base == rev:
1557 1557 return nullrev
1558 1558 elif self._generaldelta:
1559 1559 return base
1560 1560 else:
1561 1561 return rev - 1
1562 1562
1563 1563 def issnapshot(self, rev):
1564 1564 """tells whether rev is a snapshot
1565 1565 """
1566 1566 if not self._sparserevlog:
1567 1567 return self.deltaparent(rev) == nullrev
1568 1568 elif util.safehasattr(self.index, 'issnapshot'):
1569 1569 # directly assign the method to cache the testing and access
1570 1570 self.issnapshot = self.index.issnapshot
1571 1571 return self.issnapshot(rev)
1572 1572 if rev == nullrev:
1573 1573 return True
1574 1574 entry = self.index[rev]
1575 1575 base = entry[3]
1576 1576 if base == rev:
1577 1577 return True
1578 1578 if base == nullrev:
1579 1579 return True
1580 1580 p1 = entry[5]
1581 1581 p2 = entry[6]
1582 1582 if base == p1 or base == p2:
1583 1583 return False
1584 1584 return self.issnapshot(base)
1585 1585
1586 1586 def snapshotdepth(self, rev):
1587 1587 """number of snapshot in the chain before this one"""
1588 1588 if not self.issnapshot(rev):
1589 1589 raise error.ProgrammingError('revision %d not a snapshot')
1590 1590 return len(self._deltachain(rev)[0]) - 1
1591 1591
1592 1592 def revdiff(self, rev1, rev2):
1593 1593 """return or calculate a delta between two revisions
1594 1594
1595 1595 The delta calculated is in binary form and is intended to be written to
1596 1596 revlog data directly. So this function needs raw revision data.
1597 1597 """
1598 1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 1599 return bytes(self._chunk(rev2))
1600 1600
1601 1601 return mdiff.textdiff(self.rawdata(rev1),
1602 1602 self.rawdata(rev2))
1603 1603
1604 1604 def _processflags(self, text, flags, operation, raw=False):
1605 1605 """deprecated entry point to access flag processors"""
1606 1606 msg = ('_processflag(...) use the specialized variant')
1607 1607 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1608 1608 if raw:
1609 1609 return text, flagutil.processflagsraw(self, text, flags)
1610 1610 elif operation == 'read':
1611 1611 return flagutil.processflagsread(self, text, flags)
1612 1612 else: # write operation
1613 1613 return flagutil.processflagswrite(self, text, flags)
1614 1614
1615 1615 def revision(self, nodeorrev, _df=None, raw=False):
1616 1616 """return an uncompressed revision of a given node or revision
1617 1617 number.
1618 1618
1619 1619 _df - an existing file handle to read from. (internal-only)
1620 1620 raw - an optional argument specifying if the revision data is to be
1621 1621 treated as raw data when applying flag transforms. 'raw' should be set
1622 1622 to True when generating changegroups or in debug commands.
1623 1623 """
1624 1624 if raw:
1625 1625 msg = ('revlog.revision(..., raw=True) is deprecated, '
1626 1626 'use revlog.rawdata(...)')
1627 1627 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1628 1628 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1629 1629
1630 1630 def sidedata(self, nodeorrev, _df=None):
1631 1631 """a map of extra data related to the changeset but not part of the hash
1632 1632
1633 1633 This function currently return a dictionary. However, more advanced
1634 1634 mapping object will likely be used in the future for a more
1635 1635 efficient/lazy code.
1636 1636 """
1637 1637 return self._revisiondata(nodeorrev, _df)[1]
1638 1638
1639 1639 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1640 1640 # deal with <nodeorrev> argument type
1641 1641 if isinstance(nodeorrev, int):
1642 1642 rev = nodeorrev
1643 1643 node = self.node(rev)
1644 1644 else:
1645 1645 node = nodeorrev
1646 1646 rev = None
1647 1647
1648 1648 # fast path the special `nullid` rev
1649 1649 if node == nullid:
1650 1650 return "", {}
1651 1651
1652 1652 # The text as stored inside the revlog. Might be the revision or might
1653 1653 # need to be processed to retrieve the revision.
1654 1654 rawtext = None
1655 1655
1656 1656 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1657 1657
1658 1658 if raw and validated:
1659 1659 # if we don't want to process the raw text and that raw
1660 1660 # text is cached, we can exit early.
1661 1661 return rawtext, {}
1662 1662 if rev is None:
1663 1663 rev = self.rev(node)
1664 1664 # the revlog's flag for this revision
1665 1665 # (usually alter its state or content)
1666 1666 flags = self.flags(rev)
1667 1667
1668 1668 if validated and flags == REVIDX_DEFAULT_FLAGS:
1669 1669 # no extra flags set, no flag processor runs, text = rawtext
1670 1670 return rawtext, {}
1671 1671
1672 1672 sidedata = {}
1673 1673 if raw:
1674 1674 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1675 1675 text = rawtext
1676 1676 else:
1677 1677 r = flagutil.processflagsread(self, rawtext, flags)
1678 1678 text, validatehash, sidedata = r
1679 1679 if validatehash:
1680 1680 self.checkhash(text, node, rev=rev)
1681 1681 if not validated:
1682 1682 self._revisioncache = (node, rev, rawtext)
1683 1683
1684 1684 return text, sidedata
1685 1685
1686 1686 def _rawtext(self, node, rev, _df=None):
1687 1687 """return the possibly unvalidated rawtext for a revision
1688 1688
1689 1689 returns (rev, rawtext, validated)
1690 1690 """
1691 1691
1692 1692 # revision in the cache (could be useful to apply delta)
1693 1693 cachedrev = None
1694 1694 # An intermediate text to apply deltas to
1695 1695 basetext = None
1696 1696
1697 1697 # Check if we have the entry in cache
1698 1698 # The cache entry looks like (node, rev, rawtext)
1699 1699 if self._revisioncache:
1700 1700 if self._revisioncache[0] == node:
1701 1701 return (rev, self._revisioncache[2], True)
1702 1702 cachedrev = self._revisioncache[1]
1703 1703
1704 1704 if rev is None:
1705 1705 rev = self.rev(node)
1706 1706
1707 1707 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1708 1708 if stopped:
1709 1709 basetext = self._revisioncache[2]
1710 1710
1711 1711 # drop cache to save memory, the caller is expected to
1712 1712 # update self._revisioncache after validating the text
1713 1713 self._revisioncache = None
1714 1714
1715 1715 targetsize = None
1716 1716 rawsize = self.index[rev][2]
1717 1717 if 0 <= rawsize:
1718 1718 targetsize = 4 * rawsize
1719 1719
1720 1720 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1721 1721 if basetext is None:
1722 1722 basetext = bytes(bins[0])
1723 1723 bins = bins[1:]
1724 1724
1725 1725 rawtext = mdiff.patches(basetext, bins)
1726 1726 del basetext # let us have a chance to free memory early
1727 1727 return (rev, rawtext, False)
1728 1728
1729 1729 def rawdata(self, nodeorrev, _df=None):
1730 1730 """return an uncompressed raw data of a given node or revision number.
1731 1731
1732 1732 _df - an existing file handle to read from. (internal-only)
1733 1733 """
1734 1734 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1735 1735
1736 1736 def hash(self, text, p1, p2):
1737 1737 """Compute a node hash.
1738 1738
1739 1739 Available as a function so that subclasses can replace the hash
1740 1740 as needed.
1741 1741 """
1742 1742 return storageutil.hashrevisionsha1(text, p1, p2)
1743 1743
1744 1744 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1745 1745 """Check node hash integrity.
1746 1746
1747 1747 Available as a function so that subclasses can extend hash mismatch
1748 1748 behaviors as needed.
1749 1749 """
1750 1750 try:
1751 1751 if p1 is None and p2 is None:
1752 1752 p1, p2 = self.parents(node)
1753 1753 if node != self.hash(text, p1, p2):
1754 1754 # Clear the revision cache on hash failure. The revision cache
1755 1755 # only stores the raw revision and clearing the cache does have
1756 1756 # the side-effect that we won't have a cache hit when the raw
1757 1757 # revision data is accessed. But this case should be rare and
1758 1758 # it is extra work to teach the cache about the hash
1759 1759 # verification state.
1760 1760 if self._revisioncache and self._revisioncache[0] == node:
1761 1761 self._revisioncache = None
1762 1762
1763 1763 revornode = rev
1764 1764 if revornode is None:
1765 1765 revornode = templatefilters.short(hex(node))
1766 1766 raise error.RevlogError(_("integrity check failed on %s:%s")
1767 1767 % (self.indexfile, pycompat.bytestr(revornode)))
1768 1768 except error.RevlogError:
1769 1769 if self._censorable and storageutil.iscensoredtext(text):
1770 1770 raise error.CensoredNodeError(self.indexfile, node, text)
1771 1771 raise
1772 1772
1773 1773 def _enforceinlinesize(self, tr, fp=None):
1774 1774 """Check if the revlog is too big for inline and convert if so.
1775 1775
1776 1776 This should be called after revisions are added to the revlog. If the
1777 1777 revlog has grown too large to be an inline revlog, it will convert it
1778 1778 to use multiple index and data files.
1779 1779 """
1780 1780 tiprev = len(self) - 1
1781 1781 if (not self._inline or
1782 1782 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1783 1783 return
1784 1784
1785 1785 trinfo = tr.find(self.indexfile)
1786 1786 if trinfo is None:
1787 1787 raise error.RevlogError(_("%s not found in the transaction")
1788 1788 % self.indexfile)
1789 1789
1790 1790 trindex = trinfo[2]
1791 1791 if trindex is not None:
1792 1792 dataoff = self.start(trindex)
1793 1793 else:
1794 1794 # revlog was stripped at start of transaction, use all leftover data
1795 1795 trindex = len(self) - 1
1796 1796 dataoff = self.end(tiprev)
1797 1797
1798 1798 tr.add(self.datafile, dataoff)
1799 1799
1800 1800 if fp:
1801 1801 fp.flush()
1802 1802 fp.close()
1803 1803 # We can't use the cached file handle after close(). So prevent
1804 1804 # its usage.
1805 1805 self._writinghandles = None
1806 1806
1807 1807 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1808 1808 for r in self:
1809 1809 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1810 1810
1811 1811 with self._indexfp('w') as fp:
1812 1812 self.version &= ~FLAG_INLINE_DATA
1813 1813 self._inline = False
1814 1814 io = self._io
1815 1815 for i in self:
1816 1816 e = io.packentry(self.index[i], self.node, self.version, i)
1817 1817 fp.write(e)
1818 1818
1819 1819 # the temp file replace the real index when we exit the context
1820 1820 # manager
1821 1821
1822 1822 tr.replace(self.indexfile, trindex * self._io.size)
1823 1823 self._chunkclear()
1824 1824
1825 1825 def _nodeduplicatecallback(self, transaction, node):
1826 1826 """called when trying to add a node already stored.
1827 1827 """
1828 1828
1829 1829 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1830 1830 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1831 1831 sidedata=None):
1832 1832 """add a revision to the log
1833 1833
1834 1834 text - the revision data to add
1835 1835 transaction - the transaction object used for rollback
1836 1836 link - the linkrev data to add
1837 1837 p1, p2 - the parent nodeids of the revision
1838 1838 cachedelta - an optional precomputed delta
1839 1839 node - nodeid of revision; typically node is not specified, and it is
1840 1840 computed by default as hash(text, p1, p2), however subclasses might
1841 1841 use different hashing method (and override checkhash() in such case)
1842 1842 flags - the known flags to set on the revision
1843 1843 deltacomputer - an optional deltacomputer instance shared between
1844 1844 multiple calls
1845 1845 """
1846 1846 if link == nullrev:
1847 1847 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1848 1848 % self.indexfile)
1849 1849
1850 1850 if sidedata is None:
1851 1851 sidedata = {}
1852 1852
1853 1853 if flags:
1854 1854 node = node or self.hash(text, p1, p2)
1855 1855
1856 1856 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1857 1857 sidedata=sidedata)
1858 1858
1859 1859 # If the flag processor modifies the revision data, ignore any provided
1860 1860 # cachedelta.
1861 1861 if rawtext != text:
1862 1862 cachedelta = None
1863 1863
1864 1864 if len(rawtext) > _maxentrysize:
1865 1865 raise error.RevlogError(
1866 1866 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1867 1867 % (self.indexfile, len(rawtext)))
1868 1868
1869 1869 node = node or self.hash(rawtext, p1, p2)
1870 1870 if node in self.nodemap:
1871 1871 return node
1872 1872
1873 1873 if validatehash:
1874 1874 self.checkhash(rawtext, node, p1=p1, p2=p2)
1875 1875
1876 1876 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1877 1877 flags, cachedelta=cachedelta,
1878 1878 deltacomputer=deltacomputer)
1879 1879
1880 1880 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1881 1881 cachedelta=None, deltacomputer=None):
1882 1882 """add a raw revision with known flags, node and parents
1883 1883 useful when reusing a revision not stored in this revlog (ex: received
1884 1884 over wire, or read from an external bundle).
1885 1885 """
1886 1886 dfh = None
1887 1887 if not self._inline:
1888 1888 dfh = self._datafp("a+")
1889 1889 ifh = self._indexfp("a+")
1890 1890 try:
1891 1891 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1892 1892 flags, cachedelta, ifh, dfh,
1893 1893 deltacomputer=deltacomputer)
1894 1894 finally:
1895 1895 if dfh:
1896 1896 dfh.close()
1897 1897 ifh.close()
1898 1898
1899 1899 def compress(self, data):
1900 1900 """Generate a possibly-compressed representation of data."""
1901 1901 if not data:
1902 1902 return '', data
1903 1903
1904 1904 compressed = self._compressor.compress(data)
1905 1905
1906 1906 if compressed:
1907 1907 # The revlog compressor added the header in the returned data.
1908 1908 return '', compressed
1909 1909
1910 1910 if data[0:1] == '\0':
1911 1911 return '', data
1912 1912 return 'u', data
1913 1913
1914 1914 def decompress(self, data):
1915 1915 """Decompress a revlog chunk.
1916 1916
1917 1917 The chunk is expected to begin with a header identifying the
1918 1918 format type so it can be routed to an appropriate decompressor.
1919 1919 """
1920 1920 if not data:
1921 1921 return data
1922 1922
1923 1923 # Revlogs are read much more frequently than they are written and many
1924 1924 # chunks only take microseconds to decompress, so performance is
1925 1925 # important here.
1926 1926 #
1927 1927 # We can make a few assumptions about revlogs:
1928 1928 #
1929 1929 # 1) the majority of chunks will be compressed (as opposed to inline
1930 1930 # raw data).
1931 1931 # 2) decompressing *any* data will likely by at least 10x slower than
1932 1932 # returning raw inline data.
1933 1933 # 3) we want to prioritize common and officially supported compression
1934 1934 # engines
1935 1935 #
1936 1936 # It follows that we want to optimize for "decompress compressed data
1937 1937 # when encoded with common and officially supported compression engines"
1938 1938 # case over "raw data" and "data encoded by less common or non-official
1939 1939 # compression engines." That is why we have the inline lookup first
1940 1940 # followed by the compengines lookup.
1941 1941 #
1942 1942 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1943 1943 # compressed chunks. And this matters for changelog and manifest reads.
1944 1944 t = data[0:1]
1945 1945
1946 1946 if t == 'x':
1947 1947 try:
1948 1948 return _zlibdecompress(data)
1949 1949 except zlib.error as e:
1950 1950 raise error.RevlogError(_('revlog decompress error: %s') %
1951 1951 stringutil.forcebytestr(e))
1952 1952 # '\0' is more common than 'u' so it goes first.
1953 1953 elif t == '\0':
1954 1954 return data
1955 1955 elif t == 'u':
1956 1956 return util.buffer(data, 1)
1957 1957
1958 1958 try:
1959 1959 compressor = self._decompressors[t]
1960 1960 except KeyError:
1961 1961 try:
1962 1962 engine = util.compengines.forrevlogheader(t)
1963 1963 compressor = engine.revlogcompressor(self._compengineopts)
1964 1964 self._decompressors[t] = compressor
1965 1965 except KeyError:
1966 1966 raise error.RevlogError(_('unknown compression type %r') % t)
1967 1967
1968 1968 return compressor.decompress(data)
1969 1969
1970 1970 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1971 1971 cachedelta, ifh, dfh, alwayscache=False,
1972 1972 deltacomputer=None):
1973 1973 """internal function to add revisions to the log
1974 1974
1975 1975 see addrevision for argument descriptions.
1976 1976
1977 1977 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1978 1978
1979 1979 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1980 1980 be used.
1981 1981
1982 1982 invariants:
1983 1983 - rawtext is optional (can be None); if not set, cachedelta must be set.
1984 1984 if both are set, they must correspond to each other.
1985 1985 """
1986 1986 if node == nullid:
1987 1987 raise error.RevlogError(_("%s: attempt to add null revision") %
1988 1988 self.indexfile)
1989 1989 if node == wdirid or node in wdirfilenodeids:
1990 1990 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1991 1991 self.indexfile)
1992 1992
1993 1993 if self._inline:
1994 1994 fh = ifh
1995 1995 else:
1996 1996 fh = dfh
1997 1997
1998 1998 btext = [rawtext]
1999 1999
2000 2000 curr = len(self)
2001 2001 prev = curr - 1
2002 2002 offset = self.end(prev)
2003 2003 p1r, p2r = self.rev(p1), self.rev(p2)
2004 2004
2005 2005 # full versions are inserted when the needed deltas
2006 2006 # become comparable to the uncompressed text
2007 2007 if rawtext is None:
2008 2008 # need rawtext size, before changed by flag processors, which is
2009 2009 # the non-raw size. use revlog explicitly to avoid filelog's extra
2010 2010 # logic that might remove metadata size.
2011 2011 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2012 2012 cachedelta[1])
2013 2013 else:
2014 2014 textlen = len(rawtext)
2015 2015
2016 2016 if deltacomputer is None:
2017 2017 deltacomputer = deltautil.deltacomputer(self)
2018 2018
2019 2019 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2020 2020
2021 2021 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2022 2022
2023 2023 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2024 2024 deltainfo.base, link, p1r, p2r, node)
2025 2025 self.index.append(e)
2026 2026 self.nodemap[node] = curr
2027 2027
2028 2028 # Reset the pure node cache start lookup offset to account for new
2029 2029 # revision.
2030 2030 if self._nodepos is not None:
2031 2031 self._nodepos = curr
2032 2032
2033 2033 entry = self._io.packentry(e, self.node, self.version, curr)
2034 2034 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2035 2035 link, offset)
2036 2036
2037 2037 rawtext = btext[0]
2038 2038
2039 2039 if alwayscache and rawtext is None:
2040 2040 rawtext = deltacomputer.buildtext(revinfo, fh)
2041 2041
2042 2042 if type(rawtext) == bytes: # only accept immutable objects
2043 2043 self._revisioncache = (node, curr, rawtext)
2044 2044 self._chainbasecache[curr] = deltainfo.chainbase
2045 2045 return node
2046 2046
2047 2047 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2048 2048 # Files opened in a+ mode have inconsistent behavior on various
2049 2049 # platforms. Windows requires that a file positioning call be made
2050 2050 # when the file handle transitions between reads and writes. See
2051 2051 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2052 2052 # platforms, Python or the platform itself can be buggy. Some versions
2053 2053 # of Solaris have been observed to not append at the end of the file
2054 2054 # if the file was seeked to before the end. See issue4943 for more.
2055 2055 #
2056 2056 # We work around this issue by inserting a seek() before writing.
2057 2057 # Note: This is likely not necessary on Python 3. However, because
2058 2058 # the file handle is reused for reads and may be seeked there, we need
2059 2059 # to be careful before changing this.
2060 2060 ifh.seek(0, os.SEEK_END)
2061 2061 if dfh:
2062 2062 dfh.seek(0, os.SEEK_END)
2063 2063
2064 2064 curr = len(self) - 1
2065 2065 if not self._inline:
2066 2066 transaction.add(self.datafile, offset)
2067 2067 transaction.add(self.indexfile, curr * len(entry))
2068 2068 if data[0]:
2069 2069 dfh.write(data[0])
2070 2070 dfh.write(data[1])
2071 2071 ifh.write(entry)
2072 2072 else:
2073 2073 offset += curr * self._io.size
2074 2074 transaction.add(self.indexfile, offset, curr)
2075 2075 ifh.write(entry)
2076 2076 ifh.write(data[0])
2077 2077 ifh.write(data[1])
2078 2078 self._enforceinlinesize(transaction, ifh)
2079 2079
2080 2080 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2081 2081 """
2082 2082 add a delta group
2083 2083
2084 2084 given a set of deltas, add them to the revision log. the
2085 2085 first delta is against its parent, which should be in our
2086 2086 log, the rest are against the previous delta.
2087 2087
2088 2088 If ``addrevisioncb`` is defined, it will be called with arguments of
2089 2089 this revlog and the node that was added.
2090 2090 """
2091 2091
2092 2092 if self._writinghandles:
2093 2093 raise error.ProgrammingError('cannot nest addgroup() calls')
2094 2094
2095 2095 nodes = []
2096 2096
2097 2097 r = len(self)
2098 2098 end = 0
2099 2099 if r:
2100 2100 end = self.end(r - 1)
2101 2101 ifh = self._indexfp("a+")
2102 2102 isize = r * self._io.size
2103 2103 if self._inline:
2104 2104 transaction.add(self.indexfile, end + isize, r)
2105 2105 dfh = None
2106 2106 else:
2107 2107 transaction.add(self.indexfile, isize, r)
2108 2108 transaction.add(self.datafile, end)
2109 2109 dfh = self._datafp("a+")
2110 2110 def flush():
2111 2111 if dfh:
2112 2112 dfh.flush()
2113 2113 ifh.flush()
2114 2114
2115 2115 self._writinghandles = (ifh, dfh)
2116 2116
2117 2117 try:
2118 2118 deltacomputer = deltautil.deltacomputer(self)
2119 2119 # loop through our set of deltas
2120 2120 for data in deltas:
2121 2121 node, p1, p2, linknode, deltabase, delta, flags = data
2122 2122 link = linkmapper(linknode)
2123 2123 flags = flags or REVIDX_DEFAULT_FLAGS
2124 2124
2125 2125 nodes.append(node)
2126 2126
2127 2127 if node in self.nodemap:
2128 2128 self._nodeduplicatecallback(transaction, node)
2129 2129 # this can happen if two branches make the same change
2130 2130 continue
2131 2131
2132 2132 for p in (p1, p2):
2133 2133 if p not in self.nodemap:
2134 2134 raise error.LookupError(p, self.indexfile,
2135 2135 _('unknown parent'))
2136 2136
2137 2137 if deltabase not in self.nodemap:
2138 2138 raise error.LookupError(deltabase, self.indexfile,
2139 2139 _('unknown delta base'))
2140 2140
2141 2141 baserev = self.rev(deltabase)
2142 2142
2143 2143 if baserev != nullrev and self.iscensored(baserev):
2144 2144 # if base is censored, delta must be full replacement in a
2145 2145 # single patch operation
2146 2146 hlen = struct.calcsize(">lll")
2147 2147 oldlen = self.rawsize(baserev)
2148 2148 newlen = len(delta) - hlen
2149 2149 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2150 2150 raise error.CensoredBaseError(self.indexfile,
2151 2151 self.node(baserev))
2152 2152
2153 2153 if not flags and self._peek_iscensored(baserev, delta, flush):
2154 2154 flags |= REVIDX_ISCENSORED
2155 2155
2156 2156 # We assume consumers of addrevisioncb will want to retrieve
2157 2157 # the added revision, which will require a call to
2158 2158 # revision(). revision() will fast path if there is a cache
2159 2159 # hit. So, we tell _addrevision() to always cache in this case.
2160 2160 # We're only using addgroup() in the context of changegroup
2161 2161 # generation so the revision data can always be handled as raw
2162 2162 # by the flagprocessor.
2163 2163 self._addrevision(node, None, transaction, link,
2164 2164 p1, p2, flags, (baserev, delta),
2165 2165 ifh, dfh,
2166 2166 alwayscache=bool(addrevisioncb),
2167 2167 deltacomputer=deltacomputer)
2168 2168
2169 2169 if addrevisioncb:
2170 2170 addrevisioncb(self, node)
2171 2171
2172 2172 if not dfh and not self._inline:
2173 2173 # addrevision switched from inline to conventional
2174 2174 # reopen the index
2175 2175 ifh.close()
2176 2176 dfh = self._datafp("a+")
2177 2177 ifh = self._indexfp("a+")
2178 2178 self._writinghandles = (ifh, dfh)
2179 2179 finally:
2180 2180 self._writinghandles = None
2181 2181
2182 2182 if dfh:
2183 2183 dfh.close()
2184 2184 ifh.close()
2185 2185
2186 2186 return nodes
2187 2187
2188 2188 def iscensored(self, rev):
2189 2189 """Check if a file revision is censored."""
2190 2190 if not self._censorable:
2191 2191 return False
2192 2192
2193 2193 return self.flags(rev) & REVIDX_ISCENSORED
2194 2194
2195 2195 def _peek_iscensored(self, baserev, delta, flush):
2196 2196 """Quickly check if a delta produces a censored revision."""
2197 2197 if not self._censorable:
2198 2198 return False
2199 2199
2200 2200 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2201 2201
2202 2202 def getstrippoint(self, minlink):
2203 2203 """find the minimum rev that must be stripped to strip the linkrev
2204 2204
2205 2205 Returns a tuple containing the minimum rev and a set of all revs that
2206 2206 have linkrevs that will be broken by this strip.
2207 2207 """
2208 2208 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2209 2209 self.headrevs(),
2210 2210 self.linkrev, self.parentrevs)
2211 2211
2212 2212 def strip(self, minlink, transaction):
2213 2213 """truncate the revlog on the first revision with a linkrev >= minlink
2214 2214
2215 2215 This function is called when we're stripping revision minlink and
2216 2216 its descendants from the repository.
2217 2217
2218 2218 We have to remove all revisions with linkrev >= minlink, because
2219 2219 the equivalent changelog revisions will be renumbered after the
2220 2220 strip.
2221 2221
2222 2222 So we truncate the revlog on the first of these revisions, and
2223 2223 trust that the caller has saved the revisions that shouldn't be
2224 2224 removed and that it'll re-add them after this truncation.
2225 2225 """
2226 2226 if len(self) == 0:
2227 2227 return
2228 2228
2229 2229 rev, _ = self.getstrippoint(minlink)
2230 2230 if rev == len(self):
2231 2231 return
2232 2232
2233 2233 # first truncate the files on disk
2234 2234 end = self.start(rev)
2235 2235 if not self._inline:
2236 2236 transaction.add(self.datafile, end)
2237 2237 end = rev * self._io.size
2238 2238 else:
2239 2239 end += rev * self._io.size
2240 2240
2241 2241 transaction.add(self.indexfile, end)
2242 2242
2243 2243 # then reset internal state in memory to forget those revisions
2244 2244 self._revisioncache = None
2245 2245 self._chaininfocache = {}
2246 2246 self._chunkclear()
2247 2247 for x in pycompat.xrange(rev, len(self)):
2248 2248 del self.nodemap[self.node(x)]
2249 2249
2250 2250 del self.index[rev:-1]
2251 2251 self._nodepos = None
2252 2252
2253 2253 def checksize(self):
2254 2254 """Check size of index and data files
2255 2255
2256 2256 return a (dd, di) tuple.
2257 2257 - dd: extra bytes for the "data" file
2258 2258 - di: extra bytes for the "index" file
2259 2259
2260 2260 A healthy revlog will return (0, 0).
2261 2261 """
2262 2262 expected = 0
2263 2263 if len(self):
2264 2264 expected = max(0, self.end(len(self) - 1))
2265 2265
2266 2266 try:
2267 2267 with self._datafp() as f:
2268 2268 f.seek(0, io.SEEK_END)
2269 2269 actual = f.tell()
2270 2270 dd = actual - expected
2271 2271 except IOError as inst:
2272 2272 if inst.errno != errno.ENOENT:
2273 2273 raise
2274 2274 dd = 0
2275 2275
2276 2276 try:
2277 2277 f = self.opener(self.indexfile)
2278 2278 f.seek(0, io.SEEK_END)
2279 2279 actual = f.tell()
2280 2280 f.close()
2281 2281 s = self._io.size
2282 2282 i = max(0, actual // s)
2283 2283 di = actual - (i * s)
2284 2284 if self._inline:
2285 2285 databytes = 0
2286 2286 for r in self:
2287 2287 databytes += max(0, self.length(r))
2288 2288 dd = 0
2289 2289 di = actual - len(self) * s - databytes
2290 2290 except IOError as inst:
2291 2291 if inst.errno != errno.ENOENT:
2292 2292 raise
2293 2293 di = 0
2294 2294
2295 2295 return (dd, di)
2296 2296
2297 2297 def files(self):
2298 2298 res = [self.indexfile]
2299 2299 if not self._inline:
2300 2300 res.append(self.datafile)
2301 2301 return res
2302 2302
2303 2303 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2304 2304 assumehaveparentrevisions=False,
2305 2305 deltamode=repository.CG_DELTAMODE_STD):
2306 2306 if nodesorder not in ('nodes', 'storage', 'linear', None):
2307 2307 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2308 2308 nodesorder)
2309 2309
2310 2310 if nodesorder is None and not self._generaldelta:
2311 2311 nodesorder = 'storage'
2312 2312
2313 2313 if (not self._storedeltachains and
2314 2314 deltamode != repository.CG_DELTAMODE_PREV):
2315 2315 deltamode = repository.CG_DELTAMODE_FULL
2316 2316
2317 2317 return storageutil.emitrevisions(
2318 2318 self, nodes, nodesorder, revlogrevisiondelta,
2319 2319 deltaparentfn=self.deltaparent,
2320 2320 candeltafn=self.candelta,
2321 2321 rawsizefn=self.rawsize,
2322 2322 revdifffn=self.revdiff,
2323 2323 flagsfn=self.flags,
2324 2324 deltamode=deltamode,
2325 2325 revisiondata=revisiondata,
2326 2326 assumehaveparentrevisions=assumehaveparentrevisions)
2327 2327
2328 2328 DELTAREUSEALWAYS = 'always'
2329 2329 DELTAREUSESAMEREVS = 'samerevs'
2330 2330 DELTAREUSENEVER = 'never'
2331 2331
2332 2332 DELTAREUSEFULLADD = 'fulladd'
2333 2333
2334 2334 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2335 2335
2336 2336 def clone(self, tr, destrevlog, addrevisioncb=None,
2337 2337 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2338 2338 """Copy this revlog to another, possibly with format changes.
2339 2339
2340 2340 The destination revlog will contain the same revisions and nodes.
2341 2341 However, it may not be bit-for-bit identical due to e.g. delta encoding
2342 2342 differences.
2343 2343
2344 2344 The ``deltareuse`` argument control how deltas from the existing revlog
2345 2345 are preserved in the destination revlog. The argument can have the
2346 2346 following values:
2347 2347
2348 2348 DELTAREUSEALWAYS
2349 2349 Deltas will always be reused (if possible), even if the destination
2350 2350 revlog would not select the same revisions for the delta. This is the
2351 2351 fastest mode of operation.
2352 2352 DELTAREUSESAMEREVS
2353 2353 Deltas will be reused if the destination revlog would pick the same
2354 2354 revisions for the delta. This mode strikes a balance between speed
2355 2355 and optimization.
2356 2356 DELTAREUSENEVER
2357 2357 Deltas will never be reused. This is the slowest mode of execution.
2358 2358 This mode can be used to recompute deltas (e.g. if the diff/delta
2359 2359 algorithm changes).
2360 2360 DELTAREUSEFULLADD
2361 2361 Revision will be re-added as if their were new content. This is
2362 2362 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2363 2363 eg: large file detection and handling.
2364 2364
2365 2365 Delta computation can be slow, so the choice of delta reuse policy can
2366 2366 significantly affect run time.
2367 2367
2368 2368 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2369 2369 two extremes. Deltas will be reused if they are appropriate. But if the
2370 2370 delta could choose a better revision, it will do so. This means if you
2371 2371 are converting a non-generaldelta revlog to a generaldelta revlog,
2372 2372 deltas will be recomputed if the delta's parent isn't a parent of the
2373 2373 revision.
2374 2374
2375 2375 In addition to the delta policy, the ``forcedeltabothparents``
2376 2376 argument controls whether to force compute deltas against both parents
2377 2377 for merges. By default, the current default is used.
2378 2378 """
2379 2379 if deltareuse not in self.DELTAREUSEALL:
2380 2380 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2381 2381
2382 2382 if len(destrevlog):
2383 2383 raise ValueError(_('destination revlog is not empty'))
2384 2384
2385 2385 if getattr(self, 'filteredrevs', None):
2386 2386 raise ValueError(_('source revlog has filtered revisions'))
2387 2387 if getattr(destrevlog, 'filteredrevs', None):
2388 2388 raise ValueError(_('destination revlog has filtered revisions'))
2389 2389
2390 2390 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2391 2391 # if possible.
2392 2392 oldlazydelta = destrevlog._lazydelta
2393 2393 oldlazydeltabase = destrevlog._lazydeltabase
2394 2394 oldamd = destrevlog._deltabothparents
2395 2395
2396 2396 try:
2397 2397 if deltareuse == self.DELTAREUSEALWAYS:
2398 2398 destrevlog._lazydeltabase = True
2399 2399 destrevlog._lazydelta = True
2400 2400 elif deltareuse == self.DELTAREUSESAMEREVS:
2401 2401 destrevlog._lazydeltabase = False
2402 2402 destrevlog._lazydelta = True
2403 2403 elif deltareuse == self.DELTAREUSENEVER:
2404 2404 destrevlog._lazydeltabase = False
2405 2405 destrevlog._lazydelta = False
2406 2406
2407 2407 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2408 2408
2409 2409 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2410 2410 forcedeltabothparents)
2411 2411
2412 2412 finally:
2413 2413 destrevlog._lazydelta = oldlazydelta
2414 2414 destrevlog._lazydeltabase = oldlazydeltabase
2415 2415 destrevlog._deltabothparents = oldamd
2416 2416
2417 2417 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2418 2418 forcedeltabothparents):
2419 2419 """perform the core duty of `revlog.clone` after parameter processing"""
2420 2420 deltacomputer = deltautil.deltacomputer(destrevlog)
2421 2421 index = self.index
2422 2422 for rev in self:
2423 2423 entry = index[rev]
2424 2424
2425 2425 # Some classes override linkrev to take filtered revs into
2426 2426 # account. Use raw entry from index.
2427 2427 flags = entry[0] & 0xffff
2428 2428 linkrev = entry[4]
2429 2429 p1 = index[entry[5]][7]
2430 2430 p2 = index[entry[6]][7]
2431 2431 node = entry[7]
2432 2432
2433 2433 # (Possibly) reuse the delta from the revlog if allowed and
2434 2434 # the revlog chunk is a delta.
2435 2435 cachedelta = None
2436 2436 rawtext = None
2437 2437 if deltareuse == self.DELTAREUSEFULLADD:
2438 2438 text = self.revision(rev)
2439 2439 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2440 2440 cachedelta=cachedelta,
2441 2441 node=node, flags=flags,
2442 2442 deltacomputer=deltacomputer)
2443 2443 else:
2444 2444 if destrevlog._lazydelta:
2445 2445 dp = self.deltaparent(rev)
2446 2446 if dp != nullrev:
2447 2447 cachedelta = (dp, bytes(self._chunk(rev)))
2448 2448
2449 2449 if not cachedelta:
2450 2450 rawtext = self.rawdata(rev)
2451 2451
2452 2452 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2453 2453 checkambig=False)
2454 2454 dfh = None
2455 2455 if not destrevlog._inline:
2456 2456 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2457 2457 try:
2458 2458 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2459 2459 p2, flags, cachedelta, ifh, dfh,
2460 2460 deltacomputer=deltacomputer)
2461 2461 finally:
2462 2462 if dfh:
2463 2463 dfh.close()
2464 2464 ifh.close()
2465 2465
2466 2466 if addrevisioncb:
2467 2467 addrevisioncb(self, rev, node)
2468 2468
2469 2469 def censorrevision(self, tr, censornode, tombstone=b''):
2470 2470 if (self.version & 0xFFFF) == REVLOGV0:
2471 2471 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2472 2472 self.version)
2473 2473
2474 2474 censorrev = self.rev(censornode)
2475 2475 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2476 2476
2477 2477 if len(tombstone) > self.rawsize(censorrev):
2478 2478 raise error.Abort(_('censor tombstone must be no longer than '
2479 2479 'censored data'))
2480 2480
2481 2481 # Rewriting the revlog in place is hard. Our strategy for censoring is
2482 2482 # to create a new revlog, copy all revisions to it, then replace the
2483 2483 # revlogs on transaction close.
2484 2484
2485 2485 newindexfile = self.indexfile + b'.tmpcensored'
2486 2486 newdatafile = self.datafile + b'.tmpcensored'
2487 2487
2488 2488 # This is a bit dangerous. We could easily have a mismatch of state.
2489 2489 newrl = revlog(self.opener, newindexfile, newdatafile,
2490 2490 censorable=True)
2491 2491 newrl.version = self.version
2492 2492 newrl._generaldelta = self._generaldelta
2493 2493 newrl._io = self._io
2494 2494
2495 2495 for rev in self.revs():
2496 2496 node = self.node(rev)
2497 2497 p1, p2 = self.parents(node)
2498 2498
2499 2499 if rev == censorrev:
2500 2500 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2501 2501 p1, p2, censornode, REVIDX_ISCENSORED)
2502 2502
2503 2503 if newrl.deltaparent(rev) != nullrev:
2504 2504 raise error.Abort(_('censored revision stored as delta; '
2505 2505 'cannot censor'),
2506 2506 hint=_('censoring of revlogs is not '
2507 2507 'fully implemented; please report '
2508 2508 'this bug'))
2509 2509 continue
2510 2510
2511 2511 if self.iscensored(rev):
2512 2512 if self.deltaparent(rev) != nullrev:
2513 2513 raise error.Abort(_('cannot censor due to censored '
2514 2514 'revision having delta stored'))
2515 2515 rawtext = self._chunk(rev)
2516 2516 else:
2517 2517 rawtext = self.rawdata(rev)
2518 2518
2519 2519 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2520 2520 self.flags(rev))
2521 2521
2522 2522 tr.addbackup(self.indexfile, location='store')
2523 2523 if not self._inline:
2524 2524 tr.addbackup(self.datafile, location='store')
2525 2525
2526 2526 self.opener.rename(newrl.indexfile, self.indexfile)
2527 2527 if not self._inline:
2528 2528 self.opener.rename(newrl.datafile, self.datafile)
2529 2529
2530 2530 self.clearcaches()
2531 2531 self._loadindex()
2532 2532
2533 2533 def verifyintegrity(self, state):
2534 2534 """Verifies the integrity of the revlog.
2535 2535
2536 2536 Yields ``revlogproblem`` instances describing problems that are
2537 2537 found.
2538 2538 """
2539 2539 dd, di = self.checksize()
2540 2540 if dd:
2541 2541 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2542 2542 if di:
2543 2543 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2544 2544
2545 2545 version = self.version & 0xFFFF
2546 2546
2547 2547 # The verifier tells us what version revlog we should be.
2548 2548 if version != state['expectedversion']:
2549 2549 yield revlogproblem(
2550 2550 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2551 2551 (self.indexfile, version, state['expectedversion']))
2552 2552
2553 2553 state['skipread'] = set()
2554 2554
2555 2555 for rev in self:
2556 2556 node = self.node(rev)
2557 2557
2558 2558 # Verify contents. 4 cases to care about:
2559 2559 #
2560 2560 # common: the most common case
2561 2561 # rename: with a rename
2562 2562 # meta: file content starts with b'\1\n', the metadata
2563 2563 # header defined in filelog.py, but without a rename
2564 2564 # ext: content stored externally
2565 2565 #
2566 2566 # More formally, their differences are shown below:
2567 2567 #
2568 2568 # | common | rename | meta | ext
2569 2569 # -------------------------------------------------------
2570 2570 # flags() | 0 | 0 | 0 | not 0
2571 2571 # renamed() | False | True | False | ?
2572 2572 # rawtext[0:2]=='\1\n'| False | True | True | ?
2573 2573 #
2574 2574 # "rawtext" means the raw text stored in revlog data, which
2575 2575 # could be retrieved by "rawdata(rev)". "text"
2576 2576 # mentioned below is "revision(rev)".
2577 2577 #
2578 2578 # There are 3 different lengths stored physically:
2579 2579 # 1. L1: rawsize, stored in revlog index
2580 2580 # 2. L2: len(rawtext), stored in revlog data
2581 2581 # 3. L3: len(text), stored in revlog data if flags==0, or
2582 2582 # possibly somewhere else if flags!=0
2583 2583 #
2584 2584 # L1 should be equal to L2. L3 could be different from them.
2585 2585 # "text" may or may not affect commit hash depending on flag
2586 2586 # processors (see flagutil.addflagprocessor).
2587 2587 #
2588 2588 # | common | rename | meta | ext
2589 2589 # -------------------------------------------------
2590 2590 # rawsize() | L1 | L1 | L1 | L1
2591 2591 # size() | L1 | L2-LM | L1(*) | L1 (?)
2592 2592 # len(rawtext) | L2 | L2 | L2 | L2
2593 2593 # len(text) | L2 | L2 | L2 | L3
2594 2594 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2595 2595 #
2596 2596 # LM: length of metadata, depending on rawtext
2597 2597 # (*): not ideal, see comment in filelog.size
2598 2598 # (?): could be "- len(meta)" if the resolved content has
2599 2599 # rename metadata
2600 2600 #
2601 2601 # Checks needed to be done:
2602 2602 # 1. length check: L1 == L2, in all cases.
2603 2603 # 2. hash check: depending on flag processor, we may need to
2604 2604 # use either "text" (external), or "rawtext" (in revlog).
2605 2605
2606 2606 try:
2607 2607 skipflags = state.get('skipflags', 0)
2608 2608 if skipflags:
2609 2609 skipflags &= self.flags(rev)
2610 2610
2611 2611 if skipflags:
2612 2612 state['skipread'].add(node)
2613 2613 else:
2614 2614 # Side-effect: read content and verify hash.
2615 2615 self.revision(node)
2616 2616
2617 2617 l1 = self.rawsize(rev)
2618 2618 l2 = len(self.rawdata(node))
2619 2619
2620 2620 if l1 != l2:
2621 2621 yield revlogproblem(
2622 2622 error=_('unpacked size is %d, %d expected') % (l2, l1),
2623 2623 node=node)
2624 2624
2625 2625 except error.CensoredNodeError:
2626 2626 if state['erroroncensored']:
2627 2627 yield revlogproblem(error=_('censored file data'),
2628 2628 node=node)
2629 2629 state['skipread'].add(node)
2630 2630 except Exception as e:
2631 2631 yield revlogproblem(
2632 2632 error=_('unpacking %s: %s') % (short(node),
2633 2633 stringutil.forcebytestr(e)),
2634 2634 node=node)
2635 2635 state['skipread'].add(node)
2636 2636
2637 2637 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2638 2638 revisionscount=False, trackedsize=False,
2639 2639 storedsize=False):
2640 2640 d = {}
2641 2641
2642 2642 if exclusivefiles:
2643 2643 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2644 2644 if not self._inline:
2645 2645 d['exclusivefiles'].append((self.opener, self.datafile))
2646 2646
2647 2647 if sharedfiles:
2648 2648 d['sharedfiles'] = []
2649 2649
2650 2650 if revisionscount:
2651 2651 d['revisionscount'] = len(self)
2652 2652
2653 2653 if trackedsize:
2654 2654 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2655 2655
2656 2656 if storedsize:
2657 2657 d['storedsize'] = sum(self.opener.stat(path).st_size
2658 2658 for path in self.files())
2659 2659
2660 2660 return d
@@ -1,230 +1,231 b''
1 1 # statichttprepo.py - simple http repository class for mercurial
2 2 #
3 3 # This provides read-only repo access to repositories exported via static http
4 4 #
5 5 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import errno
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 branchmap,
17 17 changelog,
18 18 error,
19 19 localrepo,
20 20 manifest,
21 21 namespaces,
22 22 pathutil,
23 23 pycompat,
24 24 url,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28
29 29 urlerr = util.urlerr
30 30 urlreq = util.urlreq
31 31
32 32 class httprangereader(object):
33 33 def __init__(self, url, opener):
34 34 # we assume opener has HTTPRangeHandler
35 35 self.url = url
36 36 self.pos = 0
37 37 self.opener = opener
38 38 self.name = url
39 39
40 40 def __enter__(self):
41 41 return self
42 42
43 43 def __exit__(self, exc_type, exc_value, traceback):
44 44 self.close()
45 45
46 46 def seek(self, pos):
47 47 self.pos = pos
48 48 def read(self, bytes=None):
49 49 req = urlreq.request(pycompat.strurl(self.url))
50 50 end = ''
51 51 if bytes:
52 52 end = self.pos + bytes - 1
53 53 if self.pos or end:
54 54 req.add_header(r'Range', r'bytes=%d-%s' % (self.pos, end))
55 55
56 56 try:
57 57 f = self.opener.open(req)
58 58 data = f.read()
59 59 code = f.code
60 60 except urlerr.httperror as inst:
61 61 num = inst.code == 404 and errno.ENOENT or None
62 62 raise IOError(num, inst)
63 63 except urlerr.urlerror as inst:
64 64 raise IOError(None, inst.reason)
65 65
66 66 if code == 200:
67 67 # HTTPRangeHandler does nothing if remote does not support
68 68 # Range headers and returns the full entity. Let's slice it.
69 69 if bytes:
70 70 data = data[self.pos:self.pos + bytes]
71 71 else:
72 72 data = data[self.pos:]
73 73 elif bytes:
74 74 data = data[:bytes]
75 75 self.pos += len(data)
76 76 return data
77 77 def readlines(self):
78 78 return self.read().splitlines(True)
79 79 def __iter__(self):
80 80 return iter(self.readlines())
81 81 def close(self):
82 82 pass
83 83
84 84 # _RangeError and _HTTPRangeHandler were originally in byterange.py,
85 85 # which was itself extracted from urlgrabber. See the last version of
86 86 # byterange.py from history if you need more information.
87 87 class _RangeError(IOError):
88 88 """Error raised when an unsatisfiable range is requested."""
89 89
90 90 class _HTTPRangeHandler(urlreq.basehandler):
91 91 """Handler that enables HTTP Range headers.
92 92
93 93 This was extremely simple. The Range header is a HTTP feature to
94 94 begin with so all this class does is tell urllib2 that the
95 95 "206 Partial Content" response from the HTTP server is what we
96 96 expected.
97 97 """
98 98
99 99 def http_error_206(self, req, fp, code, msg, hdrs):
100 100 # 206 Partial Content Response
101 101 r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
102 102 r.code = code
103 103 r.msg = msg
104 104 return r
105 105
106 106 def http_error_416(self, req, fp, code, msg, hdrs):
107 107 # HTTP's Range Not Satisfiable error
108 108 raise _RangeError('Requested Range Not Satisfiable')
109 109
110 110 def build_opener(ui, authinfo):
111 111 # urllib cannot handle URLs with embedded user or passwd
112 112 urlopener = url.opener(ui, authinfo)
113 113 urlopener.add_handler(_HTTPRangeHandler())
114 114
115 115 class statichttpvfs(vfsmod.abstractvfs):
116 116 def __init__(self, base):
117 117 self.base = base
118 self.options = {}
118 119
119 120 def __call__(self, path, mode='r', *args, **kw):
120 121 if mode not in ('r', 'rb'):
121 122 raise IOError('Permission denied')
122 123 f = "/".join((self.base, urlreq.quote(path)))
123 124 return httprangereader(f, urlopener)
124 125
125 126 def join(self, path):
126 127 if path:
127 128 return pathutil.join(self.base, path)
128 129 else:
129 130 return self.base
130 131
131 132 return statichttpvfs
132 133
133 134 class statichttppeer(localrepo.localpeer):
134 135 def local(self):
135 136 return None
136 137 def canpush(self):
137 138 return False
138 139
139 140 class statichttprepository(localrepo.localrepository,
140 141 localrepo.revlogfilestorage):
141 142 supported = localrepo.localrepository._basesupported
142 143
143 144 def __init__(self, ui, path):
144 145 self._url = path
145 146 self.ui = ui
146 147
147 148 self.root = path
148 149 u = util.url(path.rstrip('/') + "/.hg")
149 150 self.path, authinfo = u.authinfo()
150 151
151 152 vfsclass = build_opener(ui, authinfo)
152 153 self.vfs = vfsclass(self.path)
153 154 self.cachevfs = vfsclass(self.vfs.join('cache'))
154 155 self._phasedefaults = []
155 156
156 157 self.names = namespaces.namespaces()
157 158 self.filtername = None
158 159 self._extrafilterid = None
159 160
160 161 try:
161 162 requirements = set(self.vfs.read(b'requires').splitlines())
162 163 except IOError as inst:
163 164 if inst.errno != errno.ENOENT:
164 165 raise
165 166 requirements = set()
166 167
167 168 # check if it is a non-empty old-style repository
168 169 try:
169 170 fp = self.vfs("00changelog.i")
170 171 fp.read(1)
171 172 fp.close()
172 173 except IOError as inst:
173 174 if inst.errno != errno.ENOENT:
174 175 raise
175 176 # we do not care about empty old-style repositories here
176 177 msg = _("'%s' does not appear to be an hg repository") % path
177 178 raise error.RepoError(msg)
178 179
179 180 supportedrequirements = localrepo.gathersupportedrequirements(ui)
180 181 localrepo.ensurerequirementsrecognized(requirements,
181 182 supportedrequirements)
182 183 localrepo.ensurerequirementscompatible(ui, requirements)
183 184
184 185 # setup store
185 186 self.store = localrepo.makestore(requirements, self.path, vfsclass)
186 187 self.spath = self.store.path
187 188 self.svfs = self.store.opener
188 189 self.sjoin = self.store.join
189 190 self._filecache = {}
190 191 self.requirements = requirements
191 192
192 193 rootmanifest = manifest.manifestrevlog(self.svfs)
193 194 self.manifestlog = manifest.manifestlog(self.svfs, self, rootmanifest,
194 195 self.narrowmatch())
195 196 self.changelog = changelog.changelog(self.svfs)
196 197 self._tags = None
197 198 self.nodetagscache = None
198 199 self._branchcaches = branchmap.BranchMapCache()
199 200 self._revbranchcache = None
200 201 self.encodepats = None
201 202 self.decodepats = None
202 203 self._transref = None
203 204
204 205 def _restrictcapabilities(self, caps):
205 206 caps = super(statichttprepository, self)._restrictcapabilities(caps)
206 207 return caps.difference(["pushkey"])
207 208
208 209 def url(self):
209 210 return self._url
210 211
211 212 def local(self):
212 213 return False
213 214
214 215 def peer(self):
215 216 return statichttppeer(self)
216 217
217 218 def wlock(self, wait=True):
218 219 raise error.LockUnavailable(0, _('lock not available'), 'lock',
219 220 _('cannot lock static-http repository'))
220 221
221 222 def lock(self, wait=True):
222 223 raise error.Abort(_('cannot lock static-http repository'))
223 224
224 225 def _writecaches(self):
225 226 pass # statichttprepository are read only
226 227
227 228 def instance(ui, path, create, intents=None, createopts=None):
228 229 if create:
229 230 raise error.Abort(_('cannot create new static-http repository'))
230 231 return statichttprepository(ui, path[7:])
@@ -1,674 +1,675 b''
1 1 # vfs.py - Mercurial 'vfs' classes
2 2 #
3 3 # Copyright Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 import contextlib
10 10 import errno
11 11 import os
12 12 import shutil
13 13 import stat
14 14 import threading
15 15
16 16 from .i18n import _
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 pathutil,
21 21 pycompat,
22 22 util,
23 23 )
24 24
25 25 def _avoidambig(path, oldstat):
26 26 """Avoid file stat ambiguity forcibly
27 27
28 28 This function causes copying ``path`` file, if it is owned by
29 29 another (see issue5418 and issue5584 for detail).
30 30 """
31 31 def checkandavoid():
32 32 newstat = util.filestat.frompath(path)
33 33 # return whether file stat ambiguity is (already) avoided
34 34 return (not newstat.isambig(oldstat) or
35 35 newstat.avoidambig(path, oldstat))
36 36 if not checkandavoid():
37 37 # simply copy to change owner of path to get privilege to
38 38 # advance mtime (see issue5418)
39 39 util.rename(util.mktempcopy(path), path)
40 40 checkandavoid()
41 41
42 42 class abstractvfs(object):
43 43 """Abstract base class; cannot be instantiated"""
44 44
45 45 def __init__(self, *args, **kwargs):
46 46 '''Prevent instantiation; don't call this from subclasses.'''
47 47 raise NotImplementedError('attempted instantiating ' + str(type(self)))
48 48
49 49 def _auditpath(self, path, mode):
50 50 raise NotImplementedError
51 51
52 52 def tryread(self, path):
53 53 '''gracefully return an empty string for missing files'''
54 54 try:
55 55 return self.read(path)
56 56 except IOError as inst:
57 57 if inst.errno != errno.ENOENT:
58 58 raise
59 59 return ""
60 60
61 61 def tryreadlines(self, path, mode='rb'):
62 62 '''gracefully return an empty array for missing files'''
63 63 try:
64 64 return self.readlines(path, mode=mode)
65 65 except IOError as inst:
66 66 if inst.errno != errno.ENOENT:
67 67 raise
68 68 return []
69 69
70 70 @util.propertycache
71 71 def open(self):
72 72 '''Open ``path`` file, which is relative to vfs root.
73 73
74 74 Newly created directories are marked as "not to be indexed by
75 75 the content indexing service", if ``notindexed`` is specified
76 76 for "write" mode access.
77 77 '''
78 78 return self.__call__
79 79
80 80 def read(self, path):
81 81 with self(path, 'rb') as fp:
82 82 return fp.read()
83 83
84 84 def readlines(self, path, mode='rb'):
85 85 with self(path, mode=mode) as fp:
86 86 return fp.readlines()
87 87
88 88 def write(self, path, data, backgroundclose=False, **kwargs):
89 89 with self(path, 'wb', backgroundclose=backgroundclose, **kwargs) as fp:
90 90 return fp.write(data)
91 91
92 92 def writelines(self, path, data, mode='wb', notindexed=False):
93 93 with self(path, mode=mode, notindexed=notindexed) as fp:
94 94 return fp.writelines(data)
95 95
96 96 def append(self, path, data):
97 97 with self(path, 'ab') as fp:
98 98 return fp.write(data)
99 99
100 100 def basename(self, path):
101 101 """return base element of a path (as os.path.basename would do)
102 102
103 103 This exists to allow handling of strange encoding if needed."""
104 104 return os.path.basename(path)
105 105
106 106 def chmod(self, path, mode):
107 107 return os.chmod(self.join(path), mode)
108 108
109 109 def dirname(self, path):
110 110 """return dirname element of a path (as os.path.dirname would do)
111 111
112 112 This exists to allow handling of strange encoding if needed."""
113 113 return os.path.dirname(path)
114 114
115 115 def exists(self, path=None):
116 116 return os.path.exists(self.join(path))
117 117
118 118 def fstat(self, fp):
119 119 return util.fstat(fp)
120 120
121 121 def isdir(self, path=None):
122 122 return os.path.isdir(self.join(path))
123 123
124 124 def isfile(self, path=None):
125 125 return os.path.isfile(self.join(path))
126 126
127 127 def islink(self, path=None):
128 128 return os.path.islink(self.join(path))
129 129
130 130 def isfileorlink(self, path=None):
131 131 '''return whether path is a regular file or a symlink
132 132
133 133 Unlike isfile, this doesn't follow symlinks.'''
134 134 try:
135 135 st = self.lstat(path)
136 136 except OSError:
137 137 return False
138 138 mode = st.st_mode
139 139 return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
140 140
141 141 def reljoin(self, *paths):
142 142 """join various elements of a path together (as os.path.join would do)
143 143
144 144 The vfs base is not injected so that path stay relative. This exists
145 145 to allow handling of strange encoding if needed."""
146 146 return os.path.join(*paths)
147 147
148 148 def split(self, path):
149 149 """split top-most element of a path (as os.path.split would do)
150 150
151 151 This exists to allow handling of strange encoding if needed."""
152 152 return os.path.split(path)
153 153
154 154 def lexists(self, path=None):
155 155 return os.path.lexists(self.join(path))
156 156
157 157 def lstat(self, path=None):
158 158 return os.lstat(self.join(path))
159 159
160 160 def listdir(self, path=None):
161 161 return os.listdir(self.join(path))
162 162
163 163 def makedir(self, path=None, notindexed=True):
164 164 return util.makedir(self.join(path), notindexed)
165 165
166 166 def makedirs(self, path=None, mode=None):
167 167 return util.makedirs(self.join(path), mode)
168 168
169 169 def makelock(self, info, path):
170 170 return util.makelock(info, self.join(path))
171 171
172 172 def mkdir(self, path=None):
173 173 return os.mkdir(self.join(path))
174 174
175 175 def mkstemp(self, suffix='', prefix='tmp', dir=None):
176 176 fd, name = pycompat.mkstemp(suffix=suffix, prefix=prefix,
177 177 dir=self.join(dir))
178 178 dname, fname = util.split(name)
179 179 if dir:
180 180 return fd, os.path.join(dir, fname)
181 181 else:
182 182 return fd, fname
183 183
184 184 def readdir(self, path=None, stat=None, skip=None):
185 185 return util.listdir(self.join(path), stat, skip)
186 186
187 187 def readlock(self, path):
188 188 return util.readlock(self.join(path))
189 189
190 190 def rename(self, src, dst, checkambig=False):
191 191 """Rename from src to dst
192 192
193 193 checkambig argument is used with util.filestat, and is useful
194 194 only if destination file is guarded by any lock
195 195 (e.g. repo.lock or repo.wlock).
196 196
197 197 To avoid file stat ambiguity forcibly, checkambig=True involves
198 198 copying ``src`` file, if it is owned by another. Therefore, use
199 199 checkambig=True only in limited cases (see also issue5418 and
200 200 issue5584 for detail).
201 201 """
202 202 self._auditpath(dst, 'w')
203 203 srcpath = self.join(src)
204 204 dstpath = self.join(dst)
205 205 oldstat = checkambig and util.filestat.frompath(dstpath)
206 206 if oldstat and oldstat.stat:
207 207 ret = util.rename(srcpath, dstpath)
208 208 _avoidambig(dstpath, oldstat)
209 209 return ret
210 210 return util.rename(srcpath, dstpath)
211 211
212 212 def readlink(self, path):
213 213 return util.readlink(self.join(path))
214 214
215 215 def removedirs(self, path=None):
216 216 """Remove a leaf directory and all empty intermediate ones
217 217 """
218 218 return util.removedirs(self.join(path))
219 219
220 220 def rmdir(self, path=None):
221 221 """Remove an empty directory."""
222 222 return os.rmdir(self.join(path))
223 223
224 224 def rmtree(self, path=None, ignore_errors=False, forcibly=False):
225 225 """Remove a directory tree recursively
226 226
227 227 If ``forcibly``, this tries to remove READ-ONLY files, too.
228 228 """
229 229 if forcibly:
230 230 def onerror(function, path, excinfo):
231 231 if function is not os.remove:
232 232 raise
233 233 # read-only files cannot be unlinked under Windows
234 234 s = os.stat(path)
235 235 if (s.st_mode & stat.S_IWRITE) != 0:
236 236 raise
237 237 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
238 238 os.remove(path)
239 239 else:
240 240 onerror = None
241 241 return shutil.rmtree(self.join(path),
242 242 ignore_errors=ignore_errors, onerror=onerror)
243 243
244 244 def setflags(self, path, l, x):
245 245 return util.setflags(self.join(path), l, x)
246 246
247 247 def stat(self, path=None):
248 248 return os.stat(self.join(path))
249 249
250 250 def unlink(self, path=None):
251 251 return util.unlink(self.join(path))
252 252
253 253 def tryunlink(self, path=None):
254 254 """Attempt to remove a file, ignoring missing file errors."""
255 255 util.tryunlink(self.join(path))
256 256
257 257 def unlinkpath(self, path=None, ignoremissing=False, rmdir=True):
258 258 return util.unlinkpath(self.join(path), ignoremissing=ignoremissing,
259 259 rmdir=rmdir)
260 260
261 261 def utime(self, path=None, t=None):
262 262 return os.utime(self.join(path), t)
263 263
264 264 def walk(self, path=None, onerror=None):
265 265 """Yield (dirpath, dirs, files) tuple for each directories under path
266 266
267 267 ``dirpath`` is relative one from the root of this vfs. This
268 268 uses ``os.sep`` as path separator, even you specify POSIX
269 269 style ``path``.
270 270
271 271 "The root of this vfs" is represented as empty ``dirpath``.
272 272 """
273 273 root = os.path.normpath(self.join(None))
274 274 # when dirpath == root, dirpath[prefixlen:] becomes empty
275 275 # because len(dirpath) < prefixlen.
276 276 prefixlen = len(pathutil.normasprefix(root))
277 277 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
278 278 yield (dirpath[prefixlen:], dirs, files)
279 279
280 280 @contextlib.contextmanager
281 281 def backgroundclosing(self, ui, expectedcount=-1):
282 282 """Allow files to be closed asynchronously.
283 283
284 284 When this context manager is active, ``backgroundclose`` can be passed
285 285 to ``__call__``/``open`` to result in the file possibly being closed
286 286 asynchronously, on a background thread.
287 287 """
288 288 # Sharing backgroundfilecloser between threads is complex and using
289 289 # multiple instances puts us at risk of running out of file descriptors
290 290 # only allow to use backgroundfilecloser when in main thread.
291 291 if not isinstance(threading.currentThread(), threading._MainThread):
292 292 yield
293 293 return
294 294 vfs = getattr(self, 'vfs', self)
295 295 if getattr(vfs, '_backgroundfilecloser', None):
296 296 raise error.Abort(
297 297 _('can only have 1 active background file closer'))
298 298
299 299 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
300 300 try:
301 301 vfs._backgroundfilecloser = bfc
302 302 yield bfc
303 303 finally:
304 304 vfs._backgroundfilecloser = None
305 305
306 306 class vfs(abstractvfs):
307 307 '''Operate files relative to a base directory
308 308
309 309 This class is used to hide the details of COW semantics and
310 310 remote file access from higher level code.
311 311
312 312 'cacheaudited' should be enabled only if (a) vfs object is short-lived, or
313 313 (b) the base directory is managed by hg and considered sort-of append-only.
314 314 See pathutil.pathauditor() for details.
315 315 '''
316 316 def __init__(self, base, audit=True, cacheaudited=False, expandpath=False,
317 317 realpath=False):
318 318 if expandpath:
319 319 base = util.expandpath(base)
320 320 if realpath:
321 321 base = os.path.realpath(base)
322 322 self.base = base
323 323 self._audit = audit
324 324 if audit:
325 325 self.audit = pathutil.pathauditor(self.base, cached=cacheaudited)
326 326 else:
327 327 self.audit = (lambda path, mode=None: True)
328 328 self.createmode = None
329 329 self._trustnlink = None
330 self.options = {}
330 331
331 332 @util.propertycache
332 333 def _cansymlink(self):
333 334 return util.checklink(self.base)
334 335
335 336 @util.propertycache
336 337 def _chmod(self):
337 338 return util.checkexec(self.base)
338 339
339 340 def _fixfilemode(self, name):
340 341 if self.createmode is None or not self._chmod:
341 342 return
342 343 os.chmod(name, self.createmode & 0o666)
343 344
344 345 def _auditpath(self, path, mode):
345 346 if self._audit:
346 347 if os.path.isabs(path) and path.startswith(self.base):
347 348 path = os.path.relpath(path, self.base)
348 349 r = util.checkosfilename(path)
349 350 if r:
350 351 raise error.Abort("%s: %r" % (r, path))
351 352 self.audit(path, mode=mode)
352 353
353 354 def __call__(self, path, mode="r", atomictemp=False, notindexed=False,
354 355 backgroundclose=False, checkambig=False, auditpath=True,
355 356 makeparentdirs=True):
356 357 '''Open ``path`` file, which is relative to vfs root.
357 358
358 359 By default, parent directories are created as needed. Newly created
359 360 directories are marked as "not to be indexed by the content indexing
360 361 service", if ``notindexed`` is specified for "write" mode access.
361 362 Set ``makeparentdirs=False`` to not create directories implicitly.
362 363
363 364 If ``backgroundclose`` is passed, the file may be closed asynchronously.
364 365 It can only be used if the ``self.backgroundclosing()`` context manager
365 366 is active. This should only be specified if the following criteria hold:
366 367
367 368 1. There is a potential for writing thousands of files. Unless you
368 369 are writing thousands of files, the performance benefits of
369 370 asynchronously closing files is not realized.
370 371 2. Files are opened exactly once for the ``backgroundclosing``
371 372 active duration and are therefore free of race conditions between
372 373 closing a file on a background thread and reopening it. (If the
373 374 file were opened multiple times, there could be unflushed data
374 375 because the original file handle hasn't been flushed/closed yet.)
375 376
376 377 ``checkambig`` argument is passed to atomictemplfile (valid
377 378 only for writing), and is useful only if target file is
378 379 guarded by any lock (e.g. repo.lock or repo.wlock).
379 380
380 381 To avoid file stat ambiguity forcibly, checkambig=True involves
381 382 copying ``path`` file opened in "append" mode (e.g. for
382 383 truncation), if it is owned by another. Therefore, use
383 384 combination of append mode and checkambig=True only in limited
384 385 cases (see also issue5418 and issue5584 for detail).
385 386 '''
386 387 if auditpath:
387 388 self._auditpath(path, mode)
388 389 f = self.join(path)
389 390
390 391 if "b" not in mode:
391 392 mode += "b" # for that other OS
392 393
393 394 nlink = -1
394 395 if mode not in ('r', 'rb'):
395 396 dirname, basename = util.split(f)
396 397 # If basename is empty, then the path is malformed because it points
397 398 # to a directory. Let the posixfile() call below raise IOError.
398 399 if basename:
399 400 if atomictemp:
400 401 if makeparentdirs:
401 402 util.makedirs(dirname, self.createmode, notindexed)
402 403 return util.atomictempfile(f, mode, self.createmode,
403 404 checkambig=checkambig)
404 405 try:
405 406 if 'w' in mode:
406 407 util.unlink(f)
407 408 nlink = 0
408 409 else:
409 410 # nlinks() may behave differently for files on Windows
410 411 # shares if the file is open.
411 412 with util.posixfile(f):
412 413 nlink = util.nlinks(f)
413 414 if nlink < 1:
414 415 nlink = 2 # force mktempcopy (issue1922)
415 416 except (OSError, IOError) as e:
416 417 if e.errno != errno.ENOENT:
417 418 raise
418 419 nlink = 0
419 420 if makeparentdirs:
420 421 util.makedirs(dirname, self.createmode, notindexed)
421 422 if nlink > 0:
422 423 if self._trustnlink is None:
423 424 self._trustnlink = nlink > 1 or util.checknlink(f)
424 425 if nlink > 1 or not self._trustnlink:
425 426 util.rename(util.mktempcopy(f), f)
426 427 fp = util.posixfile(f, mode)
427 428 if nlink == 0:
428 429 self._fixfilemode(f)
429 430
430 431 if checkambig:
431 432 if mode in ('r', 'rb'):
432 433 raise error.Abort(_('implementation error: mode %s is not'
433 434 ' valid for checkambig=True') % mode)
434 435 fp = checkambigatclosing(fp)
435 436
436 437 if (backgroundclose and
437 438 isinstance(threading.currentThread(), threading._MainThread)):
438 439 if not self._backgroundfilecloser:
439 440 raise error.Abort(_('backgroundclose can only be used when a '
440 441 'backgroundclosing context manager is active')
441 442 )
442 443
443 444 fp = delayclosedfile(fp, self._backgroundfilecloser)
444 445
445 446 return fp
446 447
447 448 def symlink(self, src, dst):
448 449 self.audit(dst)
449 450 linkname = self.join(dst)
450 451 util.tryunlink(linkname)
451 452
452 453 util.makedirs(os.path.dirname(linkname), self.createmode)
453 454
454 455 if self._cansymlink:
455 456 try:
456 457 os.symlink(src, linkname)
457 458 except OSError as err:
458 459 raise OSError(err.errno, _('could not symlink to %r: %s') %
459 460 (src, encoding.strtolocal(err.strerror)),
460 461 linkname)
461 462 else:
462 463 self.write(dst, src)
463 464
464 465 def join(self, path, *insidef):
465 466 if path:
466 467 return os.path.join(self.base, path, *insidef)
467 468 else:
468 469 return self.base
469 470
470 471 opener = vfs
471 472
472 473 class proxyvfs(abstractvfs):
473 474 def __init__(self, vfs):
474 475 self.vfs = vfs
475 476
476 477 def _auditpath(self, path, mode):
477 478 return self.vfs._auditpath(path, mode)
478 479
479 480 @property
480 481 def options(self):
481 482 return self.vfs.options
482 483
483 484 @options.setter
484 485 def options(self, value):
485 486 self.vfs.options = value
486 487
487 488 class filtervfs(proxyvfs, abstractvfs):
488 489 '''Wrapper vfs for filtering filenames with a function.'''
489 490
490 491 def __init__(self, vfs, filter):
491 492 proxyvfs.__init__(self, vfs)
492 493 self._filter = filter
493 494
494 495 def __call__(self, path, *args, **kwargs):
495 496 return self.vfs(self._filter(path), *args, **kwargs)
496 497
497 498 def join(self, path, *insidef):
498 499 if path:
499 500 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
500 501 else:
501 502 return self.vfs.join(path)
502 503
503 504 filteropener = filtervfs
504 505
505 506 class readonlyvfs(proxyvfs):
506 507 '''Wrapper vfs preventing any writing.'''
507 508
508 509 def __init__(self, vfs):
509 510 proxyvfs.__init__(self, vfs)
510 511
511 512 def __call__(self, path, mode='r', *args, **kw):
512 513 if mode not in ('r', 'rb'):
513 514 raise error.Abort(_('this vfs is read only'))
514 515 return self.vfs(path, mode, *args, **kw)
515 516
516 517 def join(self, path, *insidef):
517 518 return self.vfs.join(path, *insidef)
518 519
519 520 class closewrapbase(object):
520 521 """Base class of wrapper, which hooks closing
521 522
522 523 Do not instantiate outside of the vfs layer.
523 524 """
524 525 def __init__(self, fh):
525 526 object.__setattr__(self, r'_origfh', fh)
526 527
527 528 def __getattr__(self, attr):
528 529 return getattr(self._origfh, attr)
529 530
530 531 def __setattr__(self, attr, value):
531 532 return setattr(self._origfh, attr, value)
532 533
533 534 def __delattr__(self, attr):
534 535 return delattr(self._origfh, attr)
535 536
536 537 def __enter__(self):
537 538 self._origfh.__enter__()
538 539 return self
539 540
540 541 def __exit__(self, exc_type, exc_value, exc_tb):
541 542 raise NotImplementedError('attempted instantiating ' + str(type(self)))
542 543
543 544 def close(self):
544 545 raise NotImplementedError('attempted instantiating ' + str(type(self)))
545 546
546 547 class delayclosedfile(closewrapbase):
547 548 """Proxy for a file object whose close is delayed.
548 549
549 550 Do not instantiate outside of the vfs layer.
550 551 """
551 552 def __init__(self, fh, closer):
552 553 super(delayclosedfile, self).__init__(fh)
553 554 object.__setattr__(self, r'_closer', closer)
554 555
555 556 def __exit__(self, exc_type, exc_value, exc_tb):
556 557 self._closer.close(self._origfh)
557 558
558 559 def close(self):
559 560 self._closer.close(self._origfh)
560 561
561 562 class backgroundfilecloser(object):
562 563 """Coordinates background closing of file handles on multiple threads."""
563 564 def __init__(self, ui, expectedcount=-1):
564 565 self._running = False
565 566 self._entered = False
566 567 self._threads = []
567 568 self._threadexception = None
568 569
569 570 # Only Windows/NTFS has slow file closing. So only enable by default
570 571 # on that platform. But allow to be enabled elsewhere for testing.
571 572 defaultenabled = pycompat.iswindows
572 573 enabled = ui.configbool('worker', 'backgroundclose', defaultenabled)
573 574
574 575 if not enabled:
575 576 return
576 577
577 578 # There is overhead to starting and stopping the background threads.
578 579 # Don't do background processing unless the file count is large enough
579 580 # to justify it.
580 581 minfilecount = ui.configint('worker', 'backgroundcloseminfilecount')
581 582 # FUTURE dynamically start background threads after minfilecount closes.
582 583 # (We don't currently have any callers that don't know their file count)
583 584 if expectedcount > 0 and expectedcount < minfilecount:
584 585 return
585 586
586 587 maxqueue = ui.configint('worker', 'backgroundclosemaxqueue')
587 588 threadcount = ui.configint('worker', 'backgroundclosethreadcount')
588 589
589 590 ui.debug('starting %d threads for background file closing\n' %
590 591 threadcount)
591 592
592 593 self._queue = pycompat.queue.Queue(maxsize=maxqueue)
593 594 self._running = True
594 595
595 596 for i in range(threadcount):
596 597 t = threading.Thread(target=self._worker, name='backgroundcloser')
597 598 self._threads.append(t)
598 599 t.start()
599 600
600 601 def __enter__(self):
601 602 self._entered = True
602 603 return self
603 604
604 605 def __exit__(self, exc_type, exc_value, exc_tb):
605 606 self._running = False
606 607
607 608 # Wait for threads to finish closing so open files don't linger for
608 609 # longer than lifetime of context manager.
609 610 for t in self._threads:
610 611 t.join()
611 612
612 613 def _worker(self):
613 614 """Main routine for worker thread."""
614 615 while True:
615 616 try:
616 617 fh = self._queue.get(block=True, timeout=0.100)
617 618 # Need to catch or the thread will terminate and
618 619 # we could orphan file descriptors.
619 620 try:
620 621 fh.close()
621 622 except Exception as e:
622 623 # Stash so can re-raise from main thread later.
623 624 self._threadexception = e
624 625 except pycompat.queue.Empty:
625 626 if not self._running:
626 627 break
627 628
628 629 def close(self, fh):
629 630 """Schedule a file for closing."""
630 631 if not self._entered:
631 632 raise error.Abort(_('can only call close() when context manager '
632 633 'active'))
633 634
634 635 # If a background thread encountered an exception, raise now so we fail
635 636 # fast. Otherwise we may potentially go on for minutes until the error
636 637 # is acted on.
637 638 if self._threadexception:
638 639 e = self._threadexception
639 640 self._threadexception = None
640 641 raise e
641 642
642 643 # If we're not actively running, close synchronously.
643 644 if not self._running:
644 645 fh.close()
645 646 return
646 647
647 648 self._queue.put(fh, block=True, timeout=None)
648 649
649 650 class checkambigatclosing(closewrapbase):
650 651 """Proxy for a file object, to avoid ambiguity of file stat
651 652
652 653 See also util.filestat for detail about "ambiguity of file stat".
653 654
654 655 This proxy is useful only if the target file is guarded by any
655 656 lock (e.g. repo.lock or repo.wlock)
656 657
657 658 Do not instantiate outside of the vfs layer.
658 659 """
659 660 def __init__(self, fh):
660 661 super(checkambigatclosing, self).__init__(fh)
661 662 object.__setattr__(self, r'_oldstat', util.filestat.frompath(fh.name))
662 663
663 664 def _checkambig(self):
664 665 oldstat = self._oldstat
665 666 if oldstat.stat:
666 667 _avoidambig(self._origfh.name, oldstat)
667 668
668 669 def __exit__(self, exc_type, exc_value, exc_tb):
669 670 self._origfh.__exit__(exc_type, exc_value, exc_tb)
670 671 self._checkambig()
671 672
672 673 def close(self):
673 674 self._origfh.close()
674 675 self._checkambig()
@@ -1,222 +1,223 b''
1 1 revlog.parseindex must be able to parse the index file even if
2 2 an index entry is split between two 64k blocks. The ideal test
3 3 would be to create an index file with inline data where
4 4 64k < size < 64k + 64 (64k is the size of the read buffer, 64 is
5 5 the size of an index entry) and with an index entry starting right
6 6 before the 64k block boundary, and try to read it.
7 7 We approximate that by reducing the read buffer to 1 byte.
8 8
9 9 $ hg init a
10 10 $ cd a
11 11 $ echo abc > foo
12 12 $ hg add foo
13 13 $ hg commit -m 'add foo'
14 14 $ echo >> foo
15 15 $ hg commit -m 'change foo'
16 16 $ hg log -r 0:
17 17 changeset: 0:7c31755bf9b5
18 18 user: test
19 19 date: Thu Jan 01 00:00:00 1970 +0000
20 20 summary: add foo
21 21
22 22 changeset: 1:26333235a41c
23 23 tag: tip
24 24 user: test
25 25 date: Thu Jan 01 00:00:00 1970 +0000
26 26 summary: change foo
27 27
28 28 $ cat >> test.py << EOF
29 29 > from __future__ import print_function
30 30 > from mercurial import changelog, node, pycompat, vfs
31 31 >
32 32 > class singlebyteread(object):
33 33 > def __init__(self, real):
34 34 > self.real = real
35 35 >
36 36 > def read(self, size=-1):
37 37 > if size == 65536:
38 38 > size = 1
39 39 > return self.real.read(size)
40 40 >
41 41 > def __getattr__(self, key):
42 42 > return getattr(self.real, key)
43 43 >
44 44 > def __enter__(self):
45 45 > self.real.__enter__()
46 46 > return self
47 47 >
48 48 > def __exit__(self, *args, **kwargs):
49 49 > return self.real.__exit__(*args, **kwargs)
50 50 >
51 51 > def opener(*args):
52 52 > o = vfs.vfs(*args)
53 53 > def wrapper(*a, **kwargs):
54 54 > f = o(*a, **kwargs)
55 55 > return singlebyteread(f)
56 > wrapper.options = o.options
56 57 > return wrapper
57 58 >
58 59 > cl = changelog.changelog(opener(b'.hg/store'))
59 60 > print(len(cl), 'revisions:')
60 61 > for r in cl:
61 62 > print(pycompat.sysstr(node.short(cl.node(r))))
62 63 > EOF
63 64 $ "$PYTHON" test.py
64 65 2 revisions:
65 66 7c31755bf9b5
66 67 26333235a41c
67 68
68 69 $ cd ..
69 70
70 71 #if no-pure
71 72
72 73 Test SEGV caused by bad revision passed to reachableroots() (issue4775):
73 74
74 75 $ cd a
75 76
76 77 $ "$PYTHON" <<EOF
77 78 > from __future__ import print_function
78 79 > from mercurial import changelog, vfs
79 80 > cl = changelog.changelog(vfs.vfs(b'.hg/store'))
80 81 > print('good heads:')
81 82 > for head in [0, len(cl) - 1, -1]:
82 83 > print('%s: %r' % (head, cl.reachableroots(0, [head], [0])))
83 84 > print('bad heads:')
84 85 > for head in [len(cl), 10000, -2, -10000, None]:
85 86 > print('%s:' % head, end=' ')
86 87 > try:
87 88 > cl.reachableroots(0, [head], [0])
88 89 > print('uncaught buffer overflow?')
89 90 > except (IndexError, TypeError) as inst:
90 91 > print(inst)
91 92 > print('good roots:')
92 93 > for root in [0, len(cl) - 1, -1]:
93 94 > print('%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root])))
94 95 > print('out-of-range roots are ignored:')
95 96 > for root in [len(cl), 10000, -2, -10000]:
96 97 > print('%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root])))
97 98 > print('bad roots:')
98 99 > for root in [None]:
99 100 > print('%s:' % root, end=' ')
100 101 > try:
101 102 > cl.reachableroots(root, [len(cl) - 1], [root])
102 103 > print('uncaught error?')
103 104 > except TypeError as inst:
104 105 > print(inst)
105 106 > EOF
106 107 good heads:
107 108 0: [0]
108 109 1: [0]
109 110 -1: []
110 111 bad heads:
111 112 2: head out of range
112 113 10000: head out of range
113 114 -2: head out of range
114 115 -10000: head out of range
115 116 None: an integer is required( .got type NoneType.)? (re)
116 117 good roots:
117 118 0: [0]
118 119 1: [1]
119 120 -1: [-1]
120 121 out-of-range roots are ignored:
121 122 2: []
122 123 10000: []
123 124 -2: []
124 125 -10000: []
125 126 bad roots:
126 127 None: an integer is required( .got type NoneType.)? (re)
127 128
128 129 $ cd ..
129 130
130 131 Test corrupted p1/p2 fields that could cause SEGV at parsers.c:
131 132
132 133 $ mkdir invalidparent
133 134 $ cd invalidparent
134 135
135 136 $ hg clone --pull -q --config phases.publish=False ../a limit --config format.sparse-revlog=no
136 137 $ hg clone --pull -q --config phases.publish=False ../a neglimit --config format.sparse-revlog=no
137 138 $ hg clone --pull -q --config phases.publish=False ../a segv --config format.sparse-revlog=no
138 139 $ rm -R limit/.hg/cache neglimit/.hg/cache segv/.hg/cache
139 140
140 141 $ "$PYTHON" <<EOF
141 142 > data = open("limit/.hg/store/00changelog.i", "rb").read()
142 143 > poisons = [
143 144 > (b'limit', b'\0\0\0\x02'),
144 145 > (b'neglimit', b'\xff\xff\xff\xfe'),
145 146 > (b'segv', b'\0\x01\0\0'),
146 147 > ]
147 148 > for n, p in poisons:
148 149 > # corrupt p1 at rev0 and p2 at rev1
149 150 > d = data[:24] + p + data[28:127 + 28] + p + data[127 + 32:]
150 151 > open(n + b"/.hg/store/00changelog.i", "wb").write(d)
151 152 > EOF
152 153
153 154 $ hg -R limit debugrevlogindex -f1 -c
154 155 rev flag size link p1 p2 nodeid
155 156 0 0000 62 0 2 -1 7c31755bf9b5
156 157 1 0000 65 1 0 2 26333235a41c
157 158
158 159 $ hg -R limit debugdeltachain -c
159 160 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio
160 161 0 1 1 -1 base 63 62 63 1.01613 63 0 0.00000
161 162 1 2 1 -1 base 66 65 66 1.01538 66 0 0.00000
162 163
163 164 $ hg -R neglimit debugrevlogindex -f1 -c
164 165 rev flag size link p1 p2 nodeid
165 166 0 0000 62 0 -2 -1 7c31755bf9b5
166 167 1 0000 65 1 0 -2 26333235a41c
167 168
168 169 $ hg -R segv debugrevlogindex -f1 -c
169 170 rev flag size link p1 p2 nodeid
170 171 0 0000 62 0 65536 -1 7c31755bf9b5
171 172 1 0000 65 1 0 65536 26333235a41c
172 173
173 174 $ hg -R segv debugdeltachain -c
174 175 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio
175 176 0 1 1 -1 base 63 62 63 1.01613 63 0 0.00000
176 177 1 2 1 -1 base 66 65 66 1.01538 66 0 0.00000
177 178
178 179 $ cat <<EOF > test.py
179 180 > from __future__ import print_function
180 181 > import sys
181 182 > from mercurial import changelog, pycompat, vfs
182 183 > cl = changelog.changelog(vfs.vfs(pycompat.fsencode(sys.argv[1])))
183 184 > n0, n1 = cl.node(0), cl.node(1)
184 185 > ops = [
185 186 > ('reachableroots',
186 187 > lambda: cl.index.reachableroots2(0, [1], [0], False)),
187 188 > ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
188 189 > ('index_headrevs', lambda: cl.headrevs()),
189 190 > ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
190 191 > ('find_deepest', lambda: cl.ancestor(n0, n1)),
191 192 > ]
192 193 > for l, f in ops:
193 194 > print(l + ':', end=' ')
194 195 > try:
195 196 > f()
196 197 > print('uncaught buffer overflow?')
197 198 > except ValueError as inst:
198 199 > print(inst)
199 200 > EOF
200 201
201 202 $ "$PYTHON" test.py limit/.hg/store
202 203 reachableroots: parent out of range
203 204 compute_phases_map_sets: parent out of range
204 205 index_headrevs: parent out of range
205 206 find_gca_candidates: parent out of range
206 207 find_deepest: parent out of range
207 208 $ "$PYTHON" test.py neglimit/.hg/store
208 209 reachableroots: parent out of range
209 210 compute_phases_map_sets: parent out of range
210 211 index_headrevs: parent out of range
211 212 find_gca_candidates: parent out of range
212 213 find_deepest: parent out of range
213 214 $ "$PYTHON" test.py segv/.hg/store
214 215 reachableroots: parent out of range
215 216 compute_phases_map_sets: parent out of range
216 217 index_headrevs: parent out of range
217 218 find_gca_candidates: parent out of range
218 219 find_deepest: parent out of range
219 220
220 221 $ cd ..
221 222
222 223 #endif
General Comments 0
You need to be logged in to leave comments. Login now