##// END OF EJS Templates
shortest: move revnum-disambiguation out of revlog...
Martin von Zweigbergk -
r37990:3ac950cd default
parent child Browse files
Show More
@@ -1,2614 +1,2603 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 230 chainpayload = sum(length(r) for r in revs)
231 231
232 232 if deltachainspan:
233 233 density = chainpayload / float(deltachainspan)
234 234 else:
235 235 density = 1.0
236 236
237 237 # Store the gaps in a heap to have them sorted by decreasing size
238 238 gapsheap = []
239 239 heapq.heapify(gapsheap)
240 240 prevend = None
241 241 for i, rev in enumerate(revs):
242 242 revstart = start(rev)
243 243 revlen = length(rev)
244 244
245 245 # Skip empty revisions to form larger holes
246 246 if revlen == 0:
247 247 continue
248 248
249 249 if prevend is not None:
250 250 gapsize = revstart - prevend
251 251 # only consider holes that are large enough
252 252 if gapsize > revlog._srmingapsize:
253 253 heapq.heappush(gapsheap, (-gapsize, i))
254 254
255 255 prevend = revstart + revlen
256 256
257 257 # Collect the indices of the largest holes until the density is acceptable
258 258 indicesheap = []
259 259 heapq.heapify(indicesheap)
260 260 while gapsheap and density < revlog._srdensitythreshold:
261 261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262 262
263 263 heapq.heappush(indicesheap, gapidx)
264 264
265 265 # the gap sizes are stored as negatives to be sorted decreasingly
266 266 # by the heap
267 267 readdata -= (-oppgapsize)
268 268 if readdata > 0:
269 269 density = chainpayload / float(readdata)
270 270 else:
271 271 density = 1.0
272 272
273 273 # Cut the revs at collected indices
274 274 previdx = 0
275 275 while indicesheap:
276 276 idx = heapq.heappop(indicesheap)
277 277
278 278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 279 if chunk:
280 280 yield chunk
281 281
282 282 previdx = idx
283 283
284 284 chunk = _trimchunk(revlog, revs, previdx)
285 285 if chunk:
286 286 yield chunk
287 287
288 288 @attr.s(slots=True, frozen=True)
289 289 class _deltainfo(object):
290 290 distance = attr.ib()
291 291 deltalen = attr.ib()
292 292 data = attr.ib()
293 293 base = attr.ib()
294 294 chainbase = attr.ib()
295 295 chainlen = attr.ib()
296 296 compresseddeltalen = attr.ib()
297 297
298 298 class _deltacomputer(object):
299 299 def __init__(self, revlog):
300 300 self.revlog = revlog
301 301
302 302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 303 """
304 304 Provides revisions that present an interest to be diffed against,
305 305 grouped by level of easiness.
306 306 """
307 307 revlog = self.revlog
308 308 curr = len(revlog)
309 309 prev = curr - 1
310 310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311 311
312 312 # should we try to build a delta?
313 313 if prev != nullrev and revlog.storedeltachains:
314 314 tested = set()
315 315 # This condition is true most of the time when processing
316 316 # changegroup data into a generaldelta repo. The only time it
317 317 # isn't true is if this is the first revision in a delta chain
318 318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
320 320 # Assume what we received from the server is a good choice
321 321 # build delta will reuse the cache
322 322 yield (cachedelta[0],)
323 323 tested.add(cachedelta[0])
324 324
325 325 if revlog._generaldelta:
326 326 # exclude already lazy tested base if any
327 327 parents = [p for p in (p1r, p2r)
328 328 if p != nullrev and p not in tested]
329 329 if parents and not revlog._aggressivemergedeltas:
330 330 # Pick whichever parent is closer to us (to minimize the
331 331 # chance of having to build a fulltext).
332 332 parents = [max(parents)]
333 333 tested.update(parents)
334 334 yield parents
335 335
336 336 if prev not in tested:
337 337 # other approach failed try against prev to hopefully save us a
338 338 # fulltext.
339 339 yield (prev,)
340 340
341 341 def buildtext(self, revinfo, fh):
342 342 """Builds a fulltext version of a revision
343 343
344 344 revinfo: _revisioninfo instance that contains all needed info
345 345 fh: file handle to either the .i or the .d revlog file,
346 346 depending on whether it is inlined or not
347 347 """
348 348 btext = revinfo.btext
349 349 if btext[0] is not None:
350 350 return btext[0]
351 351
352 352 revlog = self.revlog
353 353 cachedelta = revinfo.cachedelta
354 354 flags = revinfo.flags
355 355 node = revinfo.node
356 356
357 357 baserev = cachedelta[0]
358 358 delta = cachedelta[1]
359 359 # special case deltas which replace entire base; no need to decode
360 360 # base revision. this neatly avoids censored bases, which throw when
361 361 # they're decoded.
362 362 hlen = struct.calcsize(">lll")
363 363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
364 364 len(delta) - hlen):
365 365 btext[0] = delta[hlen:]
366 366 else:
367 367 # deltabase is rawtext before changed by flag processors, which is
368 368 # equivalent to non-raw text
369 369 basetext = revlog.revision(baserev, _df=fh, raw=False)
370 370 btext[0] = mdiff.patch(basetext, delta)
371 371
372 372 try:
373 373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
374 374 btext[0], validatehash = res
375 375 if validatehash:
376 376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
377 377 if flags & REVIDX_ISCENSORED:
378 378 raise RevlogError(_('node %s is not censored') % node)
379 379 except CensoredNodeError:
380 380 # must pass the censored index flag to add censored revisions
381 381 if not flags & REVIDX_ISCENSORED:
382 382 raise
383 383 return btext[0]
384 384
385 385 def _builddeltadiff(self, base, revinfo, fh):
386 386 revlog = self.revlog
387 387 t = self.buildtext(revinfo, fh)
388 388 if revlog.iscensored(base):
389 389 # deltas based on a censored revision must replace the
390 390 # full content in one patch, so delta works everywhere
391 391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
392 392 delta = header + t
393 393 else:
394 394 ptext = revlog.revision(base, _df=fh, raw=True)
395 395 delta = mdiff.textdiff(ptext, t)
396 396
397 397 return delta
398 398
399 399 def _builddeltainfo(self, revinfo, base, fh):
400 400 # can we use the cached delta?
401 401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
402 402 delta = revinfo.cachedelta[1]
403 403 else:
404 404 delta = self._builddeltadiff(base, revinfo, fh)
405 405 revlog = self.revlog
406 406 header, data = revlog.compress(delta)
407 407 deltalen = len(header) + len(data)
408 408 chainbase = revlog.chainbase(base)
409 409 offset = revlog.end(len(revlog) - 1)
410 410 dist = deltalen + offset - revlog.start(chainbase)
411 411 if revlog._generaldelta:
412 412 deltabase = base
413 413 else:
414 414 deltabase = chainbase
415 415 chainlen, compresseddeltalen = revlog._chaininfo(base)
416 416 chainlen += 1
417 417 compresseddeltalen += deltalen
418 418 return _deltainfo(dist, deltalen, (header, data), deltabase,
419 419 chainbase, chainlen, compresseddeltalen)
420 420
421 421 def finddeltainfo(self, revinfo, fh):
422 422 """Find an acceptable delta against a candidate revision
423 423
424 424 revinfo: information about the revision (instance of _revisioninfo)
425 425 fh: file handle to either the .i or the .d revlog file,
426 426 depending on whether it is inlined or not
427 427
428 428 Returns the first acceptable candidate revision, as ordered by
429 429 _getcandidaterevs
430 430 """
431 431 cachedelta = revinfo.cachedelta
432 432 p1 = revinfo.p1
433 433 p2 = revinfo.p2
434 434 revlog = self.revlog
435 435
436 436 deltainfo = None
437 437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
438 438 nominateddeltas = []
439 439 for candidaterev in candidaterevs:
440 440 # no delta for rawtext-changing revs (see "candelta" for why)
441 441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
442 442 continue
443 443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
444 444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
445 445 nominateddeltas.append(candidatedelta)
446 446 if nominateddeltas:
447 447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
448 448 break
449 449
450 450 return deltainfo
451 451
452 452 @attr.s(slots=True, frozen=True)
453 453 class _revisioninfo(object):
454 454 """Information about a revision that allows building its fulltext
455 455 node: expected hash of the revision
456 456 p1, p2: parent revs of the revision
457 457 btext: built text cache consisting of a one-element list
458 458 cachedelta: (baserev, uncompressed_delta) or None
459 459 flags: flags associated to the revision storage
460 460
461 461 One of btext[0] or cachedelta must be set.
462 462 """
463 463 node = attr.ib()
464 464 p1 = attr.ib()
465 465 p2 = attr.ib()
466 466 btext = attr.ib()
467 467 textlen = attr.ib()
468 468 cachedelta = attr.ib()
469 469 flags = attr.ib()
470 470
471 471 # index v0:
472 472 # 4 bytes: offset
473 473 # 4 bytes: compressed length
474 474 # 4 bytes: base rev
475 475 # 4 bytes: link rev
476 476 # 20 bytes: parent 1 nodeid
477 477 # 20 bytes: parent 2 nodeid
478 478 # 20 bytes: nodeid
479 479 indexformatv0 = struct.Struct(">4l20s20s20s")
480 480 indexformatv0_pack = indexformatv0.pack
481 481 indexformatv0_unpack = indexformatv0.unpack
482 482
483 483 class revlogoldio(object):
484 484 def __init__(self):
485 485 self.size = indexformatv0.size
486 486
487 487 def parseindex(self, data, inline):
488 488 s = self.size
489 489 index = []
490 490 nodemap = {nullid: nullrev}
491 491 n = off = 0
492 492 l = len(data)
493 493 while off + s <= l:
494 494 cur = data[off:off + s]
495 495 off += s
496 496 e = indexformatv0_unpack(cur)
497 497 # transform to revlogv1 format
498 498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
499 499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
500 500 index.append(e2)
501 501 nodemap[e[6]] = n
502 502 n += 1
503 503
504 504 # add the magic null revision at -1
505 505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
506 506
507 507 return index, nodemap, None
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 if gettype(entry[0]):
511 511 raise RevlogError(_('index entry flags need revlog version 1'))
512 512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
513 513 node(entry[5]), node(entry[6]), entry[7])
514 514 return indexformatv0_pack(*e2)
515 515
516 516 # index ng:
517 517 # 6 bytes: offset
518 518 # 2 bytes: flags
519 519 # 4 bytes: compressed length
520 520 # 4 bytes: uncompressed length
521 521 # 4 bytes: base rev
522 522 # 4 bytes: link rev
523 523 # 4 bytes: parent 1 rev
524 524 # 4 bytes: parent 2 rev
525 525 # 32 bytes: nodeid
526 526 indexformatng = struct.Struct(">Qiiiiii20s12x")
527 527 indexformatng_pack = indexformatng.pack
528 528 versionformat = struct.Struct(">I")
529 529 versionformat_pack = versionformat.pack
530 530 versionformat_unpack = versionformat.unpack
531 531
532 532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
533 533 # signed integer)
534 534 _maxentrysize = 0x7fffffff
535 535
536 536 class revlogio(object):
537 537 def __init__(self):
538 538 self.size = indexformatng.size
539 539
540 540 def parseindex(self, data, inline):
541 541 # call the C implementation to parse the index data
542 542 index, cache = parsers.parse_index2(data, inline)
543 543 return index, getattr(index, 'nodemap', None), cache
544 544
545 545 def packentry(self, entry, node, version, rev):
546 546 p = indexformatng_pack(*entry)
547 547 if rev == 0:
548 548 p = versionformat_pack(version) + p[4:]
549 549 return p
550 550
551 551 class revlog(object):
552 552 """
553 553 the underlying revision storage object
554 554
555 555 A revlog consists of two parts, an index and the revision data.
556 556
557 557 The index is a file with a fixed record size containing
558 558 information on each revision, including its nodeid (hash), the
559 559 nodeids of its parents, the position and offset of its data within
560 560 the data file, and the revision it's based on. Finally, each entry
561 561 contains a linkrev entry that can serve as a pointer to external
562 562 data.
563 563
564 564 The revision data itself is a linear collection of data chunks.
565 565 Each chunk represents a revision and is usually represented as a
566 566 delta against the previous chunk. To bound lookup time, runs of
567 567 deltas are limited to about 2 times the length of the original
568 568 version data. This makes retrieval of a version proportional to
569 569 its size, or O(1) relative to the number of revisions.
570 570
571 571 Both pieces of the revlog are written to in an append-only
572 572 fashion, which means we never need to rewrite a file to insert or
573 573 remove data, and can use some simple techniques to avoid the need
574 574 for locking while reading.
575 575
576 576 If checkambig, indexfile is opened with checkambig=True at
577 577 writing, to avoid file stat ambiguity.
578 578
579 579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
580 580 index will be mmapped rather than read if it is larger than the
581 581 configured threshold.
582 582
583 583 If censorable is True, the revlog can have censored revisions.
584 584 """
585 585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
586 586 mmaplargeindex=False, censorable=False):
587 587 """
588 588 create a revlog object
589 589
590 590 opener is a function that abstracts the file opening operation
591 591 and can be used to implement COW semantics or the like.
592 592 """
593 593 self.indexfile = indexfile
594 594 self.datafile = datafile or (indexfile[:-2] + ".d")
595 595 self.opener = opener
596 596 # When True, indexfile is opened with checkambig=True at writing, to
597 597 # avoid file stat ambiguity.
598 598 self._checkambig = checkambig
599 599 self._censorable = censorable
600 600 # 3-tuple of (node, rev, text) for a raw revision.
601 601 self._cache = None
602 602 # Maps rev to chain base rev.
603 603 self._chainbasecache = util.lrucachedict(100)
604 604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
605 605 self._chunkcache = (0, '')
606 606 # How much data to read and cache into the raw revlog data cache.
607 607 self._chunkcachesize = 65536
608 608 self._maxchainlen = None
609 609 self._aggressivemergedeltas = False
610 610 self.index = []
611 611 # Mapping of partial identifiers to full nodes.
612 612 self._pcache = {}
613 613 # Mapping of revision integer to full node.
614 614 self._nodecache = {nullid: nullrev}
615 615 self._nodepos = None
616 616 self._compengine = 'zlib'
617 617 self._maxdeltachainspan = -1
618 618 self._withsparseread = False
619 619 self._srdensitythreshold = 0.25
620 620 self._srmingapsize = 262144
621 621
622 622 mmapindexthreshold = None
623 623 v = REVLOG_DEFAULT_VERSION
624 624 opts = getattr(opener, 'options', None)
625 625 if opts is not None:
626 626 if 'revlogv2' in opts:
627 627 # version 2 revlogs always use generaldelta.
628 628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
629 629 elif 'revlogv1' in opts:
630 630 if 'generaldelta' in opts:
631 631 v |= FLAG_GENERALDELTA
632 632 else:
633 633 v = 0
634 634 if 'chunkcachesize' in opts:
635 635 self._chunkcachesize = opts['chunkcachesize']
636 636 if 'maxchainlen' in opts:
637 637 self._maxchainlen = opts['maxchainlen']
638 638 if 'aggressivemergedeltas' in opts:
639 639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
640 640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
641 641 if 'compengine' in opts:
642 642 self._compengine = opts['compengine']
643 643 if 'maxdeltachainspan' in opts:
644 644 self._maxdeltachainspan = opts['maxdeltachainspan']
645 645 if mmaplargeindex and 'mmapindexthreshold' in opts:
646 646 mmapindexthreshold = opts['mmapindexthreshold']
647 647 self._withsparseread = bool(opts.get('with-sparse-read', False))
648 648 if 'sparse-read-density-threshold' in opts:
649 649 self._srdensitythreshold = opts['sparse-read-density-threshold']
650 650 if 'sparse-read-min-gap-size' in opts:
651 651 self._srmingapsize = opts['sparse-read-min-gap-size']
652 652
653 653 if self._chunkcachesize <= 0:
654 654 raise RevlogError(_('revlog chunk cache size %r is not greater '
655 655 'than 0') % self._chunkcachesize)
656 656 elif self._chunkcachesize & (self._chunkcachesize - 1):
657 657 raise RevlogError(_('revlog chunk cache size %r is not a power '
658 658 'of 2') % self._chunkcachesize)
659 659
660 660 indexdata = ''
661 661 self._initempty = True
662 662 try:
663 663 with self._indexfp() as f:
664 664 if (mmapindexthreshold is not None and
665 665 self.opener.fstat(f).st_size >= mmapindexthreshold):
666 666 indexdata = util.buffer(util.mmapread(f))
667 667 else:
668 668 indexdata = f.read()
669 669 if len(indexdata) > 0:
670 670 v = versionformat_unpack(indexdata[:4])[0]
671 671 self._initempty = False
672 672 except IOError as inst:
673 673 if inst.errno != errno.ENOENT:
674 674 raise
675 675
676 676 self.version = v
677 677 self._inline = v & FLAG_INLINE_DATA
678 678 self._generaldelta = v & FLAG_GENERALDELTA
679 679 flags = v & ~0xFFFF
680 680 fmt = v & 0xFFFF
681 681 if fmt == REVLOGV0:
682 682 if flags:
683 683 raise RevlogError(_('unknown flags (%#04x) in version %d '
684 684 'revlog %s') %
685 685 (flags >> 16, fmt, self.indexfile))
686 686 elif fmt == REVLOGV1:
687 687 if flags & ~REVLOGV1_FLAGS:
688 688 raise RevlogError(_('unknown flags (%#04x) in version %d '
689 689 'revlog %s') %
690 690 (flags >> 16, fmt, self.indexfile))
691 691 elif fmt == REVLOGV2:
692 692 if flags & ~REVLOGV2_FLAGS:
693 693 raise RevlogError(_('unknown flags (%#04x) in version %d '
694 694 'revlog %s') %
695 695 (flags >> 16, fmt, self.indexfile))
696 696 else:
697 697 raise RevlogError(_('unknown version (%d) in revlog %s') %
698 698 (fmt, self.indexfile))
699 699
700 700 self.storedeltachains = True
701 701
702 702 self._io = revlogio()
703 703 if self.version == REVLOGV0:
704 704 self._io = revlogoldio()
705 705 try:
706 706 d = self._io.parseindex(indexdata, self._inline)
707 707 except (ValueError, IndexError):
708 708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
709 709 self.index, nodemap, self._chunkcache = d
710 710 if nodemap is not None:
711 711 self.nodemap = self._nodecache = nodemap
712 712 if not self._chunkcache:
713 713 self._chunkclear()
714 714 # revnum -> (chain-length, sum-delta-length)
715 715 self._chaininfocache = {}
716 716 # revlog header -> revlog compressor
717 717 self._decompressors = {}
718 718
719 719 @util.propertycache
720 720 def _compressor(self):
721 721 return util.compengines[self._compengine].revlogcompressor()
722 722
723 723 def _indexfp(self, mode='r'):
724 724 """file object for the revlog's index file"""
725 725 args = {r'mode': mode}
726 726 if mode != 'r':
727 727 args[r'checkambig'] = self._checkambig
728 728 if mode == 'w':
729 729 args[r'atomictemp'] = True
730 730 return self.opener(self.indexfile, **args)
731 731
732 732 def _datafp(self, mode='r'):
733 733 """file object for the revlog's data file"""
734 734 return self.opener(self.datafile, mode=mode)
735 735
736 736 @contextlib.contextmanager
737 737 def _datareadfp(self, existingfp=None):
738 738 """file object suitable to read data"""
739 739 if existingfp is not None:
740 740 yield existingfp
741 741 else:
742 742 if self._inline:
743 743 func = self._indexfp
744 744 else:
745 745 func = self._datafp
746 746 with func() as fp:
747 747 yield fp
748 748
749 749 def tip(self):
750 750 return self.node(len(self.index) - 2)
751 751 def __contains__(self, rev):
752 752 return 0 <= rev < len(self)
753 753 def __len__(self):
754 754 return len(self.index) - 1
755 755 def __iter__(self):
756 756 return iter(xrange(len(self)))
757 757 def revs(self, start=0, stop=None):
758 758 """iterate over all rev in this revlog (from start to stop)"""
759 759 step = 1
760 760 if stop is not None:
761 761 if start > stop:
762 762 step = -1
763 763 stop += step
764 764 else:
765 765 stop = len(self)
766 766 return xrange(start, stop, step)
767 767
768 768 @util.propertycache
769 769 def nodemap(self):
770 770 self.rev(self.node(0))
771 771 return self._nodecache
772 772
773 773 def hasnode(self, node):
774 774 try:
775 775 self.rev(node)
776 776 return True
777 777 except KeyError:
778 778 return False
779 779
780 780 def candelta(self, baserev, rev):
781 781 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 782 # Disable delta if either rev requires a content-changing flag
783 783 # processor (ex. LFS). This is because such flag processor can alter
784 784 # the rawtext content that the delta will be based on, and two clients
785 785 # could have a same revlog node with different flags (i.e. different
786 786 # rawtext contents) and the delta could be incompatible.
787 787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
788 788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
789 789 return False
790 790 return True
791 791
792 792 def clearcaches(self):
793 793 self._cache = None
794 794 self._chainbasecache.clear()
795 795 self._chunkcache = (0, '')
796 796 self._pcache = {}
797 797
798 798 try:
799 799 self._nodecache.clearcaches()
800 800 except AttributeError:
801 801 self._nodecache = {nullid: nullrev}
802 802 self._nodepos = None
803 803
804 804 def rev(self, node):
805 805 try:
806 806 return self._nodecache[node]
807 807 except TypeError:
808 808 raise
809 809 except RevlogError:
810 810 # parsers.c radix tree lookup failed
811 811 if node == wdirid or node in wdirfilenodeids:
812 812 raise error.WdirUnsupported
813 813 raise LookupError(node, self.indexfile, _('no node'))
814 814 except KeyError:
815 815 # pure python cache lookup failed
816 816 n = self._nodecache
817 817 i = self.index
818 818 p = self._nodepos
819 819 if p is None:
820 820 p = len(i) - 2
821 821 else:
822 822 assert p < len(i)
823 823 for r in xrange(p, -1, -1):
824 824 v = i[r][7]
825 825 n[v] = r
826 826 if v == node:
827 827 self._nodepos = r - 1
828 828 return r
829 829 if node == wdirid or node in wdirfilenodeids:
830 830 raise error.WdirUnsupported
831 831 raise LookupError(node, self.indexfile, _('no node'))
832 832
833 833 # Accessors for index entries.
834 834
835 835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
836 836 # are flags.
837 837 def start(self, rev):
838 838 return int(self.index[rev][0] >> 16)
839 839
840 840 def flags(self, rev):
841 841 return self.index[rev][0] & 0xFFFF
842 842
843 843 def length(self, rev):
844 844 return self.index[rev][1]
845 845
846 846 def rawsize(self, rev):
847 847 """return the length of the uncompressed text for a given revision"""
848 848 l = self.index[rev][2]
849 849 if l >= 0:
850 850 return l
851 851
852 852 t = self.revision(rev, raw=True)
853 853 return len(t)
854 854
855 855 def size(self, rev):
856 856 """length of non-raw text (processed by a "read" flag processor)"""
857 857 # fast path: if no "read" flag processor could change the content,
858 858 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 859 flags = self.flags(rev)
860 860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 861 return self.rawsize(rev)
862 862
863 863 return len(self.revision(rev, raw=False))
864 864
865 865 def chainbase(self, rev):
866 866 base = self._chainbasecache.get(rev)
867 867 if base is not None:
868 868 return base
869 869
870 870 index = self.index
871 871 base = index[rev][3]
872 872 while base != rev:
873 873 rev = base
874 874 base = index[rev][3]
875 875
876 876 self._chainbasecache[rev] = base
877 877 return base
878 878
879 879 def linkrev(self, rev):
880 880 return self.index[rev][4]
881 881
882 882 def parentrevs(self, rev):
883 883 try:
884 884 entry = self.index[rev]
885 885 except IndexError:
886 886 if rev == wdirrev:
887 887 raise error.WdirUnsupported
888 888 raise
889 889
890 890 return entry[5], entry[6]
891 891
892 892 def node(self, rev):
893 893 try:
894 894 return self.index[rev][7]
895 895 except IndexError:
896 896 if rev == wdirrev:
897 897 raise error.WdirUnsupported
898 898 raise
899 899
900 900 # Derived from index values.
901 901
902 902 def end(self, rev):
903 903 return self.start(rev) + self.length(rev)
904 904
905 905 def parents(self, node):
906 906 i = self.index
907 907 d = i[self.rev(node)]
908 908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
909 909
910 910 def chainlen(self, rev):
911 911 return self._chaininfo(rev)[0]
912 912
913 913 def _chaininfo(self, rev):
914 914 chaininfocache = self._chaininfocache
915 915 if rev in chaininfocache:
916 916 return chaininfocache[rev]
917 917 index = self.index
918 918 generaldelta = self._generaldelta
919 919 iterrev = rev
920 920 e = index[iterrev]
921 921 clen = 0
922 922 compresseddeltalen = 0
923 923 while iterrev != e[3]:
924 924 clen += 1
925 925 compresseddeltalen += e[1]
926 926 if generaldelta:
927 927 iterrev = e[3]
928 928 else:
929 929 iterrev -= 1
930 930 if iterrev in chaininfocache:
931 931 t = chaininfocache[iterrev]
932 932 clen += t[0]
933 933 compresseddeltalen += t[1]
934 934 break
935 935 e = index[iterrev]
936 936 else:
937 937 # Add text length of base since decompressing that also takes
938 938 # work. For cache hits the length is already included.
939 939 compresseddeltalen += e[1]
940 940 r = (clen, compresseddeltalen)
941 941 chaininfocache[rev] = r
942 942 return r
943 943
944 944 def _deltachain(self, rev, stoprev=None):
945 945 """Obtain the delta chain for a revision.
946 946
947 947 ``stoprev`` specifies a revision to stop at. If not specified, we
948 948 stop at the base of the chain.
949 949
950 950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
951 951 revs in ascending order and ``stopped`` is a bool indicating whether
952 952 ``stoprev`` was hit.
953 953 """
954 954 # Try C implementation.
955 955 try:
956 956 return self.index.deltachain(rev, stoprev, self._generaldelta)
957 957 except AttributeError:
958 958 pass
959 959
960 960 chain = []
961 961
962 962 # Alias to prevent attribute lookup in tight loop.
963 963 index = self.index
964 964 generaldelta = self._generaldelta
965 965
966 966 iterrev = rev
967 967 e = index[iterrev]
968 968 while iterrev != e[3] and iterrev != stoprev:
969 969 chain.append(iterrev)
970 970 if generaldelta:
971 971 iterrev = e[3]
972 972 else:
973 973 iterrev -= 1
974 974 e = index[iterrev]
975 975
976 976 if iterrev == stoprev:
977 977 stopped = True
978 978 else:
979 979 chain.append(iterrev)
980 980 stopped = False
981 981
982 982 chain.reverse()
983 983 return chain, stopped
984 984
985 985 def ancestors(self, revs, stoprev=0, inclusive=False):
986 986 """Generate the ancestors of 'revs' in reverse topological order.
987 987 Does not generate revs lower than stoprev.
988 988
989 989 See the documentation for ancestor.lazyancestors for more details."""
990 990
991 991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
992 992 inclusive=inclusive)
993 993
994 994 def descendants(self, revs):
995 995 """Generate the descendants of 'revs' in revision order.
996 996
997 997 Yield a sequence of revision numbers starting with a child of
998 998 some rev in revs, i.e., each revision is *not* considered a
999 999 descendant of itself. Results are ordered by revision number (a
1000 1000 topological sort)."""
1001 1001 first = min(revs)
1002 1002 if first == nullrev:
1003 1003 for i in self:
1004 1004 yield i
1005 1005 return
1006 1006
1007 1007 seen = set(revs)
1008 1008 for i in self.revs(start=first + 1):
1009 1009 for x in self.parentrevs(i):
1010 1010 if x != nullrev and x in seen:
1011 1011 seen.add(i)
1012 1012 yield i
1013 1013 break
1014 1014
1015 1015 def findcommonmissing(self, common=None, heads=None):
1016 1016 """Return a tuple of the ancestors of common and the ancestors of heads
1017 1017 that are not ancestors of common. In revset terminology, we return the
1018 1018 tuple:
1019 1019
1020 1020 ::common, (::heads) - (::common)
1021 1021
1022 1022 The list is sorted by revision number, meaning it is
1023 1023 topologically sorted.
1024 1024
1025 1025 'heads' and 'common' are both lists of node IDs. If heads is
1026 1026 not supplied, uses all of the revlog's heads. If common is not
1027 1027 supplied, uses nullid."""
1028 1028 if common is None:
1029 1029 common = [nullid]
1030 1030 if heads is None:
1031 1031 heads = self.heads()
1032 1032
1033 1033 common = [self.rev(n) for n in common]
1034 1034 heads = [self.rev(n) for n in heads]
1035 1035
1036 1036 # we want the ancestors, but inclusive
1037 1037 class lazyset(object):
1038 1038 def __init__(self, lazyvalues):
1039 1039 self.addedvalues = set()
1040 1040 self.lazyvalues = lazyvalues
1041 1041
1042 1042 def __contains__(self, value):
1043 1043 return value in self.addedvalues or value in self.lazyvalues
1044 1044
1045 1045 def __iter__(self):
1046 1046 added = self.addedvalues
1047 1047 for r in added:
1048 1048 yield r
1049 1049 for r in self.lazyvalues:
1050 1050 if not r in added:
1051 1051 yield r
1052 1052
1053 1053 def add(self, value):
1054 1054 self.addedvalues.add(value)
1055 1055
1056 1056 def update(self, values):
1057 1057 self.addedvalues.update(values)
1058 1058
1059 1059 has = lazyset(self.ancestors(common))
1060 1060 has.add(nullrev)
1061 1061 has.update(common)
1062 1062
1063 1063 # take all ancestors from heads that aren't in has
1064 1064 missing = set()
1065 1065 visit = collections.deque(r for r in heads if r not in has)
1066 1066 while visit:
1067 1067 r = visit.popleft()
1068 1068 if r in missing:
1069 1069 continue
1070 1070 else:
1071 1071 missing.add(r)
1072 1072 for p in self.parentrevs(r):
1073 1073 if p not in has:
1074 1074 visit.append(p)
1075 1075 missing = list(missing)
1076 1076 missing.sort()
1077 1077 return has, [self.node(miss) for miss in missing]
1078 1078
1079 1079 def incrementalmissingrevs(self, common=None):
1080 1080 """Return an object that can be used to incrementally compute the
1081 1081 revision numbers of the ancestors of arbitrary sets that are not
1082 1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 1083 object.
1084 1084
1085 1085 'common' is a list of revision numbers. If common is not supplied, uses
1086 1086 nullrev.
1087 1087 """
1088 1088 if common is None:
1089 1089 common = [nullrev]
1090 1090
1091 1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1092 1092
1093 1093 def findmissingrevs(self, common=None, heads=None):
1094 1094 """Return the revision numbers of the ancestors of heads that
1095 1095 are not ancestors of common.
1096 1096
1097 1097 More specifically, return a list of revision numbers corresponding to
1098 1098 nodes N such that every N satisfies the following constraints:
1099 1099
1100 1100 1. N is an ancestor of some node in 'heads'
1101 1101 2. N is not an ancestor of any node in 'common'
1102 1102
1103 1103 The list is sorted by revision number, meaning it is
1104 1104 topologically sorted.
1105 1105
1106 1106 'heads' and 'common' are both lists of revision numbers. If heads is
1107 1107 not supplied, uses all of the revlog's heads. If common is not
1108 1108 supplied, uses nullid."""
1109 1109 if common is None:
1110 1110 common = [nullrev]
1111 1111 if heads is None:
1112 1112 heads = self.headrevs()
1113 1113
1114 1114 inc = self.incrementalmissingrevs(common=common)
1115 1115 return inc.missingancestors(heads)
1116 1116
1117 1117 def findmissing(self, common=None, heads=None):
1118 1118 """Return the ancestors of heads that are not ancestors of common.
1119 1119
1120 1120 More specifically, return a list of nodes N such that every N
1121 1121 satisfies the following constraints:
1122 1122
1123 1123 1. N is an ancestor of some node in 'heads'
1124 1124 2. N is not an ancestor of any node in 'common'
1125 1125
1126 1126 The list is sorted by revision number, meaning it is
1127 1127 topologically sorted.
1128 1128
1129 1129 'heads' and 'common' are both lists of node IDs. If heads is
1130 1130 not supplied, uses all of the revlog's heads. If common is not
1131 1131 supplied, uses nullid."""
1132 1132 if common is None:
1133 1133 common = [nullid]
1134 1134 if heads is None:
1135 1135 heads = self.heads()
1136 1136
1137 1137 common = [self.rev(n) for n in common]
1138 1138 heads = [self.rev(n) for n in heads]
1139 1139
1140 1140 inc = self.incrementalmissingrevs(common=common)
1141 1141 return [self.node(r) for r in inc.missingancestors(heads)]
1142 1142
1143 1143 def nodesbetween(self, roots=None, heads=None):
1144 1144 """Return a topological path from 'roots' to 'heads'.
1145 1145
1146 1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1147 1147 topologically sorted list of all nodes N that satisfy both of
1148 1148 these constraints:
1149 1149
1150 1150 1. N is a descendant of some node in 'roots'
1151 1151 2. N is an ancestor of some node in 'heads'
1152 1152
1153 1153 Every node is considered to be both a descendant and an ancestor
1154 1154 of itself, so every reachable node in 'roots' and 'heads' will be
1155 1155 included in 'nodes'.
1156 1156
1157 1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1158 1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1159 1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1160 1160
1161 1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1162 1162 unspecified, uses nullid as the only root. If 'heads' is
1163 1163 unspecified, uses list of all of the revlog's heads."""
1164 1164 nonodes = ([], [], [])
1165 1165 if roots is not None:
1166 1166 roots = list(roots)
1167 1167 if not roots:
1168 1168 return nonodes
1169 1169 lowestrev = min([self.rev(n) for n in roots])
1170 1170 else:
1171 1171 roots = [nullid] # Everybody's a descendant of nullid
1172 1172 lowestrev = nullrev
1173 1173 if (lowestrev == nullrev) and (heads is None):
1174 1174 # We want _all_ the nodes!
1175 1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1176 1176 if heads is None:
1177 1177 # All nodes are ancestors, so the latest ancestor is the last
1178 1178 # node.
1179 1179 highestrev = len(self) - 1
1180 1180 # Set ancestors to None to signal that every node is an ancestor.
1181 1181 ancestors = None
1182 1182 # Set heads to an empty dictionary for later discovery of heads
1183 1183 heads = {}
1184 1184 else:
1185 1185 heads = list(heads)
1186 1186 if not heads:
1187 1187 return nonodes
1188 1188 ancestors = set()
1189 1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1190 1190 # Also, later we will be using it to filter out the heads we can't
1191 1191 # find from roots.
1192 1192 heads = dict.fromkeys(heads, False)
1193 1193 # Start at the top and keep marking parents until we're done.
1194 1194 nodestotag = set(heads)
1195 1195 # Remember where the top was so we can use it as a limit later.
1196 1196 highestrev = max([self.rev(n) for n in nodestotag])
1197 1197 while nodestotag:
1198 1198 # grab a node to tag
1199 1199 n = nodestotag.pop()
1200 1200 # Never tag nullid
1201 1201 if n == nullid:
1202 1202 continue
1203 1203 # A node's revision number represents its place in a
1204 1204 # topologically sorted list of nodes.
1205 1205 r = self.rev(n)
1206 1206 if r >= lowestrev:
1207 1207 if n not in ancestors:
1208 1208 # If we are possibly a descendant of one of the roots
1209 1209 # and we haven't already been marked as an ancestor
1210 1210 ancestors.add(n) # Mark as ancestor
1211 1211 # Add non-nullid parents to list of nodes to tag.
1212 1212 nodestotag.update([p for p in self.parents(n) if
1213 1213 p != nullid])
1214 1214 elif n in heads: # We've seen it before, is it a fake head?
1215 1215 # So it is, real heads should not be the ancestors of
1216 1216 # any other heads.
1217 1217 heads.pop(n)
1218 1218 if not ancestors:
1219 1219 return nonodes
1220 1220 # Now that we have our set of ancestors, we want to remove any
1221 1221 # roots that are not ancestors.
1222 1222
1223 1223 # If one of the roots was nullid, everything is included anyway.
1224 1224 if lowestrev > nullrev:
1225 1225 # But, since we weren't, let's recompute the lowest rev to not
1226 1226 # include roots that aren't ancestors.
1227 1227
1228 1228 # Filter out roots that aren't ancestors of heads
1229 1229 roots = [root for root in roots if root in ancestors]
1230 1230 # Recompute the lowest revision
1231 1231 if roots:
1232 1232 lowestrev = min([self.rev(root) for root in roots])
1233 1233 else:
1234 1234 # No more roots? Return empty list
1235 1235 return nonodes
1236 1236 else:
1237 1237 # We are descending from nullid, and don't need to care about
1238 1238 # any other roots.
1239 1239 lowestrev = nullrev
1240 1240 roots = [nullid]
1241 1241 # Transform our roots list into a set.
1242 1242 descendants = set(roots)
1243 1243 # Also, keep the original roots so we can filter out roots that aren't
1244 1244 # 'real' roots (i.e. are descended from other roots).
1245 1245 roots = descendants.copy()
1246 1246 # Our topologically sorted list of output nodes.
1247 1247 orderedout = []
1248 1248 # Don't start at nullid since we don't want nullid in our output list,
1249 1249 # and if nullid shows up in descendants, empty parents will look like
1250 1250 # they're descendants.
1251 1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 1252 n = self.node(r)
1253 1253 isdescendant = False
1254 1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 1255 isdescendant = True
1256 1256 elif n in descendants:
1257 1257 # n is already a descendant
1258 1258 isdescendant = True
1259 1259 # This check only needs to be done here because all the roots
1260 1260 # will start being marked is descendants before the loop.
1261 1261 if n in roots:
1262 1262 # If n was a root, check if it's a 'real' root.
1263 1263 p = tuple(self.parents(n))
1264 1264 # If any of its parents are descendants, it's not a root.
1265 1265 if (p[0] in descendants) or (p[1] in descendants):
1266 1266 roots.remove(n)
1267 1267 else:
1268 1268 p = tuple(self.parents(n))
1269 1269 # A node is a descendant if either of its parents are
1270 1270 # descendants. (We seeded the dependents list with the roots
1271 1271 # up there, remember?)
1272 1272 if (p[0] in descendants) or (p[1] in descendants):
1273 1273 descendants.add(n)
1274 1274 isdescendant = True
1275 1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 1276 # Only include nodes that are both descendants and ancestors.
1277 1277 orderedout.append(n)
1278 1278 if (ancestors is not None) and (n in heads):
1279 1279 # We're trying to figure out which heads are reachable
1280 1280 # from roots.
1281 1281 # Mark this head as having been reached
1282 1282 heads[n] = True
1283 1283 elif ancestors is None:
1284 1284 # Otherwise, we're trying to discover the heads.
1285 1285 # Assume this is a head because if it isn't, the next step
1286 1286 # will eventually remove it.
1287 1287 heads[n] = True
1288 1288 # But, obviously its parents aren't.
1289 1289 for p in self.parents(n):
1290 1290 heads.pop(p, None)
1291 1291 heads = [head for head, flag in heads.iteritems() if flag]
1292 1292 roots = list(roots)
1293 1293 assert orderedout
1294 1294 assert roots
1295 1295 assert heads
1296 1296 return (orderedout, roots, heads)
1297 1297
1298 1298 def headrevs(self):
1299 1299 try:
1300 1300 return self.index.headrevs()
1301 1301 except AttributeError:
1302 1302 return self._headrevs()
1303 1303
1304 1304 def computephases(self, roots):
1305 1305 return self.index.computephasesmapsets(roots)
1306 1306
1307 1307 def _headrevs(self):
1308 1308 count = len(self)
1309 1309 if not count:
1310 1310 return [nullrev]
1311 1311 # we won't iter over filtered rev so nobody is a head at start
1312 1312 ishead = [0] * (count + 1)
1313 1313 index = self.index
1314 1314 for r in self:
1315 1315 ishead[r] = 1 # I may be an head
1316 1316 e = index[r]
1317 1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1318 1318 return [r for r, val in enumerate(ishead) if val]
1319 1319
1320 1320 def heads(self, start=None, stop=None):
1321 1321 """return the list of all nodes that have no children
1322 1322
1323 1323 if start is specified, only heads that are descendants of
1324 1324 start will be returned
1325 1325 if stop is specified, it will consider all the revs from stop
1326 1326 as if they had no children
1327 1327 """
1328 1328 if start is None and stop is None:
1329 1329 if not len(self):
1330 1330 return [nullid]
1331 1331 return [self.node(r) for r in self.headrevs()]
1332 1332
1333 1333 if start is None:
1334 1334 start = nullid
1335 1335 if stop is None:
1336 1336 stop = []
1337 1337 stoprevs = set([self.rev(n) for n in stop])
1338 1338 startrev = self.rev(start)
1339 1339 reachable = {startrev}
1340 1340 heads = {startrev}
1341 1341
1342 1342 parentrevs = self.parentrevs
1343 1343 for r in self.revs(start=startrev + 1):
1344 1344 for p in parentrevs(r):
1345 1345 if p in reachable:
1346 1346 if r not in stoprevs:
1347 1347 reachable.add(r)
1348 1348 heads.add(r)
1349 1349 if p in heads and p not in stoprevs:
1350 1350 heads.remove(p)
1351 1351
1352 1352 return [self.node(r) for r in heads]
1353 1353
1354 1354 def children(self, node):
1355 1355 """find the children of a given node"""
1356 1356 c = []
1357 1357 p = self.rev(node)
1358 1358 for r in self.revs(start=p + 1):
1359 1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1360 1360 if prevs:
1361 1361 for pr in prevs:
1362 1362 if pr == p:
1363 1363 c.append(self.node(r))
1364 1364 elif p == nullrev:
1365 1365 c.append(self.node(r))
1366 1366 return c
1367 1367
1368 1368 def descendant(self, start, end):
1369 1369 if start == nullrev:
1370 1370 return True
1371 1371 for i in self.descendants([start]):
1372 1372 if i == end:
1373 1373 return True
1374 1374 elif i > end:
1375 1375 break
1376 1376 return False
1377 1377
1378 1378 def commonancestorsheads(self, a, b):
1379 1379 """calculate all the heads of the common ancestors of nodes a and b"""
1380 1380 a, b = self.rev(a), self.rev(b)
1381 1381 try:
1382 1382 ancs = self.index.commonancestorsheads(a, b)
1383 1383 except (AttributeError, OverflowError): # C implementation failed
1384 1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1385 1385 return pycompat.maplist(self.node, ancs)
1386 1386
1387 1387 def isancestor(self, a, b):
1388 1388 """return True if node a is an ancestor of node b
1389 1389
1390 1390 The implementation of this is trivial but the use of
1391 1391 commonancestorsheads is not."""
1392 1392 return a in self.commonancestorsheads(a, b)
1393 1393
1394 1394 def ancestor(self, a, b):
1395 1395 """calculate the "best" common ancestor of nodes a and b"""
1396 1396
1397 1397 a, b = self.rev(a), self.rev(b)
1398 1398 try:
1399 1399 ancs = self.index.ancestors(a, b)
1400 1400 except (AttributeError, OverflowError):
1401 1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1402 1402 if ancs:
1403 1403 # choose a consistent winner when there's a tie
1404 1404 return min(map(self.node, ancs))
1405 1405 return nullid
1406 1406
1407 1407 def _match(self, id):
1408 1408 if isinstance(id, int):
1409 1409 # rev
1410 1410 return self.node(id)
1411 1411 if len(id) == 20:
1412 1412 # possibly a binary node
1413 1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1414 1414 try:
1415 1415 node = id
1416 1416 self.rev(node) # quick search the index
1417 1417 return node
1418 1418 except LookupError:
1419 1419 pass # may be partial hex id
1420 1420 try:
1421 1421 # str(rev)
1422 1422 rev = int(id)
1423 1423 if "%d" % rev != id:
1424 1424 raise ValueError
1425 1425 if rev < 0:
1426 1426 rev = len(self) + rev
1427 1427 if rev < 0 or rev >= len(self):
1428 1428 raise ValueError
1429 1429 return self.node(rev)
1430 1430 except (ValueError, OverflowError):
1431 1431 pass
1432 1432 if len(id) == 40:
1433 1433 try:
1434 1434 # a full hex nodeid?
1435 1435 node = bin(id)
1436 1436 self.rev(node)
1437 1437 return node
1438 1438 except (TypeError, LookupError):
1439 1439 pass
1440 1440
1441 1441 def _partialmatch(self, id):
1442 1442 # we don't care wdirfilenodeids as they should be always full hash
1443 1443 maybewdir = wdirhex.startswith(id)
1444 1444 try:
1445 1445 partial = self.index.partialmatch(id)
1446 1446 if partial and self.hasnode(partial):
1447 1447 if maybewdir:
1448 1448 # single 'ff...' match in radix tree, ambiguous with wdir
1449 1449 raise RevlogError
1450 1450 return partial
1451 1451 if maybewdir:
1452 1452 # no 'ff...' match in radix tree, wdir identified
1453 1453 raise error.WdirUnsupported
1454 1454 return None
1455 1455 except RevlogError:
1456 1456 # parsers.c radix tree lookup gave multiple matches
1457 1457 # fast path: for unfiltered changelog, radix tree is accurate
1458 1458 if not getattr(self, 'filteredrevs', None):
1459 1459 raise LookupError(id, self.indexfile,
1460 1460 _('ambiguous identifier'))
1461 1461 # fall through to slow path that filters hidden revisions
1462 1462 except (AttributeError, ValueError):
1463 1463 # we are pure python, or key was too short to search radix tree
1464 1464 pass
1465 1465
1466 1466 if id in self._pcache:
1467 1467 return self._pcache[id]
1468 1468
1469 1469 if len(id) <= 40:
1470 1470 try:
1471 1471 # hex(node)[:...]
1472 1472 l = len(id) // 2 # grab an even number of digits
1473 1473 prefix = bin(id[:l * 2])
1474 1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 1475 nl = [n for n in nl if hex(n).startswith(id) and
1476 1476 self.hasnode(n)]
1477 1477 if len(nl) > 0:
1478 1478 if len(nl) == 1 and not maybewdir:
1479 1479 self._pcache[id] = nl[0]
1480 1480 return nl[0]
1481 1481 raise LookupError(id, self.indexfile,
1482 1482 _('ambiguous identifier'))
1483 1483 if maybewdir:
1484 1484 raise error.WdirUnsupported
1485 1485 return None
1486 1486 except TypeError:
1487 1487 pass
1488 1488
1489 1489 def lookup(self, id):
1490 1490 """locate a node based on:
1491 1491 - revision number or str(revision number)
1492 1492 - nodeid or subset of hex nodeid
1493 1493 """
1494 1494 n = self._match(id)
1495 1495 if n is not None:
1496 1496 return n
1497 1497 n = self._partialmatch(id)
1498 1498 if n:
1499 1499 return n
1500 1500
1501 1501 raise LookupError(id, self.indexfile, _('no match found'))
1502 1502
1503 1503 def shortest(self, node, minlength=1):
1504 1504 """Find the shortest unambiguous prefix that matches node."""
1505 def isrev(prefix):
1506 try:
1507 i = int(prefix)
1508 # if we are a pure int, then starting with zero will not be
1509 # confused as a rev; or, obviously, if the int is larger
1510 # than the value of the tip rev
1511 if prefix[0] == '0' or i > len(self):
1512 return False
1513 return True
1514 except ValueError:
1515 return False
1516
1517 1505 def isvalid(prefix):
1518 1506 try:
1519 1507 node = self._partialmatch(prefix)
1520 1508 except error.RevlogError:
1521 1509 return False
1522 1510 except error.WdirUnsupported:
1523 1511 # single 'ff...' match
1524 1512 return True
1525 1513 if node is None:
1526 1514 raise LookupError(node, self.indexfile, _('no node'))
1527 1515 return True
1528 1516
1529 1517 def maybewdir(prefix):
1530 1518 return all(c == 'f' for c in prefix)
1531 1519
1532 1520 hexnode = hex(node)
1533 1521
1534 1522 def disambiguate(hexnode, minlength):
1523 """Disambiguate against wdirid."""
1535 1524 for length in range(minlength, 41):
1536 1525 prefix = hexnode[:length]
1537 if not isrev(prefix) and not maybewdir(prefix):
1526 if not maybewdir(prefix):
1538 1527 return prefix
1539 1528
1540 1529 if not getattr(self, 'filteredrevs', None):
1541 1530 try:
1542 1531 length = max(self.index.shortest(node), minlength)
1543 1532 return disambiguate(hexnode, length)
1544 1533 except RevlogError:
1545 1534 if node != wdirid:
1546 1535 raise LookupError(node, self.indexfile, _('no node'))
1547 1536 except AttributeError:
1548 1537 # Fall through to pure code
1549 1538 pass
1550 1539
1551 1540 if node == wdirid:
1552 1541 for length in range(minlength, 41):
1553 1542 prefix = hexnode[:length]
1554 1543 if isvalid(prefix):
1555 1544 return prefix
1556 1545
1557 1546 for length in range(minlength, 41):
1558 1547 prefix = hexnode[:length]
1559 1548 if isvalid(prefix):
1560 1549 return disambiguate(hexnode, length)
1561 1550
1562 1551 def cmp(self, node, text):
1563 1552 """compare text with a given file revision
1564 1553
1565 1554 returns True if text is different than what is stored.
1566 1555 """
1567 1556 p1, p2 = self.parents(node)
1568 1557 return hash(text, p1, p2) != node
1569 1558
1570 1559 def _cachesegment(self, offset, data):
1571 1560 """Add a segment to the revlog cache.
1572 1561
1573 1562 Accepts an absolute offset and the data that is at that location.
1574 1563 """
1575 1564 o, d = self._chunkcache
1576 1565 # try to add to existing cache
1577 1566 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1578 1567 self._chunkcache = o, d + data
1579 1568 else:
1580 1569 self._chunkcache = offset, data
1581 1570
1582 1571 def _readsegment(self, offset, length, df=None):
1583 1572 """Load a segment of raw data from the revlog.
1584 1573
1585 1574 Accepts an absolute offset, length to read, and an optional existing
1586 1575 file handle to read from.
1587 1576
1588 1577 If an existing file handle is passed, it will be seeked and the
1589 1578 original seek position will NOT be restored.
1590 1579
1591 1580 Returns a str or buffer of raw byte data.
1592 1581 """
1593 1582 # Cache data both forward and backward around the requested
1594 1583 # data, in a fixed size window. This helps speed up operations
1595 1584 # involving reading the revlog backwards.
1596 1585 cachesize = self._chunkcachesize
1597 1586 realoffset = offset & ~(cachesize - 1)
1598 1587 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1599 1588 - realoffset)
1600 1589 with self._datareadfp(df) as df:
1601 1590 df.seek(realoffset)
1602 1591 d = df.read(reallength)
1603 1592 self._cachesegment(realoffset, d)
1604 1593 if offset != realoffset or reallength != length:
1605 1594 return util.buffer(d, offset - realoffset, length)
1606 1595 return d
1607 1596
1608 1597 def _getsegment(self, offset, length, df=None):
1609 1598 """Obtain a segment of raw data from the revlog.
1610 1599
1611 1600 Accepts an absolute offset, length of bytes to obtain, and an
1612 1601 optional file handle to the already-opened revlog. If the file
1613 1602 handle is used, it's original seek position will not be preserved.
1614 1603
1615 1604 Requests for data may be returned from a cache.
1616 1605
1617 1606 Returns a str or a buffer instance of raw byte data.
1618 1607 """
1619 1608 o, d = self._chunkcache
1620 1609 l = len(d)
1621 1610
1622 1611 # is it in the cache?
1623 1612 cachestart = offset - o
1624 1613 cacheend = cachestart + length
1625 1614 if cachestart >= 0 and cacheend <= l:
1626 1615 if cachestart == 0 and cacheend == l:
1627 1616 return d # avoid a copy
1628 1617 return util.buffer(d, cachestart, cacheend - cachestart)
1629 1618
1630 1619 return self._readsegment(offset, length, df=df)
1631 1620
1632 1621 def _getsegmentforrevs(self, startrev, endrev, df=None):
1633 1622 """Obtain a segment of raw data corresponding to a range of revisions.
1634 1623
1635 1624 Accepts the start and end revisions and an optional already-open
1636 1625 file handle to be used for reading. If the file handle is read, its
1637 1626 seek position will not be preserved.
1638 1627
1639 1628 Requests for data may be satisfied by a cache.
1640 1629
1641 1630 Returns a 2-tuple of (offset, data) for the requested range of
1642 1631 revisions. Offset is the integer offset from the beginning of the
1643 1632 revlog and data is a str or buffer of the raw byte data.
1644 1633
1645 1634 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1646 1635 to determine where each revision's data begins and ends.
1647 1636 """
1648 1637 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1649 1638 # (functions are expensive).
1650 1639 index = self.index
1651 1640 istart = index[startrev]
1652 1641 start = int(istart[0] >> 16)
1653 1642 if startrev == endrev:
1654 1643 end = start + istart[1]
1655 1644 else:
1656 1645 iend = index[endrev]
1657 1646 end = int(iend[0] >> 16) + iend[1]
1658 1647
1659 1648 if self._inline:
1660 1649 start += (startrev + 1) * self._io.size
1661 1650 end += (endrev + 1) * self._io.size
1662 1651 length = end - start
1663 1652
1664 1653 return start, self._getsegment(start, length, df=df)
1665 1654
1666 1655 def _chunk(self, rev, df=None):
1667 1656 """Obtain a single decompressed chunk for a revision.
1668 1657
1669 1658 Accepts an integer revision and an optional already-open file handle
1670 1659 to be used for reading. If used, the seek position of the file will not
1671 1660 be preserved.
1672 1661
1673 1662 Returns a str holding uncompressed data for the requested revision.
1674 1663 """
1675 1664 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1676 1665
1677 1666 def _chunks(self, revs, df=None):
1678 1667 """Obtain decompressed chunks for the specified revisions.
1679 1668
1680 1669 Accepts an iterable of numeric revisions that are assumed to be in
1681 1670 ascending order. Also accepts an optional already-open file handle
1682 1671 to be used for reading. If used, the seek position of the file will
1683 1672 not be preserved.
1684 1673
1685 1674 This function is similar to calling ``self._chunk()`` multiple times,
1686 1675 but is faster.
1687 1676
1688 1677 Returns a list with decompressed data for each requested revision.
1689 1678 """
1690 1679 if not revs:
1691 1680 return []
1692 1681 start = self.start
1693 1682 length = self.length
1694 1683 inline = self._inline
1695 1684 iosize = self._io.size
1696 1685 buffer = util.buffer
1697 1686
1698 1687 l = []
1699 1688 ladd = l.append
1700 1689
1701 1690 if not self._withsparseread:
1702 1691 slicedchunks = (revs,)
1703 1692 else:
1704 1693 slicedchunks = _slicechunk(self, revs)
1705 1694
1706 1695 for revschunk in slicedchunks:
1707 1696 firstrev = revschunk[0]
1708 1697 # Skip trailing revisions with empty diff
1709 1698 for lastrev in revschunk[::-1]:
1710 1699 if length(lastrev) != 0:
1711 1700 break
1712 1701
1713 1702 try:
1714 1703 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1715 1704 except OverflowError:
1716 1705 # issue4215 - we can't cache a run of chunks greater than
1717 1706 # 2G on Windows
1718 1707 return [self._chunk(rev, df=df) for rev in revschunk]
1719 1708
1720 1709 decomp = self.decompress
1721 1710 for rev in revschunk:
1722 1711 chunkstart = start(rev)
1723 1712 if inline:
1724 1713 chunkstart += (rev + 1) * iosize
1725 1714 chunklength = length(rev)
1726 1715 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1727 1716
1728 1717 return l
1729 1718
1730 1719 def _chunkclear(self):
1731 1720 """Clear the raw chunk cache."""
1732 1721 self._chunkcache = (0, '')
1733 1722
1734 1723 def deltaparent(self, rev):
1735 1724 """return deltaparent of the given revision"""
1736 1725 base = self.index[rev][3]
1737 1726 if base == rev:
1738 1727 return nullrev
1739 1728 elif self._generaldelta:
1740 1729 return base
1741 1730 else:
1742 1731 return rev - 1
1743 1732
1744 1733 def revdiff(self, rev1, rev2):
1745 1734 """return or calculate a delta between two revisions
1746 1735
1747 1736 The delta calculated is in binary form and is intended to be written to
1748 1737 revlog data directly. So this function needs raw revision data.
1749 1738 """
1750 1739 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1751 1740 return bytes(self._chunk(rev2))
1752 1741
1753 1742 return mdiff.textdiff(self.revision(rev1, raw=True),
1754 1743 self.revision(rev2, raw=True))
1755 1744
1756 1745 def revision(self, nodeorrev, _df=None, raw=False):
1757 1746 """return an uncompressed revision of a given node or revision
1758 1747 number.
1759 1748
1760 1749 _df - an existing file handle to read from. (internal-only)
1761 1750 raw - an optional argument specifying if the revision data is to be
1762 1751 treated as raw data when applying flag transforms. 'raw' should be set
1763 1752 to True when generating changegroups or in debug commands.
1764 1753 """
1765 1754 if isinstance(nodeorrev, int):
1766 1755 rev = nodeorrev
1767 1756 node = self.node(rev)
1768 1757 else:
1769 1758 node = nodeorrev
1770 1759 rev = None
1771 1760
1772 1761 cachedrev = None
1773 1762 flags = None
1774 1763 rawtext = None
1775 1764 if node == nullid:
1776 1765 return ""
1777 1766 if self._cache:
1778 1767 if self._cache[0] == node:
1779 1768 # _cache only stores rawtext
1780 1769 if raw:
1781 1770 return self._cache[2]
1782 1771 # duplicated, but good for perf
1783 1772 if rev is None:
1784 1773 rev = self.rev(node)
1785 1774 if flags is None:
1786 1775 flags = self.flags(rev)
1787 1776 # no extra flags set, no flag processor runs, text = rawtext
1788 1777 if flags == REVIDX_DEFAULT_FLAGS:
1789 1778 return self._cache[2]
1790 1779 # rawtext is reusable. need to run flag processor
1791 1780 rawtext = self._cache[2]
1792 1781
1793 1782 cachedrev = self._cache[1]
1794 1783
1795 1784 # look up what we need to read
1796 1785 if rawtext is None:
1797 1786 if rev is None:
1798 1787 rev = self.rev(node)
1799 1788
1800 1789 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1801 1790 if stopped:
1802 1791 rawtext = self._cache[2]
1803 1792
1804 1793 # drop cache to save memory
1805 1794 self._cache = None
1806 1795
1807 1796 bins = self._chunks(chain, df=_df)
1808 1797 if rawtext is None:
1809 1798 rawtext = bytes(bins[0])
1810 1799 bins = bins[1:]
1811 1800
1812 1801 rawtext = mdiff.patches(rawtext, bins)
1813 1802 self._cache = (node, rev, rawtext)
1814 1803
1815 1804 if flags is None:
1816 1805 if rev is None:
1817 1806 rev = self.rev(node)
1818 1807 flags = self.flags(rev)
1819 1808
1820 1809 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1821 1810 if validatehash:
1822 1811 self.checkhash(text, node, rev=rev)
1823 1812
1824 1813 return text
1825 1814
1826 1815 def hash(self, text, p1, p2):
1827 1816 """Compute a node hash.
1828 1817
1829 1818 Available as a function so that subclasses can replace the hash
1830 1819 as needed.
1831 1820 """
1832 1821 return hash(text, p1, p2)
1833 1822
1834 1823 def _processflags(self, text, flags, operation, raw=False):
1835 1824 """Inspect revision data flags and applies transforms defined by
1836 1825 registered flag processors.
1837 1826
1838 1827 ``text`` - the revision data to process
1839 1828 ``flags`` - the revision flags
1840 1829 ``operation`` - the operation being performed (read or write)
1841 1830 ``raw`` - an optional argument describing if the raw transform should be
1842 1831 applied.
1843 1832
1844 1833 This method processes the flags in the order (or reverse order if
1845 1834 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1846 1835 flag processors registered for present flags. The order of flags defined
1847 1836 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1848 1837
1849 1838 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1850 1839 processed text and ``validatehash`` is a bool indicating whether the
1851 1840 returned text should be checked for hash integrity.
1852 1841
1853 1842 Note: If the ``raw`` argument is set, it has precedence over the
1854 1843 operation and will only update the value of ``validatehash``.
1855 1844 """
1856 1845 # fast path: no flag processors will run
1857 1846 if flags == 0:
1858 1847 return text, True
1859 1848 if not operation in ('read', 'write'):
1860 1849 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1861 1850 # Check all flags are known.
1862 1851 if flags & ~REVIDX_KNOWN_FLAGS:
1863 1852 raise RevlogError(_("incompatible revision flag '%#x'") %
1864 1853 (flags & ~REVIDX_KNOWN_FLAGS))
1865 1854 validatehash = True
1866 1855 # Depending on the operation (read or write), the order might be
1867 1856 # reversed due to non-commutative transforms.
1868 1857 orderedflags = REVIDX_FLAGS_ORDER
1869 1858 if operation == 'write':
1870 1859 orderedflags = reversed(orderedflags)
1871 1860
1872 1861 for flag in orderedflags:
1873 1862 # If a flagprocessor has been registered for a known flag, apply the
1874 1863 # related operation transform and update result tuple.
1875 1864 if flag & flags:
1876 1865 vhash = True
1877 1866
1878 1867 if flag not in _flagprocessors:
1879 1868 message = _("missing processor for flag '%#x'") % (flag)
1880 1869 raise RevlogError(message)
1881 1870
1882 1871 processor = _flagprocessors[flag]
1883 1872 if processor is not None:
1884 1873 readtransform, writetransform, rawtransform = processor
1885 1874
1886 1875 if raw:
1887 1876 vhash = rawtransform(self, text)
1888 1877 elif operation == 'read':
1889 1878 text, vhash = readtransform(self, text)
1890 1879 else: # write operation
1891 1880 text, vhash = writetransform(self, text)
1892 1881 validatehash = validatehash and vhash
1893 1882
1894 1883 return text, validatehash
1895 1884
1896 1885 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1897 1886 """Check node hash integrity.
1898 1887
1899 1888 Available as a function so that subclasses can extend hash mismatch
1900 1889 behaviors as needed.
1901 1890 """
1902 1891 try:
1903 1892 if p1 is None and p2 is None:
1904 1893 p1, p2 = self.parents(node)
1905 1894 if node != self.hash(text, p1, p2):
1906 1895 revornode = rev
1907 1896 if revornode is None:
1908 1897 revornode = templatefilters.short(hex(node))
1909 1898 raise RevlogError(_("integrity check failed on %s:%s")
1910 1899 % (self.indexfile, pycompat.bytestr(revornode)))
1911 1900 except RevlogError:
1912 1901 if self._censorable and _censoredtext(text):
1913 1902 raise error.CensoredNodeError(self.indexfile, node, text)
1914 1903 raise
1915 1904
1916 1905 def _enforceinlinesize(self, tr, fp=None):
1917 1906 """Check if the revlog is too big for inline and convert if so.
1918 1907
1919 1908 This should be called after revisions are added to the revlog. If the
1920 1909 revlog has grown too large to be an inline revlog, it will convert it
1921 1910 to use multiple index and data files.
1922 1911 """
1923 1912 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1924 1913 return
1925 1914
1926 1915 trinfo = tr.find(self.indexfile)
1927 1916 if trinfo is None:
1928 1917 raise RevlogError(_("%s not found in the transaction")
1929 1918 % self.indexfile)
1930 1919
1931 1920 trindex = trinfo[2]
1932 1921 if trindex is not None:
1933 1922 dataoff = self.start(trindex)
1934 1923 else:
1935 1924 # revlog was stripped at start of transaction, use all leftover data
1936 1925 trindex = len(self) - 1
1937 1926 dataoff = self.end(-2)
1938 1927
1939 1928 tr.add(self.datafile, dataoff)
1940 1929
1941 1930 if fp:
1942 1931 fp.flush()
1943 1932 fp.close()
1944 1933
1945 1934 with self._datafp('w') as df:
1946 1935 for r in self:
1947 1936 df.write(self._getsegmentforrevs(r, r)[1])
1948 1937
1949 1938 with self._indexfp('w') as fp:
1950 1939 self.version &= ~FLAG_INLINE_DATA
1951 1940 self._inline = False
1952 1941 io = self._io
1953 1942 for i in self:
1954 1943 e = io.packentry(self.index[i], self.node, self.version, i)
1955 1944 fp.write(e)
1956 1945
1957 1946 # the temp file replace the real index when we exit the context
1958 1947 # manager
1959 1948
1960 1949 tr.replace(self.indexfile, trindex * self._io.size)
1961 1950 self._chunkclear()
1962 1951
1963 1952 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1964 1953 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1965 1954 """add a revision to the log
1966 1955
1967 1956 text - the revision data to add
1968 1957 transaction - the transaction object used for rollback
1969 1958 link - the linkrev data to add
1970 1959 p1, p2 - the parent nodeids of the revision
1971 1960 cachedelta - an optional precomputed delta
1972 1961 node - nodeid of revision; typically node is not specified, and it is
1973 1962 computed by default as hash(text, p1, p2), however subclasses might
1974 1963 use different hashing method (and override checkhash() in such case)
1975 1964 flags - the known flags to set on the revision
1976 1965 deltacomputer - an optional _deltacomputer instance shared between
1977 1966 multiple calls
1978 1967 """
1979 1968 if link == nullrev:
1980 1969 raise RevlogError(_("attempted to add linkrev -1 to %s")
1981 1970 % self.indexfile)
1982 1971
1983 1972 if flags:
1984 1973 node = node or self.hash(text, p1, p2)
1985 1974
1986 1975 rawtext, validatehash = self._processflags(text, flags, 'write')
1987 1976
1988 1977 # If the flag processor modifies the revision data, ignore any provided
1989 1978 # cachedelta.
1990 1979 if rawtext != text:
1991 1980 cachedelta = None
1992 1981
1993 1982 if len(rawtext) > _maxentrysize:
1994 1983 raise RevlogError(
1995 1984 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1996 1985 % (self.indexfile, len(rawtext)))
1997 1986
1998 1987 node = node or self.hash(rawtext, p1, p2)
1999 1988 if node in self.nodemap:
2000 1989 return node
2001 1990
2002 1991 if validatehash:
2003 1992 self.checkhash(rawtext, node, p1=p1, p2=p2)
2004 1993
2005 1994 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
2006 1995 flags, cachedelta=cachedelta,
2007 1996 deltacomputer=deltacomputer)
2008 1997
2009 1998 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
2010 1999 cachedelta=None, deltacomputer=None):
2011 2000 """add a raw revision with known flags, node and parents
2012 2001 useful when reusing a revision not stored in this revlog (ex: received
2013 2002 over wire, or read from an external bundle).
2014 2003 """
2015 2004 dfh = None
2016 2005 if not self._inline:
2017 2006 dfh = self._datafp("a+")
2018 2007 ifh = self._indexfp("a+")
2019 2008 try:
2020 2009 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2021 2010 flags, cachedelta, ifh, dfh,
2022 2011 deltacomputer=deltacomputer)
2023 2012 finally:
2024 2013 if dfh:
2025 2014 dfh.close()
2026 2015 ifh.close()
2027 2016
2028 2017 def compress(self, data):
2029 2018 """Generate a possibly-compressed representation of data."""
2030 2019 if not data:
2031 2020 return '', data
2032 2021
2033 2022 compressed = self._compressor.compress(data)
2034 2023
2035 2024 if compressed:
2036 2025 # The revlog compressor added the header in the returned data.
2037 2026 return '', compressed
2038 2027
2039 2028 if data[0:1] == '\0':
2040 2029 return '', data
2041 2030 return 'u', data
2042 2031
2043 2032 def decompress(self, data):
2044 2033 """Decompress a revlog chunk.
2045 2034
2046 2035 The chunk is expected to begin with a header identifying the
2047 2036 format type so it can be routed to an appropriate decompressor.
2048 2037 """
2049 2038 if not data:
2050 2039 return data
2051 2040
2052 2041 # Revlogs are read much more frequently than they are written and many
2053 2042 # chunks only take microseconds to decompress, so performance is
2054 2043 # important here.
2055 2044 #
2056 2045 # We can make a few assumptions about revlogs:
2057 2046 #
2058 2047 # 1) the majority of chunks will be compressed (as opposed to inline
2059 2048 # raw data).
2060 2049 # 2) decompressing *any* data will likely by at least 10x slower than
2061 2050 # returning raw inline data.
2062 2051 # 3) we want to prioritize common and officially supported compression
2063 2052 # engines
2064 2053 #
2065 2054 # It follows that we want to optimize for "decompress compressed data
2066 2055 # when encoded with common and officially supported compression engines"
2067 2056 # case over "raw data" and "data encoded by less common or non-official
2068 2057 # compression engines." That is why we have the inline lookup first
2069 2058 # followed by the compengines lookup.
2070 2059 #
2071 2060 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2072 2061 # compressed chunks. And this matters for changelog and manifest reads.
2073 2062 t = data[0:1]
2074 2063
2075 2064 if t == 'x':
2076 2065 try:
2077 2066 return _zlibdecompress(data)
2078 2067 except zlib.error as e:
2079 2068 raise RevlogError(_('revlog decompress error: %s') %
2080 2069 stringutil.forcebytestr(e))
2081 2070 # '\0' is more common than 'u' so it goes first.
2082 2071 elif t == '\0':
2083 2072 return data
2084 2073 elif t == 'u':
2085 2074 return util.buffer(data, 1)
2086 2075
2087 2076 try:
2088 2077 compressor = self._decompressors[t]
2089 2078 except KeyError:
2090 2079 try:
2091 2080 engine = util.compengines.forrevlogheader(t)
2092 2081 compressor = engine.revlogcompressor()
2093 2082 self._decompressors[t] = compressor
2094 2083 except KeyError:
2095 2084 raise RevlogError(_('unknown compression type %r') % t)
2096 2085
2097 2086 return compressor.decompress(data)
2098 2087
2099 2088 def _isgooddeltainfo(self, d, textlen):
2100 2089 """Returns True if the given delta is good. Good means that it is within
2101 2090 the disk span, disk size, and chain length bounds that we know to be
2102 2091 performant."""
2103 2092 if d is None:
2104 2093 return False
2105 2094
2106 2095 # - 'd.distance' is the distance from the base revision -- bounding it
2107 2096 # limits the amount of I/O we need to do.
2108 2097 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2109 2098 # need to apply -- bounding it limits the amount of CPU we consume.
2110 2099
2111 2100 defaultmax = textlen * 4
2112 2101 maxdist = self._maxdeltachainspan
2113 2102 if not maxdist:
2114 2103 maxdist = d.distance # ensure the conditional pass
2115 2104 maxdist = max(maxdist, defaultmax)
2116 2105 if (d.distance > maxdist or d.deltalen > textlen or
2117 2106 d.compresseddeltalen > textlen * 2 or
2118 2107 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2119 2108 return False
2120 2109
2121 2110 return True
2122 2111
2123 2112 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2124 2113 cachedelta, ifh, dfh, alwayscache=False,
2125 2114 deltacomputer=None):
2126 2115 """internal function to add revisions to the log
2127 2116
2128 2117 see addrevision for argument descriptions.
2129 2118
2130 2119 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2131 2120
2132 2121 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2133 2122 be used.
2134 2123
2135 2124 invariants:
2136 2125 - rawtext is optional (can be None); if not set, cachedelta must be set.
2137 2126 if both are set, they must correspond to each other.
2138 2127 """
2139 2128 if node == nullid:
2140 2129 raise RevlogError(_("%s: attempt to add null revision") %
2141 2130 (self.indexfile))
2142 2131 if node == wdirid or node in wdirfilenodeids:
2143 2132 raise RevlogError(_("%s: attempt to add wdir revision") %
2144 2133 (self.indexfile))
2145 2134
2146 2135 if self._inline:
2147 2136 fh = ifh
2148 2137 else:
2149 2138 fh = dfh
2150 2139
2151 2140 btext = [rawtext]
2152 2141
2153 2142 curr = len(self)
2154 2143 prev = curr - 1
2155 2144 offset = self.end(prev)
2156 2145 p1r, p2r = self.rev(p1), self.rev(p2)
2157 2146
2158 2147 # full versions are inserted when the needed deltas
2159 2148 # become comparable to the uncompressed text
2160 2149 if rawtext is None:
2161 2150 # need rawtext size, before changed by flag processors, which is
2162 2151 # the non-raw size. use revlog explicitly to avoid filelog's extra
2163 2152 # logic that might remove metadata size.
2164 2153 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2165 2154 cachedelta[1])
2166 2155 else:
2167 2156 textlen = len(rawtext)
2168 2157
2169 2158 if deltacomputer is None:
2170 2159 deltacomputer = _deltacomputer(self)
2171 2160
2172 2161 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2173 2162
2174 2163 # no delta for flag processor revision (see "candelta" for why)
2175 2164 # not calling candelta since only one revision needs test, also to
2176 2165 # avoid overhead fetching flags again.
2177 2166 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2178 2167 deltainfo = None
2179 2168 else:
2180 2169 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2181 2170
2182 2171 if deltainfo is not None:
2183 2172 base = deltainfo.base
2184 2173 chainbase = deltainfo.chainbase
2185 2174 data = deltainfo.data
2186 2175 l = deltainfo.deltalen
2187 2176 else:
2188 2177 rawtext = deltacomputer.buildtext(revinfo, fh)
2189 2178 data = self.compress(rawtext)
2190 2179 l = len(data[1]) + len(data[0])
2191 2180 base = chainbase = curr
2192 2181
2193 2182 e = (offset_type(offset, flags), l, textlen,
2194 2183 base, link, p1r, p2r, node)
2195 2184 self.index.insert(-1, e)
2196 2185 self.nodemap[node] = curr
2197 2186
2198 2187 entry = self._io.packentry(e, self.node, self.version, curr)
2199 2188 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2200 2189
2201 2190 if alwayscache and rawtext is None:
2202 2191 rawtext = deltacomputer._buildtext(revinfo, fh)
2203 2192
2204 2193 if type(rawtext) == bytes: # only accept immutable objects
2205 2194 self._cache = (node, curr, rawtext)
2206 2195 self._chainbasecache[curr] = chainbase
2207 2196 return node
2208 2197
2209 2198 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2210 2199 # Files opened in a+ mode have inconsistent behavior on various
2211 2200 # platforms. Windows requires that a file positioning call be made
2212 2201 # when the file handle transitions between reads and writes. See
2213 2202 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2214 2203 # platforms, Python or the platform itself can be buggy. Some versions
2215 2204 # of Solaris have been observed to not append at the end of the file
2216 2205 # if the file was seeked to before the end. See issue4943 for more.
2217 2206 #
2218 2207 # We work around this issue by inserting a seek() before writing.
2219 2208 # Note: This is likely not necessary on Python 3.
2220 2209 ifh.seek(0, os.SEEK_END)
2221 2210 if dfh:
2222 2211 dfh.seek(0, os.SEEK_END)
2223 2212
2224 2213 curr = len(self) - 1
2225 2214 if not self._inline:
2226 2215 transaction.add(self.datafile, offset)
2227 2216 transaction.add(self.indexfile, curr * len(entry))
2228 2217 if data[0]:
2229 2218 dfh.write(data[0])
2230 2219 dfh.write(data[1])
2231 2220 ifh.write(entry)
2232 2221 else:
2233 2222 offset += curr * self._io.size
2234 2223 transaction.add(self.indexfile, offset, curr)
2235 2224 ifh.write(entry)
2236 2225 ifh.write(data[0])
2237 2226 ifh.write(data[1])
2238 2227 self._enforceinlinesize(transaction, ifh)
2239 2228
2240 2229 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2241 2230 """
2242 2231 add a delta group
2243 2232
2244 2233 given a set of deltas, add them to the revision log. the
2245 2234 first delta is against its parent, which should be in our
2246 2235 log, the rest are against the previous delta.
2247 2236
2248 2237 If ``addrevisioncb`` is defined, it will be called with arguments of
2249 2238 this revlog and the node that was added.
2250 2239 """
2251 2240
2252 2241 nodes = []
2253 2242
2254 2243 r = len(self)
2255 2244 end = 0
2256 2245 if r:
2257 2246 end = self.end(r - 1)
2258 2247 ifh = self._indexfp("a+")
2259 2248 isize = r * self._io.size
2260 2249 if self._inline:
2261 2250 transaction.add(self.indexfile, end + isize, r)
2262 2251 dfh = None
2263 2252 else:
2264 2253 transaction.add(self.indexfile, isize, r)
2265 2254 transaction.add(self.datafile, end)
2266 2255 dfh = self._datafp("a+")
2267 2256 def flush():
2268 2257 if dfh:
2269 2258 dfh.flush()
2270 2259 ifh.flush()
2271 2260 try:
2272 2261 deltacomputer = _deltacomputer(self)
2273 2262 # loop through our set of deltas
2274 2263 for data in deltas:
2275 2264 node, p1, p2, linknode, deltabase, delta, flags = data
2276 2265 link = linkmapper(linknode)
2277 2266 flags = flags or REVIDX_DEFAULT_FLAGS
2278 2267
2279 2268 nodes.append(node)
2280 2269
2281 2270 if node in self.nodemap:
2282 2271 # this can happen if two branches make the same change
2283 2272 continue
2284 2273
2285 2274 for p in (p1, p2):
2286 2275 if p not in self.nodemap:
2287 2276 raise LookupError(p, self.indexfile,
2288 2277 _('unknown parent'))
2289 2278
2290 2279 if deltabase not in self.nodemap:
2291 2280 raise LookupError(deltabase, self.indexfile,
2292 2281 _('unknown delta base'))
2293 2282
2294 2283 baserev = self.rev(deltabase)
2295 2284
2296 2285 if baserev != nullrev and self.iscensored(baserev):
2297 2286 # if base is censored, delta must be full replacement in a
2298 2287 # single patch operation
2299 2288 hlen = struct.calcsize(">lll")
2300 2289 oldlen = self.rawsize(baserev)
2301 2290 newlen = len(delta) - hlen
2302 2291 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2303 2292 raise error.CensoredBaseError(self.indexfile,
2304 2293 self.node(baserev))
2305 2294
2306 2295 if not flags and self._peek_iscensored(baserev, delta, flush):
2307 2296 flags |= REVIDX_ISCENSORED
2308 2297
2309 2298 # We assume consumers of addrevisioncb will want to retrieve
2310 2299 # the added revision, which will require a call to
2311 2300 # revision(). revision() will fast path if there is a cache
2312 2301 # hit. So, we tell _addrevision() to always cache in this case.
2313 2302 # We're only using addgroup() in the context of changegroup
2314 2303 # generation so the revision data can always be handled as raw
2315 2304 # by the flagprocessor.
2316 2305 self._addrevision(node, None, transaction, link,
2317 2306 p1, p2, flags, (baserev, delta),
2318 2307 ifh, dfh,
2319 2308 alwayscache=bool(addrevisioncb),
2320 2309 deltacomputer=deltacomputer)
2321 2310
2322 2311 if addrevisioncb:
2323 2312 addrevisioncb(self, node)
2324 2313
2325 2314 if not dfh and not self._inline:
2326 2315 # addrevision switched from inline to conventional
2327 2316 # reopen the index
2328 2317 ifh.close()
2329 2318 dfh = self._datafp("a+")
2330 2319 ifh = self._indexfp("a+")
2331 2320 finally:
2332 2321 if dfh:
2333 2322 dfh.close()
2334 2323 ifh.close()
2335 2324
2336 2325 return nodes
2337 2326
2338 2327 def iscensored(self, rev):
2339 2328 """Check if a file revision is censored."""
2340 2329 if not self._censorable:
2341 2330 return False
2342 2331
2343 2332 return self.flags(rev) & REVIDX_ISCENSORED
2344 2333
2345 2334 def _peek_iscensored(self, baserev, delta, flush):
2346 2335 """Quickly check if a delta produces a censored revision."""
2347 2336 if not self._censorable:
2348 2337 return False
2349 2338
2350 2339 # Fragile heuristic: unless new file meta keys are added alphabetically
2351 2340 # preceding "censored", all censored revisions are prefixed by
2352 2341 # "\1\ncensored:". A delta producing such a censored revision must be a
2353 2342 # full-replacement delta, so we inspect the first and only patch in the
2354 2343 # delta for this prefix.
2355 2344 hlen = struct.calcsize(">lll")
2356 2345 if len(delta) <= hlen:
2357 2346 return False
2358 2347
2359 2348 oldlen = self.rawsize(baserev)
2360 2349 newlen = len(delta) - hlen
2361 2350 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2362 2351 return False
2363 2352
2364 2353 add = "\1\ncensored:"
2365 2354 addlen = len(add)
2366 2355 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2367 2356
2368 2357 def getstrippoint(self, minlink):
2369 2358 """find the minimum rev that must be stripped to strip the linkrev
2370 2359
2371 2360 Returns a tuple containing the minimum rev and a set of all revs that
2372 2361 have linkrevs that will be broken by this strip.
2373 2362 """
2374 2363 brokenrevs = set()
2375 2364 strippoint = len(self)
2376 2365
2377 2366 heads = {}
2378 2367 futurelargelinkrevs = set()
2379 2368 for head in self.headrevs():
2380 2369 headlinkrev = self.linkrev(head)
2381 2370 heads[head] = headlinkrev
2382 2371 if headlinkrev >= minlink:
2383 2372 futurelargelinkrevs.add(headlinkrev)
2384 2373
2385 2374 # This algorithm involves walking down the rev graph, starting at the
2386 2375 # heads. Since the revs are topologically sorted according to linkrev,
2387 2376 # once all head linkrevs are below the minlink, we know there are
2388 2377 # no more revs that could have a linkrev greater than minlink.
2389 2378 # So we can stop walking.
2390 2379 while futurelargelinkrevs:
2391 2380 strippoint -= 1
2392 2381 linkrev = heads.pop(strippoint)
2393 2382
2394 2383 if linkrev < minlink:
2395 2384 brokenrevs.add(strippoint)
2396 2385 else:
2397 2386 futurelargelinkrevs.remove(linkrev)
2398 2387
2399 2388 for p in self.parentrevs(strippoint):
2400 2389 if p != nullrev:
2401 2390 plinkrev = self.linkrev(p)
2402 2391 heads[p] = plinkrev
2403 2392 if plinkrev >= minlink:
2404 2393 futurelargelinkrevs.add(plinkrev)
2405 2394
2406 2395 return strippoint, brokenrevs
2407 2396
2408 2397 def strip(self, minlink, transaction):
2409 2398 """truncate the revlog on the first revision with a linkrev >= minlink
2410 2399
2411 2400 This function is called when we're stripping revision minlink and
2412 2401 its descendants from the repository.
2413 2402
2414 2403 We have to remove all revisions with linkrev >= minlink, because
2415 2404 the equivalent changelog revisions will be renumbered after the
2416 2405 strip.
2417 2406
2418 2407 So we truncate the revlog on the first of these revisions, and
2419 2408 trust that the caller has saved the revisions that shouldn't be
2420 2409 removed and that it'll re-add them after this truncation.
2421 2410 """
2422 2411 if len(self) == 0:
2423 2412 return
2424 2413
2425 2414 rev, _ = self.getstrippoint(minlink)
2426 2415 if rev == len(self):
2427 2416 return
2428 2417
2429 2418 # first truncate the files on disk
2430 2419 end = self.start(rev)
2431 2420 if not self._inline:
2432 2421 transaction.add(self.datafile, end)
2433 2422 end = rev * self._io.size
2434 2423 else:
2435 2424 end += rev * self._io.size
2436 2425
2437 2426 transaction.add(self.indexfile, end)
2438 2427
2439 2428 # then reset internal state in memory to forget those revisions
2440 2429 self._cache = None
2441 2430 self._chaininfocache = {}
2442 2431 self._chunkclear()
2443 2432 for x in xrange(rev, len(self)):
2444 2433 del self.nodemap[self.node(x)]
2445 2434
2446 2435 del self.index[rev:-1]
2447 2436 self._nodepos = None
2448 2437
2449 2438 def checksize(self):
2450 2439 expected = 0
2451 2440 if len(self):
2452 2441 expected = max(0, self.end(len(self) - 1))
2453 2442
2454 2443 try:
2455 2444 with self._datafp() as f:
2456 2445 f.seek(0, 2)
2457 2446 actual = f.tell()
2458 2447 dd = actual - expected
2459 2448 except IOError as inst:
2460 2449 if inst.errno != errno.ENOENT:
2461 2450 raise
2462 2451 dd = 0
2463 2452
2464 2453 try:
2465 2454 f = self.opener(self.indexfile)
2466 2455 f.seek(0, 2)
2467 2456 actual = f.tell()
2468 2457 f.close()
2469 2458 s = self._io.size
2470 2459 i = max(0, actual // s)
2471 2460 di = actual - (i * s)
2472 2461 if self._inline:
2473 2462 databytes = 0
2474 2463 for r in self:
2475 2464 databytes += max(0, self.length(r))
2476 2465 dd = 0
2477 2466 di = actual - len(self) * s - databytes
2478 2467 except IOError as inst:
2479 2468 if inst.errno != errno.ENOENT:
2480 2469 raise
2481 2470 di = 0
2482 2471
2483 2472 return (dd, di)
2484 2473
2485 2474 def files(self):
2486 2475 res = [self.indexfile]
2487 2476 if not self._inline:
2488 2477 res.append(self.datafile)
2489 2478 return res
2490 2479
2491 2480 DELTAREUSEALWAYS = 'always'
2492 2481 DELTAREUSESAMEREVS = 'samerevs'
2493 2482 DELTAREUSENEVER = 'never'
2494 2483
2495 2484 DELTAREUSEFULLADD = 'fulladd'
2496 2485
2497 2486 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2498 2487
2499 2488 def clone(self, tr, destrevlog, addrevisioncb=None,
2500 2489 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2501 2490 """Copy this revlog to another, possibly with format changes.
2502 2491
2503 2492 The destination revlog will contain the same revisions and nodes.
2504 2493 However, it may not be bit-for-bit identical due to e.g. delta encoding
2505 2494 differences.
2506 2495
2507 2496 The ``deltareuse`` argument control how deltas from the existing revlog
2508 2497 are preserved in the destination revlog. The argument can have the
2509 2498 following values:
2510 2499
2511 2500 DELTAREUSEALWAYS
2512 2501 Deltas will always be reused (if possible), even if the destination
2513 2502 revlog would not select the same revisions for the delta. This is the
2514 2503 fastest mode of operation.
2515 2504 DELTAREUSESAMEREVS
2516 2505 Deltas will be reused if the destination revlog would pick the same
2517 2506 revisions for the delta. This mode strikes a balance between speed
2518 2507 and optimization.
2519 2508 DELTAREUSENEVER
2520 2509 Deltas will never be reused. This is the slowest mode of execution.
2521 2510 This mode can be used to recompute deltas (e.g. if the diff/delta
2522 2511 algorithm changes).
2523 2512
2524 2513 Delta computation can be slow, so the choice of delta reuse policy can
2525 2514 significantly affect run time.
2526 2515
2527 2516 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2528 2517 two extremes. Deltas will be reused if they are appropriate. But if the
2529 2518 delta could choose a better revision, it will do so. This means if you
2530 2519 are converting a non-generaldelta revlog to a generaldelta revlog,
2531 2520 deltas will be recomputed if the delta's parent isn't a parent of the
2532 2521 revision.
2533 2522
2534 2523 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2535 2524 controls whether to compute deltas against both parents for merges.
2536 2525 By default, the current default is used.
2537 2526 """
2538 2527 if deltareuse not in self.DELTAREUSEALL:
2539 2528 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2540 2529
2541 2530 if len(destrevlog):
2542 2531 raise ValueError(_('destination revlog is not empty'))
2543 2532
2544 2533 if getattr(self, 'filteredrevs', None):
2545 2534 raise ValueError(_('source revlog has filtered revisions'))
2546 2535 if getattr(destrevlog, 'filteredrevs', None):
2547 2536 raise ValueError(_('destination revlog has filtered revisions'))
2548 2537
2549 2538 # lazydeltabase controls whether to reuse a cached delta, if possible.
2550 2539 oldlazydeltabase = destrevlog._lazydeltabase
2551 2540 oldamd = destrevlog._aggressivemergedeltas
2552 2541
2553 2542 try:
2554 2543 if deltareuse == self.DELTAREUSEALWAYS:
2555 2544 destrevlog._lazydeltabase = True
2556 2545 elif deltareuse == self.DELTAREUSESAMEREVS:
2557 2546 destrevlog._lazydeltabase = False
2558 2547
2559 2548 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2560 2549
2561 2550 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2562 2551 self.DELTAREUSESAMEREVS)
2563 2552
2564 2553 deltacomputer = _deltacomputer(destrevlog)
2565 2554 index = self.index
2566 2555 for rev in self:
2567 2556 entry = index[rev]
2568 2557
2569 2558 # Some classes override linkrev to take filtered revs into
2570 2559 # account. Use raw entry from index.
2571 2560 flags = entry[0] & 0xffff
2572 2561 linkrev = entry[4]
2573 2562 p1 = index[entry[5]][7]
2574 2563 p2 = index[entry[6]][7]
2575 2564 node = entry[7]
2576 2565
2577 2566 # (Possibly) reuse the delta from the revlog if allowed and
2578 2567 # the revlog chunk is a delta.
2579 2568 cachedelta = None
2580 2569 rawtext = None
2581 2570 if populatecachedelta:
2582 2571 dp = self.deltaparent(rev)
2583 2572 if dp != nullrev:
2584 2573 cachedelta = (dp, bytes(self._chunk(rev)))
2585 2574
2586 2575 if not cachedelta:
2587 2576 rawtext = self.revision(rev, raw=True)
2588 2577
2589 2578
2590 2579 if deltareuse == self.DELTAREUSEFULLADD:
2591 2580 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2592 2581 cachedelta=cachedelta,
2593 2582 node=node, flags=flags,
2594 2583 deltacomputer=deltacomputer)
2595 2584 else:
2596 2585 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2597 2586 checkambig=False)
2598 2587 dfh = None
2599 2588 if not destrevlog._inline:
2600 2589 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2601 2590 try:
2602 2591 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2603 2592 p2, flags, cachedelta, ifh, dfh,
2604 2593 deltacomputer=deltacomputer)
2605 2594 finally:
2606 2595 if dfh:
2607 2596 dfh.close()
2608 2597 ifh.close()
2609 2598
2610 2599 if addrevisioncb:
2611 2600 addrevisioncb(self, rev, node)
2612 2601 finally:
2613 2602 destrevlog._lazydeltabase = oldlazydeltabase
2614 2603 destrevlog._aggressivemergedeltas = oldamd
@@ -1,1565 +1,1587 b''
1 1 # scmutil.py - Mercurial core utility functions
2 2 #
3 3 # Copyright Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import glob
12 12 import hashlib
13 13 import os
14 14 import re
15 15 import socket
16 16 import subprocess
17 17 import weakref
18 18
19 19 from .i18n import _
20 20 from .node import (
21 21 bin,
22 22 hex,
23 23 nullid,
24 24 short,
25 25 wdirid,
26 26 wdirrev,
27 27 )
28 28
29 29 from . import (
30 30 encoding,
31 31 error,
32 32 match as matchmod,
33 33 obsolete,
34 34 obsutil,
35 35 pathutil,
36 36 phases,
37 37 pycompat,
38 38 revsetlang,
39 39 similar,
40 40 url,
41 41 util,
42 42 vfs,
43 43 )
44 44
45 45 from .utils import (
46 46 procutil,
47 47 stringutil,
48 48 )
49 49
50 50 if pycompat.iswindows:
51 51 from . import scmwindows as scmplatform
52 52 else:
53 53 from . import scmposix as scmplatform
54 54
55 55 termsize = scmplatform.termsize
56 56
57 57 class status(tuple):
58 58 '''Named tuple with a list of files per status. The 'deleted', 'unknown'
59 59 and 'ignored' properties are only relevant to the working copy.
60 60 '''
61 61
62 62 __slots__ = ()
63 63
64 64 def __new__(cls, modified, added, removed, deleted, unknown, ignored,
65 65 clean):
66 66 return tuple.__new__(cls, (modified, added, removed, deleted, unknown,
67 67 ignored, clean))
68 68
69 69 @property
70 70 def modified(self):
71 71 '''files that have been modified'''
72 72 return self[0]
73 73
74 74 @property
75 75 def added(self):
76 76 '''files that have been added'''
77 77 return self[1]
78 78
79 79 @property
80 80 def removed(self):
81 81 '''files that have been removed'''
82 82 return self[2]
83 83
84 84 @property
85 85 def deleted(self):
86 86 '''files that are in the dirstate, but have been deleted from the
87 87 working copy (aka "missing")
88 88 '''
89 89 return self[3]
90 90
91 91 @property
92 92 def unknown(self):
93 93 '''files not in the dirstate that are not ignored'''
94 94 return self[4]
95 95
96 96 @property
97 97 def ignored(self):
98 98 '''files not in the dirstate that are ignored (by _dirignore())'''
99 99 return self[5]
100 100
101 101 @property
102 102 def clean(self):
103 103 '''files that have not been modified'''
104 104 return self[6]
105 105
106 106 def __repr__(self, *args, **kwargs):
107 107 return ((r'<status modified=%s, added=%s, removed=%s, deleted=%s, '
108 108 r'unknown=%s, ignored=%s, clean=%s>') %
109 109 tuple(pycompat.sysstr(stringutil.pprint(v)) for v in self))
110 110
111 111 def itersubrepos(ctx1, ctx2):
112 112 """find subrepos in ctx1 or ctx2"""
113 113 # Create a (subpath, ctx) mapping where we prefer subpaths from
114 114 # ctx1. The subpaths from ctx2 are important when the .hgsub file
115 115 # has been modified (in ctx2) but not yet committed (in ctx1).
116 116 subpaths = dict.fromkeys(ctx2.substate, ctx2)
117 117 subpaths.update(dict.fromkeys(ctx1.substate, ctx1))
118 118
119 119 missing = set()
120 120
121 121 for subpath in ctx2.substate:
122 122 if subpath not in ctx1.substate:
123 123 del subpaths[subpath]
124 124 missing.add(subpath)
125 125
126 126 for subpath, ctx in sorted(subpaths.iteritems()):
127 127 yield subpath, ctx.sub(subpath)
128 128
129 129 # Yield an empty subrepo based on ctx1 for anything only in ctx2. That way,
130 130 # status and diff will have an accurate result when it does
131 131 # 'sub.{status|diff}(rev2)'. Otherwise, the ctx2 subrepo is compared
132 132 # against itself.
133 133 for subpath in missing:
134 134 yield subpath, ctx2.nullsub(subpath, ctx1)
135 135
136 136 def nochangesfound(ui, repo, excluded=None):
137 137 '''Report no changes for push/pull, excluded is None or a list of
138 138 nodes excluded from the push/pull.
139 139 '''
140 140 secretlist = []
141 141 if excluded:
142 142 for n in excluded:
143 143 ctx = repo[n]
144 144 if ctx.phase() >= phases.secret and not ctx.extinct():
145 145 secretlist.append(n)
146 146
147 147 if secretlist:
148 148 ui.status(_("no changes found (ignored %d secret changesets)\n")
149 149 % len(secretlist))
150 150 else:
151 151 ui.status(_("no changes found\n"))
152 152
153 153 def callcatch(ui, func):
154 154 """call func() with global exception handling
155 155
156 156 return func() if no exception happens. otherwise do some error handling
157 157 and return an exit code accordingly. does not handle all exceptions.
158 158 """
159 159 try:
160 160 try:
161 161 return func()
162 162 except: # re-raises
163 163 ui.traceback()
164 164 raise
165 165 # Global exception handling, alphabetically
166 166 # Mercurial-specific first, followed by built-in and library exceptions
167 167 except error.LockHeld as inst:
168 168 if inst.errno == errno.ETIMEDOUT:
169 169 reason = _('timed out waiting for lock held by %r') % inst.locker
170 170 else:
171 171 reason = _('lock held by %r') % inst.locker
172 172 ui.warn(_("abort: %s: %s\n")
173 173 % (inst.desc or stringutil.forcebytestr(inst.filename), reason))
174 174 if not inst.locker:
175 175 ui.warn(_("(lock might be very busy)\n"))
176 176 except error.LockUnavailable as inst:
177 177 ui.warn(_("abort: could not lock %s: %s\n") %
178 178 (inst.desc or stringutil.forcebytestr(inst.filename),
179 179 encoding.strtolocal(inst.strerror)))
180 180 except error.OutOfBandError as inst:
181 181 if inst.args:
182 182 msg = _("abort: remote error:\n")
183 183 else:
184 184 msg = _("abort: remote error\n")
185 185 ui.warn(msg)
186 186 if inst.args:
187 187 ui.warn(''.join(inst.args))
188 188 if inst.hint:
189 189 ui.warn('(%s)\n' % inst.hint)
190 190 except error.RepoError as inst:
191 191 ui.warn(_("abort: %s!\n") % inst)
192 192 if inst.hint:
193 193 ui.warn(_("(%s)\n") % inst.hint)
194 194 except error.ResponseError as inst:
195 195 ui.warn(_("abort: %s") % inst.args[0])
196 196 msg = inst.args[1]
197 197 if isinstance(msg, type(u'')):
198 198 msg = pycompat.sysbytes(msg)
199 199 if not isinstance(msg, bytes):
200 200 ui.warn(" %r\n" % (msg,))
201 201 elif not msg:
202 202 ui.warn(_(" empty string\n"))
203 203 else:
204 204 ui.warn("\n%r\n" % stringutil.ellipsis(msg))
205 205 except error.CensoredNodeError as inst:
206 206 ui.warn(_("abort: file censored %s!\n") % inst)
207 207 except error.RevlogError as inst:
208 208 ui.warn(_("abort: %s!\n") % inst)
209 209 except error.InterventionRequired as inst:
210 210 ui.warn("%s\n" % inst)
211 211 if inst.hint:
212 212 ui.warn(_("(%s)\n") % inst.hint)
213 213 return 1
214 214 except error.WdirUnsupported:
215 215 ui.warn(_("abort: working directory revision cannot be specified\n"))
216 216 except error.Abort as inst:
217 217 ui.warn(_("abort: %s\n") % inst)
218 218 if inst.hint:
219 219 ui.warn(_("(%s)\n") % inst.hint)
220 220 except ImportError as inst:
221 221 ui.warn(_("abort: %s!\n") % stringutil.forcebytestr(inst))
222 222 m = stringutil.forcebytestr(inst).split()[-1]
223 223 if m in "mpatch bdiff".split():
224 224 ui.warn(_("(did you forget to compile extensions?)\n"))
225 225 elif m in "zlib".split():
226 226 ui.warn(_("(is your Python install correct?)\n"))
227 227 except IOError as inst:
228 228 if util.safehasattr(inst, "code"):
229 229 ui.warn(_("abort: %s\n") % stringutil.forcebytestr(inst))
230 230 elif util.safehasattr(inst, "reason"):
231 231 try: # usually it is in the form (errno, strerror)
232 232 reason = inst.reason.args[1]
233 233 except (AttributeError, IndexError):
234 234 # it might be anything, for example a string
235 235 reason = inst.reason
236 236 if isinstance(reason, unicode):
237 237 # SSLError of Python 2.7.9 contains a unicode
238 238 reason = encoding.unitolocal(reason)
239 239 ui.warn(_("abort: error: %s\n") % reason)
240 240 elif (util.safehasattr(inst, "args")
241 241 and inst.args and inst.args[0] == errno.EPIPE):
242 242 pass
243 243 elif getattr(inst, "strerror", None):
244 244 if getattr(inst, "filename", None):
245 245 ui.warn(_("abort: %s: %s\n") % (
246 246 encoding.strtolocal(inst.strerror),
247 247 stringutil.forcebytestr(inst.filename)))
248 248 else:
249 249 ui.warn(_("abort: %s\n") % encoding.strtolocal(inst.strerror))
250 250 else:
251 251 raise
252 252 except OSError as inst:
253 253 if getattr(inst, "filename", None) is not None:
254 254 ui.warn(_("abort: %s: '%s'\n") % (
255 255 encoding.strtolocal(inst.strerror),
256 256 stringutil.forcebytestr(inst.filename)))
257 257 else:
258 258 ui.warn(_("abort: %s\n") % encoding.strtolocal(inst.strerror))
259 259 except MemoryError:
260 260 ui.warn(_("abort: out of memory\n"))
261 261 except SystemExit as inst:
262 262 # Commands shouldn't sys.exit directly, but give a return code.
263 263 # Just in case catch this and and pass exit code to caller.
264 264 return inst.code
265 265 except socket.error as inst:
266 266 ui.warn(_("abort: %s\n") % stringutil.forcebytestr(inst.args[-1]))
267 267
268 268 return -1
269 269
270 270 def checknewlabel(repo, lbl, kind):
271 271 # Do not use the "kind" parameter in ui output.
272 272 # It makes strings difficult to translate.
273 273 if lbl in ['tip', '.', 'null']:
274 274 raise error.Abort(_("the name '%s' is reserved") % lbl)
275 275 for c in (':', '\0', '\n', '\r'):
276 276 if c in lbl:
277 277 raise error.Abort(
278 278 _("%r cannot be used in a name") % pycompat.bytestr(c))
279 279 try:
280 280 int(lbl)
281 281 raise error.Abort(_("cannot use an integer as a name"))
282 282 except ValueError:
283 283 pass
284 284 if lbl.strip() != lbl:
285 285 raise error.Abort(_("leading or trailing whitespace in name %r") % lbl)
286 286
287 287 def checkfilename(f):
288 288 '''Check that the filename f is an acceptable filename for a tracked file'''
289 289 if '\r' in f or '\n' in f:
290 290 raise error.Abort(_("'\\n' and '\\r' disallowed in filenames: %r") % f)
291 291
292 292 def checkportable(ui, f):
293 293 '''Check if filename f is portable and warn or abort depending on config'''
294 294 checkfilename(f)
295 295 abort, warn = checkportabilityalert(ui)
296 296 if abort or warn:
297 297 msg = util.checkwinfilename(f)
298 298 if msg:
299 299 msg = "%s: %s" % (msg, procutil.shellquote(f))
300 300 if abort:
301 301 raise error.Abort(msg)
302 302 ui.warn(_("warning: %s\n") % msg)
303 303
304 304 def checkportabilityalert(ui):
305 305 '''check if the user's config requests nothing, a warning, or abort for
306 306 non-portable filenames'''
307 307 val = ui.config('ui', 'portablefilenames')
308 308 lval = val.lower()
309 309 bval = stringutil.parsebool(val)
310 310 abort = pycompat.iswindows or lval == 'abort'
311 311 warn = bval or lval == 'warn'
312 312 if bval is None and not (warn or abort or lval == 'ignore'):
313 313 raise error.ConfigError(
314 314 _("ui.portablefilenames value is invalid ('%s')") % val)
315 315 return abort, warn
316 316
317 317 class casecollisionauditor(object):
318 318 def __init__(self, ui, abort, dirstate):
319 319 self._ui = ui
320 320 self._abort = abort
321 321 allfiles = '\0'.join(dirstate._map)
322 322 self._loweredfiles = set(encoding.lower(allfiles).split('\0'))
323 323 self._dirstate = dirstate
324 324 # The purpose of _newfiles is so that we don't complain about
325 325 # case collisions if someone were to call this object with the
326 326 # same filename twice.
327 327 self._newfiles = set()
328 328
329 329 def __call__(self, f):
330 330 if f in self._newfiles:
331 331 return
332 332 fl = encoding.lower(f)
333 333 if fl in self._loweredfiles and f not in self._dirstate:
334 334 msg = _('possible case-folding collision for %s') % f
335 335 if self._abort:
336 336 raise error.Abort(msg)
337 337 self._ui.warn(_("warning: %s\n") % msg)
338 338 self._loweredfiles.add(fl)
339 339 self._newfiles.add(f)
340 340
341 341 def filteredhash(repo, maxrev):
342 342 """build hash of filtered revisions in the current repoview.
343 343
344 344 Multiple caches perform up-to-date validation by checking that the
345 345 tiprev and tipnode stored in the cache file match the current repository.
346 346 However, this is not sufficient for validating repoviews because the set
347 347 of revisions in the view may change without the repository tiprev and
348 348 tipnode changing.
349 349
350 350 This function hashes all the revs filtered from the view and returns
351 351 that SHA-1 digest.
352 352 """
353 353 cl = repo.changelog
354 354 if not cl.filteredrevs:
355 355 return None
356 356 key = None
357 357 revs = sorted(r for r in cl.filteredrevs if r <= maxrev)
358 358 if revs:
359 359 s = hashlib.sha1()
360 360 for rev in revs:
361 361 s.update('%d;' % rev)
362 362 key = s.digest()
363 363 return key
364 364
365 365 def walkrepos(path, followsym=False, seen_dirs=None, recurse=False):
366 366 '''yield every hg repository under path, always recursively.
367 367 The recurse flag will only control recursion into repo working dirs'''
368 368 def errhandler(err):
369 369 if err.filename == path:
370 370 raise err
371 371 samestat = getattr(os.path, 'samestat', None)
372 372 if followsym and samestat is not None:
373 373 def adddir(dirlst, dirname):
374 374 dirstat = os.stat(dirname)
375 375 match = any(samestat(dirstat, lstdirstat) for lstdirstat in dirlst)
376 376 if not match:
377 377 dirlst.append(dirstat)
378 378 return not match
379 379 else:
380 380 followsym = False
381 381
382 382 if (seen_dirs is None) and followsym:
383 383 seen_dirs = []
384 384 adddir(seen_dirs, path)
385 385 for root, dirs, files in os.walk(path, topdown=True, onerror=errhandler):
386 386 dirs.sort()
387 387 if '.hg' in dirs:
388 388 yield root # found a repository
389 389 qroot = os.path.join(root, '.hg', 'patches')
390 390 if os.path.isdir(os.path.join(qroot, '.hg')):
391 391 yield qroot # we have a patch queue repo here
392 392 if recurse:
393 393 # avoid recursing inside the .hg directory
394 394 dirs.remove('.hg')
395 395 else:
396 396 dirs[:] = [] # don't descend further
397 397 elif followsym:
398 398 newdirs = []
399 399 for d in dirs:
400 400 fname = os.path.join(root, d)
401 401 if adddir(seen_dirs, fname):
402 402 if os.path.islink(fname):
403 403 for hgname in walkrepos(fname, True, seen_dirs):
404 404 yield hgname
405 405 else:
406 406 newdirs.append(d)
407 407 dirs[:] = newdirs
408 408
409 409 def binnode(ctx):
410 410 """Return binary node id for a given basectx"""
411 411 node = ctx.node()
412 412 if node is None:
413 413 return wdirid
414 414 return node
415 415
416 416 def intrev(ctx):
417 417 """Return integer for a given basectx that can be used in comparison or
418 418 arithmetic operation"""
419 419 rev = ctx.rev()
420 420 if rev is None:
421 421 return wdirrev
422 422 return rev
423 423
424 424 def formatchangeid(ctx):
425 425 """Format changectx as '{rev}:{node|formatnode}', which is the default
426 426 template provided by logcmdutil.changesettemplater"""
427 427 repo = ctx.repo()
428 428 return formatrevnode(repo.ui, intrev(ctx), binnode(ctx))
429 429
430 430 def formatrevnode(ui, rev, node):
431 431 """Format given revision and node depending on the current verbosity"""
432 432 if ui.debugflag:
433 433 hexfunc = hex
434 434 else:
435 435 hexfunc = short
436 436 return '%d:%s' % (rev, hexfunc(node))
437 437
438 438 def resolvehexnodeidprefix(repo, prefix):
439 439 # Uses unfiltered repo because it's faster when prefix is ambiguous/
440 440 # This matches the shortesthexnodeidprefix() function below.
441 441 node = repo.unfiltered().changelog._partialmatch(prefix)
442 442 if node is None:
443 443 return
444 444 repo.changelog.rev(node) # make sure node isn't filtered
445 445 return node
446 446
447 447 def shortesthexnodeidprefix(repo, node, minlength=1):
448 448 """Find the shortest unambiguous prefix that matches hexnode."""
449 449 # _partialmatch() of filtered changelog could take O(len(repo)) time,
450 450 # which would be unacceptably slow. so we look for hash collision in
451 451 # unfiltered space, which means some hashes may be slightly longer.
452 cl = repo.unfiltered().changelog
453
454 def isrev(prefix):
455 try:
456 i = int(prefix)
457 # if we are a pure int, then starting with zero will not be
458 # confused as a rev; or, obviously, if the int is larger
459 # than the value of the tip rev
460 if prefix[0] == '0' or i > len(cl):
461 return False
462 return True
463 except ValueError:
464 return False
465
466 def disambiguate(prefix):
467 """Disambiguate against revnums."""
468 hexnode = hex(node)
469 for length in range(len(prefix), 41):
470 prefix = hexnode[:length]
471 if not isrev(prefix):
472 return prefix
473
452 474 try:
453 return repo.unfiltered().changelog.shortest(node, minlength)
475 return disambiguate(cl.shortest(node, minlength))
454 476 except error.LookupError:
455 477 raise error.RepoLookupError()
456 478
457 479 def isrevsymbol(repo, symbol):
458 480 """Checks if a symbol exists in the repo.
459 481
460 482 See revsymbol() for details. Raises error.LookupError if the symbol is an
461 483 ambiguous nodeid prefix.
462 484 """
463 485 try:
464 486 revsymbol(repo, symbol)
465 487 return True
466 488 except error.RepoLookupError:
467 489 return False
468 490
469 491 def revsymbol(repo, symbol):
470 492 """Returns a context given a single revision symbol (as string).
471 493
472 494 This is similar to revsingle(), but accepts only a single revision symbol,
473 495 i.e. things like ".", "tip", "1234", "deadbeef", "my-bookmark" work, but
474 496 not "max(public())".
475 497 """
476 498 if not isinstance(symbol, bytes):
477 499 msg = ("symbol (%s of type %s) was not a string, did you mean "
478 500 "repo[symbol]?" % (symbol, type(symbol)))
479 501 raise error.ProgrammingError(msg)
480 502 try:
481 503 if symbol in ('.', 'tip', 'null'):
482 504 return repo[symbol]
483 505
484 506 try:
485 507 r = int(symbol)
486 508 if '%d' % r != symbol:
487 509 raise ValueError
488 510 l = len(repo.changelog)
489 511 if r < 0:
490 512 r += l
491 513 if r < 0 or r >= l and r != wdirrev:
492 514 raise ValueError
493 515 return repo[r]
494 516 except error.FilteredIndexError:
495 517 raise
496 518 except (ValueError, OverflowError, IndexError):
497 519 pass
498 520
499 521 if len(symbol) == 40:
500 522 try:
501 523 node = bin(symbol)
502 524 rev = repo.changelog.rev(node)
503 525 return repo[rev]
504 526 except error.FilteredLookupError:
505 527 raise
506 528 except (TypeError, LookupError):
507 529 pass
508 530
509 531 # look up bookmarks through the name interface
510 532 try:
511 533 node = repo.names.singlenode(repo, symbol)
512 534 rev = repo.changelog.rev(node)
513 535 return repo[rev]
514 536 except KeyError:
515 537 pass
516 538
517 539 node = resolvehexnodeidprefix(repo, symbol)
518 540 if node is not None:
519 541 rev = repo.changelog.rev(node)
520 542 return repo[rev]
521 543
522 544 raise error.RepoLookupError(_("unknown revision '%s'") % symbol)
523 545
524 546 except error.WdirUnsupported:
525 547 return repo[None]
526 548 except (error.FilteredIndexError, error.FilteredLookupError,
527 549 error.FilteredRepoLookupError):
528 550 raise _filterederror(repo, symbol)
529 551
530 552 def _filterederror(repo, changeid):
531 553 """build an exception to be raised about a filtered changeid
532 554
533 555 This is extracted in a function to help extensions (eg: evolve) to
534 556 experiment with various message variants."""
535 557 if repo.filtername.startswith('visible'):
536 558
537 559 # Check if the changeset is obsolete
538 560 unfilteredrepo = repo.unfiltered()
539 561 ctx = revsymbol(unfilteredrepo, changeid)
540 562
541 563 # If the changeset is obsolete, enrich the message with the reason
542 564 # that made this changeset not visible
543 565 if ctx.obsolete():
544 566 msg = obsutil._getfilteredreason(repo, changeid, ctx)
545 567 else:
546 568 msg = _("hidden revision '%s'") % changeid
547 569
548 570 hint = _('use --hidden to access hidden revisions')
549 571
550 572 return error.FilteredRepoLookupError(msg, hint=hint)
551 573 msg = _("filtered revision '%s' (not in '%s' subset)")
552 574 msg %= (changeid, repo.filtername)
553 575 return error.FilteredRepoLookupError(msg)
554 576
555 577 def revsingle(repo, revspec, default='.', localalias=None):
556 578 if not revspec and revspec != 0:
557 579 return repo[default]
558 580
559 581 l = revrange(repo, [revspec], localalias=localalias)
560 582 if not l:
561 583 raise error.Abort(_('empty revision set'))
562 584 return repo[l.last()]
563 585
564 586 def _pairspec(revspec):
565 587 tree = revsetlang.parse(revspec)
566 588 return tree and tree[0] in ('range', 'rangepre', 'rangepost', 'rangeall')
567 589
568 590 def revpair(repo, revs):
569 591 if not revs:
570 592 return repo['.'], repo[None]
571 593
572 594 l = revrange(repo, revs)
573 595
574 596 if not l:
575 597 first = second = None
576 598 elif l.isascending():
577 599 first = l.min()
578 600 second = l.max()
579 601 elif l.isdescending():
580 602 first = l.max()
581 603 second = l.min()
582 604 else:
583 605 first = l.first()
584 606 second = l.last()
585 607
586 608 if first is None:
587 609 raise error.Abort(_('empty revision range'))
588 610 if (first == second and len(revs) >= 2
589 611 and not all(revrange(repo, [r]) for r in revs)):
590 612 raise error.Abort(_('empty revision on one side of range'))
591 613
592 614 # if top-level is range expression, the result must always be a pair
593 615 if first == second and len(revs) == 1 and not _pairspec(revs[0]):
594 616 return repo[first], repo[None]
595 617
596 618 return repo[first], repo[second]
597 619
598 620 def revrange(repo, specs, localalias=None):
599 621 """Execute 1 to many revsets and return the union.
600 622
601 623 This is the preferred mechanism for executing revsets using user-specified
602 624 config options, such as revset aliases.
603 625
604 626 The revsets specified by ``specs`` will be executed via a chained ``OR``
605 627 expression. If ``specs`` is empty, an empty result is returned.
606 628
607 629 ``specs`` can contain integers, in which case they are assumed to be
608 630 revision numbers.
609 631
610 632 It is assumed the revsets are already formatted. If you have arguments
611 633 that need to be expanded in the revset, call ``revsetlang.formatspec()``
612 634 and pass the result as an element of ``specs``.
613 635
614 636 Specifying a single revset is allowed.
615 637
616 638 Returns a ``revset.abstractsmartset`` which is a list-like interface over
617 639 integer revisions.
618 640 """
619 641 allspecs = []
620 642 for spec in specs:
621 643 if isinstance(spec, int):
622 644 spec = revsetlang.formatspec('rev(%d)', spec)
623 645 allspecs.append(spec)
624 646 return repo.anyrevs(allspecs, user=True, localalias=localalias)
625 647
626 648 def meaningfulparents(repo, ctx):
627 649 """Return list of meaningful (or all if debug) parentrevs for rev.
628 650
629 651 For merges (two non-nullrev revisions) both parents are meaningful.
630 652 Otherwise the first parent revision is considered meaningful if it
631 653 is not the preceding revision.
632 654 """
633 655 parents = ctx.parents()
634 656 if len(parents) > 1:
635 657 return parents
636 658 if repo.ui.debugflag:
637 659 return [parents[0], repo['null']]
638 660 if parents[0].rev() >= intrev(ctx) - 1:
639 661 return []
640 662 return parents
641 663
642 664 def expandpats(pats):
643 665 '''Expand bare globs when running on windows.
644 666 On posix we assume it already has already been done by sh.'''
645 667 if not util.expandglobs:
646 668 return list(pats)
647 669 ret = []
648 670 for kindpat in pats:
649 671 kind, pat = matchmod._patsplit(kindpat, None)
650 672 if kind is None:
651 673 try:
652 674 globbed = glob.glob(pat)
653 675 except re.error:
654 676 globbed = [pat]
655 677 if globbed:
656 678 ret.extend(globbed)
657 679 continue
658 680 ret.append(kindpat)
659 681 return ret
660 682
661 683 def matchandpats(ctx, pats=(), opts=None, globbed=False, default='relpath',
662 684 badfn=None):
663 685 '''Return a matcher and the patterns that were used.
664 686 The matcher will warn about bad matches, unless an alternate badfn callback
665 687 is provided.'''
666 688 if pats == ("",):
667 689 pats = []
668 690 if opts is None:
669 691 opts = {}
670 692 if not globbed and default == 'relpath':
671 693 pats = expandpats(pats or [])
672 694
673 695 def bad(f, msg):
674 696 ctx.repo().ui.warn("%s: %s\n" % (m.rel(f), msg))
675 697
676 698 if badfn is None:
677 699 badfn = bad
678 700
679 701 m = ctx.match(pats, opts.get('include'), opts.get('exclude'),
680 702 default, listsubrepos=opts.get('subrepos'), badfn=badfn)
681 703
682 704 if m.always():
683 705 pats = []
684 706 return m, pats
685 707
686 708 def match(ctx, pats=(), opts=None, globbed=False, default='relpath',
687 709 badfn=None):
688 710 '''Return a matcher that will warn about bad matches.'''
689 711 return matchandpats(ctx, pats, opts, globbed, default, badfn=badfn)[0]
690 712
691 713 def matchall(repo):
692 714 '''Return a matcher that will efficiently match everything.'''
693 715 return matchmod.always(repo.root, repo.getcwd())
694 716
695 717 def matchfiles(repo, files, badfn=None):
696 718 '''Return a matcher that will efficiently match exactly these files.'''
697 719 return matchmod.exact(repo.root, repo.getcwd(), files, badfn=badfn)
698 720
699 721 def parsefollowlinespattern(repo, rev, pat, msg):
700 722 """Return a file name from `pat` pattern suitable for usage in followlines
701 723 logic.
702 724 """
703 725 if not matchmod.patkind(pat):
704 726 return pathutil.canonpath(repo.root, repo.getcwd(), pat)
705 727 else:
706 728 ctx = repo[rev]
707 729 m = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=ctx)
708 730 files = [f for f in ctx if m(f)]
709 731 if len(files) != 1:
710 732 raise error.ParseError(msg)
711 733 return files[0]
712 734
713 735 def origpath(ui, repo, filepath):
714 736 '''customize where .orig files are created
715 737
716 738 Fetch user defined path from config file: [ui] origbackuppath = <path>
717 739 Fall back to default (filepath with .orig suffix) if not specified
718 740 '''
719 741 origbackuppath = ui.config('ui', 'origbackuppath')
720 742 if not origbackuppath:
721 743 return filepath + ".orig"
722 744
723 745 # Convert filepath from an absolute path into a path inside the repo.
724 746 filepathfromroot = util.normpath(os.path.relpath(filepath,
725 747 start=repo.root))
726 748
727 749 origvfs = vfs.vfs(repo.wjoin(origbackuppath))
728 750 origbackupdir = origvfs.dirname(filepathfromroot)
729 751 if not origvfs.isdir(origbackupdir) or origvfs.islink(origbackupdir):
730 752 ui.note(_('creating directory: %s\n') % origvfs.join(origbackupdir))
731 753
732 754 # Remove any files that conflict with the backup file's path
733 755 for f in reversed(list(util.finddirs(filepathfromroot))):
734 756 if origvfs.isfileorlink(f):
735 757 ui.note(_('removing conflicting file: %s\n')
736 758 % origvfs.join(f))
737 759 origvfs.unlink(f)
738 760 break
739 761
740 762 origvfs.makedirs(origbackupdir)
741 763
742 764 if origvfs.isdir(filepathfromroot) and not origvfs.islink(filepathfromroot):
743 765 ui.note(_('removing conflicting directory: %s\n')
744 766 % origvfs.join(filepathfromroot))
745 767 origvfs.rmtree(filepathfromroot, forcibly=True)
746 768
747 769 return origvfs.join(filepathfromroot)
748 770
749 771 class _containsnode(object):
750 772 """proxy __contains__(node) to container.__contains__ which accepts revs"""
751 773
752 774 def __init__(self, repo, revcontainer):
753 775 self._torev = repo.changelog.rev
754 776 self._revcontains = revcontainer.__contains__
755 777
756 778 def __contains__(self, node):
757 779 return self._revcontains(self._torev(node))
758 780
759 781 def cleanupnodes(repo, replacements, operation, moves=None, metadata=None):
760 782 """do common cleanups when old nodes are replaced by new nodes
761 783
762 784 That includes writing obsmarkers or stripping nodes, and moving bookmarks.
763 785 (we might also want to move working directory parent in the future)
764 786
765 787 By default, bookmark moves are calculated automatically from 'replacements',
766 788 but 'moves' can be used to override that. Also, 'moves' may include
767 789 additional bookmark moves that should not have associated obsmarkers.
768 790
769 791 replacements is {oldnode: [newnode]} or a iterable of nodes if they do not
770 792 have replacements. operation is a string, like "rebase".
771 793
772 794 metadata is dictionary containing metadata to be stored in obsmarker if
773 795 obsolescence is enabled.
774 796 """
775 797 if not replacements and not moves:
776 798 return
777 799
778 800 # translate mapping's other forms
779 801 if not util.safehasattr(replacements, 'items'):
780 802 replacements = {n: () for n in replacements}
781 803
782 804 # Calculate bookmark movements
783 805 if moves is None:
784 806 moves = {}
785 807 # Unfiltered repo is needed since nodes in replacements might be hidden.
786 808 unfi = repo.unfiltered()
787 809 for oldnode, newnodes in replacements.items():
788 810 if oldnode in moves:
789 811 continue
790 812 if len(newnodes) > 1:
791 813 # usually a split, take the one with biggest rev number
792 814 newnode = next(unfi.set('max(%ln)', newnodes)).node()
793 815 elif len(newnodes) == 0:
794 816 # move bookmark backwards
795 817 roots = list(unfi.set('max((::%n) - %ln)', oldnode,
796 818 list(replacements)))
797 819 if roots:
798 820 newnode = roots[0].node()
799 821 else:
800 822 newnode = nullid
801 823 else:
802 824 newnode = newnodes[0]
803 825 moves[oldnode] = newnode
804 826
805 827 with repo.transaction('cleanup') as tr:
806 828 # Move bookmarks
807 829 bmarks = repo._bookmarks
808 830 bmarkchanges = []
809 831 allnewnodes = [n for ns in replacements.values() for n in ns]
810 832 for oldnode, newnode in moves.items():
811 833 oldbmarks = repo.nodebookmarks(oldnode)
812 834 if not oldbmarks:
813 835 continue
814 836 from . import bookmarks # avoid import cycle
815 837 repo.ui.debug('moving bookmarks %r from %s to %s\n' %
816 838 (util.rapply(pycompat.maybebytestr, oldbmarks),
817 839 hex(oldnode), hex(newnode)))
818 840 # Delete divergent bookmarks being parents of related newnodes
819 841 deleterevs = repo.revs('parents(roots(%ln & (::%n))) - parents(%n)',
820 842 allnewnodes, newnode, oldnode)
821 843 deletenodes = _containsnode(repo, deleterevs)
822 844 for name in oldbmarks:
823 845 bmarkchanges.append((name, newnode))
824 846 for b in bookmarks.divergent2delete(repo, deletenodes, name):
825 847 bmarkchanges.append((b, None))
826 848
827 849 if bmarkchanges:
828 850 bmarks.applychanges(repo, tr, bmarkchanges)
829 851
830 852 # Obsolete or strip nodes
831 853 if obsolete.isenabled(repo, obsolete.createmarkersopt):
832 854 # If a node is already obsoleted, and we want to obsolete it
833 855 # without a successor, skip that obssolete request since it's
834 856 # unnecessary. That's the "if s or not isobs(n)" check below.
835 857 # Also sort the node in topology order, that might be useful for
836 858 # some obsstore logic.
837 859 # NOTE: the filtering and sorting might belong to createmarkers.
838 860 isobs = unfi.obsstore.successors.__contains__
839 861 torev = unfi.changelog.rev
840 862 sortfunc = lambda ns: torev(ns[0])
841 863 rels = [(unfi[n], tuple(unfi[m] for m in s))
842 864 for n, s in sorted(replacements.items(), key=sortfunc)
843 865 if s or not isobs(n)]
844 866 if rels:
845 867 obsolete.createmarkers(repo, rels, operation=operation,
846 868 metadata=metadata)
847 869 else:
848 870 from . import repair # avoid import cycle
849 871 tostrip = list(replacements)
850 872 if tostrip:
851 873 repair.delayedstrip(repo.ui, repo, tostrip, operation)
852 874
853 875 def addremove(repo, matcher, prefix, opts=None):
854 876 if opts is None:
855 877 opts = {}
856 878 m = matcher
857 879 dry_run = opts.get('dry_run')
858 880 try:
859 881 similarity = float(opts.get('similarity') or 0)
860 882 except ValueError:
861 883 raise error.Abort(_('similarity must be a number'))
862 884 if similarity < 0 or similarity > 100:
863 885 raise error.Abort(_('similarity must be between 0 and 100'))
864 886 similarity /= 100.0
865 887
866 888 ret = 0
867 889 join = lambda f: os.path.join(prefix, f)
868 890
869 891 wctx = repo[None]
870 892 for subpath in sorted(wctx.substate):
871 893 submatch = matchmod.subdirmatcher(subpath, m)
872 894 if opts.get('subrepos') or m.exact(subpath) or any(submatch.files()):
873 895 sub = wctx.sub(subpath)
874 896 try:
875 897 if sub.addremove(submatch, prefix, opts):
876 898 ret = 1
877 899 except error.LookupError:
878 900 repo.ui.status(_("skipping missing subrepository: %s\n")
879 901 % join(subpath))
880 902
881 903 rejected = []
882 904 def badfn(f, msg):
883 905 if f in m.files():
884 906 m.bad(f, msg)
885 907 rejected.append(f)
886 908
887 909 badmatch = matchmod.badmatch(m, badfn)
888 910 added, unknown, deleted, removed, forgotten = _interestingfiles(repo,
889 911 badmatch)
890 912
891 913 unknownset = set(unknown + forgotten)
892 914 toprint = unknownset.copy()
893 915 toprint.update(deleted)
894 916 for abs in sorted(toprint):
895 917 if repo.ui.verbose or not m.exact(abs):
896 918 if abs in unknownset:
897 919 status = _('adding %s\n') % m.uipath(abs)
898 920 else:
899 921 status = _('removing %s\n') % m.uipath(abs)
900 922 repo.ui.status(status)
901 923
902 924 renames = _findrenames(repo, m, added + unknown, removed + deleted,
903 925 similarity)
904 926
905 927 if not dry_run:
906 928 _markchanges(repo, unknown + forgotten, deleted, renames)
907 929
908 930 for f in rejected:
909 931 if f in m.files():
910 932 return 1
911 933 return ret
912 934
913 935 def marktouched(repo, files, similarity=0.0):
914 936 '''Assert that files have somehow been operated upon. files are relative to
915 937 the repo root.'''
916 938 m = matchfiles(repo, files, badfn=lambda x, y: rejected.append(x))
917 939 rejected = []
918 940
919 941 added, unknown, deleted, removed, forgotten = _interestingfiles(repo, m)
920 942
921 943 if repo.ui.verbose:
922 944 unknownset = set(unknown + forgotten)
923 945 toprint = unknownset.copy()
924 946 toprint.update(deleted)
925 947 for abs in sorted(toprint):
926 948 if abs in unknownset:
927 949 status = _('adding %s\n') % abs
928 950 else:
929 951 status = _('removing %s\n') % abs
930 952 repo.ui.status(status)
931 953
932 954 renames = _findrenames(repo, m, added + unknown, removed + deleted,
933 955 similarity)
934 956
935 957 _markchanges(repo, unknown + forgotten, deleted, renames)
936 958
937 959 for f in rejected:
938 960 if f in m.files():
939 961 return 1
940 962 return 0
941 963
942 964 def _interestingfiles(repo, matcher):
943 965 '''Walk dirstate with matcher, looking for files that addremove would care
944 966 about.
945 967
946 968 This is different from dirstate.status because it doesn't care about
947 969 whether files are modified or clean.'''
948 970 added, unknown, deleted, removed, forgotten = [], [], [], [], []
949 971 audit_path = pathutil.pathauditor(repo.root, cached=True)
950 972
951 973 ctx = repo[None]
952 974 dirstate = repo.dirstate
953 975 walkresults = dirstate.walk(matcher, subrepos=sorted(ctx.substate),
954 976 unknown=True, ignored=False, full=False)
955 977 for abs, st in walkresults.iteritems():
956 978 dstate = dirstate[abs]
957 979 if dstate == '?' and audit_path.check(abs):
958 980 unknown.append(abs)
959 981 elif dstate != 'r' and not st:
960 982 deleted.append(abs)
961 983 elif dstate == 'r' and st:
962 984 forgotten.append(abs)
963 985 # for finding renames
964 986 elif dstate == 'r' and not st:
965 987 removed.append(abs)
966 988 elif dstate == 'a':
967 989 added.append(abs)
968 990
969 991 return added, unknown, deleted, removed, forgotten
970 992
971 993 def _findrenames(repo, matcher, added, removed, similarity):
972 994 '''Find renames from removed files to added ones.'''
973 995 renames = {}
974 996 if similarity > 0:
975 997 for old, new, score in similar.findrenames(repo, added, removed,
976 998 similarity):
977 999 if (repo.ui.verbose or not matcher.exact(old)
978 1000 or not matcher.exact(new)):
979 1001 repo.ui.status(_('recording removal of %s as rename to %s '
980 1002 '(%d%% similar)\n') %
981 1003 (matcher.rel(old), matcher.rel(new),
982 1004 score * 100))
983 1005 renames[new] = old
984 1006 return renames
985 1007
986 1008 def _markchanges(repo, unknown, deleted, renames):
987 1009 '''Marks the files in unknown as added, the files in deleted as removed,
988 1010 and the files in renames as copied.'''
989 1011 wctx = repo[None]
990 1012 with repo.wlock():
991 1013 wctx.forget(deleted)
992 1014 wctx.add(unknown)
993 1015 for new, old in renames.iteritems():
994 1016 wctx.copy(old, new)
995 1017
996 1018 def dirstatecopy(ui, repo, wctx, src, dst, dryrun=False, cwd=None):
997 1019 """Update the dirstate to reflect the intent of copying src to dst. For
998 1020 different reasons it might not end with dst being marked as copied from src.
999 1021 """
1000 1022 origsrc = repo.dirstate.copied(src) or src
1001 1023 if dst == origsrc: # copying back a copy?
1002 1024 if repo.dirstate[dst] not in 'mn' and not dryrun:
1003 1025 repo.dirstate.normallookup(dst)
1004 1026 else:
1005 1027 if repo.dirstate[origsrc] == 'a' and origsrc == src:
1006 1028 if not ui.quiet:
1007 1029 ui.warn(_("%s has not been committed yet, so no copy "
1008 1030 "data will be stored for %s.\n")
1009 1031 % (repo.pathto(origsrc, cwd), repo.pathto(dst, cwd)))
1010 1032 if repo.dirstate[dst] in '?r' and not dryrun:
1011 1033 wctx.add([dst])
1012 1034 elif not dryrun:
1013 1035 wctx.copy(origsrc, dst)
1014 1036
1015 1037 def readrequires(opener, supported):
1016 1038 '''Reads and parses .hg/requires and checks if all entries found
1017 1039 are in the list of supported features.'''
1018 1040 requirements = set(opener.read("requires").splitlines())
1019 1041 missings = []
1020 1042 for r in requirements:
1021 1043 if r not in supported:
1022 1044 if not r or not r[0:1].isalnum():
1023 1045 raise error.RequirementError(_(".hg/requires file is corrupt"))
1024 1046 missings.append(r)
1025 1047 missings.sort()
1026 1048 if missings:
1027 1049 raise error.RequirementError(
1028 1050 _("repository requires features unknown to this Mercurial: %s")
1029 1051 % " ".join(missings),
1030 1052 hint=_("see https://mercurial-scm.org/wiki/MissingRequirement"
1031 1053 " for more information"))
1032 1054 return requirements
1033 1055
1034 1056 def writerequires(opener, requirements):
1035 1057 with opener('requires', 'w') as fp:
1036 1058 for r in sorted(requirements):
1037 1059 fp.write("%s\n" % r)
1038 1060
1039 1061 class filecachesubentry(object):
1040 1062 def __init__(self, path, stat):
1041 1063 self.path = path
1042 1064 self.cachestat = None
1043 1065 self._cacheable = None
1044 1066
1045 1067 if stat:
1046 1068 self.cachestat = filecachesubentry.stat(self.path)
1047 1069
1048 1070 if self.cachestat:
1049 1071 self._cacheable = self.cachestat.cacheable()
1050 1072 else:
1051 1073 # None means we don't know yet
1052 1074 self._cacheable = None
1053 1075
1054 1076 def refresh(self):
1055 1077 if self.cacheable():
1056 1078 self.cachestat = filecachesubentry.stat(self.path)
1057 1079
1058 1080 def cacheable(self):
1059 1081 if self._cacheable is not None:
1060 1082 return self._cacheable
1061 1083
1062 1084 # we don't know yet, assume it is for now
1063 1085 return True
1064 1086
1065 1087 def changed(self):
1066 1088 # no point in going further if we can't cache it
1067 1089 if not self.cacheable():
1068 1090 return True
1069 1091
1070 1092 newstat = filecachesubentry.stat(self.path)
1071 1093
1072 1094 # we may not know if it's cacheable yet, check again now
1073 1095 if newstat and self._cacheable is None:
1074 1096 self._cacheable = newstat.cacheable()
1075 1097
1076 1098 # check again
1077 1099 if not self._cacheable:
1078 1100 return True
1079 1101
1080 1102 if self.cachestat != newstat:
1081 1103 self.cachestat = newstat
1082 1104 return True
1083 1105 else:
1084 1106 return False
1085 1107
1086 1108 @staticmethod
1087 1109 def stat(path):
1088 1110 try:
1089 1111 return util.cachestat(path)
1090 1112 except OSError as e:
1091 1113 if e.errno != errno.ENOENT:
1092 1114 raise
1093 1115
1094 1116 class filecacheentry(object):
1095 1117 def __init__(self, paths, stat=True):
1096 1118 self._entries = []
1097 1119 for path in paths:
1098 1120 self._entries.append(filecachesubentry(path, stat))
1099 1121
1100 1122 def changed(self):
1101 1123 '''true if any entry has changed'''
1102 1124 for entry in self._entries:
1103 1125 if entry.changed():
1104 1126 return True
1105 1127 return False
1106 1128
1107 1129 def refresh(self):
1108 1130 for entry in self._entries:
1109 1131 entry.refresh()
1110 1132
1111 1133 class filecache(object):
1112 1134 '''A property like decorator that tracks files under .hg/ for updates.
1113 1135
1114 1136 Records stat info when called in _filecache.
1115 1137
1116 1138 On subsequent calls, compares old stat info with new info, and recreates the
1117 1139 object when any of the files changes, updating the new stat info in
1118 1140 _filecache.
1119 1141
1120 1142 Mercurial either atomic renames or appends for files under .hg,
1121 1143 so to ensure the cache is reliable we need the filesystem to be able
1122 1144 to tell us if a file has been replaced. If it can't, we fallback to
1123 1145 recreating the object on every call (essentially the same behavior as
1124 1146 propertycache).
1125 1147
1126 1148 '''
1127 1149 def __init__(self, *paths):
1128 1150 self.paths = paths
1129 1151
1130 1152 def join(self, obj, fname):
1131 1153 """Used to compute the runtime path of a cached file.
1132 1154
1133 1155 Users should subclass filecache and provide their own version of this
1134 1156 function to call the appropriate join function on 'obj' (an instance
1135 1157 of the class that its member function was decorated).
1136 1158 """
1137 1159 raise NotImplementedError
1138 1160
1139 1161 def __call__(self, func):
1140 1162 self.func = func
1141 1163 self.sname = func.__name__
1142 1164 self.name = pycompat.sysbytes(self.sname)
1143 1165 return self
1144 1166
1145 1167 def __get__(self, obj, type=None):
1146 1168 # if accessed on the class, return the descriptor itself.
1147 1169 if obj is None:
1148 1170 return self
1149 1171 # do we need to check if the file changed?
1150 1172 if self.sname in obj.__dict__:
1151 1173 assert self.name in obj._filecache, self.name
1152 1174 return obj.__dict__[self.sname]
1153 1175
1154 1176 entry = obj._filecache.get(self.name)
1155 1177
1156 1178 if entry:
1157 1179 if entry.changed():
1158 1180 entry.obj = self.func(obj)
1159 1181 else:
1160 1182 paths = [self.join(obj, path) for path in self.paths]
1161 1183
1162 1184 # We stat -before- creating the object so our cache doesn't lie if
1163 1185 # a writer modified between the time we read and stat
1164 1186 entry = filecacheentry(paths, True)
1165 1187 entry.obj = self.func(obj)
1166 1188
1167 1189 obj._filecache[self.name] = entry
1168 1190
1169 1191 obj.__dict__[self.sname] = entry.obj
1170 1192 return entry.obj
1171 1193
1172 1194 def __set__(self, obj, value):
1173 1195 if self.name not in obj._filecache:
1174 1196 # we add an entry for the missing value because X in __dict__
1175 1197 # implies X in _filecache
1176 1198 paths = [self.join(obj, path) for path in self.paths]
1177 1199 ce = filecacheentry(paths, False)
1178 1200 obj._filecache[self.name] = ce
1179 1201 else:
1180 1202 ce = obj._filecache[self.name]
1181 1203
1182 1204 ce.obj = value # update cached copy
1183 1205 obj.__dict__[self.sname] = value # update copy returned by obj.x
1184 1206
1185 1207 def __delete__(self, obj):
1186 1208 try:
1187 1209 del obj.__dict__[self.sname]
1188 1210 except KeyError:
1189 1211 raise AttributeError(self.sname)
1190 1212
1191 1213 def extdatasource(repo, source):
1192 1214 """Gather a map of rev -> value dict from the specified source
1193 1215
1194 1216 A source spec is treated as a URL, with a special case shell: type
1195 1217 for parsing the output from a shell command.
1196 1218
1197 1219 The data is parsed as a series of newline-separated records where
1198 1220 each record is a revision specifier optionally followed by a space
1199 1221 and a freeform string value. If the revision is known locally, it
1200 1222 is converted to a rev, otherwise the record is skipped.
1201 1223
1202 1224 Note that both key and value are treated as UTF-8 and converted to
1203 1225 the local encoding. This allows uniformity between local and
1204 1226 remote data sources.
1205 1227 """
1206 1228
1207 1229 spec = repo.ui.config("extdata", source)
1208 1230 if not spec:
1209 1231 raise error.Abort(_("unknown extdata source '%s'") % source)
1210 1232
1211 1233 data = {}
1212 1234 src = proc = None
1213 1235 try:
1214 1236 if spec.startswith("shell:"):
1215 1237 # external commands should be run relative to the repo root
1216 1238 cmd = spec[6:]
1217 1239 proc = subprocess.Popen(cmd, shell=True, bufsize=-1,
1218 1240 close_fds=procutil.closefds,
1219 1241 stdout=subprocess.PIPE, cwd=repo.root)
1220 1242 src = proc.stdout
1221 1243 else:
1222 1244 # treat as a URL or file
1223 1245 src = url.open(repo.ui, spec)
1224 1246 for l in src:
1225 1247 if " " in l:
1226 1248 k, v = l.strip().split(" ", 1)
1227 1249 else:
1228 1250 k, v = l.strip(), ""
1229 1251
1230 1252 k = encoding.tolocal(k)
1231 1253 try:
1232 1254 data[revsingle(repo, k).rev()] = encoding.tolocal(v)
1233 1255 except (error.LookupError, error.RepoLookupError):
1234 1256 pass # we ignore data for nodes that don't exist locally
1235 1257 finally:
1236 1258 if proc:
1237 1259 proc.communicate()
1238 1260 if src:
1239 1261 src.close()
1240 1262 if proc and proc.returncode != 0:
1241 1263 raise error.Abort(_("extdata command '%s' failed: %s")
1242 1264 % (cmd, procutil.explainexit(proc.returncode)))
1243 1265
1244 1266 return data
1245 1267
1246 1268 def _locksub(repo, lock, envvar, cmd, environ=None, *args, **kwargs):
1247 1269 if lock is None:
1248 1270 raise error.LockInheritanceContractViolation(
1249 1271 'lock can only be inherited while held')
1250 1272 if environ is None:
1251 1273 environ = {}
1252 1274 with lock.inherit() as locker:
1253 1275 environ[envvar] = locker
1254 1276 return repo.ui.system(cmd, environ=environ, *args, **kwargs)
1255 1277
1256 1278 def wlocksub(repo, cmd, *args, **kwargs):
1257 1279 """run cmd as a subprocess that allows inheriting repo's wlock
1258 1280
1259 1281 This can only be called while the wlock is held. This takes all the
1260 1282 arguments that ui.system does, and returns the exit code of the
1261 1283 subprocess."""
1262 1284 return _locksub(repo, repo.currentwlock(), 'HG_WLOCK_LOCKER', cmd, *args,
1263 1285 **kwargs)
1264 1286
1265 1287 def gdinitconfig(ui):
1266 1288 """helper function to know if a repo should be created as general delta
1267 1289 """
1268 1290 # experimental config: format.generaldelta
1269 1291 return (ui.configbool('format', 'generaldelta')
1270 1292 or ui.configbool('format', 'usegeneraldelta'))
1271 1293
1272 1294 def gddeltaconfig(ui):
1273 1295 """helper function to know if incoming delta should be optimised
1274 1296 """
1275 1297 # experimental config: format.generaldelta
1276 1298 return ui.configbool('format', 'generaldelta')
1277 1299
1278 1300 class simplekeyvaluefile(object):
1279 1301 """A simple file with key=value lines
1280 1302
1281 1303 Keys must be alphanumerics and start with a letter, values must not
1282 1304 contain '\n' characters"""
1283 1305 firstlinekey = '__firstline'
1284 1306
1285 1307 def __init__(self, vfs, path, keys=None):
1286 1308 self.vfs = vfs
1287 1309 self.path = path
1288 1310
1289 1311 def read(self, firstlinenonkeyval=False):
1290 1312 """Read the contents of a simple key-value file
1291 1313
1292 1314 'firstlinenonkeyval' indicates whether the first line of file should
1293 1315 be treated as a key-value pair or reuturned fully under the
1294 1316 __firstline key."""
1295 1317 lines = self.vfs.readlines(self.path)
1296 1318 d = {}
1297 1319 if firstlinenonkeyval:
1298 1320 if not lines:
1299 1321 e = _("empty simplekeyvalue file")
1300 1322 raise error.CorruptedState(e)
1301 1323 # we don't want to include '\n' in the __firstline
1302 1324 d[self.firstlinekey] = lines[0][:-1]
1303 1325 del lines[0]
1304 1326
1305 1327 try:
1306 1328 # the 'if line.strip()' part prevents us from failing on empty
1307 1329 # lines which only contain '\n' therefore are not skipped
1308 1330 # by 'if line'
1309 1331 updatedict = dict(line[:-1].split('=', 1) for line in lines
1310 1332 if line.strip())
1311 1333 if self.firstlinekey in updatedict:
1312 1334 e = _("%r can't be used as a key")
1313 1335 raise error.CorruptedState(e % self.firstlinekey)
1314 1336 d.update(updatedict)
1315 1337 except ValueError as e:
1316 1338 raise error.CorruptedState(str(e))
1317 1339 return d
1318 1340
1319 1341 def write(self, data, firstline=None):
1320 1342 """Write key=>value mapping to a file
1321 1343 data is a dict. Keys must be alphanumerical and start with a letter.
1322 1344 Values must not contain newline characters.
1323 1345
1324 1346 If 'firstline' is not None, it is written to file before
1325 1347 everything else, as it is, not in a key=value form"""
1326 1348 lines = []
1327 1349 if firstline is not None:
1328 1350 lines.append('%s\n' % firstline)
1329 1351
1330 1352 for k, v in data.items():
1331 1353 if k == self.firstlinekey:
1332 1354 e = "key name '%s' is reserved" % self.firstlinekey
1333 1355 raise error.ProgrammingError(e)
1334 1356 if not k[0:1].isalpha():
1335 1357 e = "keys must start with a letter in a key-value file"
1336 1358 raise error.ProgrammingError(e)
1337 1359 if not k.isalnum():
1338 1360 e = "invalid key name in a simple key-value file"
1339 1361 raise error.ProgrammingError(e)
1340 1362 if '\n' in v:
1341 1363 e = "invalid value in a simple key-value file"
1342 1364 raise error.ProgrammingError(e)
1343 1365 lines.append("%s=%s\n" % (k, v))
1344 1366 with self.vfs(self.path, mode='wb', atomictemp=True) as fp:
1345 1367 fp.write(''.join(lines))
1346 1368
1347 1369 _reportobsoletedsource = [
1348 1370 'debugobsolete',
1349 1371 'pull',
1350 1372 'push',
1351 1373 'serve',
1352 1374 'unbundle',
1353 1375 ]
1354 1376
1355 1377 _reportnewcssource = [
1356 1378 'pull',
1357 1379 'unbundle',
1358 1380 ]
1359 1381
1360 1382 def prefetchfiles(repo, revs, match):
1361 1383 """Invokes the registered file prefetch functions, allowing extensions to
1362 1384 ensure the corresponding files are available locally, before the command
1363 1385 uses them."""
1364 1386 if match:
1365 1387 # The command itself will complain about files that don't exist, so
1366 1388 # don't duplicate the message.
1367 1389 match = matchmod.badmatch(match, lambda fn, msg: None)
1368 1390 else:
1369 1391 match = matchall(repo)
1370 1392
1371 1393 fileprefetchhooks(repo, revs, match)
1372 1394
1373 1395 # a list of (repo, revs, match) prefetch functions
1374 1396 fileprefetchhooks = util.hooks()
1375 1397
1376 1398 # A marker that tells the evolve extension to suppress its own reporting
1377 1399 _reportstroubledchangesets = True
1378 1400
1379 1401 def registersummarycallback(repo, otr, txnname=''):
1380 1402 """register a callback to issue a summary after the transaction is closed
1381 1403 """
1382 1404 def txmatch(sources):
1383 1405 return any(txnname.startswith(source) for source in sources)
1384 1406
1385 1407 categories = []
1386 1408
1387 1409 def reportsummary(func):
1388 1410 """decorator for report callbacks."""
1389 1411 # The repoview life cycle is shorter than the one of the actual
1390 1412 # underlying repository. So the filtered object can die before the
1391 1413 # weakref is used leading to troubles. We keep a reference to the
1392 1414 # unfiltered object and restore the filtering when retrieving the
1393 1415 # repository through the weakref.
1394 1416 filtername = repo.filtername
1395 1417 reporef = weakref.ref(repo.unfiltered())
1396 1418 def wrapped(tr):
1397 1419 repo = reporef()
1398 1420 if filtername:
1399 1421 repo = repo.filtered(filtername)
1400 1422 func(repo, tr)
1401 1423 newcat = '%02i-txnreport' % len(categories)
1402 1424 otr.addpostclose(newcat, wrapped)
1403 1425 categories.append(newcat)
1404 1426 return wrapped
1405 1427
1406 1428 if txmatch(_reportobsoletedsource):
1407 1429 @reportsummary
1408 1430 def reportobsoleted(repo, tr):
1409 1431 obsoleted = obsutil.getobsoleted(repo, tr)
1410 1432 if obsoleted:
1411 1433 repo.ui.status(_('obsoleted %i changesets\n')
1412 1434 % len(obsoleted))
1413 1435
1414 1436 if (obsolete.isenabled(repo, obsolete.createmarkersopt) and
1415 1437 repo.ui.configbool('experimental', 'evolution.report-instabilities')):
1416 1438 instabilitytypes = [
1417 1439 ('orphan', 'orphan'),
1418 1440 ('phase-divergent', 'phasedivergent'),
1419 1441 ('content-divergent', 'contentdivergent'),
1420 1442 ]
1421 1443
1422 1444 def getinstabilitycounts(repo):
1423 1445 filtered = repo.changelog.filteredrevs
1424 1446 counts = {}
1425 1447 for instability, revset in instabilitytypes:
1426 1448 counts[instability] = len(set(obsolete.getrevs(repo, revset)) -
1427 1449 filtered)
1428 1450 return counts
1429 1451
1430 1452 oldinstabilitycounts = getinstabilitycounts(repo)
1431 1453 @reportsummary
1432 1454 def reportnewinstabilities(repo, tr):
1433 1455 newinstabilitycounts = getinstabilitycounts(repo)
1434 1456 for instability, revset in instabilitytypes:
1435 1457 delta = (newinstabilitycounts[instability] -
1436 1458 oldinstabilitycounts[instability])
1437 1459 if delta > 0:
1438 1460 repo.ui.warn(_('%i new %s changesets\n') %
1439 1461 (delta, instability))
1440 1462
1441 1463 if txmatch(_reportnewcssource):
1442 1464 @reportsummary
1443 1465 def reportnewcs(repo, tr):
1444 1466 """Report the range of new revisions pulled/unbundled."""
1445 1467 newrevs = tr.changes.get('revs', xrange(0, 0))
1446 1468 if not newrevs:
1447 1469 return
1448 1470
1449 1471 # Compute the bounds of new revisions' range, excluding obsoletes.
1450 1472 unfi = repo.unfiltered()
1451 1473 revs = unfi.revs('%ld and not obsolete()', newrevs)
1452 1474 if not revs:
1453 1475 # Got only obsoletes.
1454 1476 return
1455 1477 minrev, maxrev = repo[revs.min()], repo[revs.max()]
1456 1478
1457 1479 if minrev == maxrev:
1458 1480 revrange = minrev
1459 1481 else:
1460 1482 revrange = '%s:%s' % (minrev, maxrev)
1461 1483 repo.ui.status(_('new changesets %s\n') % revrange)
1462 1484
1463 1485 def nodesummaries(repo, nodes, maxnumnodes=4):
1464 1486 if len(nodes) <= maxnumnodes or repo.ui.verbose:
1465 1487 return ' '.join(short(h) for h in nodes)
1466 1488 first = ' '.join(short(h) for h in nodes[:maxnumnodes])
1467 1489 return _("%s and %d others") % (first, len(nodes) - maxnumnodes)
1468 1490
1469 1491 def enforcesinglehead(repo, tr, desc):
1470 1492 """check that no named branch has multiple heads"""
1471 1493 if desc in ('strip', 'repair'):
1472 1494 # skip the logic during strip
1473 1495 return
1474 1496 visible = repo.filtered('visible')
1475 1497 # possible improvement: we could restrict the check to affected branch
1476 1498 for name, heads in visible.branchmap().iteritems():
1477 1499 if len(heads) > 1:
1478 1500 msg = _('rejecting multiple heads on branch "%s"')
1479 1501 msg %= name
1480 1502 hint = _('%d heads: %s')
1481 1503 hint %= (len(heads), nodesummaries(repo, heads))
1482 1504 raise error.Abort(msg, hint=hint)
1483 1505
1484 1506 def wrapconvertsink(sink):
1485 1507 """Allow extensions to wrap the sink returned by convcmd.convertsink()
1486 1508 before it is used, whether or not the convert extension was formally loaded.
1487 1509 """
1488 1510 return sink
1489 1511
1490 1512 def unhidehashlikerevs(repo, specs, hiddentype):
1491 1513 """parse the user specs and unhide changesets whose hash or revision number
1492 1514 is passed.
1493 1515
1494 1516 hiddentype can be: 1) 'warn': warn while unhiding changesets
1495 1517 2) 'nowarn': don't warn while unhiding changesets
1496 1518
1497 1519 returns a repo object with the required changesets unhidden
1498 1520 """
1499 1521 if not repo.filtername or not repo.ui.configbool('experimental',
1500 1522 'directaccess'):
1501 1523 return repo
1502 1524
1503 1525 if repo.filtername not in ('visible', 'visible-hidden'):
1504 1526 return repo
1505 1527
1506 1528 symbols = set()
1507 1529 for spec in specs:
1508 1530 try:
1509 1531 tree = revsetlang.parse(spec)
1510 1532 except error.ParseError: # will be reported by scmutil.revrange()
1511 1533 continue
1512 1534
1513 1535 symbols.update(revsetlang.gethashlikesymbols(tree))
1514 1536
1515 1537 if not symbols:
1516 1538 return repo
1517 1539
1518 1540 revs = _getrevsfromsymbols(repo, symbols)
1519 1541
1520 1542 if not revs:
1521 1543 return repo
1522 1544
1523 1545 if hiddentype == 'warn':
1524 1546 unfi = repo.unfiltered()
1525 1547 revstr = ", ".join([pycompat.bytestr(unfi[l]) for l in revs])
1526 1548 repo.ui.warn(_("warning: accessing hidden changesets for write "
1527 1549 "operation: %s\n") % revstr)
1528 1550
1529 1551 # we have to use new filtername to separate branch/tags cache until we can
1530 1552 # disbale these cache when revisions are dynamically pinned.
1531 1553 return repo.filtered('visible-hidden', revs)
1532 1554
1533 1555 def _getrevsfromsymbols(repo, symbols):
1534 1556 """parse the list of symbols and returns a set of revision numbers of hidden
1535 1557 changesets present in symbols"""
1536 1558 revs = set()
1537 1559 unfi = repo.unfiltered()
1538 1560 unficl = unfi.changelog
1539 1561 cl = repo.changelog
1540 1562 tiprev = len(unficl)
1541 1563 allowrevnums = repo.ui.configbool('experimental', 'directaccess.revnums')
1542 1564 for s in symbols:
1543 1565 try:
1544 1566 n = int(s)
1545 1567 if n <= tiprev:
1546 1568 if not allowrevnums:
1547 1569 continue
1548 1570 else:
1549 1571 if n not in cl:
1550 1572 revs.add(n)
1551 1573 continue
1552 1574 except ValueError:
1553 1575 pass
1554 1576
1555 1577 try:
1556 1578 s = resolvehexnodeidprefix(unfi, s)
1557 1579 except (error.LookupError, error.WdirUnsupported):
1558 1580 s = None
1559 1581
1560 1582 if s is not None:
1561 1583 rev = unficl.rev(s)
1562 1584 if rev not in cl:
1563 1585 revs.add(rev)
1564 1586
1565 1587 return revs
General Comments 0
You need to be logged in to leave comments. Login now