##// END OF EJS Templates
revlog: make shortest() take a full binary nodeid (API)...
Martin von Zweigbergk -
r37785:44d1959a default
parent child Browse files
Show More
@@ -1,2593 +1,2594
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .thirdparty import (
39 39 attr,
40 40 )
41 41 from . import (
42 42 ancestor,
43 43 error,
44 44 mdiff,
45 45 policy,
46 46 pycompat,
47 47 templatefilters,
48 48 util,
49 49 )
50 50 from .utils import (
51 51 stringutil,
52 52 )
53 53
54 54 parsers = policy.importmod(r'parsers')
55 55
56 56 # Aliased for performance.
57 57 _zlibdecompress = zlib.decompress
58 58
59 59 # revlog header flags
60 60 REVLOGV0 = 0
61 61 REVLOGV1 = 1
62 62 # Dummy value until file format is finalized.
63 63 # Reminder: change the bounds check in revlog.__init__ when this is changed.
64 64 REVLOGV2 = 0xDEAD
65 65 FLAG_INLINE_DATA = (1 << 16)
66 66 FLAG_GENERALDELTA = (1 << 17)
67 67 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
68 68 REVLOG_DEFAULT_FORMAT = REVLOGV1
69 69 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
70 70 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
71 71 REVLOGV2_FLAGS = REVLOGV1_FLAGS
72 72
73 73 # revlog index flags
74 74 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
75 75 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
76 76 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
77 77 REVIDX_DEFAULT_FLAGS = 0
78 78 # stable order in which flags need to be processed and their processors applied
79 79 REVIDX_FLAGS_ORDER = [
80 80 REVIDX_ISCENSORED,
81 81 REVIDX_ELLIPSIS,
82 82 REVIDX_EXTSTORED,
83 83 ]
84 84 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
85 85 # bitmark for flags that could cause rawdata content change
86 86 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
87 87
88 88 # max size of revlog with inline data
89 89 _maxinline = 131072
90 90 _chunksize = 1048576
91 91
92 92 RevlogError = error.RevlogError
93 93 LookupError = error.LookupError
94 94 CensoredNodeError = error.CensoredNodeError
95 95 ProgrammingError = error.ProgrammingError
96 96
97 97 # Store flag processors (cf. 'addflagprocessor()' to register)
98 98 _flagprocessors = {
99 99 REVIDX_ISCENSORED: None,
100 100 }
101 101
102 102 _mdre = re.compile('\1\n')
103 103 def parsemeta(text):
104 104 """return (metadatadict, metadatasize)"""
105 105 # text can be buffer, so we can't use .startswith or .index
106 106 if text[:2] != '\1\n':
107 107 return None, None
108 108 s = _mdre.search(text, 2).start()
109 109 mtext = text[2:s]
110 110 meta = {}
111 111 for l in mtext.splitlines():
112 112 k, v = l.split(": ", 1)
113 113 meta[k] = v
114 114 return meta, (s + 2)
115 115
116 116 def packmeta(meta, text):
117 117 keys = sorted(meta)
118 118 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
119 119 return "\1\n%s\1\n%s" % (metatext, text)
120 120
121 121 def _censoredtext(text):
122 122 m, offs = parsemeta(text)
123 123 return m and "censored" in m
124 124
125 125 def addflagprocessor(flag, processor):
126 126 """Register a flag processor on a revision data flag.
127 127
128 128 Invariant:
129 129 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
130 130 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
131 131 - Only one flag processor can be registered on a specific flag.
132 132 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
133 133 following signatures:
134 134 - (read) f(self, rawtext) -> text, bool
135 135 - (write) f(self, text) -> rawtext, bool
136 136 - (raw) f(self, rawtext) -> bool
137 137 "text" is presented to the user. "rawtext" is stored in revlog data, not
138 138 directly visible to the user.
139 139 The boolean returned by these transforms is used to determine whether
140 140 the returned text can be used for hash integrity checking. For example,
141 141 if "write" returns False, then "text" is used to generate hash. If
142 142 "write" returns True, that basically means "rawtext" returned by "write"
143 143 should be used to generate hash. Usually, "write" and "read" return
144 144 different booleans. And "raw" returns a same boolean as "write".
145 145
146 146 Note: The 'raw' transform is used for changegroup generation and in some
147 147 debug commands. In this case the transform only indicates whether the
148 148 contents can be used for hash integrity checks.
149 149 """
150 150 if not flag & REVIDX_KNOWN_FLAGS:
151 151 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
152 152 raise ProgrammingError(msg)
153 153 if flag not in REVIDX_FLAGS_ORDER:
154 154 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
155 155 raise ProgrammingError(msg)
156 156 if flag in _flagprocessors:
157 157 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
158 158 raise error.Abort(msg)
159 159 _flagprocessors[flag] = processor
160 160
161 161 def getoffset(q):
162 162 return int(q >> 16)
163 163
164 164 def gettype(q):
165 165 return int(q & 0xFFFF)
166 166
167 167 def offset_type(offset, type):
168 168 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
169 169 raise ValueError('unknown revlog index flags')
170 170 return int(int(offset) << 16 | type)
171 171
172 172 _nullhash = hashlib.sha1(nullid)
173 173
174 174 def hash(text, p1, p2):
175 175 """generate a hash from the given text and its parent hashes
176 176
177 177 This hash combines both the current file contents and its history
178 178 in a manner that makes it easy to distinguish nodes with the same
179 179 content in the revision graph.
180 180 """
181 181 # As of now, if one of the parent node is null, p2 is null
182 182 if p2 == nullid:
183 183 # deep copy of a hash is faster than creating one
184 184 s = _nullhash.copy()
185 185 s.update(p1)
186 186 else:
187 187 # none of the parent nodes are nullid
188 188 if p1 < p2:
189 189 a = p1
190 190 b = p2
191 191 else:
192 192 a = p2
193 193 b = p1
194 194 s = hashlib.sha1(a)
195 195 s.update(b)
196 196 s.update(text)
197 197 return s.digest()
198 198
199 199 def _trimchunk(revlog, revs, startidx, endidx=None):
200 200 """returns revs[startidx:endidx] without empty trailing revs
201 201 """
202 202 length = revlog.length
203 203
204 204 if endidx is None:
205 205 endidx = len(revs)
206 206
207 207 # Trim empty revs at the end, but never the very first revision of a chain
208 208 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
209 209 endidx -= 1
210 210
211 211 return revs[startidx:endidx]
212 212
213 213 def _slicechunk(revlog, revs):
214 214 """slice revs to reduce the amount of unrelated data to be read from disk.
215 215
216 216 ``revs`` is sliced into groups that should be read in one time.
217 217 Assume that revs are sorted.
218 218 """
219 219 start = revlog.start
220 220 length = revlog.length
221 221
222 222 if len(revs) <= 1:
223 223 yield revs
224 224 return
225 225
226 226 startbyte = start(revs[0])
227 227 endbyte = start(revs[-1]) + length(revs[-1])
228 228 readdata = deltachainspan = endbyte - startbyte
229 229
230 230 chainpayload = sum(length(r) for r in revs)
231 231
232 232 if deltachainspan:
233 233 density = chainpayload / float(deltachainspan)
234 234 else:
235 235 density = 1.0
236 236
237 237 # Store the gaps in a heap to have them sorted by decreasing size
238 238 gapsheap = []
239 239 heapq.heapify(gapsheap)
240 240 prevend = None
241 241 for i, rev in enumerate(revs):
242 242 revstart = start(rev)
243 243 revlen = length(rev)
244 244
245 245 # Skip empty revisions to form larger holes
246 246 if revlen == 0:
247 247 continue
248 248
249 249 if prevend is not None:
250 250 gapsize = revstart - prevend
251 251 # only consider holes that are large enough
252 252 if gapsize > revlog._srmingapsize:
253 253 heapq.heappush(gapsheap, (-gapsize, i))
254 254
255 255 prevend = revstart + revlen
256 256
257 257 # Collect the indices of the largest holes until the density is acceptable
258 258 indicesheap = []
259 259 heapq.heapify(indicesheap)
260 260 while gapsheap and density < revlog._srdensitythreshold:
261 261 oppgapsize, gapidx = heapq.heappop(gapsheap)
262 262
263 263 heapq.heappush(indicesheap, gapidx)
264 264
265 265 # the gap sizes are stored as negatives to be sorted decreasingly
266 266 # by the heap
267 267 readdata -= (-oppgapsize)
268 268 if readdata > 0:
269 269 density = chainpayload / float(readdata)
270 270 else:
271 271 density = 1.0
272 272
273 273 # Cut the revs at collected indices
274 274 previdx = 0
275 275 while indicesheap:
276 276 idx = heapq.heappop(indicesheap)
277 277
278 278 chunk = _trimchunk(revlog, revs, previdx, idx)
279 279 if chunk:
280 280 yield chunk
281 281
282 282 previdx = idx
283 283
284 284 chunk = _trimchunk(revlog, revs, previdx)
285 285 if chunk:
286 286 yield chunk
287 287
288 288 @attr.s(slots=True, frozen=True)
289 289 class _deltainfo(object):
290 290 distance = attr.ib()
291 291 deltalen = attr.ib()
292 292 data = attr.ib()
293 293 base = attr.ib()
294 294 chainbase = attr.ib()
295 295 chainlen = attr.ib()
296 296 compresseddeltalen = attr.ib()
297 297
298 298 class _deltacomputer(object):
299 299 def __init__(self, revlog):
300 300 self.revlog = revlog
301 301
302 302 def _getcandidaterevs(self, p1, p2, cachedelta):
303 303 """
304 304 Provides revisions that present an interest to be diffed against,
305 305 grouped by level of easiness.
306 306 """
307 307 revlog = self.revlog
308 308 curr = len(revlog)
309 309 prev = curr - 1
310 310 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
311 311
312 312 # should we try to build a delta?
313 313 if prev != nullrev and revlog.storedeltachains:
314 314 tested = set()
315 315 # This condition is true most of the time when processing
316 316 # changegroup data into a generaldelta repo. The only time it
317 317 # isn't true is if this is the first revision in a delta chain
318 318 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
319 319 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
320 320 # Assume what we received from the server is a good choice
321 321 # build delta will reuse the cache
322 322 yield (cachedelta[0],)
323 323 tested.add(cachedelta[0])
324 324
325 325 if revlog._generaldelta:
326 326 # exclude already lazy tested base if any
327 327 parents = [p for p in (p1r, p2r)
328 328 if p != nullrev and p not in tested]
329 329 if parents and not revlog._aggressivemergedeltas:
330 330 # Pick whichever parent is closer to us (to minimize the
331 331 # chance of having to build a fulltext).
332 332 parents = [max(parents)]
333 333 tested.update(parents)
334 334 yield parents
335 335
336 336 if prev not in tested:
337 337 # other approach failed try against prev to hopefully save us a
338 338 # fulltext.
339 339 yield (prev,)
340 340
341 341 def buildtext(self, revinfo, fh):
342 342 """Builds a fulltext version of a revision
343 343
344 344 revinfo: _revisioninfo instance that contains all needed info
345 345 fh: file handle to either the .i or the .d revlog file,
346 346 depending on whether it is inlined or not
347 347 """
348 348 btext = revinfo.btext
349 349 if btext[0] is not None:
350 350 return btext[0]
351 351
352 352 revlog = self.revlog
353 353 cachedelta = revinfo.cachedelta
354 354 flags = revinfo.flags
355 355 node = revinfo.node
356 356
357 357 baserev = cachedelta[0]
358 358 delta = cachedelta[1]
359 359 # special case deltas which replace entire base; no need to decode
360 360 # base revision. this neatly avoids censored bases, which throw when
361 361 # they're decoded.
362 362 hlen = struct.calcsize(">lll")
363 363 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
364 364 len(delta) - hlen):
365 365 btext[0] = delta[hlen:]
366 366 else:
367 367 # deltabase is rawtext before changed by flag processors, which is
368 368 # equivalent to non-raw text
369 369 basetext = revlog.revision(baserev, _df=fh, raw=False)
370 370 btext[0] = mdiff.patch(basetext, delta)
371 371
372 372 try:
373 373 res = revlog._processflags(btext[0], flags, 'read', raw=True)
374 374 btext[0], validatehash = res
375 375 if validatehash:
376 376 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
377 377 if flags & REVIDX_ISCENSORED:
378 378 raise RevlogError(_('node %s is not censored') % node)
379 379 except CensoredNodeError:
380 380 # must pass the censored index flag to add censored revisions
381 381 if not flags & REVIDX_ISCENSORED:
382 382 raise
383 383 return btext[0]
384 384
385 385 def _builddeltadiff(self, base, revinfo, fh):
386 386 revlog = self.revlog
387 387 t = self.buildtext(revinfo, fh)
388 388 if revlog.iscensored(base):
389 389 # deltas based on a censored revision must replace the
390 390 # full content in one patch, so delta works everywhere
391 391 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
392 392 delta = header + t
393 393 else:
394 394 ptext = revlog.revision(base, _df=fh, raw=True)
395 395 delta = mdiff.textdiff(ptext, t)
396 396
397 397 return delta
398 398
399 399 def _builddeltainfo(self, revinfo, base, fh):
400 400 # can we use the cached delta?
401 401 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
402 402 delta = revinfo.cachedelta[1]
403 403 else:
404 404 delta = self._builddeltadiff(base, revinfo, fh)
405 405 revlog = self.revlog
406 406 header, data = revlog.compress(delta)
407 407 deltalen = len(header) + len(data)
408 408 chainbase = revlog.chainbase(base)
409 409 offset = revlog.end(len(revlog) - 1)
410 410 dist = deltalen + offset - revlog.start(chainbase)
411 411 if revlog._generaldelta:
412 412 deltabase = base
413 413 else:
414 414 deltabase = chainbase
415 415 chainlen, compresseddeltalen = revlog._chaininfo(base)
416 416 chainlen += 1
417 417 compresseddeltalen += deltalen
418 418 return _deltainfo(dist, deltalen, (header, data), deltabase,
419 419 chainbase, chainlen, compresseddeltalen)
420 420
421 421 def finddeltainfo(self, revinfo, fh):
422 422 """Find an acceptable delta against a candidate revision
423 423
424 424 revinfo: information about the revision (instance of _revisioninfo)
425 425 fh: file handle to either the .i or the .d revlog file,
426 426 depending on whether it is inlined or not
427 427
428 428 Returns the first acceptable candidate revision, as ordered by
429 429 _getcandidaterevs
430 430 """
431 431 cachedelta = revinfo.cachedelta
432 432 p1 = revinfo.p1
433 433 p2 = revinfo.p2
434 434 revlog = self.revlog
435 435
436 436 deltainfo = None
437 437 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
438 438 nominateddeltas = []
439 439 for candidaterev in candidaterevs:
440 440 # no delta for rawtext-changing revs (see "candelta" for why)
441 441 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
442 442 continue
443 443 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
444 444 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
445 445 nominateddeltas.append(candidatedelta)
446 446 if nominateddeltas:
447 447 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
448 448 break
449 449
450 450 return deltainfo
451 451
452 452 @attr.s(slots=True, frozen=True)
453 453 class _revisioninfo(object):
454 454 """Information about a revision that allows building its fulltext
455 455 node: expected hash of the revision
456 456 p1, p2: parent revs of the revision
457 457 btext: built text cache consisting of a one-element list
458 458 cachedelta: (baserev, uncompressed_delta) or None
459 459 flags: flags associated to the revision storage
460 460
461 461 One of btext[0] or cachedelta must be set.
462 462 """
463 463 node = attr.ib()
464 464 p1 = attr.ib()
465 465 p2 = attr.ib()
466 466 btext = attr.ib()
467 467 textlen = attr.ib()
468 468 cachedelta = attr.ib()
469 469 flags = attr.ib()
470 470
471 471 # index v0:
472 472 # 4 bytes: offset
473 473 # 4 bytes: compressed length
474 474 # 4 bytes: base rev
475 475 # 4 bytes: link rev
476 476 # 20 bytes: parent 1 nodeid
477 477 # 20 bytes: parent 2 nodeid
478 478 # 20 bytes: nodeid
479 479 indexformatv0 = struct.Struct(">4l20s20s20s")
480 480 indexformatv0_pack = indexformatv0.pack
481 481 indexformatv0_unpack = indexformatv0.unpack
482 482
483 483 class revlogoldio(object):
484 484 def __init__(self):
485 485 self.size = indexformatv0.size
486 486
487 487 def parseindex(self, data, inline):
488 488 s = self.size
489 489 index = []
490 490 nodemap = {nullid: nullrev}
491 491 n = off = 0
492 492 l = len(data)
493 493 while off + s <= l:
494 494 cur = data[off:off + s]
495 495 off += s
496 496 e = indexformatv0_unpack(cur)
497 497 # transform to revlogv1 format
498 498 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
499 499 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
500 500 index.append(e2)
501 501 nodemap[e[6]] = n
502 502 n += 1
503 503
504 504 # add the magic null revision at -1
505 505 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
506 506
507 507 return index, nodemap, None
508 508
509 509 def packentry(self, entry, node, version, rev):
510 510 if gettype(entry[0]):
511 511 raise RevlogError(_('index entry flags need revlog version 1'))
512 512 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
513 513 node(entry[5]), node(entry[6]), entry[7])
514 514 return indexformatv0_pack(*e2)
515 515
516 516 # index ng:
517 517 # 6 bytes: offset
518 518 # 2 bytes: flags
519 519 # 4 bytes: compressed length
520 520 # 4 bytes: uncompressed length
521 521 # 4 bytes: base rev
522 522 # 4 bytes: link rev
523 523 # 4 bytes: parent 1 rev
524 524 # 4 bytes: parent 2 rev
525 525 # 32 bytes: nodeid
526 526 indexformatng = struct.Struct(">Qiiiiii20s12x")
527 527 indexformatng_pack = indexformatng.pack
528 528 versionformat = struct.Struct(">I")
529 529 versionformat_pack = versionformat.pack
530 530 versionformat_unpack = versionformat.unpack
531 531
532 532 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
533 533 # signed integer)
534 534 _maxentrysize = 0x7fffffff
535 535
536 536 class revlogio(object):
537 537 def __init__(self):
538 538 self.size = indexformatng.size
539 539
540 540 def parseindex(self, data, inline):
541 541 # call the C implementation to parse the index data
542 542 index, cache = parsers.parse_index2(data, inline)
543 543 return index, getattr(index, 'nodemap', None), cache
544 544
545 545 def packentry(self, entry, node, version, rev):
546 546 p = indexformatng_pack(*entry)
547 547 if rev == 0:
548 548 p = versionformat_pack(version) + p[4:]
549 549 return p
550 550
551 551 class revlog(object):
552 552 """
553 553 the underlying revision storage object
554 554
555 555 A revlog consists of two parts, an index and the revision data.
556 556
557 557 The index is a file with a fixed record size containing
558 558 information on each revision, including its nodeid (hash), the
559 559 nodeids of its parents, the position and offset of its data within
560 560 the data file, and the revision it's based on. Finally, each entry
561 561 contains a linkrev entry that can serve as a pointer to external
562 562 data.
563 563
564 564 The revision data itself is a linear collection of data chunks.
565 565 Each chunk represents a revision and is usually represented as a
566 566 delta against the previous chunk. To bound lookup time, runs of
567 567 deltas are limited to about 2 times the length of the original
568 568 version data. This makes retrieval of a version proportional to
569 569 its size, or O(1) relative to the number of revisions.
570 570
571 571 Both pieces of the revlog are written to in an append-only
572 572 fashion, which means we never need to rewrite a file to insert or
573 573 remove data, and can use some simple techniques to avoid the need
574 574 for locking while reading.
575 575
576 576 If checkambig, indexfile is opened with checkambig=True at
577 577 writing, to avoid file stat ambiguity.
578 578
579 579 If mmaplargeindex is True, and an mmapindexthreshold is set, the
580 580 index will be mmapped rather than read if it is larger than the
581 581 configured threshold.
582 582
583 583 If censorable is True, the revlog can have censored revisions.
584 584 """
585 585 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
586 586 mmaplargeindex=False, censorable=False):
587 587 """
588 588 create a revlog object
589 589
590 590 opener is a function that abstracts the file opening operation
591 591 and can be used to implement COW semantics or the like.
592 592 """
593 593 self.indexfile = indexfile
594 594 self.datafile = datafile or (indexfile[:-2] + ".d")
595 595 self.opener = opener
596 596 # When True, indexfile is opened with checkambig=True at writing, to
597 597 # avoid file stat ambiguity.
598 598 self._checkambig = checkambig
599 599 self._censorable = censorable
600 600 # 3-tuple of (node, rev, text) for a raw revision.
601 601 self._cache = None
602 602 # Maps rev to chain base rev.
603 603 self._chainbasecache = util.lrucachedict(100)
604 604 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
605 605 self._chunkcache = (0, '')
606 606 # How much data to read and cache into the raw revlog data cache.
607 607 self._chunkcachesize = 65536
608 608 self._maxchainlen = None
609 609 self._aggressivemergedeltas = False
610 610 self.index = []
611 611 # Mapping of partial identifiers to full nodes.
612 612 self._pcache = {}
613 613 # Mapping of revision integer to full node.
614 614 self._nodecache = {nullid: nullrev}
615 615 self._nodepos = None
616 616 self._compengine = 'zlib'
617 617 self._maxdeltachainspan = -1
618 618 self._withsparseread = False
619 619 self._srdensitythreshold = 0.25
620 620 self._srmingapsize = 262144
621 621
622 622 mmapindexthreshold = None
623 623 v = REVLOG_DEFAULT_VERSION
624 624 opts = getattr(opener, 'options', None)
625 625 if opts is not None:
626 626 if 'revlogv2' in opts:
627 627 # version 2 revlogs always use generaldelta.
628 628 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
629 629 elif 'revlogv1' in opts:
630 630 if 'generaldelta' in opts:
631 631 v |= FLAG_GENERALDELTA
632 632 else:
633 633 v = 0
634 634 if 'chunkcachesize' in opts:
635 635 self._chunkcachesize = opts['chunkcachesize']
636 636 if 'maxchainlen' in opts:
637 637 self._maxchainlen = opts['maxchainlen']
638 638 if 'aggressivemergedeltas' in opts:
639 639 self._aggressivemergedeltas = opts['aggressivemergedeltas']
640 640 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
641 641 if 'compengine' in opts:
642 642 self._compengine = opts['compengine']
643 643 if 'maxdeltachainspan' in opts:
644 644 self._maxdeltachainspan = opts['maxdeltachainspan']
645 645 if mmaplargeindex and 'mmapindexthreshold' in opts:
646 646 mmapindexthreshold = opts['mmapindexthreshold']
647 647 self._withsparseread = bool(opts.get('with-sparse-read', False))
648 648 if 'sparse-read-density-threshold' in opts:
649 649 self._srdensitythreshold = opts['sparse-read-density-threshold']
650 650 if 'sparse-read-min-gap-size' in opts:
651 651 self._srmingapsize = opts['sparse-read-min-gap-size']
652 652
653 653 if self._chunkcachesize <= 0:
654 654 raise RevlogError(_('revlog chunk cache size %r is not greater '
655 655 'than 0') % self._chunkcachesize)
656 656 elif self._chunkcachesize & (self._chunkcachesize - 1):
657 657 raise RevlogError(_('revlog chunk cache size %r is not a power '
658 658 'of 2') % self._chunkcachesize)
659 659
660 660 indexdata = ''
661 661 self._initempty = True
662 662 try:
663 663 with self._indexfp() as f:
664 664 if (mmapindexthreshold is not None and
665 665 self.opener.fstat(f).st_size >= mmapindexthreshold):
666 666 indexdata = util.buffer(util.mmapread(f))
667 667 else:
668 668 indexdata = f.read()
669 669 if len(indexdata) > 0:
670 670 v = versionformat_unpack(indexdata[:4])[0]
671 671 self._initempty = False
672 672 except IOError as inst:
673 673 if inst.errno != errno.ENOENT:
674 674 raise
675 675
676 676 self.version = v
677 677 self._inline = v & FLAG_INLINE_DATA
678 678 self._generaldelta = v & FLAG_GENERALDELTA
679 679 flags = v & ~0xFFFF
680 680 fmt = v & 0xFFFF
681 681 if fmt == REVLOGV0:
682 682 if flags:
683 683 raise RevlogError(_('unknown flags (%#04x) in version %d '
684 684 'revlog %s') %
685 685 (flags >> 16, fmt, self.indexfile))
686 686 elif fmt == REVLOGV1:
687 687 if flags & ~REVLOGV1_FLAGS:
688 688 raise RevlogError(_('unknown flags (%#04x) in version %d '
689 689 'revlog %s') %
690 690 (flags >> 16, fmt, self.indexfile))
691 691 elif fmt == REVLOGV2:
692 692 if flags & ~REVLOGV2_FLAGS:
693 693 raise RevlogError(_('unknown flags (%#04x) in version %d '
694 694 'revlog %s') %
695 695 (flags >> 16, fmt, self.indexfile))
696 696 else:
697 697 raise RevlogError(_('unknown version (%d) in revlog %s') %
698 698 (fmt, self.indexfile))
699 699
700 700 self.storedeltachains = True
701 701
702 702 self._io = revlogio()
703 703 if self.version == REVLOGV0:
704 704 self._io = revlogoldio()
705 705 try:
706 706 d = self._io.parseindex(indexdata, self._inline)
707 707 except (ValueError, IndexError):
708 708 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
709 709 self.index, nodemap, self._chunkcache = d
710 710 if nodemap is not None:
711 711 self.nodemap = self._nodecache = nodemap
712 712 if not self._chunkcache:
713 713 self._chunkclear()
714 714 # revnum -> (chain-length, sum-delta-length)
715 715 self._chaininfocache = {}
716 716 # revlog header -> revlog compressor
717 717 self._decompressors = {}
718 718
719 719 @util.propertycache
720 720 def _compressor(self):
721 721 return util.compengines[self._compengine].revlogcompressor()
722 722
723 723 def _indexfp(self, mode='r'):
724 724 """file object for the revlog's index file"""
725 725 args = {r'mode': mode}
726 726 if mode != 'r':
727 727 args[r'checkambig'] = self._checkambig
728 728 if mode == 'w':
729 729 args[r'atomictemp'] = True
730 730 return self.opener(self.indexfile, **args)
731 731
732 732 def _datafp(self, mode='r'):
733 733 """file object for the revlog's data file"""
734 734 return self.opener(self.datafile, mode=mode)
735 735
736 736 @contextlib.contextmanager
737 737 def _datareadfp(self, existingfp=None):
738 738 """file object suitable to read data"""
739 739 if existingfp is not None:
740 740 yield existingfp
741 741 else:
742 742 if self._inline:
743 743 func = self._indexfp
744 744 else:
745 745 func = self._datafp
746 746 with func() as fp:
747 747 yield fp
748 748
749 749 def tip(self):
750 750 return self.node(len(self.index) - 2)
751 751 def __contains__(self, rev):
752 752 return 0 <= rev < len(self)
753 753 def __len__(self):
754 754 return len(self.index) - 1
755 755 def __iter__(self):
756 756 return iter(xrange(len(self)))
757 757 def revs(self, start=0, stop=None):
758 758 """iterate over all rev in this revlog (from start to stop)"""
759 759 step = 1
760 760 if stop is not None:
761 761 if start > stop:
762 762 step = -1
763 763 stop += step
764 764 else:
765 765 stop = len(self)
766 766 return xrange(start, stop, step)
767 767
768 768 @util.propertycache
769 769 def nodemap(self):
770 770 self.rev(self.node(0))
771 771 return self._nodecache
772 772
773 773 def hasnode(self, node):
774 774 try:
775 775 self.rev(node)
776 776 return True
777 777 except KeyError:
778 778 return False
779 779
780 780 def candelta(self, baserev, rev):
781 781 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 782 # Disable delta if either rev requires a content-changing flag
783 783 # processor (ex. LFS). This is because such flag processor can alter
784 784 # the rawtext content that the delta will be based on, and two clients
785 785 # could have a same revlog node with different flags (i.e. different
786 786 # rawtext contents) and the delta could be incompatible.
787 787 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
788 788 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
789 789 return False
790 790 return True
791 791
792 792 def clearcaches(self):
793 793 self._cache = None
794 794 self._chainbasecache.clear()
795 795 self._chunkcache = (0, '')
796 796 self._pcache = {}
797 797
798 798 try:
799 799 self._nodecache.clearcaches()
800 800 except AttributeError:
801 801 self._nodecache = {nullid: nullrev}
802 802 self._nodepos = None
803 803
804 804 def rev(self, node):
805 805 try:
806 806 return self._nodecache[node]
807 807 except TypeError:
808 808 raise
809 809 except RevlogError:
810 810 # parsers.c radix tree lookup failed
811 811 if node == wdirid or node in wdirfilenodeids:
812 812 raise error.WdirUnsupported
813 813 raise LookupError(node, self.indexfile, _('no node'))
814 814 except KeyError:
815 815 # pure python cache lookup failed
816 816 n = self._nodecache
817 817 i = self.index
818 818 p = self._nodepos
819 819 if p is None:
820 820 p = len(i) - 2
821 821 else:
822 822 assert p < len(i)
823 823 for r in xrange(p, -1, -1):
824 824 v = i[r][7]
825 825 n[v] = r
826 826 if v == node:
827 827 self._nodepos = r - 1
828 828 return r
829 829 if node == wdirid or node in wdirfilenodeids:
830 830 raise error.WdirUnsupported
831 831 raise LookupError(node, self.indexfile, _('no node'))
832 832
833 833 # Accessors for index entries.
834 834
835 835 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
836 836 # are flags.
837 837 def start(self, rev):
838 838 return int(self.index[rev][0] >> 16)
839 839
840 840 def flags(self, rev):
841 841 return self.index[rev][0] & 0xFFFF
842 842
843 843 def length(self, rev):
844 844 return self.index[rev][1]
845 845
846 846 def rawsize(self, rev):
847 847 """return the length of the uncompressed text for a given revision"""
848 848 l = self.index[rev][2]
849 849 if l >= 0:
850 850 return l
851 851
852 852 t = self.revision(rev, raw=True)
853 853 return len(t)
854 854
855 855 def size(self, rev):
856 856 """length of non-raw text (processed by a "read" flag processor)"""
857 857 # fast path: if no "read" flag processor could change the content,
858 858 # size is rawsize. note: ELLIPSIS is known to not change the content.
859 859 flags = self.flags(rev)
860 860 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
861 861 return self.rawsize(rev)
862 862
863 863 return len(self.revision(rev, raw=False))
864 864
865 865 def chainbase(self, rev):
866 866 base = self._chainbasecache.get(rev)
867 867 if base is not None:
868 868 return base
869 869
870 870 index = self.index
871 871 base = index[rev][3]
872 872 while base != rev:
873 873 rev = base
874 874 base = index[rev][3]
875 875
876 876 self._chainbasecache[rev] = base
877 877 return base
878 878
879 879 def linkrev(self, rev):
880 880 return self.index[rev][4]
881 881
882 882 def parentrevs(self, rev):
883 883 try:
884 884 entry = self.index[rev]
885 885 except IndexError:
886 886 if rev == wdirrev:
887 887 raise error.WdirUnsupported
888 888 raise
889 889
890 890 return entry[5], entry[6]
891 891
892 892 def node(self, rev):
893 893 try:
894 894 return self.index[rev][7]
895 895 except IndexError:
896 896 if rev == wdirrev:
897 897 raise error.WdirUnsupported
898 898 raise
899 899
900 900 # Derived from index values.
901 901
902 902 def end(self, rev):
903 903 return self.start(rev) + self.length(rev)
904 904
905 905 def parents(self, node):
906 906 i = self.index
907 907 d = i[self.rev(node)]
908 908 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
909 909
910 910 def chainlen(self, rev):
911 911 return self._chaininfo(rev)[0]
912 912
913 913 def _chaininfo(self, rev):
914 914 chaininfocache = self._chaininfocache
915 915 if rev in chaininfocache:
916 916 return chaininfocache[rev]
917 917 index = self.index
918 918 generaldelta = self._generaldelta
919 919 iterrev = rev
920 920 e = index[iterrev]
921 921 clen = 0
922 922 compresseddeltalen = 0
923 923 while iterrev != e[3]:
924 924 clen += 1
925 925 compresseddeltalen += e[1]
926 926 if generaldelta:
927 927 iterrev = e[3]
928 928 else:
929 929 iterrev -= 1
930 930 if iterrev in chaininfocache:
931 931 t = chaininfocache[iterrev]
932 932 clen += t[0]
933 933 compresseddeltalen += t[1]
934 934 break
935 935 e = index[iterrev]
936 936 else:
937 937 # Add text length of base since decompressing that also takes
938 938 # work. For cache hits the length is already included.
939 939 compresseddeltalen += e[1]
940 940 r = (clen, compresseddeltalen)
941 941 chaininfocache[rev] = r
942 942 return r
943 943
944 944 def _deltachain(self, rev, stoprev=None):
945 945 """Obtain the delta chain for a revision.
946 946
947 947 ``stoprev`` specifies a revision to stop at. If not specified, we
948 948 stop at the base of the chain.
949 949
950 950 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
951 951 revs in ascending order and ``stopped`` is a bool indicating whether
952 952 ``stoprev`` was hit.
953 953 """
954 954 # Try C implementation.
955 955 try:
956 956 return self.index.deltachain(rev, stoprev, self._generaldelta)
957 957 except AttributeError:
958 958 pass
959 959
960 960 chain = []
961 961
962 962 # Alias to prevent attribute lookup in tight loop.
963 963 index = self.index
964 964 generaldelta = self._generaldelta
965 965
966 966 iterrev = rev
967 967 e = index[iterrev]
968 968 while iterrev != e[3] and iterrev != stoprev:
969 969 chain.append(iterrev)
970 970 if generaldelta:
971 971 iterrev = e[3]
972 972 else:
973 973 iterrev -= 1
974 974 e = index[iterrev]
975 975
976 976 if iterrev == stoprev:
977 977 stopped = True
978 978 else:
979 979 chain.append(iterrev)
980 980 stopped = False
981 981
982 982 chain.reverse()
983 983 return chain, stopped
984 984
985 985 def ancestors(self, revs, stoprev=0, inclusive=False):
986 986 """Generate the ancestors of 'revs' in reverse topological order.
987 987 Does not generate revs lower than stoprev.
988 988
989 989 See the documentation for ancestor.lazyancestors for more details."""
990 990
991 991 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
992 992 inclusive=inclusive)
993 993
994 994 def descendants(self, revs):
995 995 """Generate the descendants of 'revs' in revision order.
996 996
997 997 Yield a sequence of revision numbers starting with a child of
998 998 some rev in revs, i.e., each revision is *not* considered a
999 999 descendant of itself. Results are ordered by revision number (a
1000 1000 topological sort)."""
1001 1001 first = min(revs)
1002 1002 if first == nullrev:
1003 1003 for i in self:
1004 1004 yield i
1005 1005 return
1006 1006
1007 1007 seen = set(revs)
1008 1008 for i in self.revs(start=first + 1):
1009 1009 for x in self.parentrevs(i):
1010 1010 if x != nullrev and x in seen:
1011 1011 seen.add(i)
1012 1012 yield i
1013 1013 break
1014 1014
1015 1015 def findcommonmissing(self, common=None, heads=None):
1016 1016 """Return a tuple of the ancestors of common and the ancestors of heads
1017 1017 that are not ancestors of common. In revset terminology, we return the
1018 1018 tuple:
1019 1019
1020 1020 ::common, (::heads) - (::common)
1021 1021
1022 1022 The list is sorted by revision number, meaning it is
1023 1023 topologically sorted.
1024 1024
1025 1025 'heads' and 'common' are both lists of node IDs. If heads is
1026 1026 not supplied, uses all of the revlog's heads. If common is not
1027 1027 supplied, uses nullid."""
1028 1028 if common is None:
1029 1029 common = [nullid]
1030 1030 if heads is None:
1031 1031 heads = self.heads()
1032 1032
1033 1033 common = [self.rev(n) for n in common]
1034 1034 heads = [self.rev(n) for n in heads]
1035 1035
1036 1036 # we want the ancestors, but inclusive
1037 1037 class lazyset(object):
1038 1038 def __init__(self, lazyvalues):
1039 1039 self.addedvalues = set()
1040 1040 self.lazyvalues = lazyvalues
1041 1041
1042 1042 def __contains__(self, value):
1043 1043 return value in self.addedvalues or value in self.lazyvalues
1044 1044
1045 1045 def __iter__(self):
1046 1046 added = self.addedvalues
1047 1047 for r in added:
1048 1048 yield r
1049 1049 for r in self.lazyvalues:
1050 1050 if not r in added:
1051 1051 yield r
1052 1052
1053 1053 def add(self, value):
1054 1054 self.addedvalues.add(value)
1055 1055
1056 1056 def update(self, values):
1057 1057 self.addedvalues.update(values)
1058 1058
1059 1059 has = lazyset(self.ancestors(common))
1060 1060 has.add(nullrev)
1061 1061 has.update(common)
1062 1062
1063 1063 # take all ancestors from heads that aren't in has
1064 1064 missing = set()
1065 1065 visit = collections.deque(r for r in heads if r not in has)
1066 1066 while visit:
1067 1067 r = visit.popleft()
1068 1068 if r in missing:
1069 1069 continue
1070 1070 else:
1071 1071 missing.add(r)
1072 1072 for p in self.parentrevs(r):
1073 1073 if p not in has:
1074 1074 visit.append(p)
1075 1075 missing = list(missing)
1076 1076 missing.sort()
1077 1077 return has, [self.node(miss) for miss in missing]
1078 1078
1079 1079 def incrementalmissingrevs(self, common=None):
1080 1080 """Return an object that can be used to incrementally compute the
1081 1081 revision numbers of the ancestors of arbitrary sets that are not
1082 1082 ancestors of common. This is an ancestor.incrementalmissingancestors
1083 1083 object.
1084 1084
1085 1085 'common' is a list of revision numbers. If common is not supplied, uses
1086 1086 nullrev.
1087 1087 """
1088 1088 if common is None:
1089 1089 common = [nullrev]
1090 1090
1091 1091 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1092 1092
1093 1093 def findmissingrevs(self, common=None, heads=None):
1094 1094 """Return the revision numbers of the ancestors of heads that
1095 1095 are not ancestors of common.
1096 1096
1097 1097 More specifically, return a list of revision numbers corresponding to
1098 1098 nodes N such that every N satisfies the following constraints:
1099 1099
1100 1100 1. N is an ancestor of some node in 'heads'
1101 1101 2. N is not an ancestor of any node in 'common'
1102 1102
1103 1103 The list is sorted by revision number, meaning it is
1104 1104 topologically sorted.
1105 1105
1106 1106 'heads' and 'common' are both lists of revision numbers. If heads is
1107 1107 not supplied, uses all of the revlog's heads. If common is not
1108 1108 supplied, uses nullid."""
1109 1109 if common is None:
1110 1110 common = [nullrev]
1111 1111 if heads is None:
1112 1112 heads = self.headrevs()
1113 1113
1114 1114 inc = self.incrementalmissingrevs(common=common)
1115 1115 return inc.missingancestors(heads)
1116 1116
1117 1117 def findmissing(self, common=None, heads=None):
1118 1118 """Return the ancestors of heads that are not ancestors of common.
1119 1119
1120 1120 More specifically, return a list of nodes N such that every N
1121 1121 satisfies the following constraints:
1122 1122
1123 1123 1. N is an ancestor of some node in 'heads'
1124 1124 2. N is not an ancestor of any node in 'common'
1125 1125
1126 1126 The list is sorted by revision number, meaning it is
1127 1127 topologically sorted.
1128 1128
1129 1129 'heads' and 'common' are both lists of node IDs. If heads is
1130 1130 not supplied, uses all of the revlog's heads. If common is not
1131 1131 supplied, uses nullid."""
1132 1132 if common is None:
1133 1133 common = [nullid]
1134 1134 if heads is None:
1135 1135 heads = self.heads()
1136 1136
1137 1137 common = [self.rev(n) for n in common]
1138 1138 heads = [self.rev(n) for n in heads]
1139 1139
1140 1140 inc = self.incrementalmissingrevs(common=common)
1141 1141 return [self.node(r) for r in inc.missingancestors(heads)]
1142 1142
1143 1143 def nodesbetween(self, roots=None, heads=None):
1144 1144 """Return a topological path from 'roots' to 'heads'.
1145 1145
1146 1146 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1147 1147 topologically sorted list of all nodes N that satisfy both of
1148 1148 these constraints:
1149 1149
1150 1150 1. N is a descendant of some node in 'roots'
1151 1151 2. N is an ancestor of some node in 'heads'
1152 1152
1153 1153 Every node is considered to be both a descendant and an ancestor
1154 1154 of itself, so every reachable node in 'roots' and 'heads' will be
1155 1155 included in 'nodes'.
1156 1156
1157 1157 'outroots' is the list of reachable nodes in 'roots', i.e., the
1158 1158 subset of 'roots' that is returned in 'nodes'. Likewise,
1159 1159 'outheads' is the subset of 'heads' that is also in 'nodes'.
1160 1160
1161 1161 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1162 1162 unspecified, uses nullid as the only root. If 'heads' is
1163 1163 unspecified, uses list of all of the revlog's heads."""
1164 1164 nonodes = ([], [], [])
1165 1165 if roots is not None:
1166 1166 roots = list(roots)
1167 1167 if not roots:
1168 1168 return nonodes
1169 1169 lowestrev = min([self.rev(n) for n in roots])
1170 1170 else:
1171 1171 roots = [nullid] # Everybody's a descendant of nullid
1172 1172 lowestrev = nullrev
1173 1173 if (lowestrev == nullrev) and (heads is None):
1174 1174 # We want _all_ the nodes!
1175 1175 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1176 1176 if heads is None:
1177 1177 # All nodes are ancestors, so the latest ancestor is the last
1178 1178 # node.
1179 1179 highestrev = len(self) - 1
1180 1180 # Set ancestors to None to signal that every node is an ancestor.
1181 1181 ancestors = None
1182 1182 # Set heads to an empty dictionary for later discovery of heads
1183 1183 heads = {}
1184 1184 else:
1185 1185 heads = list(heads)
1186 1186 if not heads:
1187 1187 return nonodes
1188 1188 ancestors = set()
1189 1189 # Turn heads into a dictionary so we can remove 'fake' heads.
1190 1190 # Also, later we will be using it to filter out the heads we can't
1191 1191 # find from roots.
1192 1192 heads = dict.fromkeys(heads, False)
1193 1193 # Start at the top and keep marking parents until we're done.
1194 1194 nodestotag = set(heads)
1195 1195 # Remember where the top was so we can use it as a limit later.
1196 1196 highestrev = max([self.rev(n) for n in nodestotag])
1197 1197 while nodestotag:
1198 1198 # grab a node to tag
1199 1199 n = nodestotag.pop()
1200 1200 # Never tag nullid
1201 1201 if n == nullid:
1202 1202 continue
1203 1203 # A node's revision number represents its place in a
1204 1204 # topologically sorted list of nodes.
1205 1205 r = self.rev(n)
1206 1206 if r >= lowestrev:
1207 1207 if n not in ancestors:
1208 1208 # If we are possibly a descendant of one of the roots
1209 1209 # and we haven't already been marked as an ancestor
1210 1210 ancestors.add(n) # Mark as ancestor
1211 1211 # Add non-nullid parents to list of nodes to tag.
1212 1212 nodestotag.update([p for p in self.parents(n) if
1213 1213 p != nullid])
1214 1214 elif n in heads: # We've seen it before, is it a fake head?
1215 1215 # So it is, real heads should not be the ancestors of
1216 1216 # any other heads.
1217 1217 heads.pop(n)
1218 1218 if not ancestors:
1219 1219 return nonodes
1220 1220 # Now that we have our set of ancestors, we want to remove any
1221 1221 # roots that are not ancestors.
1222 1222
1223 1223 # If one of the roots was nullid, everything is included anyway.
1224 1224 if lowestrev > nullrev:
1225 1225 # But, since we weren't, let's recompute the lowest rev to not
1226 1226 # include roots that aren't ancestors.
1227 1227
1228 1228 # Filter out roots that aren't ancestors of heads
1229 1229 roots = [root for root in roots if root in ancestors]
1230 1230 # Recompute the lowest revision
1231 1231 if roots:
1232 1232 lowestrev = min([self.rev(root) for root in roots])
1233 1233 else:
1234 1234 # No more roots? Return empty list
1235 1235 return nonodes
1236 1236 else:
1237 1237 # We are descending from nullid, and don't need to care about
1238 1238 # any other roots.
1239 1239 lowestrev = nullrev
1240 1240 roots = [nullid]
1241 1241 # Transform our roots list into a set.
1242 1242 descendants = set(roots)
1243 1243 # Also, keep the original roots so we can filter out roots that aren't
1244 1244 # 'real' roots (i.e. are descended from other roots).
1245 1245 roots = descendants.copy()
1246 1246 # Our topologically sorted list of output nodes.
1247 1247 orderedout = []
1248 1248 # Don't start at nullid since we don't want nullid in our output list,
1249 1249 # and if nullid shows up in descendants, empty parents will look like
1250 1250 # they're descendants.
1251 1251 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1252 1252 n = self.node(r)
1253 1253 isdescendant = False
1254 1254 if lowestrev == nullrev: # Everybody is a descendant of nullid
1255 1255 isdescendant = True
1256 1256 elif n in descendants:
1257 1257 # n is already a descendant
1258 1258 isdescendant = True
1259 1259 # This check only needs to be done here because all the roots
1260 1260 # will start being marked is descendants before the loop.
1261 1261 if n in roots:
1262 1262 # If n was a root, check if it's a 'real' root.
1263 1263 p = tuple(self.parents(n))
1264 1264 # If any of its parents are descendants, it's not a root.
1265 1265 if (p[0] in descendants) or (p[1] in descendants):
1266 1266 roots.remove(n)
1267 1267 else:
1268 1268 p = tuple(self.parents(n))
1269 1269 # A node is a descendant if either of its parents are
1270 1270 # descendants. (We seeded the dependents list with the roots
1271 1271 # up there, remember?)
1272 1272 if (p[0] in descendants) or (p[1] in descendants):
1273 1273 descendants.add(n)
1274 1274 isdescendant = True
1275 1275 if isdescendant and ((ancestors is None) or (n in ancestors)):
1276 1276 # Only include nodes that are both descendants and ancestors.
1277 1277 orderedout.append(n)
1278 1278 if (ancestors is not None) and (n in heads):
1279 1279 # We're trying to figure out which heads are reachable
1280 1280 # from roots.
1281 1281 # Mark this head as having been reached
1282 1282 heads[n] = True
1283 1283 elif ancestors is None:
1284 1284 # Otherwise, we're trying to discover the heads.
1285 1285 # Assume this is a head because if it isn't, the next step
1286 1286 # will eventually remove it.
1287 1287 heads[n] = True
1288 1288 # But, obviously its parents aren't.
1289 1289 for p in self.parents(n):
1290 1290 heads.pop(p, None)
1291 1291 heads = [head for head, flag in heads.iteritems() if flag]
1292 1292 roots = list(roots)
1293 1293 assert orderedout
1294 1294 assert roots
1295 1295 assert heads
1296 1296 return (orderedout, roots, heads)
1297 1297
1298 1298 def headrevs(self):
1299 1299 try:
1300 1300 return self.index.headrevs()
1301 1301 except AttributeError:
1302 1302 return self._headrevs()
1303 1303
1304 1304 def computephases(self, roots):
1305 1305 return self.index.computephasesmapsets(roots)
1306 1306
1307 1307 def _headrevs(self):
1308 1308 count = len(self)
1309 1309 if not count:
1310 1310 return [nullrev]
1311 1311 # we won't iter over filtered rev so nobody is a head at start
1312 1312 ishead = [0] * (count + 1)
1313 1313 index = self.index
1314 1314 for r in self:
1315 1315 ishead[r] = 1 # I may be an head
1316 1316 e = index[r]
1317 1317 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1318 1318 return [r for r, val in enumerate(ishead) if val]
1319 1319
1320 1320 def heads(self, start=None, stop=None):
1321 1321 """return the list of all nodes that have no children
1322 1322
1323 1323 if start is specified, only heads that are descendants of
1324 1324 start will be returned
1325 1325 if stop is specified, it will consider all the revs from stop
1326 1326 as if they had no children
1327 1327 """
1328 1328 if start is None and stop is None:
1329 1329 if not len(self):
1330 1330 return [nullid]
1331 1331 return [self.node(r) for r in self.headrevs()]
1332 1332
1333 1333 if start is None:
1334 1334 start = nullid
1335 1335 if stop is None:
1336 1336 stop = []
1337 1337 stoprevs = set([self.rev(n) for n in stop])
1338 1338 startrev = self.rev(start)
1339 1339 reachable = {startrev}
1340 1340 heads = {startrev}
1341 1341
1342 1342 parentrevs = self.parentrevs
1343 1343 for r in self.revs(start=startrev + 1):
1344 1344 for p in parentrevs(r):
1345 1345 if p in reachable:
1346 1346 if r not in stoprevs:
1347 1347 reachable.add(r)
1348 1348 heads.add(r)
1349 1349 if p in heads and p not in stoprevs:
1350 1350 heads.remove(p)
1351 1351
1352 1352 return [self.node(r) for r in heads]
1353 1353
1354 1354 def children(self, node):
1355 1355 """find the children of a given node"""
1356 1356 c = []
1357 1357 p = self.rev(node)
1358 1358 for r in self.revs(start=p + 1):
1359 1359 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1360 1360 if prevs:
1361 1361 for pr in prevs:
1362 1362 if pr == p:
1363 1363 c.append(self.node(r))
1364 1364 elif p == nullrev:
1365 1365 c.append(self.node(r))
1366 1366 return c
1367 1367
1368 1368 def descendant(self, start, end):
1369 1369 if start == nullrev:
1370 1370 return True
1371 1371 for i in self.descendants([start]):
1372 1372 if i == end:
1373 1373 return True
1374 1374 elif i > end:
1375 1375 break
1376 1376 return False
1377 1377
1378 1378 def commonancestorsheads(self, a, b):
1379 1379 """calculate all the heads of the common ancestors of nodes a and b"""
1380 1380 a, b = self.rev(a), self.rev(b)
1381 1381 try:
1382 1382 ancs = self.index.commonancestorsheads(a, b)
1383 1383 except (AttributeError, OverflowError): # C implementation failed
1384 1384 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1385 1385 return pycompat.maplist(self.node, ancs)
1386 1386
1387 1387 def isancestor(self, a, b):
1388 1388 """return True if node a is an ancestor of node b
1389 1389
1390 1390 The implementation of this is trivial but the use of
1391 1391 commonancestorsheads is not."""
1392 1392 return a in self.commonancestorsheads(a, b)
1393 1393
1394 1394 def ancestor(self, a, b):
1395 1395 """calculate the "best" common ancestor of nodes a and b"""
1396 1396
1397 1397 a, b = self.rev(a), self.rev(b)
1398 1398 try:
1399 1399 ancs = self.index.ancestors(a, b)
1400 1400 except (AttributeError, OverflowError):
1401 1401 ancs = ancestor.ancestors(self.parentrevs, a, b)
1402 1402 if ancs:
1403 1403 # choose a consistent winner when there's a tie
1404 1404 return min(map(self.node, ancs))
1405 1405 return nullid
1406 1406
1407 1407 def _match(self, id):
1408 1408 if isinstance(id, int):
1409 1409 # rev
1410 1410 return self.node(id)
1411 1411 if len(id) == 20:
1412 1412 # possibly a binary node
1413 1413 # odds of a binary node being all hex in ASCII are 1 in 10**25
1414 1414 try:
1415 1415 node = id
1416 1416 self.rev(node) # quick search the index
1417 1417 return node
1418 1418 except LookupError:
1419 1419 pass # may be partial hex id
1420 1420 try:
1421 1421 # str(rev)
1422 1422 rev = int(id)
1423 1423 if "%d" % rev != id:
1424 1424 raise ValueError
1425 1425 if rev < 0:
1426 1426 rev = len(self) + rev
1427 1427 if rev < 0 or rev >= len(self):
1428 1428 raise ValueError
1429 1429 return self.node(rev)
1430 1430 except (ValueError, OverflowError):
1431 1431 pass
1432 1432 if len(id) == 40:
1433 1433 try:
1434 1434 # a full hex nodeid?
1435 1435 node = bin(id)
1436 1436 self.rev(node)
1437 1437 return node
1438 1438 except (TypeError, LookupError):
1439 1439 pass
1440 1440
1441 1441 def _partialmatch(self, id):
1442 1442 # we don't care wdirfilenodeids as they should be always full hash
1443 1443 maybewdir = wdirhex.startswith(id)
1444 1444 try:
1445 1445 partial = self.index.partialmatch(id)
1446 1446 if partial and self.hasnode(partial):
1447 1447 if maybewdir:
1448 1448 # single 'ff...' match in radix tree, ambiguous with wdir
1449 1449 raise RevlogError
1450 1450 return partial
1451 1451 if maybewdir:
1452 1452 # no 'ff...' match in radix tree, wdir identified
1453 1453 raise error.WdirUnsupported
1454 1454 return None
1455 1455 except RevlogError:
1456 1456 # parsers.c radix tree lookup gave multiple matches
1457 1457 # fast path: for unfiltered changelog, radix tree is accurate
1458 1458 if not getattr(self, 'filteredrevs', None):
1459 1459 raise LookupError(id, self.indexfile,
1460 1460 _('ambiguous identifier'))
1461 1461 # fall through to slow path that filters hidden revisions
1462 1462 except (AttributeError, ValueError):
1463 1463 # we are pure python, or key was too short to search radix tree
1464 1464 pass
1465 1465
1466 1466 if id in self._pcache:
1467 1467 return self._pcache[id]
1468 1468
1469 1469 if len(id) < 40:
1470 1470 try:
1471 1471 # hex(node)[:...]
1472 1472 l = len(id) // 2 # grab an even number of digits
1473 1473 prefix = bin(id[:l * 2])
1474 1474 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1475 1475 nl = [n for n in nl if hex(n).startswith(id) and
1476 1476 self.hasnode(n)]
1477 1477 if len(nl) > 0:
1478 1478 if len(nl) == 1 and not maybewdir:
1479 1479 self._pcache[id] = nl[0]
1480 1480 return nl[0]
1481 1481 raise LookupError(id, self.indexfile,
1482 1482 _('ambiguous identifier'))
1483 1483 if maybewdir:
1484 1484 raise error.WdirUnsupported
1485 1485 return None
1486 1486 except TypeError:
1487 1487 pass
1488 1488
1489 1489 def lookup(self, id):
1490 1490 """locate a node based on:
1491 1491 - revision number or str(revision number)
1492 1492 - nodeid or subset of hex nodeid
1493 1493 """
1494 1494 n = self._match(id)
1495 1495 if n is not None:
1496 1496 return n
1497 1497 n = self._partialmatch(id)
1498 1498 if n:
1499 1499 return n
1500 1500
1501 1501 raise LookupError(id, self.indexfile, _('no match found'))
1502 1502
1503 def shortest(self, hexnode, minlength=1):
1504 """Find the shortest unambiguous prefix that matches hexnode."""
1503 def shortest(self, node, minlength=1):
1504 """Find the shortest unambiguous prefix that matches node."""
1505 1505 def isvalid(test):
1506 1506 try:
1507 1507 if self._partialmatch(test) is None:
1508 1508 return False
1509 1509
1510 1510 try:
1511 1511 i = int(test)
1512 1512 # if we are a pure int, then starting with zero will not be
1513 1513 # confused as a rev; or, obviously, if the int is larger
1514 1514 # than the value of the tip rev
1515 1515 if test[0] == '0' or i > len(self):
1516 1516 return True
1517 1517 return False
1518 1518 except ValueError:
1519 1519 return True
1520 1520 except error.RevlogError:
1521 1521 return False
1522 1522 except error.WdirUnsupported:
1523 1523 # single 'ff...' match
1524 1524 return True
1525 1525
1526 hexnode = hex(node)
1526 1527 shortest = hexnode
1527 1528 startlength = max(6, minlength)
1528 1529 length = startlength
1529 1530 while True:
1530 1531 test = hexnode[:length]
1531 1532 if isvalid(test):
1532 1533 shortest = test
1533 1534 if length == minlength or length > startlength:
1534 1535 return shortest
1535 1536 length -= 1
1536 1537 else:
1537 1538 length += 1
1538 1539 if len(shortest) <= length:
1539 1540 return shortest
1540 1541
1541 1542 def cmp(self, node, text):
1542 1543 """compare text with a given file revision
1543 1544
1544 1545 returns True if text is different than what is stored.
1545 1546 """
1546 1547 p1, p2 = self.parents(node)
1547 1548 return hash(text, p1, p2) != node
1548 1549
1549 1550 def _cachesegment(self, offset, data):
1550 1551 """Add a segment to the revlog cache.
1551 1552
1552 1553 Accepts an absolute offset and the data that is at that location.
1553 1554 """
1554 1555 o, d = self._chunkcache
1555 1556 # try to add to existing cache
1556 1557 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1557 1558 self._chunkcache = o, d + data
1558 1559 else:
1559 1560 self._chunkcache = offset, data
1560 1561
1561 1562 def _readsegment(self, offset, length, df=None):
1562 1563 """Load a segment of raw data from the revlog.
1563 1564
1564 1565 Accepts an absolute offset, length to read, and an optional existing
1565 1566 file handle to read from.
1566 1567
1567 1568 If an existing file handle is passed, it will be seeked and the
1568 1569 original seek position will NOT be restored.
1569 1570
1570 1571 Returns a str or buffer of raw byte data.
1571 1572 """
1572 1573 # Cache data both forward and backward around the requested
1573 1574 # data, in a fixed size window. This helps speed up operations
1574 1575 # involving reading the revlog backwards.
1575 1576 cachesize = self._chunkcachesize
1576 1577 realoffset = offset & ~(cachesize - 1)
1577 1578 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1578 1579 - realoffset)
1579 1580 with self._datareadfp(df) as df:
1580 1581 df.seek(realoffset)
1581 1582 d = df.read(reallength)
1582 1583 self._cachesegment(realoffset, d)
1583 1584 if offset != realoffset or reallength != length:
1584 1585 return util.buffer(d, offset - realoffset, length)
1585 1586 return d
1586 1587
1587 1588 def _getsegment(self, offset, length, df=None):
1588 1589 """Obtain a segment of raw data from the revlog.
1589 1590
1590 1591 Accepts an absolute offset, length of bytes to obtain, and an
1591 1592 optional file handle to the already-opened revlog. If the file
1592 1593 handle is used, it's original seek position will not be preserved.
1593 1594
1594 1595 Requests for data may be returned from a cache.
1595 1596
1596 1597 Returns a str or a buffer instance of raw byte data.
1597 1598 """
1598 1599 o, d = self._chunkcache
1599 1600 l = len(d)
1600 1601
1601 1602 # is it in the cache?
1602 1603 cachestart = offset - o
1603 1604 cacheend = cachestart + length
1604 1605 if cachestart >= 0 and cacheend <= l:
1605 1606 if cachestart == 0 and cacheend == l:
1606 1607 return d # avoid a copy
1607 1608 return util.buffer(d, cachestart, cacheend - cachestart)
1608 1609
1609 1610 return self._readsegment(offset, length, df=df)
1610 1611
1611 1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1612 1613 """Obtain a segment of raw data corresponding to a range of revisions.
1613 1614
1614 1615 Accepts the start and end revisions and an optional already-open
1615 1616 file handle to be used for reading. If the file handle is read, its
1616 1617 seek position will not be preserved.
1617 1618
1618 1619 Requests for data may be satisfied by a cache.
1619 1620
1620 1621 Returns a 2-tuple of (offset, data) for the requested range of
1621 1622 revisions. Offset is the integer offset from the beginning of the
1622 1623 revlog and data is a str or buffer of the raw byte data.
1623 1624
1624 1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1625 1626 to determine where each revision's data begins and ends.
1626 1627 """
1627 1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1628 1629 # (functions are expensive).
1629 1630 index = self.index
1630 1631 istart = index[startrev]
1631 1632 start = int(istart[0] >> 16)
1632 1633 if startrev == endrev:
1633 1634 end = start + istart[1]
1634 1635 else:
1635 1636 iend = index[endrev]
1636 1637 end = int(iend[0] >> 16) + iend[1]
1637 1638
1638 1639 if self._inline:
1639 1640 start += (startrev + 1) * self._io.size
1640 1641 end += (endrev + 1) * self._io.size
1641 1642 length = end - start
1642 1643
1643 1644 return start, self._getsegment(start, length, df=df)
1644 1645
1645 1646 def _chunk(self, rev, df=None):
1646 1647 """Obtain a single decompressed chunk for a revision.
1647 1648
1648 1649 Accepts an integer revision and an optional already-open file handle
1649 1650 to be used for reading. If used, the seek position of the file will not
1650 1651 be preserved.
1651 1652
1652 1653 Returns a str holding uncompressed data for the requested revision.
1653 1654 """
1654 1655 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1655 1656
1656 1657 def _chunks(self, revs, df=None):
1657 1658 """Obtain decompressed chunks for the specified revisions.
1658 1659
1659 1660 Accepts an iterable of numeric revisions that are assumed to be in
1660 1661 ascending order. Also accepts an optional already-open file handle
1661 1662 to be used for reading. If used, the seek position of the file will
1662 1663 not be preserved.
1663 1664
1664 1665 This function is similar to calling ``self._chunk()`` multiple times,
1665 1666 but is faster.
1666 1667
1667 1668 Returns a list with decompressed data for each requested revision.
1668 1669 """
1669 1670 if not revs:
1670 1671 return []
1671 1672 start = self.start
1672 1673 length = self.length
1673 1674 inline = self._inline
1674 1675 iosize = self._io.size
1675 1676 buffer = util.buffer
1676 1677
1677 1678 l = []
1678 1679 ladd = l.append
1679 1680
1680 1681 if not self._withsparseread:
1681 1682 slicedchunks = (revs,)
1682 1683 else:
1683 1684 slicedchunks = _slicechunk(self, revs)
1684 1685
1685 1686 for revschunk in slicedchunks:
1686 1687 firstrev = revschunk[0]
1687 1688 # Skip trailing revisions with empty diff
1688 1689 for lastrev in revschunk[::-1]:
1689 1690 if length(lastrev) != 0:
1690 1691 break
1691 1692
1692 1693 try:
1693 1694 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1694 1695 except OverflowError:
1695 1696 # issue4215 - we can't cache a run of chunks greater than
1696 1697 # 2G on Windows
1697 1698 return [self._chunk(rev, df=df) for rev in revschunk]
1698 1699
1699 1700 decomp = self.decompress
1700 1701 for rev in revschunk:
1701 1702 chunkstart = start(rev)
1702 1703 if inline:
1703 1704 chunkstart += (rev + 1) * iosize
1704 1705 chunklength = length(rev)
1705 1706 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1706 1707
1707 1708 return l
1708 1709
1709 1710 def _chunkclear(self):
1710 1711 """Clear the raw chunk cache."""
1711 1712 self._chunkcache = (0, '')
1712 1713
1713 1714 def deltaparent(self, rev):
1714 1715 """return deltaparent of the given revision"""
1715 1716 base = self.index[rev][3]
1716 1717 if base == rev:
1717 1718 return nullrev
1718 1719 elif self._generaldelta:
1719 1720 return base
1720 1721 else:
1721 1722 return rev - 1
1722 1723
1723 1724 def revdiff(self, rev1, rev2):
1724 1725 """return or calculate a delta between two revisions
1725 1726
1726 1727 The delta calculated is in binary form and is intended to be written to
1727 1728 revlog data directly. So this function needs raw revision data.
1728 1729 """
1729 1730 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1730 1731 return bytes(self._chunk(rev2))
1731 1732
1732 1733 return mdiff.textdiff(self.revision(rev1, raw=True),
1733 1734 self.revision(rev2, raw=True))
1734 1735
1735 1736 def revision(self, nodeorrev, _df=None, raw=False):
1736 1737 """return an uncompressed revision of a given node or revision
1737 1738 number.
1738 1739
1739 1740 _df - an existing file handle to read from. (internal-only)
1740 1741 raw - an optional argument specifying if the revision data is to be
1741 1742 treated as raw data when applying flag transforms. 'raw' should be set
1742 1743 to True when generating changegroups or in debug commands.
1743 1744 """
1744 1745 if isinstance(nodeorrev, int):
1745 1746 rev = nodeorrev
1746 1747 node = self.node(rev)
1747 1748 else:
1748 1749 node = nodeorrev
1749 1750 rev = None
1750 1751
1751 1752 cachedrev = None
1752 1753 flags = None
1753 1754 rawtext = None
1754 1755 if node == nullid:
1755 1756 return ""
1756 1757 if self._cache:
1757 1758 if self._cache[0] == node:
1758 1759 # _cache only stores rawtext
1759 1760 if raw:
1760 1761 return self._cache[2]
1761 1762 # duplicated, but good for perf
1762 1763 if rev is None:
1763 1764 rev = self.rev(node)
1764 1765 if flags is None:
1765 1766 flags = self.flags(rev)
1766 1767 # no extra flags set, no flag processor runs, text = rawtext
1767 1768 if flags == REVIDX_DEFAULT_FLAGS:
1768 1769 return self._cache[2]
1769 1770 # rawtext is reusable. need to run flag processor
1770 1771 rawtext = self._cache[2]
1771 1772
1772 1773 cachedrev = self._cache[1]
1773 1774
1774 1775 # look up what we need to read
1775 1776 if rawtext is None:
1776 1777 if rev is None:
1777 1778 rev = self.rev(node)
1778 1779
1779 1780 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1780 1781 if stopped:
1781 1782 rawtext = self._cache[2]
1782 1783
1783 1784 # drop cache to save memory
1784 1785 self._cache = None
1785 1786
1786 1787 bins = self._chunks(chain, df=_df)
1787 1788 if rawtext is None:
1788 1789 rawtext = bytes(bins[0])
1789 1790 bins = bins[1:]
1790 1791
1791 1792 rawtext = mdiff.patches(rawtext, bins)
1792 1793 self._cache = (node, rev, rawtext)
1793 1794
1794 1795 if flags is None:
1795 1796 if rev is None:
1796 1797 rev = self.rev(node)
1797 1798 flags = self.flags(rev)
1798 1799
1799 1800 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1800 1801 if validatehash:
1801 1802 self.checkhash(text, node, rev=rev)
1802 1803
1803 1804 return text
1804 1805
1805 1806 def hash(self, text, p1, p2):
1806 1807 """Compute a node hash.
1807 1808
1808 1809 Available as a function so that subclasses can replace the hash
1809 1810 as needed.
1810 1811 """
1811 1812 return hash(text, p1, p2)
1812 1813
1813 1814 def _processflags(self, text, flags, operation, raw=False):
1814 1815 """Inspect revision data flags and applies transforms defined by
1815 1816 registered flag processors.
1816 1817
1817 1818 ``text`` - the revision data to process
1818 1819 ``flags`` - the revision flags
1819 1820 ``operation`` - the operation being performed (read or write)
1820 1821 ``raw`` - an optional argument describing if the raw transform should be
1821 1822 applied.
1822 1823
1823 1824 This method processes the flags in the order (or reverse order if
1824 1825 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1825 1826 flag processors registered for present flags. The order of flags defined
1826 1827 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1827 1828
1828 1829 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1829 1830 processed text and ``validatehash`` is a bool indicating whether the
1830 1831 returned text should be checked for hash integrity.
1831 1832
1832 1833 Note: If the ``raw`` argument is set, it has precedence over the
1833 1834 operation and will only update the value of ``validatehash``.
1834 1835 """
1835 1836 # fast path: no flag processors will run
1836 1837 if flags == 0:
1837 1838 return text, True
1838 1839 if not operation in ('read', 'write'):
1839 1840 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1840 1841 # Check all flags are known.
1841 1842 if flags & ~REVIDX_KNOWN_FLAGS:
1842 1843 raise RevlogError(_("incompatible revision flag '%#x'") %
1843 1844 (flags & ~REVIDX_KNOWN_FLAGS))
1844 1845 validatehash = True
1845 1846 # Depending on the operation (read or write), the order might be
1846 1847 # reversed due to non-commutative transforms.
1847 1848 orderedflags = REVIDX_FLAGS_ORDER
1848 1849 if operation == 'write':
1849 1850 orderedflags = reversed(orderedflags)
1850 1851
1851 1852 for flag in orderedflags:
1852 1853 # If a flagprocessor has been registered for a known flag, apply the
1853 1854 # related operation transform and update result tuple.
1854 1855 if flag & flags:
1855 1856 vhash = True
1856 1857
1857 1858 if flag not in _flagprocessors:
1858 1859 message = _("missing processor for flag '%#x'") % (flag)
1859 1860 raise RevlogError(message)
1860 1861
1861 1862 processor = _flagprocessors[flag]
1862 1863 if processor is not None:
1863 1864 readtransform, writetransform, rawtransform = processor
1864 1865
1865 1866 if raw:
1866 1867 vhash = rawtransform(self, text)
1867 1868 elif operation == 'read':
1868 1869 text, vhash = readtransform(self, text)
1869 1870 else: # write operation
1870 1871 text, vhash = writetransform(self, text)
1871 1872 validatehash = validatehash and vhash
1872 1873
1873 1874 return text, validatehash
1874 1875
1875 1876 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1876 1877 """Check node hash integrity.
1877 1878
1878 1879 Available as a function so that subclasses can extend hash mismatch
1879 1880 behaviors as needed.
1880 1881 """
1881 1882 try:
1882 1883 if p1 is None and p2 is None:
1883 1884 p1, p2 = self.parents(node)
1884 1885 if node != self.hash(text, p1, p2):
1885 1886 revornode = rev
1886 1887 if revornode is None:
1887 1888 revornode = templatefilters.short(hex(node))
1888 1889 raise RevlogError(_("integrity check failed on %s:%s")
1889 1890 % (self.indexfile, pycompat.bytestr(revornode)))
1890 1891 except RevlogError:
1891 1892 if self._censorable and _censoredtext(text):
1892 1893 raise error.CensoredNodeError(self.indexfile, node, text)
1893 1894 raise
1894 1895
1895 1896 def _enforceinlinesize(self, tr, fp=None):
1896 1897 """Check if the revlog is too big for inline and convert if so.
1897 1898
1898 1899 This should be called after revisions are added to the revlog. If the
1899 1900 revlog has grown too large to be an inline revlog, it will convert it
1900 1901 to use multiple index and data files.
1901 1902 """
1902 1903 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1903 1904 return
1904 1905
1905 1906 trinfo = tr.find(self.indexfile)
1906 1907 if trinfo is None:
1907 1908 raise RevlogError(_("%s not found in the transaction")
1908 1909 % self.indexfile)
1909 1910
1910 1911 trindex = trinfo[2]
1911 1912 if trindex is not None:
1912 1913 dataoff = self.start(trindex)
1913 1914 else:
1914 1915 # revlog was stripped at start of transaction, use all leftover data
1915 1916 trindex = len(self) - 1
1916 1917 dataoff = self.end(-2)
1917 1918
1918 1919 tr.add(self.datafile, dataoff)
1919 1920
1920 1921 if fp:
1921 1922 fp.flush()
1922 1923 fp.close()
1923 1924
1924 1925 with self._datafp('w') as df:
1925 1926 for r in self:
1926 1927 df.write(self._getsegmentforrevs(r, r)[1])
1927 1928
1928 1929 with self._indexfp('w') as fp:
1929 1930 self.version &= ~FLAG_INLINE_DATA
1930 1931 self._inline = False
1931 1932 io = self._io
1932 1933 for i in self:
1933 1934 e = io.packentry(self.index[i], self.node, self.version, i)
1934 1935 fp.write(e)
1935 1936
1936 1937 # the temp file replace the real index when we exit the context
1937 1938 # manager
1938 1939
1939 1940 tr.replace(self.indexfile, trindex * self._io.size)
1940 1941 self._chunkclear()
1941 1942
1942 1943 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1943 1944 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1944 1945 """add a revision to the log
1945 1946
1946 1947 text - the revision data to add
1947 1948 transaction - the transaction object used for rollback
1948 1949 link - the linkrev data to add
1949 1950 p1, p2 - the parent nodeids of the revision
1950 1951 cachedelta - an optional precomputed delta
1951 1952 node - nodeid of revision; typically node is not specified, and it is
1952 1953 computed by default as hash(text, p1, p2), however subclasses might
1953 1954 use different hashing method (and override checkhash() in such case)
1954 1955 flags - the known flags to set on the revision
1955 1956 deltacomputer - an optional _deltacomputer instance shared between
1956 1957 multiple calls
1957 1958 """
1958 1959 if link == nullrev:
1959 1960 raise RevlogError(_("attempted to add linkrev -1 to %s")
1960 1961 % self.indexfile)
1961 1962
1962 1963 if flags:
1963 1964 node = node or self.hash(text, p1, p2)
1964 1965
1965 1966 rawtext, validatehash = self._processflags(text, flags, 'write')
1966 1967
1967 1968 # If the flag processor modifies the revision data, ignore any provided
1968 1969 # cachedelta.
1969 1970 if rawtext != text:
1970 1971 cachedelta = None
1971 1972
1972 1973 if len(rawtext) > _maxentrysize:
1973 1974 raise RevlogError(
1974 1975 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1975 1976 % (self.indexfile, len(rawtext)))
1976 1977
1977 1978 node = node or self.hash(rawtext, p1, p2)
1978 1979 if node in self.nodemap:
1979 1980 return node
1980 1981
1981 1982 if validatehash:
1982 1983 self.checkhash(rawtext, node, p1=p1, p2=p2)
1983 1984
1984 1985 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1985 1986 flags, cachedelta=cachedelta,
1986 1987 deltacomputer=deltacomputer)
1987 1988
1988 1989 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1989 1990 cachedelta=None, deltacomputer=None):
1990 1991 """add a raw revision with known flags, node and parents
1991 1992 useful when reusing a revision not stored in this revlog (ex: received
1992 1993 over wire, or read from an external bundle).
1993 1994 """
1994 1995 dfh = None
1995 1996 if not self._inline:
1996 1997 dfh = self._datafp("a+")
1997 1998 ifh = self._indexfp("a+")
1998 1999 try:
1999 2000 return self._addrevision(node, rawtext, transaction, link, p1, p2,
2000 2001 flags, cachedelta, ifh, dfh,
2001 2002 deltacomputer=deltacomputer)
2002 2003 finally:
2003 2004 if dfh:
2004 2005 dfh.close()
2005 2006 ifh.close()
2006 2007
2007 2008 def compress(self, data):
2008 2009 """Generate a possibly-compressed representation of data."""
2009 2010 if not data:
2010 2011 return '', data
2011 2012
2012 2013 compressed = self._compressor.compress(data)
2013 2014
2014 2015 if compressed:
2015 2016 # The revlog compressor added the header in the returned data.
2016 2017 return '', compressed
2017 2018
2018 2019 if data[0:1] == '\0':
2019 2020 return '', data
2020 2021 return 'u', data
2021 2022
2022 2023 def decompress(self, data):
2023 2024 """Decompress a revlog chunk.
2024 2025
2025 2026 The chunk is expected to begin with a header identifying the
2026 2027 format type so it can be routed to an appropriate decompressor.
2027 2028 """
2028 2029 if not data:
2029 2030 return data
2030 2031
2031 2032 # Revlogs are read much more frequently than they are written and many
2032 2033 # chunks only take microseconds to decompress, so performance is
2033 2034 # important here.
2034 2035 #
2035 2036 # We can make a few assumptions about revlogs:
2036 2037 #
2037 2038 # 1) the majority of chunks will be compressed (as opposed to inline
2038 2039 # raw data).
2039 2040 # 2) decompressing *any* data will likely by at least 10x slower than
2040 2041 # returning raw inline data.
2041 2042 # 3) we want to prioritize common and officially supported compression
2042 2043 # engines
2043 2044 #
2044 2045 # It follows that we want to optimize for "decompress compressed data
2045 2046 # when encoded with common and officially supported compression engines"
2046 2047 # case over "raw data" and "data encoded by less common or non-official
2047 2048 # compression engines." That is why we have the inline lookup first
2048 2049 # followed by the compengines lookup.
2049 2050 #
2050 2051 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2051 2052 # compressed chunks. And this matters for changelog and manifest reads.
2052 2053 t = data[0:1]
2053 2054
2054 2055 if t == 'x':
2055 2056 try:
2056 2057 return _zlibdecompress(data)
2057 2058 except zlib.error as e:
2058 2059 raise RevlogError(_('revlog decompress error: %s') %
2059 2060 stringutil.forcebytestr(e))
2060 2061 # '\0' is more common than 'u' so it goes first.
2061 2062 elif t == '\0':
2062 2063 return data
2063 2064 elif t == 'u':
2064 2065 return util.buffer(data, 1)
2065 2066
2066 2067 try:
2067 2068 compressor = self._decompressors[t]
2068 2069 except KeyError:
2069 2070 try:
2070 2071 engine = util.compengines.forrevlogheader(t)
2071 2072 compressor = engine.revlogcompressor()
2072 2073 self._decompressors[t] = compressor
2073 2074 except KeyError:
2074 2075 raise RevlogError(_('unknown compression type %r') % t)
2075 2076
2076 2077 return compressor.decompress(data)
2077 2078
2078 2079 def _isgooddeltainfo(self, d, textlen):
2079 2080 """Returns True if the given delta is good. Good means that it is within
2080 2081 the disk span, disk size, and chain length bounds that we know to be
2081 2082 performant."""
2082 2083 if d is None:
2083 2084 return False
2084 2085
2085 2086 # - 'd.distance' is the distance from the base revision -- bounding it
2086 2087 # limits the amount of I/O we need to do.
2087 2088 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2088 2089 # need to apply -- bounding it limits the amount of CPU we consume.
2089 2090
2090 2091 defaultmax = textlen * 4
2091 2092 maxdist = self._maxdeltachainspan
2092 2093 if not maxdist:
2093 2094 maxdist = d.distance # ensure the conditional pass
2094 2095 maxdist = max(maxdist, defaultmax)
2095 2096 if (d.distance > maxdist or d.deltalen > textlen or
2096 2097 d.compresseddeltalen > textlen * 2 or
2097 2098 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2098 2099 return False
2099 2100
2100 2101 return True
2101 2102
2102 2103 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2103 2104 cachedelta, ifh, dfh, alwayscache=False,
2104 2105 deltacomputer=None):
2105 2106 """internal function to add revisions to the log
2106 2107
2107 2108 see addrevision for argument descriptions.
2108 2109
2109 2110 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2110 2111
2111 2112 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2112 2113 be used.
2113 2114
2114 2115 invariants:
2115 2116 - rawtext is optional (can be None); if not set, cachedelta must be set.
2116 2117 if both are set, they must correspond to each other.
2117 2118 """
2118 2119 if node == nullid:
2119 2120 raise RevlogError(_("%s: attempt to add null revision") %
2120 2121 (self.indexfile))
2121 2122 if node == wdirid or node in wdirfilenodeids:
2122 2123 raise RevlogError(_("%s: attempt to add wdir revision") %
2123 2124 (self.indexfile))
2124 2125
2125 2126 if self._inline:
2126 2127 fh = ifh
2127 2128 else:
2128 2129 fh = dfh
2129 2130
2130 2131 btext = [rawtext]
2131 2132
2132 2133 curr = len(self)
2133 2134 prev = curr - 1
2134 2135 offset = self.end(prev)
2135 2136 p1r, p2r = self.rev(p1), self.rev(p2)
2136 2137
2137 2138 # full versions are inserted when the needed deltas
2138 2139 # become comparable to the uncompressed text
2139 2140 if rawtext is None:
2140 2141 # need rawtext size, before changed by flag processors, which is
2141 2142 # the non-raw size. use revlog explicitly to avoid filelog's extra
2142 2143 # logic that might remove metadata size.
2143 2144 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2144 2145 cachedelta[1])
2145 2146 else:
2146 2147 textlen = len(rawtext)
2147 2148
2148 2149 if deltacomputer is None:
2149 2150 deltacomputer = _deltacomputer(self)
2150 2151
2151 2152 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2152 2153
2153 2154 # no delta for flag processor revision (see "candelta" for why)
2154 2155 # not calling candelta since only one revision needs test, also to
2155 2156 # avoid overhead fetching flags again.
2156 2157 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2157 2158 deltainfo = None
2158 2159 else:
2159 2160 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2160 2161
2161 2162 if deltainfo is not None:
2162 2163 base = deltainfo.base
2163 2164 chainbase = deltainfo.chainbase
2164 2165 data = deltainfo.data
2165 2166 l = deltainfo.deltalen
2166 2167 else:
2167 2168 rawtext = deltacomputer.buildtext(revinfo, fh)
2168 2169 data = self.compress(rawtext)
2169 2170 l = len(data[1]) + len(data[0])
2170 2171 base = chainbase = curr
2171 2172
2172 2173 e = (offset_type(offset, flags), l, textlen,
2173 2174 base, link, p1r, p2r, node)
2174 2175 self.index.insert(-1, e)
2175 2176 self.nodemap[node] = curr
2176 2177
2177 2178 entry = self._io.packentry(e, self.node, self.version, curr)
2178 2179 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2179 2180
2180 2181 if alwayscache and rawtext is None:
2181 2182 rawtext = deltacomputer._buildtext(revinfo, fh)
2182 2183
2183 2184 if type(rawtext) == bytes: # only accept immutable objects
2184 2185 self._cache = (node, curr, rawtext)
2185 2186 self._chainbasecache[curr] = chainbase
2186 2187 return node
2187 2188
2188 2189 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2189 2190 # Files opened in a+ mode have inconsistent behavior on various
2190 2191 # platforms. Windows requires that a file positioning call be made
2191 2192 # when the file handle transitions between reads and writes. See
2192 2193 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2193 2194 # platforms, Python or the platform itself can be buggy. Some versions
2194 2195 # of Solaris have been observed to not append at the end of the file
2195 2196 # if the file was seeked to before the end. See issue4943 for more.
2196 2197 #
2197 2198 # We work around this issue by inserting a seek() before writing.
2198 2199 # Note: This is likely not necessary on Python 3.
2199 2200 ifh.seek(0, os.SEEK_END)
2200 2201 if dfh:
2201 2202 dfh.seek(0, os.SEEK_END)
2202 2203
2203 2204 curr = len(self) - 1
2204 2205 if not self._inline:
2205 2206 transaction.add(self.datafile, offset)
2206 2207 transaction.add(self.indexfile, curr * len(entry))
2207 2208 if data[0]:
2208 2209 dfh.write(data[0])
2209 2210 dfh.write(data[1])
2210 2211 ifh.write(entry)
2211 2212 else:
2212 2213 offset += curr * self._io.size
2213 2214 transaction.add(self.indexfile, offset, curr)
2214 2215 ifh.write(entry)
2215 2216 ifh.write(data[0])
2216 2217 ifh.write(data[1])
2217 2218 self._enforceinlinesize(transaction, ifh)
2218 2219
2219 2220 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2220 2221 """
2221 2222 add a delta group
2222 2223
2223 2224 given a set of deltas, add them to the revision log. the
2224 2225 first delta is against its parent, which should be in our
2225 2226 log, the rest are against the previous delta.
2226 2227
2227 2228 If ``addrevisioncb`` is defined, it will be called with arguments of
2228 2229 this revlog and the node that was added.
2229 2230 """
2230 2231
2231 2232 nodes = []
2232 2233
2233 2234 r = len(self)
2234 2235 end = 0
2235 2236 if r:
2236 2237 end = self.end(r - 1)
2237 2238 ifh = self._indexfp("a+")
2238 2239 isize = r * self._io.size
2239 2240 if self._inline:
2240 2241 transaction.add(self.indexfile, end + isize, r)
2241 2242 dfh = None
2242 2243 else:
2243 2244 transaction.add(self.indexfile, isize, r)
2244 2245 transaction.add(self.datafile, end)
2245 2246 dfh = self._datafp("a+")
2246 2247 def flush():
2247 2248 if dfh:
2248 2249 dfh.flush()
2249 2250 ifh.flush()
2250 2251 try:
2251 2252 deltacomputer = _deltacomputer(self)
2252 2253 # loop through our set of deltas
2253 2254 for data in deltas:
2254 2255 node, p1, p2, linknode, deltabase, delta, flags = data
2255 2256 link = linkmapper(linknode)
2256 2257 flags = flags or REVIDX_DEFAULT_FLAGS
2257 2258
2258 2259 nodes.append(node)
2259 2260
2260 2261 if node in self.nodemap:
2261 2262 # this can happen if two branches make the same change
2262 2263 continue
2263 2264
2264 2265 for p in (p1, p2):
2265 2266 if p not in self.nodemap:
2266 2267 raise LookupError(p, self.indexfile,
2267 2268 _('unknown parent'))
2268 2269
2269 2270 if deltabase not in self.nodemap:
2270 2271 raise LookupError(deltabase, self.indexfile,
2271 2272 _('unknown delta base'))
2272 2273
2273 2274 baserev = self.rev(deltabase)
2274 2275
2275 2276 if baserev != nullrev and self.iscensored(baserev):
2276 2277 # if base is censored, delta must be full replacement in a
2277 2278 # single patch operation
2278 2279 hlen = struct.calcsize(">lll")
2279 2280 oldlen = self.rawsize(baserev)
2280 2281 newlen = len(delta) - hlen
2281 2282 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2282 2283 raise error.CensoredBaseError(self.indexfile,
2283 2284 self.node(baserev))
2284 2285
2285 2286 if not flags and self._peek_iscensored(baserev, delta, flush):
2286 2287 flags |= REVIDX_ISCENSORED
2287 2288
2288 2289 # We assume consumers of addrevisioncb will want to retrieve
2289 2290 # the added revision, which will require a call to
2290 2291 # revision(). revision() will fast path if there is a cache
2291 2292 # hit. So, we tell _addrevision() to always cache in this case.
2292 2293 # We're only using addgroup() in the context of changegroup
2293 2294 # generation so the revision data can always be handled as raw
2294 2295 # by the flagprocessor.
2295 2296 self._addrevision(node, None, transaction, link,
2296 2297 p1, p2, flags, (baserev, delta),
2297 2298 ifh, dfh,
2298 2299 alwayscache=bool(addrevisioncb),
2299 2300 deltacomputer=deltacomputer)
2300 2301
2301 2302 if addrevisioncb:
2302 2303 addrevisioncb(self, node)
2303 2304
2304 2305 if not dfh and not self._inline:
2305 2306 # addrevision switched from inline to conventional
2306 2307 # reopen the index
2307 2308 ifh.close()
2308 2309 dfh = self._datafp("a+")
2309 2310 ifh = self._indexfp("a+")
2310 2311 finally:
2311 2312 if dfh:
2312 2313 dfh.close()
2313 2314 ifh.close()
2314 2315
2315 2316 return nodes
2316 2317
2317 2318 def iscensored(self, rev):
2318 2319 """Check if a file revision is censored."""
2319 2320 if not self._censorable:
2320 2321 return False
2321 2322
2322 2323 return self.flags(rev) & REVIDX_ISCENSORED
2323 2324
2324 2325 def _peek_iscensored(self, baserev, delta, flush):
2325 2326 """Quickly check if a delta produces a censored revision."""
2326 2327 if not self._censorable:
2327 2328 return False
2328 2329
2329 2330 # Fragile heuristic: unless new file meta keys are added alphabetically
2330 2331 # preceding "censored", all censored revisions are prefixed by
2331 2332 # "\1\ncensored:". A delta producing such a censored revision must be a
2332 2333 # full-replacement delta, so we inspect the first and only patch in the
2333 2334 # delta for this prefix.
2334 2335 hlen = struct.calcsize(">lll")
2335 2336 if len(delta) <= hlen:
2336 2337 return False
2337 2338
2338 2339 oldlen = self.rawsize(baserev)
2339 2340 newlen = len(delta) - hlen
2340 2341 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2341 2342 return False
2342 2343
2343 2344 add = "\1\ncensored:"
2344 2345 addlen = len(add)
2345 2346 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2346 2347
2347 2348 def getstrippoint(self, minlink):
2348 2349 """find the minimum rev that must be stripped to strip the linkrev
2349 2350
2350 2351 Returns a tuple containing the minimum rev and a set of all revs that
2351 2352 have linkrevs that will be broken by this strip.
2352 2353 """
2353 2354 brokenrevs = set()
2354 2355 strippoint = len(self)
2355 2356
2356 2357 heads = {}
2357 2358 futurelargelinkrevs = set()
2358 2359 for head in self.headrevs():
2359 2360 headlinkrev = self.linkrev(head)
2360 2361 heads[head] = headlinkrev
2361 2362 if headlinkrev >= minlink:
2362 2363 futurelargelinkrevs.add(headlinkrev)
2363 2364
2364 2365 # This algorithm involves walking down the rev graph, starting at the
2365 2366 # heads. Since the revs are topologically sorted according to linkrev,
2366 2367 # once all head linkrevs are below the minlink, we know there are
2367 2368 # no more revs that could have a linkrev greater than minlink.
2368 2369 # So we can stop walking.
2369 2370 while futurelargelinkrevs:
2370 2371 strippoint -= 1
2371 2372 linkrev = heads.pop(strippoint)
2372 2373
2373 2374 if linkrev < minlink:
2374 2375 brokenrevs.add(strippoint)
2375 2376 else:
2376 2377 futurelargelinkrevs.remove(linkrev)
2377 2378
2378 2379 for p in self.parentrevs(strippoint):
2379 2380 if p != nullrev:
2380 2381 plinkrev = self.linkrev(p)
2381 2382 heads[p] = plinkrev
2382 2383 if plinkrev >= minlink:
2383 2384 futurelargelinkrevs.add(plinkrev)
2384 2385
2385 2386 return strippoint, brokenrevs
2386 2387
2387 2388 def strip(self, minlink, transaction):
2388 2389 """truncate the revlog on the first revision with a linkrev >= minlink
2389 2390
2390 2391 This function is called when we're stripping revision minlink and
2391 2392 its descendants from the repository.
2392 2393
2393 2394 We have to remove all revisions with linkrev >= minlink, because
2394 2395 the equivalent changelog revisions will be renumbered after the
2395 2396 strip.
2396 2397
2397 2398 So we truncate the revlog on the first of these revisions, and
2398 2399 trust that the caller has saved the revisions that shouldn't be
2399 2400 removed and that it'll re-add them after this truncation.
2400 2401 """
2401 2402 if len(self) == 0:
2402 2403 return
2403 2404
2404 2405 rev, _ = self.getstrippoint(minlink)
2405 2406 if rev == len(self):
2406 2407 return
2407 2408
2408 2409 # first truncate the files on disk
2409 2410 end = self.start(rev)
2410 2411 if not self._inline:
2411 2412 transaction.add(self.datafile, end)
2412 2413 end = rev * self._io.size
2413 2414 else:
2414 2415 end += rev * self._io.size
2415 2416
2416 2417 transaction.add(self.indexfile, end)
2417 2418
2418 2419 # then reset internal state in memory to forget those revisions
2419 2420 self._cache = None
2420 2421 self._chaininfocache = {}
2421 2422 self._chunkclear()
2422 2423 for x in xrange(rev, len(self)):
2423 2424 del self.nodemap[self.node(x)]
2424 2425
2425 2426 del self.index[rev:-1]
2426 2427 self._nodepos = None
2427 2428
2428 2429 def checksize(self):
2429 2430 expected = 0
2430 2431 if len(self):
2431 2432 expected = max(0, self.end(len(self) - 1))
2432 2433
2433 2434 try:
2434 2435 with self._datafp() as f:
2435 2436 f.seek(0, 2)
2436 2437 actual = f.tell()
2437 2438 dd = actual - expected
2438 2439 except IOError as inst:
2439 2440 if inst.errno != errno.ENOENT:
2440 2441 raise
2441 2442 dd = 0
2442 2443
2443 2444 try:
2444 2445 f = self.opener(self.indexfile)
2445 2446 f.seek(0, 2)
2446 2447 actual = f.tell()
2447 2448 f.close()
2448 2449 s = self._io.size
2449 2450 i = max(0, actual // s)
2450 2451 di = actual - (i * s)
2451 2452 if self._inline:
2452 2453 databytes = 0
2453 2454 for r in self:
2454 2455 databytes += max(0, self.length(r))
2455 2456 dd = 0
2456 2457 di = actual - len(self) * s - databytes
2457 2458 except IOError as inst:
2458 2459 if inst.errno != errno.ENOENT:
2459 2460 raise
2460 2461 di = 0
2461 2462
2462 2463 return (dd, di)
2463 2464
2464 2465 def files(self):
2465 2466 res = [self.indexfile]
2466 2467 if not self._inline:
2467 2468 res.append(self.datafile)
2468 2469 return res
2469 2470
2470 2471 DELTAREUSEALWAYS = 'always'
2471 2472 DELTAREUSESAMEREVS = 'samerevs'
2472 2473 DELTAREUSENEVER = 'never'
2473 2474
2474 2475 DELTAREUSEFULLADD = 'fulladd'
2475 2476
2476 2477 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2477 2478
2478 2479 def clone(self, tr, destrevlog, addrevisioncb=None,
2479 2480 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2480 2481 """Copy this revlog to another, possibly with format changes.
2481 2482
2482 2483 The destination revlog will contain the same revisions and nodes.
2483 2484 However, it may not be bit-for-bit identical due to e.g. delta encoding
2484 2485 differences.
2485 2486
2486 2487 The ``deltareuse`` argument control how deltas from the existing revlog
2487 2488 are preserved in the destination revlog. The argument can have the
2488 2489 following values:
2489 2490
2490 2491 DELTAREUSEALWAYS
2491 2492 Deltas will always be reused (if possible), even if the destination
2492 2493 revlog would not select the same revisions for the delta. This is the
2493 2494 fastest mode of operation.
2494 2495 DELTAREUSESAMEREVS
2495 2496 Deltas will be reused if the destination revlog would pick the same
2496 2497 revisions for the delta. This mode strikes a balance between speed
2497 2498 and optimization.
2498 2499 DELTAREUSENEVER
2499 2500 Deltas will never be reused. This is the slowest mode of execution.
2500 2501 This mode can be used to recompute deltas (e.g. if the diff/delta
2501 2502 algorithm changes).
2502 2503
2503 2504 Delta computation can be slow, so the choice of delta reuse policy can
2504 2505 significantly affect run time.
2505 2506
2506 2507 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2507 2508 two extremes. Deltas will be reused if they are appropriate. But if the
2508 2509 delta could choose a better revision, it will do so. This means if you
2509 2510 are converting a non-generaldelta revlog to a generaldelta revlog,
2510 2511 deltas will be recomputed if the delta's parent isn't a parent of the
2511 2512 revision.
2512 2513
2513 2514 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2514 2515 controls whether to compute deltas against both parents for merges.
2515 2516 By default, the current default is used.
2516 2517 """
2517 2518 if deltareuse not in self.DELTAREUSEALL:
2518 2519 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2519 2520
2520 2521 if len(destrevlog):
2521 2522 raise ValueError(_('destination revlog is not empty'))
2522 2523
2523 2524 if getattr(self, 'filteredrevs', None):
2524 2525 raise ValueError(_('source revlog has filtered revisions'))
2525 2526 if getattr(destrevlog, 'filteredrevs', None):
2526 2527 raise ValueError(_('destination revlog has filtered revisions'))
2527 2528
2528 2529 # lazydeltabase controls whether to reuse a cached delta, if possible.
2529 2530 oldlazydeltabase = destrevlog._lazydeltabase
2530 2531 oldamd = destrevlog._aggressivemergedeltas
2531 2532
2532 2533 try:
2533 2534 if deltareuse == self.DELTAREUSEALWAYS:
2534 2535 destrevlog._lazydeltabase = True
2535 2536 elif deltareuse == self.DELTAREUSESAMEREVS:
2536 2537 destrevlog._lazydeltabase = False
2537 2538
2538 2539 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2539 2540
2540 2541 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2541 2542 self.DELTAREUSESAMEREVS)
2542 2543
2543 2544 deltacomputer = _deltacomputer(destrevlog)
2544 2545 index = self.index
2545 2546 for rev in self:
2546 2547 entry = index[rev]
2547 2548
2548 2549 # Some classes override linkrev to take filtered revs into
2549 2550 # account. Use raw entry from index.
2550 2551 flags = entry[0] & 0xffff
2551 2552 linkrev = entry[4]
2552 2553 p1 = index[entry[5]][7]
2553 2554 p2 = index[entry[6]][7]
2554 2555 node = entry[7]
2555 2556
2556 2557 # (Possibly) reuse the delta from the revlog if allowed and
2557 2558 # the revlog chunk is a delta.
2558 2559 cachedelta = None
2559 2560 rawtext = None
2560 2561 if populatecachedelta:
2561 2562 dp = self.deltaparent(rev)
2562 2563 if dp != nullrev:
2563 2564 cachedelta = (dp, bytes(self._chunk(rev)))
2564 2565
2565 2566 if not cachedelta:
2566 2567 rawtext = self.revision(rev, raw=True)
2567 2568
2568 2569
2569 2570 if deltareuse == self.DELTAREUSEFULLADD:
2570 2571 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2571 2572 cachedelta=cachedelta,
2572 2573 node=node, flags=flags,
2573 2574 deltacomputer=deltacomputer)
2574 2575 else:
2575 2576 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2576 2577 checkambig=False)
2577 2578 dfh = None
2578 2579 if not destrevlog._inline:
2579 2580 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2580 2581 try:
2581 2582 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2582 2583 p2, flags, cachedelta, ifh, dfh,
2583 2584 deltacomputer=deltacomputer)
2584 2585 finally:
2585 2586 if dfh:
2586 2587 dfh.close()
2587 2588 ifh.close()
2588 2589
2589 2590 if addrevisioncb:
2590 2591 addrevisioncb(self, rev, node)
2591 2592 finally:
2592 2593 destrevlog._lazydeltabase = oldlazydeltabase
2593 2594 destrevlog._aggressivemergedeltas = oldamd
@@ -1,1566 +1,1566
1 1 # scmutil.py - Mercurial core utility functions
2 2 #
3 3 # Copyright Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import glob
12 12 import hashlib
13 13 import os
14 14 import re
15 15 import socket
16 16 import subprocess
17 17 import weakref
18 18
19 19 from .i18n import _
20 20 from .node import (
21 21 bin,
22 22 hex,
23 23 nullid,
24 24 short,
25 25 wdirid,
26 26 wdirrev,
27 27 )
28 28
29 29 from . import (
30 30 encoding,
31 31 error,
32 32 match as matchmod,
33 33 obsolete,
34 34 obsutil,
35 35 pathutil,
36 36 phases,
37 37 pycompat,
38 38 revsetlang,
39 39 similar,
40 40 url,
41 41 util,
42 42 vfs,
43 43 )
44 44
45 45 from .utils import (
46 46 procutil,
47 47 stringutil,
48 48 )
49 49
50 50 if pycompat.iswindows:
51 51 from . import scmwindows as scmplatform
52 52 else:
53 53 from . import scmposix as scmplatform
54 54
55 55 termsize = scmplatform.termsize
56 56
57 57 class status(tuple):
58 58 '''Named tuple with a list of files per status. The 'deleted', 'unknown'
59 59 and 'ignored' properties are only relevant to the working copy.
60 60 '''
61 61
62 62 __slots__ = ()
63 63
64 64 def __new__(cls, modified, added, removed, deleted, unknown, ignored,
65 65 clean):
66 66 return tuple.__new__(cls, (modified, added, removed, deleted, unknown,
67 67 ignored, clean))
68 68
69 69 @property
70 70 def modified(self):
71 71 '''files that have been modified'''
72 72 return self[0]
73 73
74 74 @property
75 75 def added(self):
76 76 '''files that have been added'''
77 77 return self[1]
78 78
79 79 @property
80 80 def removed(self):
81 81 '''files that have been removed'''
82 82 return self[2]
83 83
84 84 @property
85 85 def deleted(self):
86 86 '''files that are in the dirstate, but have been deleted from the
87 87 working copy (aka "missing")
88 88 '''
89 89 return self[3]
90 90
91 91 @property
92 92 def unknown(self):
93 93 '''files not in the dirstate that are not ignored'''
94 94 return self[4]
95 95
96 96 @property
97 97 def ignored(self):
98 98 '''files not in the dirstate that are ignored (by _dirignore())'''
99 99 return self[5]
100 100
101 101 @property
102 102 def clean(self):
103 103 '''files that have not been modified'''
104 104 return self[6]
105 105
106 106 def __repr__(self, *args, **kwargs):
107 107 return (('<status modified=%r, added=%r, removed=%r, deleted=%r, '
108 108 'unknown=%r, ignored=%r, clean=%r>') % self)
109 109
110 110 def itersubrepos(ctx1, ctx2):
111 111 """find subrepos in ctx1 or ctx2"""
112 112 # Create a (subpath, ctx) mapping where we prefer subpaths from
113 113 # ctx1. The subpaths from ctx2 are important when the .hgsub file
114 114 # has been modified (in ctx2) but not yet committed (in ctx1).
115 115 subpaths = dict.fromkeys(ctx2.substate, ctx2)
116 116 subpaths.update(dict.fromkeys(ctx1.substate, ctx1))
117 117
118 118 missing = set()
119 119
120 120 for subpath in ctx2.substate:
121 121 if subpath not in ctx1.substate:
122 122 del subpaths[subpath]
123 123 missing.add(subpath)
124 124
125 125 for subpath, ctx in sorted(subpaths.iteritems()):
126 126 yield subpath, ctx.sub(subpath)
127 127
128 128 # Yield an empty subrepo based on ctx1 for anything only in ctx2. That way,
129 129 # status and diff will have an accurate result when it does
130 130 # 'sub.{status|diff}(rev2)'. Otherwise, the ctx2 subrepo is compared
131 131 # against itself.
132 132 for subpath in missing:
133 133 yield subpath, ctx2.nullsub(subpath, ctx1)
134 134
135 135 def nochangesfound(ui, repo, excluded=None):
136 136 '''Report no changes for push/pull, excluded is None or a list of
137 137 nodes excluded from the push/pull.
138 138 '''
139 139 secretlist = []
140 140 if excluded:
141 141 for n in excluded:
142 142 ctx = repo[n]
143 143 if ctx.phase() >= phases.secret and not ctx.extinct():
144 144 secretlist.append(n)
145 145
146 146 if secretlist:
147 147 ui.status(_("no changes found (ignored %d secret changesets)\n")
148 148 % len(secretlist))
149 149 else:
150 150 ui.status(_("no changes found\n"))
151 151
152 152 def callcatch(ui, func):
153 153 """call func() with global exception handling
154 154
155 155 return func() if no exception happens. otherwise do some error handling
156 156 and return an exit code accordingly. does not handle all exceptions.
157 157 """
158 158 try:
159 159 try:
160 160 return func()
161 161 except: # re-raises
162 162 ui.traceback()
163 163 raise
164 164 # Global exception handling, alphabetically
165 165 # Mercurial-specific first, followed by built-in and library exceptions
166 166 except error.LockHeld as inst:
167 167 if inst.errno == errno.ETIMEDOUT:
168 168 reason = _('timed out waiting for lock held by %r') % inst.locker
169 169 else:
170 170 reason = _('lock held by %r') % inst.locker
171 171 ui.warn(_("abort: %s: %s\n")
172 172 % (inst.desc or stringutil.forcebytestr(inst.filename), reason))
173 173 if not inst.locker:
174 174 ui.warn(_("(lock might be very busy)\n"))
175 175 except error.LockUnavailable as inst:
176 176 ui.warn(_("abort: could not lock %s: %s\n") %
177 177 (inst.desc or stringutil.forcebytestr(inst.filename),
178 178 encoding.strtolocal(inst.strerror)))
179 179 except error.OutOfBandError as inst:
180 180 if inst.args:
181 181 msg = _("abort: remote error:\n")
182 182 else:
183 183 msg = _("abort: remote error\n")
184 184 ui.warn(msg)
185 185 if inst.args:
186 186 ui.warn(''.join(inst.args))
187 187 if inst.hint:
188 188 ui.warn('(%s)\n' % inst.hint)
189 189 except error.RepoError as inst:
190 190 ui.warn(_("abort: %s!\n") % inst)
191 191 if inst.hint:
192 192 ui.warn(_("(%s)\n") % inst.hint)
193 193 except error.ResponseError as inst:
194 194 ui.warn(_("abort: %s") % inst.args[0])
195 195 msg = inst.args[1]
196 196 if isinstance(msg, type(u'')):
197 197 msg = pycompat.sysbytes(msg)
198 198 if not isinstance(msg, bytes):
199 199 ui.warn(" %r\n" % (msg,))
200 200 elif not msg:
201 201 ui.warn(_(" empty string\n"))
202 202 else:
203 203 ui.warn("\n%r\n" % stringutil.ellipsis(msg))
204 204 except error.CensoredNodeError as inst:
205 205 ui.warn(_("abort: file censored %s!\n") % inst)
206 206 except error.RevlogError as inst:
207 207 ui.warn(_("abort: %s!\n") % inst)
208 208 except error.InterventionRequired as inst:
209 209 ui.warn("%s\n" % inst)
210 210 if inst.hint:
211 211 ui.warn(_("(%s)\n") % inst.hint)
212 212 return 1
213 213 except error.WdirUnsupported:
214 214 ui.warn(_("abort: working directory revision cannot be specified\n"))
215 215 except error.Abort as inst:
216 216 ui.warn(_("abort: %s\n") % inst)
217 217 if inst.hint:
218 218 ui.warn(_("(%s)\n") % inst.hint)
219 219 except ImportError as inst:
220 220 ui.warn(_("abort: %s!\n") % stringutil.forcebytestr(inst))
221 221 m = stringutil.forcebytestr(inst).split()[-1]
222 222 if m in "mpatch bdiff".split():
223 223 ui.warn(_("(did you forget to compile extensions?)\n"))
224 224 elif m in "zlib".split():
225 225 ui.warn(_("(is your Python install correct?)\n"))
226 226 except IOError as inst:
227 227 if util.safehasattr(inst, "code"):
228 228 ui.warn(_("abort: %s\n") % stringutil.forcebytestr(inst))
229 229 elif util.safehasattr(inst, "reason"):
230 230 try: # usually it is in the form (errno, strerror)
231 231 reason = inst.reason.args[1]
232 232 except (AttributeError, IndexError):
233 233 # it might be anything, for example a string
234 234 reason = inst.reason
235 235 if isinstance(reason, unicode):
236 236 # SSLError of Python 2.7.9 contains a unicode
237 237 reason = encoding.unitolocal(reason)
238 238 ui.warn(_("abort: error: %s\n") % reason)
239 239 elif (util.safehasattr(inst, "args")
240 240 and inst.args and inst.args[0] == errno.EPIPE):
241 241 pass
242 242 elif getattr(inst, "strerror", None):
243 243 if getattr(inst, "filename", None):
244 244 ui.warn(_("abort: %s: %s\n") % (
245 245 encoding.strtolocal(inst.strerror),
246 246 stringutil.forcebytestr(inst.filename)))
247 247 else:
248 248 ui.warn(_("abort: %s\n") % encoding.strtolocal(inst.strerror))
249 249 else:
250 250 raise
251 251 except OSError as inst:
252 252 if getattr(inst, "filename", None) is not None:
253 253 ui.warn(_("abort: %s: '%s'\n") % (
254 254 encoding.strtolocal(inst.strerror),
255 255 stringutil.forcebytestr(inst.filename)))
256 256 else:
257 257 ui.warn(_("abort: %s\n") % encoding.strtolocal(inst.strerror))
258 258 except MemoryError:
259 259 ui.warn(_("abort: out of memory\n"))
260 260 except SystemExit as inst:
261 261 # Commands shouldn't sys.exit directly, but give a return code.
262 262 # Just in case catch this and and pass exit code to caller.
263 263 return inst.code
264 264 except socket.error as inst:
265 265 ui.warn(_("abort: %s\n") % stringutil.forcebytestr(inst.args[-1]))
266 266
267 267 return -1
268 268
269 269 def checknewlabel(repo, lbl, kind):
270 270 # Do not use the "kind" parameter in ui output.
271 271 # It makes strings difficult to translate.
272 272 if lbl in ['tip', '.', 'null']:
273 273 raise error.Abort(_("the name '%s' is reserved") % lbl)
274 274 for c in (':', '\0', '\n', '\r'):
275 275 if c in lbl:
276 276 raise error.Abort(
277 277 _("%r cannot be used in a name") % pycompat.bytestr(c))
278 278 try:
279 279 int(lbl)
280 280 raise error.Abort(_("cannot use an integer as a name"))
281 281 except ValueError:
282 282 pass
283 283 if lbl.strip() != lbl:
284 284 raise error.Abort(_("leading or trailing whitespace in name %r") % lbl)
285 285
286 286 def checkfilename(f):
287 287 '''Check that the filename f is an acceptable filename for a tracked file'''
288 288 if '\r' in f or '\n' in f:
289 289 raise error.Abort(_("'\\n' and '\\r' disallowed in filenames: %r") % f)
290 290
291 291 def checkportable(ui, f):
292 292 '''Check if filename f is portable and warn or abort depending on config'''
293 293 checkfilename(f)
294 294 abort, warn = checkportabilityalert(ui)
295 295 if abort or warn:
296 296 msg = util.checkwinfilename(f)
297 297 if msg:
298 298 msg = "%s: %s" % (msg, procutil.shellquote(f))
299 299 if abort:
300 300 raise error.Abort(msg)
301 301 ui.warn(_("warning: %s\n") % msg)
302 302
303 303 def checkportabilityalert(ui):
304 304 '''check if the user's config requests nothing, a warning, or abort for
305 305 non-portable filenames'''
306 306 val = ui.config('ui', 'portablefilenames')
307 307 lval = val.lower()
308 308 bval = stringutil.parsebool(val)
309 309 abort = pycompat.iswindows or lval == 'abort'
310 310 warn = bval or lval == 'warn'
311 311 if bval is None and not (warn or abort or lval == 'ignore'):
312 312 raise error.ConfigError(
313 313 _("ui.portablefilenames value is invalid ('%s')") % val)
314 314 return abort, warn
315 315
316 316 class casecollisionauditor(object):
317 317 def __init__(self, ui, abort, dirstate):
318 318 self._ui = ui
319 319 self._abort = abort
320 320 allfiles = '\0'.join(dirstate._map)
321 321 self._loweredfiles = set(encoding.lower(allfiles).split('\0'))
322 322 self._dirstate = dirstate
323 323 # The purpose of _newfiles is so that we don't complain about
324 324 # case collisions if someone were to call this object with the
325 325 # same filename twice.
326 326 self._newfiles = set()
327 327
328 328 def __call__(self, f):
329 329 if f in self._newfiles:
330 330 return
331 331 fl = encoding.lower(f)
332 332 if fl in self._loweredfiles and f not in self._dirstate:
333 333 msg = _('possible case-folding collision for %s') % f
334 334 if self._abort:
335 335 raise error.Abort(msg)
336 336 self._ui.warn(_("warning: %s\n") % msg)
337 337 self._loweredfiles.add(fl)
338 338 self._newfiles.add(f)
339 339
340 340 def filteredhash(repo, maxrev):
341 341 """build hash of filtered revisions in the current repoview.
342 342
343 343 Multiple caches perform up-to-date validation by checking that the
344 344 tiprev and tipnode stored in the cache file match the current repository.
345 345 However, this is not sufficient for validating repoviews because the set
346 346 of revisions in the view may change without the repository tiprev and
347 347 tipnode changing.
348 348
349 349 This function hashes all the revs filtered from the view and returns
350 350 that SHA-1 digest.
351 351 """
352 352 cl = repo.changelog
353 353 if not cl.filteredrevs:
354 354 return None
355 355 key = None
356 356 revs = sorted(r for r in cl.filteredrevs if r <= maxrev)
357 357 if revs:
358 358 s = hashlib.sha1()
359 359 for rev in revs:
360 360 s.update('%d;' % rev)
361 361 key = s.digest()
362 362 return key
363 363
364 364 def walkrepos(path, followsym=False, seen_dirs=None, recurse=False):
365 365 '''yield every hg repository under path, always recursively.
366 366 The recurse flag will only control recursion into repo working dirs'''
367 367 def errhandler(err):
368 368 if err.filename == path:
369 369 raise err
370 370 samestat = getattr(os.path, 'samestat', None)
371 371 if followsym and samestat is not None:
372 372 def adddir(dirlst, dirname):
373 373 dirstat = os.stat(dirname)
374 374 match = any(samestat(dirstat, lstdirstat) for lstdirstat in dirlst)
375 375 if not match:
376 376 dirlst.append(dirstat)
377 377 return not match
378 378 else:
379 379 followsym = False
380 380
381 381 if (seen_dirs is None) and followsym:
382 382 seen_dirs = []
383 383 adddir(seen_dirs, path)
384 384 for root, dirs, files in os.walk(path, topdown=True, onerror=errhandler):
385 385 dirs.sort()
386 386 if '.hg' in dirs:
387 387 yield root # found a repository
388 388 qroot = os.path.join(root, '.hg', 'patches')
389 389 if os.path.isdir(os.path.join(qroot, '.hg')):
390 390 yield qroot # we have a patch queue repo here
391 391 if recurse:
392 392 # avoid recursing inside the .hg directory
393 393 dirs.remove('.hg')
394 394 else:
395 395 dirs[:] = [] # don't descend further
396 396 elif followsym:
397 397 newdirs = []
398 398 for d in dirs:
399 399 fname = os.path.join(root, d)
400 400 if adddir(seen_dirs, fname):
401 401 if os.path.islink(fname):
402 402 for hgname in walkrepos(fname, True, seen_dirs):
403 403 yield hgname
404 404 else:
405 405 newdirs.append(d)
406 406 dirs[:] = newdirs
407 407
408 408 def binnode(ctx):
409 409 """Return binary node id for a given basectx"""
410 410 node = ctx.node()
411 411 if node is None:
412 412 return wdirid
413 413 return node
414 414
415 415 def intrev(ctx):
416 416 """Return integer for a given basectx that can be used in comparison or
417 417 arithmetic operation"""
418 418 rev = ctx.rev()
419 419 if rev is None:
420 420 return wdirrev
421 421 return rev
422 422
423 423 def formatchangeid(ctx):
424 424 """Format changectx as '{rev}:{node|formatnode}', which is the default
425 425 template provided by logcmdutil.changesettemplater"""
426 426 repo = ctx.repo()
427 427 return formatrevnode(repo.ui, intrev(ctx), binnode(ctx))
428 428
429 429 def formatrevnode(ui, rev, node):
430 430 """Format given revision and node depending on the current verbosity"""
431 431 if ui.debugflag:
432 432 hexfunc = hex
433 433 else:
434 434 hexfunc = short
435 435 return '%d:%s' % (rev, hexfunc(node))
436 436
437 437 def resolvehexnodeidprefix(repo, prefix):
438 438 # Uses unfiltered repo because it's faster when prefix is ambiguous/
439 439 # This matches the shortesthexnodeidprefix() function below.
440 440 node = repo.unfiltered().changelog._partialmatch(prefix)
441 441 if node is None:
442 442 return
443 443 repo.changelog.rev(node) # make sure node isn't filtered
444 444 return node
445 445
446 446 def shortesthexnodeidprefix(repo, node, minlength=1):
447 447 """Find the shortest unambiguous prefix that matches hexnode."""
448 448 # _partialmatch() of filtered changelog could take O(len(repo)) time,
449 449 # which would be unacceptably slow. so we look for hash collision in
450 450 # unfiltered space, which means some hashes may be slightly longer.
451 return repo.unfiltered().changelog.shortest(hex(node), minlength)
451 return repo.unfiltered().changelog.shortest(node, minlength)
452 452
453 453 def isrevsymbol(repo, symbol):
454 454 """Checks if a symbol exists in the repo.
455 455
456 456 See revsymbol() for details. Raises error.LookupError if the symbol is an
457 457 ambiguous nodeid prefix.
458 458 """
459 459 try:
460 460 revsymbol(repo, symbol)
461 461 return True
462 462 except error.RepoLookupError:
463 463 return False
464 464
465 465 def revsymbol(repo, symbol):
466 466 """Returns a context given a single revision symbol (as string).
467 467
468 468 This is similar to revsingle(), but accepts only a single revision symbol,
469 469 i.e. things like ".", "tip", "1234", "deadbeef", "my-bookmark" work, but
470 470 not "max(public())".
471 471 """
472 472 if not isinstance(symbol, bytes):
473 473 msg = ("symbol (%s of type %s) was not a string, did you mean "
474 474 "repo[symbol]?" % (symbol, type(symbol)))
475 475 raise error.ProgrammingError(msg)
476 476 try:
477 477 if symbol in ('.', 'tip', 'null'):
478 478 return repo[symbol]
479 479
480 480 try:
481 481 r = int(symbol)
482 482 if '%d' % r != symbol:
483 483 raise ValueError
484 484 l = len(repo.changelog)
485 485 if r < 0:
486 486 r += l
487 487 if r < 0 or r >= l and r != wdirrev:
488 488 raise ValueError
489 489 return repo[r]
490 490 except error.FilteredIndexError:
491 491 raise
492 492 except (ValueError, OverflowError, IndexError):
493 493 pass
494 494
495 495 if len(symbol) == 40:
496 496 try:
497 497 node = bin(symbol)
498 498 rev = repo.changelog.rev(node)
499 499 return repo[rev]
500 500 except error.FilteredLookupError:
501 501 raise
502 502 except (TypeError, LookupError):
503 503 pass
504 504
505 505 # look up bookmarks through the name interface
506 506 try:
507 507 node = repo.names.singlenode(repo, symbol)
508 508 rev = repo.changelog.rev(node)
509 509 return repo[rev]
510 510 except KeyError:
511 511 pass
512 512
513 513 node = resolvehexnodeidprefix(repo, symbol)
514 514 if node is not None:
515 515 rev = repo.changelog.rev(node)
516 516 return repo[rev]
517 517
518 518 raise error.RepoLookupError(_("unknown revision '%s'") % symbol)
519 519
520 520 except error.WdirUnsupported:
521 521 return repo[None]
522 522 except (error.FilteredIndexError, error.FilteredLookupError,
523 523 error.FilteredRepoLookupError):
524 524 raise _filterederror(repo, symbol)
525 525
526 526 def _filterederror(repo, changeid):
527 527 """build an exception to be raised about a filtered changeid
528 528
529 529 This is extracted in a function to help extensions (eg: evolve) to
530 530 experiment with various message variants."""
531 531 if repo.filtername.startswith('visible'):
532 532
533 533 # Check if the changeset is obsolete
534 534 unfilteredrepo = repo.unfiltered()
535 535 ctx = revsymbol(unfilteredrepo, changeid)
536 536
537 537 # If the changeset is obsolete, enrich the message with the reason
538 538 # that made this changeset not visible
539 539 if ctx.obsolete():
540 540 msg = obsutil._getfilteredreason(repo, changeid, ctx)
541 541 else:
542 542 msg = _("hidden revision '%s'") % changeid
543 543
544 544 hint = _('use --hidden to access hidden revisions')
545 545
546 546 return error.FilteredRepoLookupError(msg, hint=hint)
547 547 msg = _("filtered revision '%s' (not in '%s' subset)")
548 548 msg %= (changeid, repo.filtername)
549 549 return error.FilteredRepoLookupError(msg)
550 550
551 551 def revsingle(repo, revspec, default='.', localalias=None):
552 552 if not revspec and revspec != 0:
553 553 return repo[default]
554 554
555 555 l = revrange(repo, [revspec], localalias=localalias)
556 556 if not l:
557 557 raise error.Abort(_('empty revision set'))
558 558 return repo[l.last()]
559 559
560 560 def _pairspec(revspec):
561 561 tree = revsetlang.parse(revspec)
562 562 return tree and tree[0] in ('range', 'rangepre', 'rangepost', 'rangeall')
563 563
564 564 def revpairnodes(repo, revs):
565 565 repo.ui.deprecwarn("revpairnodes is deprecated, please use revpair", "4.6")
566 566 ctx1, ctx2 = revpair(repo, revs)
567 567 return ctx1.node(), ctx2.node()
568 568
569 569 def revpair(repo, revs):
570 570 if not revs:
571 571 return repo['.'], repo[None]
572 572
573 573 l = revrange(repo, revs)
574 574
575 575 if not l:
576 576 first = second = None
577 577 elif l.isascending():
578 578 first = l.min()
579 579 second = l.max()
580 580 elif l.isdescending():
581 581 first = l.max()
582 582 second = l.min()
583 583 else:
584 584 first = l.first()
585 585 second = l.last()
586 586
587 587 if first is None:
588 588 raise error.Abort(_('empty revision range'))
589 589 if (first == second and len(revs) >= 2
590 590 and not all(revrange(repo, [r]) for r in revs)):
591 591 raise error.Abort(_('empty revision on one side of range'))
592 592
593 593 # if top-level is range expression, the result must always be a pair
594 594 if first == second and len(revs) == 1 and not _pairspec(revs[0]):
595 595 return repo[first], repo[None]
596 596
597 597 return repo[first], repo[second]
598 598
599 599 def revrange(repo, specs, localalias=None):
600 600 """Execute 1 to many revsets and return the union.
601 601
602 602 This is the preferred mechanism for executing revsets using user-specified
603 603 config options, such as revset aliases.
604 604
605 605 The revsets specified by ``specs`` will be executed via a chained ``OR``
606 606 expression. If ``specs`` is empty, an empty result is returned.
607 607
608 608 ``specs`` can contain integers, in which case they are assumed to be
609 609 revision numbers.
610 610
611 611 It is assumed the revsets are already formatted. If you have arguments
612 612 that need to be expanded in the revset, call ``revsetlang.formatspec()``
613 613 and pass the result as an element of ``specs``.
614 614
615 615 Specifying a single revset is allowed.
616 616
617 617 Returns a ``revset.abstractsmartset`` which is a list-like interface over
618 618 integer revisions.
619 619 """
620 620 allspecs = []
621 621 for spec in specs:
622 622 if isinstance(spec, int):
623 623 spec = revsetlang.formatspec('rev(%d)', spec)
624 624 allspecs.append(spec)
625 625 return repo.anyrevs(allspecs, user=True, localalias=localalias)
626 626
627 627 def meaningfulparents(repo, ctx):
628 628 """Return list of meaningful (or all if debug) parentrevs for rev.
629 629
630 630 For merges (two non-nullrev revisions) both parents are meaningful.
631 631 Otherwise the first parent revision is considered meaningful if it
632 632 is not the preceding revision.
633 633 """
634 634 parents = ctx.parents()
635 635 if len(parents) > 1:
636 636 return parents
637 637 if repo.ui.debugflag:
638 638 return [parents[0], repo['null']]
639 639 if parents[0].rev() >= intrev(ctx) - 1:
640 640 return []
641 641 return parents
642 642
643 643 def expandpats(pats):
644 644 '''Expand bare globs when running on windows.
645 645 On posix we assume it already has already been done by sh.'''
646 646 if not util.expandglobs:
647 647 return list(pats)
648 648 ret = []
649 649 for kindpat in pats:
650 650 kind, pat = matchmod._patsplit(kindpat, None)
651 651 if kind is None:
652 652 try:
653 653 globbed = glob.glob(pat)
654 654 except re.error:
655 655 globbed = [pat]
656 656 if globbed:
657 657 ret.extend(globbed)
658 658 continue
659 659 ret.append(kindpat)
660 660 return ret
661 661
662 662 def matchandpats(ctx, pats=(), opts=None, globbed=False, default='relpath',
663 663 badfn=None):
664 664 '''Return a matcher and the patterns that were used.
665 665 The matcher will warn about bad matches, unless an alternate badfn callback
666 666 is provided.'''
667 667 if pats == ("",):
668 668 pats = []
669 669 if opts is None:
670 670 opts = {}
671 671 if not globbed and default == 'relpath':
672 672 pats = expandpats(pats or [])
673 673
674 674 def bad(f, msg):
675 675 ctx.repo().ui.warn("%s: %s\n" % (m.rel(f), msg))
676 676
677 677 if badfn is None:
678 678 badfn = bad
679 679
680 680 m = ctx.match(pats, opts.get('include'), opts.get('exclude'),
681 681 default, listsubrepos=opts.get('subrepos'), badfn=badfn)
682 682
683 683 if m.always():
684 684 pats = []
685 685 return m, pats
686 686
687 687 def match(ctx, pats=(), opts=None, globbed=False, default='relpath',
688 688 badfn=None):
689 689 '''Return a matcher that will warn about bad matches.'''
690 690 return matchandpats(ctx, pats, opts, globbed, default, badfn=badfn)[0]
691 691
692 692 def matchall(repo):
693 693 '''Return a matcher that will efficiently match everything.'''
694 694 return matchmod.always(repo.root, repo.getcwd())
695 695
696 696 def matchfiles(repo, files, badfn=None):
697 697 '''Return a matcher that will efficiently match exactly these files.'''
698 698 return matchmod.exact(repo.root, repo.getcwd(), files, badfn=badfn)
699 699
700 700 def parsefollowlinespattern(repo, rev, pat, msg):
701 701 """Return a file name from `pat` pattern suitable for usage in followlines
702 702 logic.
703 703 """
704 704 if not matchmod.patkind(pat):
705 705 return pathutil.canonpath(repo.root, repo.getcwd(), pat)
706 706 else:
707 707 ctx = repo[rev]
708 708 m = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=ctx)
709 709 files = [f for f in ctx if m(f)]
710 710 if len(files) != 1:
711 711 raise error.ParseError(msg)
712 712 return files[0]
713 713
714 714 def origpath(ui, repo, filepath):
715 715 '''customize where .orig files are created
716 716
717 717 Fetch user defined path from config file: [ui] origbackuppath = <path>
718 718 Fall back to default (filepath with .orig suffix) if not specified
719 719 '''
720 720 origbackuppath = ui.config('ui', 'origbackuppath')
721 721 if not origbackuppath:
722 722 return filepath + ".orig"
723 723
724 724 # Convert filepath from an absolute path into a path inside the repo.
725 725 filepathfromroot = util.normpath(os.path.relpath(filepath,
726 726 start=repo.root))
727 727
728 728 origvfs = vfs.vfs(repo.wjoin(origbackuppath))
729 729 origbackupdir = origvfs.dirname(filepathfromroot)
730 730 if not origvfs.isdir(origbackupdir) or origvfs.islink(origbackupdir):
731 731 ui.note(_('creating directory: %s\n') % origvfs.join(origbackupdir))
732 732
733 733 # Remove any files that conflict with the backup file's path
734 734 for f in reversed(list(util.finddirs(filepathfromroot))):
735 735 if origvfs.isfileorlink(f):
736 736 ui.note(_('removing conflicting file: %s\n')
737 737 % origvfs.join(f))
738 738 origvfs.unlink(f)
739 739 break
740 740
741 741 origvfs.makedirs(origbackupdir)
742 742
743 743 if origvfs.isdir(filepathfromroot) and not origvfs.islink(filepathfromroot):
744 744 ui.note(_('removing conflicting directory: %s\n')
745 745 % origvfs.join(filepathfromroot))
746 746 origvfs.rmtree(filepathfromroot, forcibly=True)
747 747
748 748 return origvfs.join(filepathfromroot)
749 749
750 750 class _containsnode(object):
751 751 """proxy __contains__(node) to container.__contains__ which accepts revs"""
752 752
753 753 def __init__(self, repo, revcontainer):
754 754 self._torev = repo.changelog.rev
755 755 self._revcontains = revcontainer.__contains__
756 756
757 757 def __contains__(self, node):
758 758 return self._revcontains(self._torev(node))
759 759
760 760 def cleanupnodes(repo, replacements, operation, moves=None, metadata=None):
761 761 """do common cleanups when old nodes are replaced by new nodes
762 762
763 763 That includes writing obsmarkers or stripping nodes, and moving bookmarks.
764 764 (we might also want to move working directory parent in the future)
765 765
766 766 By default, bookmark moves are calculated automatically from 'replacements',
767 767 but 'moves' can be used to override that. Also, 'moves' may include
768 768 additional bookmark moves that should not have associated obsmarkers.
769 769
770 770 replacements is {oldnode: [newnode]} or a iterable of nodes if they do not
771 771 have replacements. operation is a string, like "rebase".
772 772
773 773 metadata is dictionary containing metadata to be stored in obsmarker if
774 774 obsolescence is enabled.
775 775 """
776 776 if not replacements and not moves:
777 777 return
778 778
779 779 # translate mapping's other forms
780 780 if not util.safehasattr(replacements, 'items'):
781 781 replacements = {n: () for n in replacements}
782 782
783 783 # Calculate bookmark movements
784 784 if moves is None:
785 785 moves = {}
786 786 # Unfiltered repo is needed since nodes in replacements might be hidden.
787 787 unfi = repo.unfiltered()
788 788 for oldnode, newnodes in replacements.items():
789 789 if oldnode in moves:
790 790 continue
791 791 if len(newnodes) > 1:
792 792 # usually a split, take the one with biggest rev number
793 793 newnode = next(unfi.set('max(%ln)', newnodes)).node()
794 794 elif len(newnodes) == 0:
795 795 # move bookmark backwards
796 796 roots = list(unfi.set('max((::%n) - %ln)', oldnode,
797 797 list(replacements)))
798 798 if roots:
799 799 newnode = roots[0].node()
800 800 else:
801 801 newnode = nullid
802 802 else:
803 803 newnode = newnodes[0]
804 804 moves[oldnode] = newnode
805 805
806 806 with repo.transaction('cleanup') as tr:
807 807 # Move bookmarks
808 808 bmarks = repo._bookmarks
809 809 bmarkchanges = []
810 810 allnewnodes = [n for ns in replacements.values() for n in ns]
811 811 for oldnode, newnode in moves.items():
812 812 oldbmarks = repo.nodebookmarks(oldnode)
813 813 if not oldbmarks:
814 814 continue
815 815 from . import bookmarks # avoid import cycle
816 816 repo.ui.debug('moving bookmarks %r from %s to %s\n' %
817 817 (util.rapply(pycompat.maybebytestr, oldbmarks),
818 818 hex(oldnode), hex(newnode)))
819 819 # Delete divergent bookmarks being parents of related newnodes
820 820 deleterevs = repo.revs('parents(roots(%ln & (::%n))) - parents(%n)',
821 821 allnewnodes, newnode, oldnode)
822 822 deletenodes = _containsnode(repo, deleterevs)
823 823 for name in oldbmarks:
824 824 bmarkchanges.append((name, newnode))
825 825 for b in bookmarks.divergent2delete(repo, deletenodes, name):
826 826 bmarkchanges.append((b, None))
827 827
828 828 if bmarkchanges:
829 829 bmarks.applychanges(repo, tr, bmarkchanges)
830 830
831 831 # Obsolete or strip nodes
832 832 if obsolete.isenabled(repo, obsolete.createmarkersopt):
833 833 # If a node is already obsoleted, and we want to obsolete it
834 834 # without a successor, skip that obssolete request since it's
835 835 # unnecessary. That's the "if s or not isobs(n)" check below.
836 836 # Also sort the node in topology order, that might be useful for
837 837 # some obsstore logic.
838 838 # NOTE: the filtering and sorting might belong to createmarkers.
839 839 isobs = unfi.obsstore.successors.__contains__
840 840 torev = unfi.changelog.rev
841 841 sortfunc = lambda ns: torev(ns[0])
842 842 rels = [(unfi[n], tuple(unfi[m] for m in s))
843 843 for n, s in sorted(replacements.items(), key=sortfunc)
844 844 if s or not isobs(n)]
845 845 if rels:
846 846 obsolete.createmarkers(repo, rels, operation=operation,
847 847 metadata=metadata)
848 848 else:
849 849 from . import repair # avoid import cycle
850 850 tostrip = list(replacements)
851 851 if tostrip:
852 852 repair.delayedstrip(repo.ui, repo, tostrip, operation)
853 853
854 854 def addremove(repo, matcher, prefix, opts=None):
855 855 if opts is None:
856 856 opts = {}
857 857 m = matcher
858 858 dry_run = opts.get('dry_run')
859 859 try:
860 860 similarity = float(opts.get('similarity') or 0)
861 861 except ValueError:
862 862 raise error.Abort(_('similarity must be a number'))
863 863 if similarity < 0 or similarity > 100:
864 864 raise error.Abort(_('similarity must be between 0 and 100'))
865 865 similarity /= 100.0
866 866
867 867 ret = 0
868 868 join = lambda f: os.path.join(prefix, f)
869 869
870 870 wctx = repo[None]
871 871 for subpath in sorted(wctx.substate):
872 872 submatch = matchmod.subdirmatcher(subpath, m)
873 873 if opts.get('subrepos') or m.exact(subpath) or any(submatch.files()):
874 874 sub = wctx.sub(subpath)
875 875 try:
876 876 if sub.addremove(submatch, prefix, opts):
877 877 ret = 1
878 878 except error.LookupError:
879 879 repo.ui.status(_("skipping missing subrepository: %s\n")
880 880 % join(subpath))
881 881
882 882 rejected = []
883 883 def badfn(f, msg):
884 884 if f in m.files():
885 885 m.bad(f, msg)
886 886 rejected.append(f)
887 887
888 888 badmatch = matchmod.badmatch(m, badfn)
889 889 added, unknown, deleted, removed, forgotten = _interestingfiles(repo,
890 890 badmatch)
891 891
892 892 unknownset = set(unknown + forgotten)
893 893 toprint = unknownset.copy()
894 894 toprint.update(deleted)
895 895 for abs in sorted(toprint):
896 896 if repo.ui.verbose or not m.exact(abs):
897 897 if abs in unknownset:
898 898 status = _('adding %s\n') % m.uipath(abs)
899 899 else:
900 900 status = _('removing %s\n') % m.uipath(abs)
901 901 repo.ui.status(status)
902 902
903 903 renames = _findrenames(repo, m, added + unknown, removed + deleted,
904 904 similarity)
905 905
906 906 if not dry_run:
907 907 _markchanges(repo, unknown + forgotten, deleted, renames)
908 908
909 909 for f in rejected:
910 910 if f in m.files():
911 911 return 1
912 912 return ret
913 913
914 914 def marktouched(repo, files, similarity=0.0):
915 915 '''Assert that files have somehow been operated upon. files are relative to
916 916 the repo root.'''
917 917 m = matchfiles(repo, files, badfn=lambda x, y: rejected.append(x))
918 918 rejected = []
919 919
920 920 added, unknown, deleted, removed, forgotten = _interestingfiles(repo, m)
921 921
922 922 if repo.ui.verbose:
923 923 unknownset = set(unknown + forgotten)
924 924 toprint = unknownset.copy()
925 925 toprint.update(deleted)
926 926 for abs in sorted(toprint):
927 927 if abs in unknownset:
928 928 status = _('adding %s\n') % abs
929 929 else:
930 930 status = _('removing %s\n') % abs
931 931 repo.ui.status(status)
932 932
933 933 renames = _findrenames(repo, m, added + unknown, removed + deleted,
934 934 similarity)
935 935
936 936 _markchanges(repo, unknown + forgotten, deleted, renames)
937 937
938 938 for f in rejected:
939 939 if f in m.files():
940 940 return 1
941 941 return 0
942 942
943 943 def _interestingfiles(repo, matcher):
944 944 '''Walk dirstate with matcher, looking for files that addremove would care
945 945 about.
946 946
947 947 This is different from dirstate.status because it doesn't care about
948 948 whether files are modified or clean.'''
949 949 added, unknown, deleted, removed, forgotten = [], [], [], [], []
950 950 audit_path = pathutil.pathauditor(repo.root, cached=True)
951 951
952 952 ctx = repo[None]
953 953 dirstate = repo.dirstate
954 954 walkresults = dirstate.walk(matcher, subrepos=sorted(ctx.substate),
955 955 unknown=True, ignored=False, full=False)
956 956 for abs, st in walkresults.iteritems():
957 957 dstate = dirstate[abs]
958 958 if dstate == '?' and audit_path.check(abs):
959 959 unknown.append(abs)
960 960 elif dstate != 'r' and not st:
961 961 deleted.append(abs)
962 962 elif dstate == 'r' and st:
963 963 forgotten.append(abs)
964 964 # for finding renames
965 965 elif dstate == 'r' and not st:
966 966 removed.append(abs)
967 967 elif dstate == 'a':
968 968 added.append(abs)
969 969
970 970 return added, unknown, deleted, removed, forgotten
971 971
972 972 def _findrenames(repo, matcher, added, removed, similarity):
973 973 '''Find renames from removed files to added ones.'''
974 974 renames = {}
975 975 if similarity > 0:
976 976 for old, new, score in similar.findrenames(repo, added, removed,
977 977 similarity):
978 978 if (repo.ui.verbose or not matcher.exact(old)
979 979 or not matcher.exact(new)):
980 980 repo.ui.status(_('recording removal of %s as rename to %s '
981 981 '(%d%% similar)\n') %
982 982 (matcher.rel(old), matcher.rel(new),
983 983 score * 100))
984 984 renames[new] = old
985 985 return renames
986 986
987 987 def _markchanges(repo, unknown, deleted, renames):
988 988 '''Marks the files in unknown as added, the files in deleted as removed,
989 989 and the files in renames as copied.'''
990 990 wctx = repo[None]
991 991 with repo.wlock():
992 992 wctx.forget(deleted)
993 993 wctx.add(unknown)
994 994 for new, old in renames.iteritems():
995 995 wctx.copy(old, new)
996 996
997 997 def dirstatecopy(ui, repo, wctx, src, dst, dryrun=False, cwd=None):
998 998 """Update the dirstate to reflect the intent of copying src to dst. For
999 999 different reasons it might not end with dst being marked as copied from src.
1000 1000 """
1001 1001 origsrc = repo.dirstate.copied(src) or src
1002 1002 if dst == origsrc: # copying back a copy?
1003 1003 if repo.dirstate[dst] not in 'mn' and not dryrun:
1004 1004 repo.dirstate.normallookup(dst)
1005 1005 else:
1006 1006 if repo.dirstate[origsrc] == 'a' and origsrc == src:
1007 1007 if not ui.quiet:
1008 1008 ui.warn(_("%s has not been committed yet, so no copy "
1009 1009 "data will be stored for %s.\n")
1010 1010 % (repo.pathto(origsrc, cwd), repo.pathto(dst, cwd)))
1011 1011 if repo.dirstate[dst] in '?r' and not dryrun:
1012 1012 wctx.add([dst])
1013 1013 elif not dryrun:
1014 1014 wctx.copy(origsrc, dst)
1015 1015
1016 1016 def readrequires(opener, supported):
1017 1017 '''Reads and parses .hg/requires and checks if all entries found
1018 1018 are in the list of supported features.'''
1019 1019 requirements = set(opener.read("requires").splitlines())
1020 1020 missings = []
1021 1021 for r in requirements:
1022 1022 if r not in supported:
1023 1023 if not r or not r[0:1].isalnum():
1024 1024 raise error.RequirementError(_(".hg/requires file is corrupt"))
1025 1025 missings.append(r)
1026 1026 missings.sort()
1027 1027 if missings:
1028 1028 raise error.RequirementError(
1029 1029 _("repository requires features unknown to this Mercurial: %s")
1030 1030 % " ".join(missings),
1031 1031 hint=_("see https://mercurial-scm.org/wiki/MissingRequirement"
1032 1032 " for more information"))
1033 1033 return requirements
1034 1034
1035 1035 def writerequires(opener, requirements):
1036 1036 with opener('requires', 'w') as fp:
1037 1037 for r in sorted(requirements):
1038 1038 fp.write("%s\n" % r)
1039 1039
1040 1040 class filecachesubentry(object):
1041 1041 def __init__(self, path, stat):
1042 1042 self.path = path
1043 1043 self.cachestat = None
1044 1044 self._cacheable = None
1045 1045
1046 1046 if stat:
1047 1047 self.cachestat = filecachesubentry.stat(self.path)
1048 1048
1049 1049 if self.cachestat:
1050 1050 self._cacheable = self.cachestat.cacheable()
1051 1051 else:
1052 1052 # None means we don't know yet
1053 1053 self._cacheable = None
1054 1054
1055 1055 def refresh(self):
1056 1056 if self.cacheable():
1057 1057 self.cachestat = filecachesubentry.stat(self.path)
1058 1058
1059 1059 def cacheable(self):
1060 1060 if self._cacheable is not None:
1061 1061 return self._cacheable
1062 1062
1063 1063 # we don't know yet, assume it is for now
1064 1064 return True
1065 1065
1066 1066 def changed(self):
1067 1067 # no point in going further if we can't cache it
1068 1068 if not self.cacheable():
1069 1069 return True
1070 1070
1071 1071 newstat = filecachesubentry.stat(self.path)
1072 1072
1073 1073 # we may not know if it's cacheable yet, check again now
1074 1074 if newstat and self._cacheable is None:
1075 1075 self._cacheable = newstat.cacheable()
1076 1076
1077 1077 # check again
1078 1078 if not self._cacheable:
1079 1079 return True
1080 1080
1081 1081 if self.cachestat != newstat:
1082 1082 self.cachestat = newstat
1083 1083 return True
1084 1084 else:
1085 1085 return False
1086 1086
1087 1087 @staticmethod
1088 1088 def stat(path):
1089 1089 try:
1090 1090 return util.cachestat(path)
1091 1091 except OSError as e:
1092 1092 if e.errno != errno.ENOENT:
1093 1093 raise
1094 1094
1095 1095 class filecacheentry(object):
1096 1096 def __init__(self, paths, stat=True):
1097 1097 self._entries = []
1098 1098 for path in paths:
1099 1099 self._entries.append(filecachesubentry(path, stat))
1100 1100
1101 1101 def changed(self):
1102 1102 '''true if any entry has changed'''
1103 1103 for entry in self._entries:
1104 1104 if entry.changed():
1105 1105 return True
1106 1106 return False
1107 1107
1108 1108 def refresh(self):
1109 1109 for entry in self._entries:
1110 1110 entry.refresh()
1111 1111
1112 1112 class filecache(object):
1113 1113 '''A property like decorator that tracks files under .hg/ for updates.
1114 1114
1115 1115 Records stat info when called in _filecache.
1116 1116
1117 1117 On subsequent calls, compares old stat info with new info, and recreates the
1118 1118 object when any of the files changes, updating the new stat info in
1119 1119 _filecache.
1120 1120
1121 1121 Mercurial either atomic renames or appends for files under .hg,
1122 1122 so to ensure the cache is reliable we need the filesystem to be able
1123 1123 to tell us if a file has been replaced. If it can't, we fallback to
1124 1124 recreating the object on every call (essentially the same behavior as
1125 1125 propertycache).
1126 1126
1127 1127 '''
1128 1128 def __init__(self, *paths):
1129 1129 self.paths = paths
1130 1130
1131 1131 def join(self, obj, fname):
1132 1132 """Used to compute the runtime path of a cached file.
1133 1133
1134 1134 Users should subclass filecache and provide their own version of this
1135 1135 function to call the appropriate join function on 'obj' (an instance
1136 1136 of the class that its member function was decorated).
1137 1137 """
1138 1138 raise NotImplementedError
1139 1139
1140 1140 def __call__(self, func):
1141 1141 self.func = func
1142 1142 self.name = func.__name__.encode('ascii')
1143 1143 return self
1144 1144
1145 1145 def __get__(self, obj, type=None):
1146 1146 # if accessed on the class, return the descriptor itself.
1147 1147 if obj is None:
1148 1148 return self
1149 1149 # do we need to check if the file changed?
1150 1150 if self.name in obj.__dict__:
1151 1151 assert self.name in obj._filecache, self.name
1152 1152 return obj.__dict__[self.name]
1153 1153
1154 1154 entry = obj._filecache.get(self.name)
1155 1155
1156 1156 if entry:
1157 1157 if entry.changed():
1158 1158 entry.obj = self.func(obj)
1159 1159 else:
1160 1160 paths = [self.join(obj, path) for path in self.paths]
1161 1161
1162 1162 # We stat -before- creating the object so our cache doesn't lie if
1163 1163 # a writer modified between the time we read and stat
1164 1164 entry = filecacheentry(paths, True)
1165 1165 entry.obj = self.func(obj)
1166 1166
1167 1167 obj._filecache[self.name] = entry
1168 1168
1169 1169 obj.__dict__[self.name] = entry.obj
1170 1170 return entry.obj
1171 1171
1172 1172 def __set__(self, obj, value):
1173 1173 if self.name not in obj._filecache:
1174 1174 # we add an entry for the missing value because X in __dict__
1175 1175 # implies X in _filecache
1176 1176 paths = [self.join(obj, path) for path in self.paths]
1177 1177 ce = filecacheentry(paths, False)
1178 1178 obj._filecache[self.name] = ce
1179 1179 else:
1180 1180 ce = obj._filecache[self.name]
1181 1181
1182 1182 ce.obj = value # update cached copy
1183 1183 obj.__dict__[self.name] = value # update copy returned by obj.x
1184 1184
1185 1185 def __delete__(self, obj):
1186 1186 try:
1187 1187 del obj.__dict__[self.name]
1188 1188 except KeyError:
1189 1189 raise AttributeError(self.name)
1190 1190
1191 1191 def extdatasource(repo, source):
1192 1192 """Gather a map of rev -> value dict from the specified source
1193 1193
1194 1194 A source spec is treated as a URL, with a special case shell: type
1195 1195 for parsing the output from a shell command.
1196 1196
1197 1197 The data is parsed as a series of newline-separated records where
1198 1198 each record is a revision specifier optionally followed by a space
1199 1199 and a freeform string value. If the revision is known locally, it
1200 1200 is converted to a rev, otherwise the record is skipped.
1201 1201
1202 1202 Note that both key and value are treated as UTF-8 and converted to
1203 1203 the local encoding. This allows uniformity between local and
1204 1204 remote data sources.
1205 1205 """
1206 1206
1207 1207 spec = repo.ui.config("extdata", source)
1208 1208 if not spec:
1209 1209 raise error.Abort(_("unknown extdata source '%s'") % source)
1210 1210
1211 1211 data = {}
1212 1212 src = proc = None
1213 1213 try:
1214 1214 if spec.startswith("shell:"):
1215 1215 # external commands should be run relative to the repo root
1216 1216 cmd = spec[6:]
1217 1217 proc = subprocess.Popen(cmd, shell=True, bufsize=-1,
1218 1218 close_fds=procutil.closefds,
1219 1219 stdout=subprocess.PIPE, cwd=repo.root)
1220 1220 src = proc.stdout
1221 1221 else:
1222 1222 # treat as a URL or file
1223 1223 src = url.open(repo.ui, spec)
1224 1224 for l in src:
1225 1225 if " " in l:
1226 1226 k, v = l.strip().split(" ", 1)
1227 1227 else:
1228 1228 k, v = l.strip(), ""
1229 1229
1230 1230 k = encoding.tolocal(k)
1231 1231 try:
1232 1232 data[revsingle(repo, k).rev()] = encoding.tolocal(v)
1233 1233 except (error.LookupError, error.RepoLookupError):
1234 1234 pass # we ignore data for nodes that don't exist locally
1235 1235 finally:
1236 1236 if proc:
1237 1237 proc.communicate()
1238 1238 if src:
1239 1239 src.close()
1240 1240 if proc and proc.returncode != 0:
1241 1241 raise error.Abort(_("extdata command '%s' failed: %s")
1242 1242 % (cmd, procutil.explainexit(proc.returncode)))
1243 1243
1244 1244 return data
1245 1245
1246 1246 def _locksub(repo, lock, envvar, cmd, environ=None, *args, **kwargs):
1247 1247 if lock is None:
1248 1248 raise error.LockInheritanceContractViolation(
1249 1249 'lock can only be inherited while held')
1250 1250 if environ is None:
1251 1251 environ = {}
1252 1252 with lock.inherit() as locker:
1253 1253 environ[envvar] = locker
1254 1254 return repo.ui.system(cmd, environ=environ, *args, **kwargs)
1255 1255
1256 1256 def wlocksub(repo, cmd, *args, **kwargs):
1257 1257 """run cmd as a subprocess that allows inheriting repo's wlock
1258 1258
1259 1259 This can only be called while the wlock is held. This takes all the
1260 1260 arguments that ui.system does, and returns the exit code of the
1261 1261 subprocess."""
1262 1262 return _locksub(repo, repo.currentwlock(), 'HG_WLOCK_LOCKER', cmd, *args,
1263 1263 **kwargs)
1264 1264
1265 1265 def gdinitconfig(ui):
1266 1266 """helper function to know if a repo should be created as general delta
1267 1267 """
1268 1268 # experimental config: format.generaldelta
1269 1269 return (ui.configbool('format', 'generaldelta')
1270 1270 or ui.configbool('format', 'usegeneraldelta'))
1271 1271
1272 1272 def gddeltaconfig(ui):
1273 1273 """helper function to know if incoming delta should be optimised
1274 1274 """
1275 1275 # experimental config: format.generaldelta
1276 1276 return ui.configbool('format', 'generaldelta')
1277 1277
1278 1278 class simplekeyvaluefile(object):
1279 1279 """A simple file with key=value lines
1280 1280
1281 1281 Keys must be alphanumerics and start with a letter, values must not
1282 1282 contain '\n' characters"""
1283 1283 firstlinekey = '__firstline'
1284 1284
1285 1285 def __init__(self, vfs, path, keys=None):
1286 1286 self.vfs = vfs
1287 1287 self.path = path
1288 1288
1289 1289 def read(self, firstlinenonkeyval=False):
1290 1290 """Read the contents of a simple key-value file
1291 1291
1292 1292 'firstlinenonkeyval' indicates whether the first line of file should
1293 1293 be treated as a key-value pair or reuturned fully under the
1294 1294 __firstline key."""
1295 1295 lines = self.vfs.readlines(self.path)
1296 1296 d = {}
1297 1297 if firstlinenonkeyval:
1298 1298 if not lines:
1299 1299 e = _("empty simplekeyvalue file")
1300 1300 raise error.CorruptedState(e)
1301 1301 # we don't want to include '\n' in the __firstline
1302 1302 d[self.firstlinekey] = lines[0][:-1]
1303 1303 del lines[0]
1304 1304
1305 1305 try:
1306 1306 # the 'if line.strip()' part prevents us from failing on empty
1307 1307 # lines which only contain '\n' therefore are not skipped
1308 1308 # by 'if line'
1309 1309 updatedict = dict(line[:-1].split('=', 1) for line in lines
1310 1310 if line.strip())
1311 1311 if self.firstlinekey in updatedict:
1312 1312 e = _("%r can't be used as a key")
1313 1313 raise error.CorruptedState(e % self.firstlinekey)
1314 1314 d.update(updatedict)
1315 1315 except ValueError as e:
1316 1316 raise error.CorruptedState(str(e))
1317 1317 return d
1318 1318
1319 1319 def write(self, data, firstline=None):
1320 1320 """Write key=>value mapping to a file
1321 1321 data is a dict. Keys must be alphanumerical and start with a letter.
1322 1322 Values must not contain newline characters.
1323 1323
1324 1324 If 'firstline' is not None, it is written to file before
1325 1325 everything else, as it is, not in a key=value form"""
1326 1326 lines = []
1327 1327 if firstline is not None:
1328 1328 lines.append('%s\n' % firstline)
1329 1329
1330 1330 for k, v in data.items():
1331 1331 if k == self.firstlinekey:
1332 1332 e = "key name '%s' is reserved" % self.firstlinekey
1333 1333 raise error.ProgrammingError(e)
1334 1334 if not k[0:1].isalpha():
1335 1335 e = "keys must start with a letter in a key-value file"
1336 1336 raise error.ProgrammingError(e)
1337 1337 if not k.isalnum():
1338 1338 e = "invalid key name in a simple key-value file"
1339 1339 raise error.ProgrammingError(e)
1340 1340 if '\n' in v:
1341 1341 e = "invalid value in a simple key-value file"
1342 1342 raise error.ProgrammingError(e)
1343 1343 lines.append("%s=%s\n" % (k, v))
1344 1344 with self.vfs(self.path, mode='wb', atomictemp=True) as fp:
1345 1345 fp.write(''.join(lines))
1346 1346
1347 1347 _reportobsoletedsource = [
1348 1348 'debugobsolete',
1349 1349 'pull',
1350 1350 'push',
1351 1351 'serve',
1352 1352 'unbundle',
1353 1353 ]
1354 1354
1355 1355 _reportnewcssource = [
1356 1356 'pull',
1357 1357 'unbundle',
1358 1358 ]
1359 1359
1360 1360 def prefetchfiles(repo, revs, match):
1361 1361 """Invokes the registered file prefetch functions, allowing extensions to
1362 1362 ensure the corresponding files are available locally, before the command
1363 1363 uses them."""
1364 1364 if match:
1365 1365 # The command itself will complain about files that don't exist, so
1366 1366 # don't duplicate the message.
1367 1367 match = matchmod.badmatch(match, lambda fn, msg: None)
1368 1368 else:
1369 1369 match = matchall(repo)
1370 1370
1371 1371 fileprefetchhooks(repo, revs, match)
1372 1372
1373 1373 # a list of (repo, revs, match) prefetch functions
1374 1374 fileprefetchhooks = util.hooks()
1375 1375
1376 1376 # A marker that tells the evolve extension to suppress its own reporting
1377 1377 _reportstroubledchangesets = True
1378 1378
1379 1379 def registersummarycallback(repo, otr, txnname=''):
1380 1380 """register a callback to issue a summary after the transaction is closed
1381 1381 """
1382 1382 def txmatch(sources):
1383 1383 return any(txnname.startswith(source) for source in sources)
1384 1384
1385 1385 categories = []
1386 1386
1387 1387 def reportsummary(func):
1388 1388 """decorator for report callbacks."""
1389 1389 # The repoview life cycle is shorter than the one of the actual
1390 1390 # underlying repository. So the filtered object can die before the
1391 1391 # weakref is used leading to troubles. We keep a reference to the
1392 1392 # unfiltered object and restore the filtering when retrieving the
1393 1393 # repository through the weakref.
1394 1394 filtername = repo.filtername
1395 1395 reporef = weakref.ref(repo.unfiltered())
1396 1396 def wrapped(tr):
1397 1397 repo = reporef()
1398 1398 if filtername:
1399 1399 repo = repo.filtered(filtername)
1400 1400 func(repo, tr)
1401 1401 newcat = '%02i-txnreport' % len(categories)
1402 1402 otr.addpostclose(newcat, wrapped)
1403 1403 categories.append(newcat)
1404 1404 return wrapped
1405 1405
1406 1406 if txmatch(_reportobsoletedsource):
1407 1407 @reportsummary
1408 1408 def reportobsoleted(repo, tr):
1409 1409 obsoleted = obsutil.getobsoleted(repo, tr)
1410 1410 if obsoleted:
1411 1411 repo.ui.status(_('obsoleted %i changesets\n')
1412 1412 % len(obsoleted))
1413 1413
1414 1414 if (obsolete.isenabled(repo, obsolete.createmarkersopt) and
1415 1415 repo.ui.configbool('experimental', 'evolution.report-instabilities')):
1416 1416 instabilitytypes = [
1417 1417 ('orphan', 'orphan'),
1418 1418 ('phase-divergent', 'phasedivergent'),
1419 1419 ('content-divergent', 'contentdivergent'),
1420 1420 ]
1421 1421
1422 1422 def getinstabilitycounts(repo):
1423 1423 filtered = repo.changelog.filteredrevs
1424 1424 counts = {}
1425 1425 for instability, revset in instabilitytypes:
1426 1426 counts[instability] = len(set(obsolete.getrevs(repo, revset)) -
1427 1427 filtered)
1428 1428 return counts
1429 1429
1430 1430 oldinstabilitycounts = getinstabilitycounts(repo)
1431 1431 @reportsummary
1432 1432 def reportnewinstabilities(repo, tr):
1433 1433 newinstabilitycounts = getinstabilitycounts(repo)
1434 1434 for instability, revset in instabilitytypes:
1435 1435 delta = (newinstabilitycounts[instability] -
1436 1436 oldinstabilitycounts[instability])
1437 1437 if delta > 0:
1438 1438 repo.ui.warn(_('%i new %s changesets\n') %
1439 1439 (delta, instability))
1440 1440
1441 1441 if txmatch(_reportnewcssource):
1442 1442 @reportsummary
1443 1443 def reportnewcs(repo, tr):
1444 1444 """Report the range of new revisions pulled/unbundled."""
1445 1445 newrevs = tr.changes.get('revs', xrange(0, 0))
1446 1446 if not newrevs:
1447 1447 return
1448 1448
1449 1449 # Compute the bounds of new revisions' range, excluding obsoletes.
1450 1450 unfi = repo.unfiltered()
1451 1451 revs = unfi.revs('%ld and not obsolete()', newrevs)
1452 1452 if not revs:
1453 1453 # Got only obsoletes.
1454 1454 return
1455 1455 minrev, maxrev = repo[revs.min()], repo[revs.max()]
1456 1456
1457 1457 if minrev == maxrev:
1458 1458 revrange = minrev
1459 1459 else:
1460 1460 revrange = '%s:%s' % (minrev, maxrev)
1461 1461 repo.ui.status(_('new changesets %s\n') % revrange)
1462 1462
1463 1463 def nodesummaries(repo, nodes, maxnumnodes=4):
1464 1464 if len(nodes) <= maxnumnodes or repo.ui.verbose:
1465 1465 return ' '.join(short(h) for h in nodes)
1466 1466 first = ' '.join(short(h) for h in nodes[:maxnumnodes])
1467 1467 return _("%s and %d others") % (first, len(nodes) - maxnumnodes)
1468 1468
1469 1469 def enforcesinglehead(repo, tr, desc):
1470 1470 """check that no named branch has multiple heads"""
1471 1471 if desc in ('strip', 'repair'):
1472 1472 # skip the logic during strip
1473 1473 return
1474 1474 visible = repo.filtered('visible')
1475 1475 # possible improvement: we could restrict the check to affected branch
1476 1476 for name, heads in visible.branchmap().iteritems():
1477 1477 if len(heads) > 1:
1478 1478 msg = _('rejecting multiple heads on branch "%s"')
1479 1479 msg %= name
1480 1480 hint = _('%d heads: %s')
1481 1481 hint %= (len(heads), nodesummaries(repo, heads))
1482 1482 raise error.Abort(msg, hint=hint)
1483 1483
1484 1484 def wrapconvertsink(sink):
1485 1485 """Allow extensions to wrap the sink returned by convcmd.convertsink()
1486 1486 before it is used, whether or not the convert extension was formally loaded.
1487 1487 """
1488 1488 return sink
1489 1489
1490 1490 def unhidehashlikerevs(repo, specs, hiddentype):
1491 1491 """parse the user specs and unhide changesets whose hash or revision number
1492 1492 is passed.
1493 1493
1494 1494 hiddentype can be: 1) 'warn': warn while unhiding changesets
1495 1495 2) 'nowarn': don't warn while unhiding changesets
1496 1496
1497 1497 returns a repo object with the required changesets unhidden
1498 1498 """
1499 1499 if not repo.filtername or not repo.ui.configbool('experimental',
1500 1500 'directaccess'):
1501 1501 return repo
1502 1502
1503 1503 if repo.filtername not in ('visible', 'visible-hidden'):
1504 1504 return repo
1505 1505
1506 1506 symbols = set()
1507 1507 for spec in specs:
1508 1508 try:
1509 1509 tree = revsetlang.parse(spec)
1510 1510 except error.ParseError: # will be reported by scmutil.revrange()
1511 1511 continue
1512 1512
1513 1513 symbols.update(revsetlang.gethashlikesymbols(tree))
1514 1514
1515 1515 if not symbols:
1516 1516 return repo
1517 1517
1518 1518 revs = _getrevsfromsymbols(repo, symbols)
1519 1519
1520 1520 if not revs:
1521 1521 return repo
1522 1522
1523 1523 if hiddentype == 'warn':
1524 1524 unfi = repo.unfiltered()
1525 1525 revstr = ", ".join([pycompat.bytestr(unfi[l]) for l in revs])
1526 1526 repo.ui.warn(_("warning: accessing hidden changesets for write "
1527 1527 "operation: %s\n") % revstr)
1528 1528
1529 1529 # we have to use new filtername to separate branch/tags cache until we can
1530 1530 # disbale these cache when revisions are dynamically pinned.
1531 1531 return repo.filtered('visible-hidden', revs)
1532 1532
1533 1533 def _getrevsfromsymbols(repo, symbols):
1534 1534 """parse the list of symbols and returns a set of revision numbers of hidden
1535 1535 changesets present in symbols"""
1536 1536 revs = set()
1537 1537 unfi = repo.unfiltered()
1538 1538 unficl = unfi.changelog
1539 1539 cl = repo.changelog
1540 1540 tiprev = len(unficl)
1541 1541 pmatch = unficl._partialmatch
1542 1542 allowrevnums = repo.ui.configbool('experimental', 'directaccess.revnums')
1543 1543 for s in symbols:
1544 1544 try:
1545 1545 n = int(s)
1546 1546 if n <= tiprev:
1547 1547 if not allowrevnums:
1548 1548 continue
1549 1549 else:
1550 1550 if n not in cl:
1551 1551 revs.add(n)
1552 1552 continue
1553 1553 except ValueError:
1554 1554 pass
1555 1555
1556 1556 try:
1557 1557 s = pmatch(s)
1558 1558 except (error.LookupError, error.WdirUnsupported):
1559 1559 s = None
1560 1560
1561 1561 if s is not None:
1562 1562 rev = unficl.rev(s)
1563 1563 if rev not in cl:
1564 1564 revs.add(rev)
1565 1565
1566 1566 return revs
General Comments 0
You need to be logged in to leave comments. Login now