##// END OF EJS Templates
revlog: map rev(wdirid) to WdirUnsupported exception...
Yuya Nishihara -
r32657:7b17f9de default
parent child Browse files
Show More
@@ -1,2167 +1,2172
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import errno
18 18 import hashlib
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullid,
28 28 nullrev,
29 wdirid,
29 30 wdirrev,
30 31 )
31 32 from .i18n import _
32 33 from . import (
33 34 ancestor,
34 35 error,
35 36 mdiff,
36 37 policy,
37 38 pycompat,
38 39 templatefilters,
39 40 util,
40 41 )
41 42
42 43 parsers = policy.importmod(r'parsers')
43 44
44 45 _pack = struct.pack
45 46 _unpack = struct.unpack
46 47 # Aliased for performance.
47 48 _zlibdecompress = zlib.decompress
48 49
49 50 # revlog header flags
50 51 REVLOGV0 = 0
51 52 REVLOGV1 = 1
52 53 FLAG_INLINE_DATA = (1 << 16)
53 54 FLAG_GENERALDELTA = (1 << 17)
54 55 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
55 56 REVLOG_DEFAULT_FORMAT = REVLOGV1
56 57 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
57 58 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
58 59
59 60 # revlog index flags
60 61 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
61 62 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
62 63 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
63 64 REVIDX_DEFAULT_FLAGS = 0
64 65 # stable order in which flags need to be processed and their processors applied
65 66 REVIDX_FLAGS_ORDER = [
66 67 REVIDX_ISCENSORED,
67 68 REVIDX_ELLIPSIS,
68 69 REVIDX_EXTSTORED,
69 70 ]
70 71 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
71 72
72 73 # max size of revlog with inline data
73 74 _maxinline = 131072
74 75 _chunksize = 1048576
75 76
76 77 RevlogError = error.RevlogError
77 78 LookupError = error.LookupError
78 79 CensoredNodeError = error.CensoredNodeError
79 80 ProgrammingError = error.ProgrammingError
80 81
81 82 # Store flag processors (cf. 'addflagprocessor()' to register)
82 83 _flagprocessors = {
83 84 REVIDX_ISCENSORED: None,
84 85 }
85 86
86 87 def addflagprocessor(flag, processor):
87 88 """Register a flag processor on a revision data flag.
88 89
89 90 Invariant:
90 91 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
91 92 - Only one flag processor can be registered on a specific flag.
92 93 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
93 94 following signatures:
94 95 - (read) f(self, rawtext) -> text, bool
95 96 - (write) f(self, text) -> rawtext, bool
96 97 - (raw) f(self, rawtext) -> bool
97 98 "text" is presented to the user. "rawtext" is stored in revlog data, not
98 99 directly visible to the user.
99 100 The boolean returned by these transforms is used to determine whether
100 101 the returned text can be used for hash integrity checking. For example,
101 102 if "write" returns False, then "text" is used to generate hash. If
102 103 "write" returns True, that basically means "rawtext" returned by "write"
103 104 should be used to generate hash. Usually, "write" and "read" return
104 105 different booleans. And "raw" returns a same boolean as "write".
105 106
106 107 Note: The 'raw' transform is used for changegroup generation and in some
107 108 debug commands. In this case the transform only indicates whether the
108 109 contents can be used for hash integrity checks.
109 110 """
110 111 if not flag & REVIDX_KNOWN_FLAGS:
111 112 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
112 113 raise ProgrammingError(msg)
113 114 if flag not in REVIDX_FLAGS_ORDER:
114 115 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
115 116 raise ProgrammingError(msg)
116 117 if flag in _flagprocessors:
117 118 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
118 119 raise error.Abort(msg)
119 120 _flagprocessors[flag] = processor
120 121
121 122 def getoffset(q):
122 123 return int(q >> 16)
123 124
124 125 def gettype(q):
125 126 return int(q & 0xFFFF)
126 127
127 128 def offset_type(offset, type):
128 129 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
129 130 raise ValueError('unknown revlog index flags')
130 131 return int(int(offset) << 16 | type)
131 132
132 133 _nullhash = hashlib.sha1(nullid)
133 134
134 135 def hash(text, p1, p2):
135 136 """generate a hash from the given text and its parent hashes
136 137
137 138 This hash combines both the current file contents and its history
138 139 in a manner that makes it easy to distinguish nodes with the same
139 140 content in the revision graph.
140 141 """
141 142 # As of now, if one of the parent node is null, p2 is null
142 143 if p2 == nullid:
143 144 # deep copy of a hash is faster than creating one
144 145 s = _nullhash.copy()
145 146 s.update(p1)
146 147 else:
147 148 # none of the parent nodes are nullid
148 149 l = [p1, p2]
149 150 l.sort()
150 151 s = hashlib.sha1(l[0])
151 152 s.update(l[1])
152 153 s.update(text)
153 154 return s.digest()
154 155
155 156 # index v0:
156 157 # 4 bytes: offset
157 158 # 4 bytes: compressed length
158 159 # 4 bytes: base rev
159 160 # 4 bytes: link rev
160 161 # 20 bytes: parent 1 nodeid
161 162 # 20 bytes: parent 2 nodeid
162 163 # 20 bytes: nodeid
163 164 indexformatv0 = ">4l20s20s20s"
164 165
165 166 class revlogoldio(object):
166 167 def __init__(self):
167 168 self.size = struct.calcsize(indexformatv0)
168 169
169 170 def parseindex(self, data, inline):
170 171 s = self.size
171 172 index = []
172 173 nodemap = {nullid: nullrev}
173 174 n = off = 0
174 175 l = len(data)
175 176 while off + s <= l:
176 177 cur = data[off:off + s]
177 178 off += s
178 179 e = _unpack(indexformatv0, cur)
179 180 # transform to revlogv1 format
180 181 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
181 182 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
182 183 index.append(e2)
183 184 nodemap[e[6]] = n
184 185 n += 1
185 186
186 187 # add the magic null revision at -1
187 188 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
188 189
189 190 return index, nodemap, None
190 191
191 192 def packentry(self, entry, node, version, rev):
192 193 if gettype(entry[0]):
193 194 raise RevlogError(_('index entry flags need revlog version 1'))
194 195 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
195 196 node(entry[5]), node(entry[6]), entry[7])
196 197 return _pack(indexformatv0, *e2)
197 198
198 199 # index ng:
199 200 # 6 bytes: offset
200 201 # 2 bytes: flags
201 202 # 4 bytes: compressed length
202 203 # 4 bytes: uncompressed length
203 204 # 4 bytes: base rev
204 205 # 4 bytes: link rev
205 206 # 4 bytes: parent 1 rev
206 207 # 4 bytes: parent 2 rev
207 208 # 32 bytes: nodeid
208 209 indexformatng = ">Qiiiiii20s12x"
209 210 versionformat = ">I"
210 211
211 212 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
212 213 # signed integer)
213 214 _maxentrysize = 0x7fffffff
214 215
215 216 class revlogio(object):
216 217 def __init__(self):
217 218 self.size = struct.calcsize(indexformatng)
218 219
219 220 def parseindex(self, data, inline):
220 221 # call the C implementation to parse the index data
221 222 index, cache = parsers.parse_index2(data, inline)
222 223 return index, getattr(index, 'nodemap', None), cache
223 224
224 225 def packentry(self, entry, node, version, rev):
225 226 p = _pack(indexformatng, *entry)
226 227 if rev == 0:
227 228 p = _pack(versionformat, version) + p[4:]
228 229 return p
229 230
230 231 class revlog(object):
231 232 """
232 233 the underlying revision storage object
233 234
234 235 A revlog consists of two parts, an index and the revision data.
235 236
236 237 The index is a file with a fixed record size containing
237 238 information on each revision, including its nodeid (hash), the
238 239 nodeids of its parents, the position and offset of its data within
239 240 the data file, and the revision it's based on. Finally, each entry
240 241 contains a linkrev entry that can serve as a pointer to external
241 242 data.
242 243
243 244 The revision data itself is a linear collection of data chunks.
244 245 Each chunk represents a revision and is usually represented as a
245 246 delta against the previous chunk. To bound lookup time, runs of
246 247 deltas are limited to about 2 times the length of the original
247 248 version data. This makes retrieval of a version proportional to
248 249 its size, or O(1) relative to the number of revisions.
249 250
250 251 Both pieces of the revlog are written to in an append-only
251 252 fashion, which means we never need to rewrite a file to insert or
252 253 remove data, and can use some simple techniques to avoid the need
253 254 for locking while reading.
254 255
255 256 If checkambig, indexfile is opened with checkambig=True at
256 257 writing, to avoid file stat ambiguity.
257 258 """
258 259 def __init__(self, opener, indexfile, datafile=None, checkambig=False):
259 260 """
260 261 create a revlog object
261 262
262 263 opener is a function that abstracts the file opening operation
263 264 and can be used to implement COW semantics or the like.
264 265 """
265 266 self.indexfile = indexfile
266 267 self.datafile = datafile or (indexfile[:-2] + ".d")
267 268 self.opener = opener
268 269 # When True, indexfile is opened with checkambig=True at writing, to
269 270 # avoid file stat ambiguity.
270 271 self._checkambig = checkambig
271 272 # 3-tuple of (node, rev, text) for a raw revision.
272 273 self._cache = None
273 274 # Maps rev to chain base rev.
274 275 self._chainbasecache = util.lrucachedict(100)
275 276 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
276 277 self._chunkcache = (0, '')
277 278 # How much data to read and cache into the raw revlog data cache.
278 279 self._chunkcachesize = 65536
279 280 self._maxchainlen = None
280 281 self._aggressivemergedeltas = False
281 282 self.index = []
282 283 # Mapping of partial identifiers to full nodes.
283 284 self._pcache = {}
284 285 # Mapping of revision integer to full node.
285 286 self._nodecache = {nullid: nullrev}
286 287 self._nodepos = None
287 288 self._compengine = 'zlib'
288 289
289 290 v = REVLOG_DEFAULT_VERSION
290 291 opts = getattr(opener, 'options', None)
291 292 if opts is not None:
292 293 if 'revlogv1' in opts:
293 294 if 'generaldelta' in opts:
294 295 v |= FLAG_GENERALDELTA
295 296 else:
296 297 v = 0
297 298 if 'chunkcachesize' in opts:
298 299 self._chunkcachesize = opts['chunkcachesize']
299 300 if 'maxchainlen' in opts:
300 301 self._maxchainlen = opts['maxchainlen']
301 302 if 'aggressivemergedeltas' in opts:
302 303 self._aggressivemergedeltas = opts['aggressivemergedeltas']
303 304 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
304 305 if 'compengine' in opts:
305 306 self._compengine = opts['compengine']
306 307
307 308 if self._chunkcachesize <= 0:
308 309 raise RevlogError(_('revlog chunk cache size %r is not greater '
309 310 'than 0') % self._chunkcachesize)
310 311 elif self._chunkcachesize & (self._chunkcachesize - 1):
311 312 raise RevlogError(_('revlog chunk cache size %r is not a power '
312 313 'of 2') % self._chunkcachesize)
313 314
314 315 indexdata = ''
315 316 self._initempty = True
316 317 try:
317 318 f = self.opener(self.indexfile)
318 319 indexdata = f.read()
319 320 f.close()
320 321 if len(indexdata) > 0:
321 322 v = struct.unpack(versionformat, indexdata[:4])[0]
322 323 self._initempty = False
323 324 except IOError as inst:
324 325 if inst.errno != errno.ENOENT:
325 326 raise
326 327
327 328 self.version = v
328 329 self._inline = v & FLAG_INLINE_DATA
329 330 self._generaldelta = v & FLAG_GENERALDELTA
330 331 flags = v & ~0xFFFF
331 332 fmt = v & 0xFFFF
332 333 if fmt == REVLOGV0:
333 334 if flags:
334 335 raise RevlogError(_('unknown flags (%#04x) in version %d '
335 336 'revlog %s') %
336 337 (flags >> 16, fmt, self.indexfile))
337 338 elif fmt == REVLOGV1:
338 339 if flags & ~REVLOGV1_FLAGS:
339 340 raise RevlogError(_('unknown flags (%#04x) in version %d '
340 341 'revlog %s') %
341 342 (flags >> 16, fmt, self.indexfile))
342 343 else:
343 344 raise RevlogError(_('unknown version (%d) in revlog %s') %
344 345 (fmt, self.indexfile))
345 346
346 347 self.storedeltachains = True
347 348
348 349 self._io = revlogio()
349 350 if self.version == REVLOGV0:
350 351 self._io = revlogoldio()
351 352 try:
352 353 d = self._io.parseindex(indexdata, self._inline)
353 354 except (ValueError, IndexError):
354 355 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
355 356 self.index, nodemap, self._chunkcache = d
356 357 if nodemap is not None:
357 358 self.nodemap = self._nodecache = nodemap
358 359 if not self._chunkcache:
359 360 self._chunkclear()
360 361 # revnum -> (chain-length, sum-delta-length)
361 362 self._chaininfocache = {}
362 363 # revlog header -> revlog compressor
363 364 self._decompressors = {}
364 365
365 366 @util.propertycache
366 367 def _compressor(self):
367 368 return util.compengines[self._compengine].revlogcompressor()
368 369
369 370 def tip(self):
370 371 return self.node(len(self.index) - 2)
371 372 def __contains__(self, rev):
372 373 return 0 <= rev < len(self)
373 374 def __len__(self):
374 375 return len(self.index) - 1
375 376 def __iter__(self):
376 377 return iter(xrange(len(self)))
377 378 def revs(self, start=0, stop=None):
378 379 """iterate over all rev in this revlog (from start to stop)"""
379 380 step = 1
380 381 if stop is not None:
381 382 if start > stop:
382 383 step = -1
383 384 stop += step
384 385 else:
385 386 stop = len(self)
386 387 return xrange(start, stop, step)
387 388
388 389 @util.propertycache
389 390 def nodemap(self):
390 391 self.rev(self.node(0))
391 392 return self._nodecache
392 393
393 394 def hasnode(self, node):
394 395 try:
395 396 self.rev(node)
396 397 return True
397 398 except KeyError:
398 399 return False
399 400
400 401 def clearcaches(self):
401 402 self._cache = None
402 403 self._chainbasecache.clear()
403 404 self._chunkcache = (0, '')
404 405 self._pcache = {}
405 406
406 407 try:
407 408 self._nodecache.clearcaches()
408 409 except AttributeError:
409 410 self._nodecache = {nullid: nullrev}
410 411 self._nodepos = None
411 412
412 413 def rev(self, node):
413 414 try:
414 415 return self._nodecache[node]
415 416 except TypeError:
416 417 raise
417 418 except RevlogError:
418 419 # parsers.c radix tree lookup failed
420 if node == wdirid:
421 raise error.WdirUnsupported
419 422 raise LookupError(node, self.indexfile, _('no node'))
420 423 except KeyError:
421 424 # pure python cache lookup failed
422 425 n = self._nodecache
423 426 i = self.index
424 427 p = self._nodepos
425 428 if p is None:
426 429 p = len(i) - 2
427 430 for r in xrange(p, -1, -1):
428 431 v = i[r][7]
429 432 n[v] = r
430 433 if v == node:
431 434 self._nodepos = r - 1
432 435 return r
436 if node == wdirid:
437 raise error.WdirUnsupported
433 438 raise LookupError(node, self.indexfile, _('no node'))
434 439
435 440 # Accessors for index entries.
436 441
437 442 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
438 443 # are flags.
439 444 def start(self, rev):
440 445 return int(self.index[rev][0] >> 16)
441 446
442 447 def flags(self, rev):
443 448 return self.index[rev][0] & 0xFFFF
444 449
445 450 def length(self, rev):
446 451 return self.index[rev][1]
447 452
448 453 def rawsize(self, rev):
449 454 """return the length of the uncompressed text for a given revision"""
450 455 l = self.index[rev][2]
451 456 if l >= 0:
452 457 return l
453 458
454 459 t = self.revision(rev, raw=True)
455 460 return len(t)
456 461
457 462 def size(self, rev):
458 463 """length of non-raw text (processed by a "read" flag processor)"""
459 464 # fast path: if no "read" flag processor could change the content,
460 465 # size is rawsize. note: ELLIPSIS is known to not change the content.
461 466 flags = self.flags(rev)
462 467 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
463 468 return self.rawsize(rev)
464 469
465 470 return len(self.revision(rev, raw=False))
466 471
467 472 def chainbase(self, rev):
468 473 base = self._chainbasecache.get(rev)
469 474 if base is not None:
470 475 return base
471 476
472 477 index = self.index
473 478 base = index[rev][3]
474 479 while base != rev:
475 480 rev = base
476 481 base = index[rev][3]
477 482
478 483 self._chainbasecache[rev] = base
479 484 return base
480 485
481 486 def linkrev(self, rev):
482 487 return self.index[rev][4]
483 488
484 489 def parentrevs(self, rev):
485 490 try:
486 491 return self.index[rev][5:7]
487 492 except IndexError:
488 493 if rev == wdirrev:
489 494 raise error.WdirUnsupported
490 495 raise
491 496
492 497 def node(self, rev):
493 498 try:
494 499 return self.index[rev][7]
495 500 except IndexError:
496 501 if rev == wdirrev:
497 502 raise error.WdirUnsupported
498 503 raise
499 504
500 505 # Derived from index values.
501 506
502 507 def end(self, rev):
503 508 return self.start(rev) + self.length(rev)
504 509
505 510 def parents(self, node):
506 511 i = self.index
507 512 d = i[self.rev(node)]
508 513 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
509 514
510 515 def chainlen(self, rev):
511 516 return self._chaininfo(rev)[0]
512 517
513 518 def _chaininfo(self, rev):
514 519 chaininfocache = self._chaininfocache
515 520 if rev in chaininfocache:
516 521 return chaininfocache[rev]
517 522 index = self.index
518 523 generaldelta = self._generaldelta
519 524 iterrev = rev
520 525 e = index[iterrev]
521 526 clen = 0
522 527 compresseddeltalen = 0
523 528 while iterrev != e[3]:
524 529 clen += 1
525 530 compresseddeltalen += e[1]
526 531 if generaldelta:
527 532 iterrev = e[3]
528 533 else:
529 534 iterrev -= 1
530 535 if iterrev in chaininfocache:
531 536 t = chaininfocache[iterrev]
532 537 clen += t[0]
533 538 compresseddeltalen += t[1]
534 539 break
535 540 e = index[iterrev]
536 541 else:
537 542 # Add text length of base since decompressing that also takes
538 543 # work. For cache hits the length is already included.
539 544 compresseddeltalen += e[1]
540 545 r = (clen, compresseddeltalen)
541 546 chaininfocache[rev] = r
542 547 return r
543 548
544 549 def _deltachain(self, rev, stoprev=None):
545 550 """Obtain the delta chain for a revision.
546 551
547 552 ``stoprev`` specifies a revision to stop at. If not specified, we
548 553 stop at the base of the chain.
549 554
550 555 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
551 556 revs in ascending order and ``stopped`` is a bool indicating whether
552 557 ``stoprev`` was hit.
553 558 """
554 559 chain = []
555 560
556 561 # Alias to prevent attribute lookup in tight loop.
557 562 index = self.index
558 563 generaldelta = self._generaldelta
559 564
560 565 iterrev = rev
561 566 e = index[iterrev]
562 567 while iterrev != e[3] and iterrev != stoprev:
563 568 chain.append(iterrev)
564 569 if generaldelta:
565 570 iterrev = e[3]
566 571 else:
567 572 iterrev -= 1
568 573 e = index[iterrev]
569 574
570 575 if iterrev == stoprev:
571 576 stopped = True
572 577 else:
573 578 chain.append(iterrev)
574 579 stopped = False
575 580
576 581 chain.reverse()
577 582 return chain, stopped
578 583
579 584 def ancestors(self, revs, stoprev=0, inclusive=False):
580 585 """Generate the ancestors of 'revs' in reverse topological order.
581 586 Does not generate revs lower than stoprev.
582 587
583 588 See the documentation for ancestor.lazyancestors for more details."""
584 589
585 590 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
586 591 inclusive=inclusive)
587 592
588 593 def descendants(self, revs):
589 594 """Generate the descendants of 'revs' in revision order.
590 595
591 596 Yield a sequence of revision numbers starting with a child of
592 597 some rev in revs, i.e., each revision is *not* considered a
593 598 descendant of itself. Results are ordered by revision number (a
594 599 topological sort)."""
595 600 first = min(revs)
596 601 if first == nullrev:
597 602 for i in self:
598 603 yield i
599 604 return
600 605
601 606 seen = set(revs)
602 607 for i in self.revs(start=first + 1):
603 608 for x in self.parentrevs(i):
604 609 if x != nullrev and x in seen:
605 610 seen.add(i)
606 611 yield i
607 612 break
608 613
609 614 def findcommonmissing(self, common=None, heads=None):
610 615 """Return a tuple of the ancestors of common and the ancestors of heads
611 616 that are not ancestors of common. In revset terminology, we return the
612 617 tuple:
613 618
614 619 ::common, (::heads) - (::common)
615 620
616 621 The list is sorted by revision number, meaning it is
617 622 topologically sorted.
618 623
619 624 'heads' and 'common' are both lists of node IDs. If heads is
620 625 not supplied, uses all of the revlog's heads. If common is not
621 626 supplied, uses nullid."""
622 627 if common is None:
623 628 common = [nullid]
624 629 if heads is None:
625 630 heads = self.heads()
626 631
627 632 common = [self.rev(n) for n in common]
628 633 heads = [self.rev(n) for n in heads]
629 634
630 635 # we want the ancestors, but inclusive
631 636 class lazyset(object):
632 637 def __init__(self, lazyvalues):
633 638 self.addedvalues = set()
634 639 self.lazyvalues = lazyvalues
635 640
636 641 def __contains__(self, value):
637 642 return value in self.addedvalues or value in self.lazyvalues
638 643
639 644 def __iter__(self):
640 645 added = self.addedvalues
641 646 for r in added:
642 647 yield r
643 648 for r in self.lazyvalues:
644 649 if not r in added:
645 650 yield r
646 651
647 652 def add(self, value):
648 653 self.addedvalues.add(value)
649 654
650 655 def update(self, values):
651 656 self.addedvalues.update(values)
652 657
653 658 has = lazyset(self.ancestors(common))
654 659 has.add(nullrev)
655 660 has.update(common)
656 661
657 662 # take all ancestors from heads that aren't in has
658 663 missing = set()
659 664 visit = collections.deque(r for r in heads if r not in has)
660 665 while visit:
661 666 r = visit.popleft()
662 667 if r in missing:
663 668 continue
664 669 else:
665 670 missing.add(r)
666 671 for p in self.parentrevs(r):
667 672 if p not in has:
668 673 visit.append(p)
669 674 missing = list(missing)
670 675 missing.sort()
671 676 return has, [self.node(miss) for miss in missing]
672 677
673 678 def incrementalmissingrevs(self, common=None):
674 679 """Return an object that can be used to incrementally compute the
675 680 revision numbers of the ancestors of arbitrary sets that are not
676 681 ancestors of common. This is an ancestor.incrementalmissingancestors
677 682 object.
678 683
679 684 'common' is a list of revision numbers. If common is not supplied, uses
680 685 nullrev.
681 686 """
682 687 if common is None:
683 688 common = [nullrev]
684 689
685 690 return ancestor.incrementalmissingancestors(self.parentrevs, common)
686 691
687 692 def findmissingrevs(self, common=None, heads=None):
688 693 """Return the revision numbers of the ancestors of heads that
689 694 are not ancestors of common.
690 695
691 696 More specifically, return a list of revision numbers corresponding to
692 697 nodes N such that every N satisfies the following constraints:
693 698
694 699 1. N is an ancestor of some node in 'heads'
695 700 2. N is not an ancestor of any node in 'common'
696 701
697 702 The list is sorted by revision number, meaning it is
698 703 topologically sorted.
699 704
700 705 'heads' and 'common' are both lists of revision numbers. If heads is
701 706 not supplied, uses all of the revlog's heads. If common is not
702 707 supplied, uses nullid."""
703 708 if common is None:
704 709 common = [nullrev]
705 710 if heads is None:
706 711 heads = self.headrevs()
707 712
708 713 inc = self.incrementalmissingrevs(common=common)
709 714 return inc.missingancestors(heads)
710 715
711 716 def findmissing(self, common=None, heads=None):
712 717 """Return the ancestors of heads that are not ancestors of common.
713 718
714 719 More specifically, return a list of nodes N such that every N
715 720 satisfies the following constraints:
716 721
717 722 1. N is an ancestor of some node in 'heads'
718 723 2. N is not an ancestor of any node in 'common'
719 724
720 725 The list is sorted by revision number, meaning it is
721 726 topologically sorted.
722 727
723 728 'heads' and 'common' are both lists of node IDs. If heads is
724 729 not supplied, uses all of the revlog's heads. If common is not
725 730 supplied, uses nullid."""
726 731 if common is None:
727 732 common = [nullid]
728 733 if heads is None:
729 734 heads = self.heads()
730 735
731 736 common = [self.rev(n) for n in common]
732 737 heads = [self.rev(n) for n in heads]
733 738
734 739 inc = self.incrementalmissingrevs(common=common)
735 740 return [self.node(r) for r in inc.missingancestors(heads)]
736 741
737 742 def nodesbetween(self, roots=None, heads=None):
738 743 """Return a topological path from 'roots' to 'heads'.
739 744
740 745 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
741 746 topologically sorted list of all nodes N that satisfy both of
742 747 these constraints:
743 748
744 749 1. N is a descendant of some node in 'roots'
745 750 2. N is an ancestor of some node in 'heads'
746 751
747 752 Every node is considered to be both a descendant and an ancestor
748 753 of itself, so every reachable node in 'roots' and 'heads' will be
749 754 included in 'nodes'.
750 755
751 756 'outroots' is the list of reachable nodes in 'roots', i.e., the
752 757 subset of 'roots' that is returned in 'nodes'. Likewise,
753 758 'outheads' is the subset of 'heads' that is also in 'nodes'.
754 759
755 760 'roots' and 'heads' are both lists of node IDs. If 'roots' is
756 761 unspecified, uses nullid as the only root. If 'heads' is
757 762 unspecified, uses list of all of the revlog's heads."""
758 763 nonodes = ([], [], [])
759 764 if roots is not None:
760 765 roots = list(roots)
761 766 if not roots:
762 767 return nonodes
763 768 lowestrev = min([self.rev(n) for n in roots])
764 769 else:
765 770 roots = [nullid] # Everybody's a descendant of nullid
766 771 lowestrev = nullrev
767 772 if (lowestrev == nullrev) and (heads is None):
768 773 # We want _all_ the nodes!
769 774 return ([self.node(r) for r in self], [nullid], list(self.heads()))
770 775 if heads is None:
771 776 # All nodes are ancestors, so the latest ancestor is the last
772 777 # node.
773 778 highestrev = len(self) - 1
774 779 # Set ancestors to None to signal that every node is an ancestor.
775 780 ancestors = None
776 781 # Set heads to an empty dictionary for later discovery of heads
777 782 heads = {}
778 783 else:
779 784 heads = list(heads)
780 785 if not heads:
781 786 return nonodes
782 787 ancestors = set()
783 788 # Turn heads into a dictionary so we can remove 'fake' heads.
784 789 # Also, later we will be using it to filter out the heads we can't
785 790 # find from roots.
786 791 heads = dict.fromkeys(heads, False)
787 792 # Start at the top and keep marking parents until we're done.
788 793 nodestotag = set(heads)
789 794 # Remember where the top was so we can use it as a limit later.
790 795 highestrev = max([self.rev(n) for n in nodestotag])
791 796 while nodestotag:
792 797 # grab a node to tag
793 798 n = nodestotag.pop()
794 799 # Never tag nullid
795 800 if n == nullid:
796 801 continue
797 802 # A node's revision number represents its place in a
798 803 # topologically sorted list of nodes.
799 804 r = self.rev(n)
800 805 if r >= lowestrev:
801 806 if n not in ancestors:
802 807 # If we are possibly a descendant of one of the roots
803 808 # and we haven't already been marked as an ancestor
804 809 ancestors.add(n) # Mark as ancestor
805 810 # Add non-nullid parents to list of nodes to tag.
806 811 nodestotag.update([p for p in self.parents(n) if
807 812 p != nullid])
808 813 elif n in heads: # We've seen it before, is it a fake head?
809 814 # So it is, real heads should not be the ancestors of
810 815 # any other heads.
811 816 heads.pop(n)
812 817 if not ancestors:
813 818 return nonodes
814 819 # Now that we have our set of ancestors, we want to remove any
815 820 # roots that are not ancestors.
816 821
817 822 # If one of the roots was nullid, everything is included anyway.
818 823 if lowestrev > nullrev:
819 824 # But, since we weren't, let's recompute the lowest rev to not
820 825 # include roots that aren't ancestors.
821 826
822 827 # Filter out roots that aren't ancestors of heads
823 828 roots = [root for root in roots if root in ancestors]
824 829 # Recompute the lowest revision
825 830 if roots:
826 831 lowestrev = min([self.rev(root) for root in roots])
827 832 else:
828 833 # No more roots? Return empty list
829 834 return nonodes
830 835 else:
831 836 # We are descending from nullid, and don't need to care about
832 837 # any other roots.
833 838 lowestrev = nullrev
834 839 roots = [nullid]
835 840 # Transform our roots list into a set.
836 841 descendants = set(roots)
837 842 # Also, keep the original roots so we can filter out roots that aren't
838 843 # 'real' roots (i.e. are descended from other roots).
839 844 roots = descendants.copy()
840 845 # Our topologically sorted list of output nodes.
841 846 orderedout = []
842 847 # Don't start at nullid since we don't want nullid in our output list,
843 848 # and if nullid shows up in descendants, empty parents will look like
844 849 # they're descendants.
845 850 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
846 851 n = self.node(r)
847 852 isdescendant = False
848 853 if lowestrev == nullrev: # Everybody is a descendant of nullid
849 854 isdescendant = True
850 855 elif n in descendants:
851 856 # n is already a descendant
852 857 isdescendant = True
853 858 # This check only needs to be done here because all the roots
854 859 # will start being marked is descendants before the loop.
855 860 if n in roots:
856 861 # If n was a root, check if it's a 'real' root.
857 862 p = tuple(self.parents(n))
858 863 # If any of its parents are descendants, it's not a root.
859 864 if (p[0] in descendants) or (p[1] in descendants):
860 865 roots.remove(n)
861 866 else:
862 867 p = tuple(self.parents(n))
863 868 # A node is a descendant if either of its parents are
864 869 # descendants. (We seeded the dependents list with the roots
865 870 # up there, remember?)
866 871 if (p[0] in descendants) or (p[1] in descendants):
867 872 descendants.add(n)
868 873 isdescendant = True
869 874 if isdescendant and ((ancestors is None) or (n in ancestors)):
870 875 # Only include nodes that are both descendants and ancestors.
871 876 orderedout.append(n)
872 877 if (ancestors is not None) and (n in heads):
873 878 # We're trying to figure out which heads are reachable
874 879 # from roots.
875 880 # Mark this head as having been reached
876 881 heads[n] = True
877 882 elif ancestors is None:
878 883 # Otherwise, we're trying to discover the heads.
879 884 # Assume this is a head because if it isn't, the next step
880 885 # will eventually remove it.
881 886 heads[n] = True
882 887 # But, obviously its parents aren't.
883 888 for p in self.parents(n):
884 889 heads.pop(p, None)
885 890 heads = [head for head, flag in heads.iteritems() if flag]
886 891 roots = list(roots)
887 892 assert orderedout
888 893 assert roots
889 894 assert heads
890 895 return (orderedout, roots, heads)
891 896
892 897 def headrevs(self):
893 898 try:
894 899 return self.index.headrevs()
895 900 except AttributeError:
896 901 return self._headrevs()
897 902
898 903 def computephases(self, roots):
899 904 return self.index.computephasesmapsets(roots)
900 905
901 906 def _headrevs(self):
902 907 count = len(self)
903 908 if not count:
904 909 return [nullrev]
905 910 # we won't iter over filtered rev so nobody is a head at start
906 911 ishead = [0] * (count + 1)
907 912 index = self.index
908 913 for r in self:
909 914 ishead[r] = 1 # I may be an head
910 915 e = index[r]
911 916 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
912 917 return [r for r, val in enumerate(ishead) if val]
913 918
914 919 def heads(self, start=None, stop=None):
915 920 """return the list of all nodes that have no children
916 921
917 922 if start is specified, only heads that are descendants of
918 923 start will be returned
919 924 if stop is specified, it will consider all the revs from stop
920 925 as if they had no children
921 926 """
922 927 if start is None and stop is None:
923 928 if not len(self):
924 929 return [nullid]
925 930 return [self.node(r) for r in self.headrevs()]
926 931
927 932 if start is None:
928 933 start = nullid
929 934 if stop is None:
930 935 stop = []
931 936 stoprevs = set([self.rev(n) for n in stop])
932 937 startrev = self.rev(start)
933 938 reachable = {startrev}
934 939 heads = {startrev}
935 940
936 941 parentrevs = self.parentrevs
937 942 for r in self.revs(start=startrev + 1):
938 943 for p in parentrevs(r):
939 944 if p in reachable:
940 945 if r not in stoprevs:
941 946 reachable.add(r)
942 947 heads.add(r)
943 948 if p in heads and p not in stoprevs:
944 949 heads.remove(p)
945 950
946 951 return [self.node(r) for r in heads]
947 952
948 953 def children(self, node):
949 954 """find the children of a given node"""
950 955 c = []
951 956 p = self.rev(node)
952 957 for r in self.revs(start=p + 1):
953 958 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
954 959 if prevs:
955 960 for pr in prevs:
956 961 if pr == p:
957 962 c.append(self.node(r))
958 963 elif p == nullrev:
959 964 c.append(self.node(r))
960 965 return c
961 966
962 967 def descendant(self, start, end):
963 968 if start == nullrev:
964 969 return True
965 970 for i in self.descendants([start]):
966 971 if i == end:
967 972 return True
968 973 elif i > end:
969 974 break
970 975 return False
971 976
972 977 def commonancestorsheads(self, a, b):
973 978 """calculate all the heads of the common ancestors of nodes a and b"""
974 979 a, b = self.rev(a), self.rev(b)
975 980 try:
976 981 ancs = self.index.commonancestorsheads(a, b)
977 982 except (AttributeError, OverflowError): # C implementation failed
978 983 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
979 984 return pycompat.maplist(self.node, ancs)
980 985
981 986 def isancestor(self, a, b):
982 987 """return True if node a is an ancestor of node b
983 988
984 989 The implementation of this is trivial but the use of
985 990 commonancestorsheads is not."""
986 991 return a in self.commonancestorsheads(a, b)
987 992
988 993 def ancestor(self, a, b):
989 994 """calculate the "best" common ancestor of nodes a and b"""
990 995
991 996 a, b = self.rev(a), self.rev(b)
992 997 try:
993 998 ancs = self.index.ancestors(a, b)
994 999 except (AttributeError, OverflowError):
995 1000 ancs = ancestor.ancestors(self.parentrevs, a, b)
996 1001 if ancs:
997 1002 # choose a consistent winner when there's a tie
998 1003 return min(map(self.node, ancs))
999 1004 return nullid
1000 1005
1001 1006 def _match(self, id):
1002 1007 if isinstance(id, int):
1003 1008 # rev
1004 1009 return self.node(id)
1005 1010 if len(id) == 20:
1006 1011 # possibly a binary node
1007 1012 # odds of a binary node being all hex in ASCII are 1 in 10**25
1008 1013 try:
1009 1014 node = id
1010 1015 self.rev(node) # quick search the index
1011 1016 return node
1012 1017 except LookupError:
1013 1018 pass # may be partial hex id
1014 1019 try:
1015 1020 # str(rev)
1016 1021 rev = int(id)
1017 1022 if str(rev) != id:
1018 1023 raise ValueError
1019 1024 if rev < 0:
1020 1025 rev = len(self) + rev
1021 1026 if rev < 0 or rev >= len(self):
1022 1027 raise ValueError
1023 1028 return self.node(rev)
1024 1029 except (ValueError, OverflowError):
1025 1030 pass
1026 1031 if len(id) == 40:
1027 1032 try:
1028 1033 # a full hex nodeid?
1029 1034 node = bin(id)
1030 1035 self.rev(node)
1031 1036 return node
1032 1037 except (TypeError, LookupError):
1033 1038 pass
1034 1039
1035 1040 def _partialmatch(self, id):
1036 1041 try:
1037 1042 partial = self.index.partialmatch(id)
1038 1043 if partial and self.hasnode(partial):
1039 1044 return partial
1040 1045 return None
1041 1046 except RevlogError:
1042 1047 # parsers.c radix tree lookup gave multiple matches
1043 1048 # fast path: for unfiltered changelog, radix tree is accurate
1044 1049 if not getattr(self, 'filteredrevs', None):
1045 1050 raise LookupError(id, self.indexfile,
1046 1051 _('ambiguous identifier'))
1047 1052 # fall through to slow path that filters hidden revisions
1048 1053 except (AttributeError, ValueError):
1049 1054 # we are pure python, or key was too short to search radix tree
1050 1055 pass
1051 1056
1052 1057 if id in self._pcache:
1053 1058 return self._pcache[id]
1054 1059
1055 1060 if len(id) < 40:
1056 1061 try:
1057 1062 # hex(node)[:...]
1058 1063 l = len(id) // 2 # grab an even number of digits
1059 1064 prefix = bin(id[:l * 2])
1060 1065 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1061 1066 nl = [n for n in nl if hex(n).startswith(id) and
1062 1067 self.hasnode(n)]
1063 1068 if len(nl) > 0:
1064 1069 if len(nl) == 1:
1065 1070 self._pcache[id] = nl[0]
1066 1071 return nl[0]
1067 1072 raise LookupError(id, self.indexfile,
1068 1073 _('ambiguous identifier'))
1069 1074 return None
1070 1075 except TypeError:
1071 1076 pass
1072 1077
1073 1078 def lookup(self, id):
1074 1079 """locate a node based on:
1075 1080 - revision number or str(revision number)
1076 1081 - nodeid or subset of hex nodeid
1077 1082 """
1078 1083 n = self._match(id)
1079 1084 if n is not None:
1080 1085 return n
1081 1086 n = self._partialmatch(id)
1082 1087 if n:
1083 1088 return n
1084 1089
1085 1090 raise LookupError(id, self.indexfile, _('no match found'))
1086 1091
1087 1092 def cmp(self, node, text):
1088 1093 """compare text with a given file revision
1089 1094
1090 1095 returns True if text is different than what is stored.
1091 1096 """
1092 1097 p1, p2 = self.parents(node)
1093 1098 return hash(text, p1, p2) != node
1094 1099
1095 1100 def _cachesegment(self, offset, data):
1096 1101 """Add a segment to the revlog cache.
1097 1102
1098 1103 Accepts an absolute offset and the data that is at that location.
1099 1104 """
1100 1105 o, d = self._chunkcache
1101 1106 # try to add to existing cache
1102 1107 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1103 1108 self._chunkcache = o, d + data
1104 1109 else:
1105 1110 self._chunkcache = offset, data
1106 1111
1107 1112 def _readsegment(self, offset, length, df=None):
1108 1113 """Load a segment of raw data from the revlog.
1109 1114
1110 1115 Accepts an absolute offset, length to read, and an optional existing
1111 1116 file handle to read from.
1112 1117
1113 1118 If an existing file handle is passed, it will be seeked and the
1114 1119 original seek position will NOT be restored.
1115 1120
1116 1121 Returns a str or buffer of raw byte data.
1117 1122 """
1118 1123 if df is not None:
1119 1124 closehandle = False
1120 1125 else:
1121 1126 if self._inline:
1122 1127 df = self.opener(self.indexfile)
1123 1128 else:
1124 1129 df = self.opener(self.datafile)
1125 1130 closehandle = True
1126 1131
1127 1132 # Cache data both forward and backward around the requested
1128 1133 # data, in a fixed size window. This helps speed up operations
1129 1134 # involving reading the revlog backwards.
1130 1135 cachesize = self._chunkcachesize
1131 1136 realoffset = offset & ~(cachesize - 1)
1132 1137 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1133 1138 - realoffset)
1134 1139 df.seek(realoffset)
1135 1140 d = df.read(reallength)
1136 1141 if closehandle:
1137 1142 df.close()
1138 1143 self._cachesegment(realoffset, d)
1139 1144 if offset != realoffset or reallength != length:
1140 1145 return util.buffer(d, offset - realoffset, length)
1141 1146 return d
1142 1147
1143 1148 def _getsegment(self, offset, length, df=None):
1144 1149 """Obtain a segment of raw data from the revlog.
1145 1150
1146 1151 Accepts an absolute offset, length of bytes to obtain, and an
1147 1152 optional file handle to the already-opened revlog. If the file
1148 1153 handle is used, it's original seek position will not be preserved.
1149 1154
1150 1155 Requests for data may be returned from a cache.
1151 1156
1152 1157 Returns a str or a buffer instance of raw byte data.
1153 1158 """
1154 1159 o, d = self._chunkcache
1155 1160 l = len(d)
1156 1161
1157 1162 # is it in the cache?
1158 1163 cachestart = offset - o
1159 1164 cacheend = cachestart + length
1160 1165 if cachestart >= 0 and cacheend <= l:
1161 1166 if cachestart == 0 and cacheend == l:
1162 1167 return d # avoid a copy
1163 1168 return util.buffer(d, cachestart, cacheend - cachestart)
1164 1169
1165 1170 return self._readsegment(offset, length, df=df)
1166 1171
1167 1172 def _getsegmentforrevs(self, startrev, endrev, df=None):
1168 1173 """Obtain a segment of raw data corresponding to a range of revisions.
1169 1174
1170 1175 Accepts the start and end revisions and an optional already-open
1171 1176 file handle to be used for reading. If the file handle is read, its
1172 1177 seek position will not be preserved.
1173 1178
1174 1179 Requests for data may be satisfied by a cache.
1175 1180
1176 1181 Returns a 2-tuple of (offset, data) for the requested range of
1177 1182 revisions. Offset is the integer offset from the beginning of the
1178 1183 revlog and data is a str or buffer of the raw byte data.
1179 1184
1180 1185 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1181 1186 to determine where each revision's data begins and ends.
1182 1187 """
1183 1188 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1184 1189 # (functions are expensive).
1185 1190 index = self.index
1186 1191 istart = index[startrev]
1187 1192 start = int(istart[0] >> 16)
1188 1193 if startrev == endrev:
1189 1194 end = start + istart[1]
1190 1195 else:
1191 1196 iend = index[endrev]
1192 1197 end = int(iend[0] >> 16) + iend[1]
1193 1198
1194 1199 if self._inline:
1195 1200 start += (startrev + 1) * self._io.size
1196 1201 end += (endrev + 1) * self._io.size
1197 1202 length = end - start
1198 1203
1199 1204 return start, self._getsegment(start, length, df=df)
1200 1205
1201 1206 def _chunk(self, rev, df=None):
1202 1207 """Obtain a single decompressed chunk for a revision.
1203 1208
1204 1209 Accepts an integer revision and an optional already-open file handle
1205 1210 to be used for reading. If used, the seek position of the file will not
1206 1211 be preserved.
1207 1212
1208 1213 Returns a str holding uncompressed data for the requested revision.
1209 1214 """
1210 1215 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1211 1216
1212 1217 def _chunks(self, revs, df=None):
1213 1218 """Obtain decompressed chunks for the specified revisions.
1214 1219
1215 1220 Accepts an iterable of numeric revisions that are assumed to be in
1216 1221 ascending order. Also accepts an optional already-open file handle
1217 1222 to be used for reading. If used, the seek position of the file will
1218 1223 not be preserved.
1219 1224
1220 1225 This function is similar to calling ``self._chunk()`` multiple times,
1221 1226 but is faster.
1222 1227
1223 1228 Returns a list with decompressed data for each requested revision.
1224 1229 """
1225 1230 if not revs:
1226 1231 return []
1227 1232 start = self.start
1228 1233 length = self.length
1229 1234 inline = self._inline
1230 1235 iosize = self._io.size
1231 1236 buffer = util.buffer
1232 1237
1233 1238 l = []
1234 1239 ladd = l.append
1235 1240
1236 1241 try:
1237 1242 offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
1238 1243 except OverflowError:
1239 1244 # issue4215 - we can't cache a run of chunks greater than
1240 1245 # 2G on Windows
1241 1246 return [self._chunk(rev, df=df) for rev in revs]
1242 1247
1243 1248 decomp = self.decompress
1244 1249 for rev in revs:
1245 1250 chunkstart = start(rev)
1246 1251 if inline:
1247 1252 chunkstart += (rev + 1) * iosize
1248 1253 chunklength = length(rev)
1249 1254 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1250 1255
1251 1256 return l
1252 1257
1253 1258 def _chunkclear(self):
1254 1259 """Clear the raw chunk cache."""
1255 1260 self._chunkcache = (0, '')
1256 1261
1257 1262 def deltaparent(self, rev):
1258 1263 """return deltaparent of the given revision"""
1259 1264 base = self.index[rev][3]
1260 1265 if base == rev:
1261 1266 return nullrev
1262 1267 elif self._generaldelta:
1263 1268 return base
1264 1269 else:
1265 1270 return rev - 1
1266 1271
1267 1272 def revdiff(self, rev1, rev2):
1268 1273 """return or calculate a delta between two revisions
1269 1274
1270 1275 The delta calculated is in binary form and is intended to be written to
1271 1276 revlog data directly. So this function needs raw revision data.
1272 1277 """
1273 1278 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1274 1279 return bytes(self._chunk(rev2))
1275 1280
1276 1281 return mdiff.textdiff(self.revision(rev1, raw=True),
1277 1282 self.revision(rev2, raw=True))
1278 1283
1279 1284 def revision(self, nodeorrev, _df=None, raw=False):
1280 1285 """return an uncompressed revision of a given node or revision
1281 1286 number.
1282 1287
1283 1288 _df - an existing file handle to read from. (internal-only)
1284 1289 raw - an optional argument specifying if the revision data is to be
1285 1290 treated as raw data when applying flag transforms. 'raw' should be set
1286 1291 to True when generating changegroups or in debug commands.
1287 1292 """
1288 1293 if isinstance(nodeorrev, int):
1289 1294 rev = nodeorrev
1290 1295 node = self.node(rev)
1291 1296 else:
1292 1297 node = nodeorrev
1293 1298 rev = None
1294 1299
1295 1300 cachedrev = None
1296 1301 flags = None
1297 1302 rawtext = None
1298 1303 if node == nullid:
1299 1304 return ""
1300 1305 if self._cache:
1301 1306 if self._cache[0] == node:
1302 1307 # _cache only stores rawtext
1303 1308 if raw:
1304 1309 return self._cache[2]
1305 1310 # duplicated, but good for perf
1306 1311 if rev is None:
1307 1312 rev = self.rev(node)
1308 1313 if flags is None:
1309 1314 flags = self.flags(rev)
1310 1315 # no extra flags set, no flag processor runs, text = rawtext
1311 1316 if flags == REVIDX_DEFAULT_FLAGS:
1312 1317 return self._cache[2]
1313 1318 # rawtext is reusable. need to run flag processor
1314 1319 rawtext = self._cache[2]
1315 1320
1316 1321 cachedrev = self._cache[1]
1317 1322
1318 1323 # look up what we need to read
1319 1324 if rawtext is None:
1320 1325 if rev is None:
1321 1326 rev = self.rev(node)
1322 1327
1323 1328 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1324 1329 if stopped:
1325 1330 rawtext = self._cache[2]
1326 1331
1327 1332 # drop cache to save memory
1328 1333 self._cache = None
1329 1334
1330 1335 bins = self._chunks(chain, df=_df)
1331 1336 if rawtext is None:
1332 1337 rawtext = bytes(bins[0])
1333 1338 bins = bins[1:]
1334 1339
1335 1340 rawtext = mdiff.patches(rawtext, bins)
1336 1341 self._cache = (node, rev, rawtext)
1337 1342
1338 1343 if flags is None:
1339 1344 if rev is None:
1340 1345 rev = self.rev(node)
1341 1346 flags = self.flags(rev)
1342 1347
1343 1348 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1344 1349 if validatehash:
1345 1350 self.checkhash(text, node, rev=rev)
1346 1351
1347 1352 return text
1348 1353
1349 1354 def hash(self, text, p1, p2):
1350 1355 """Compute a node hash.
1351 1356
1352 1357 Available as a function so that subclasses can replace the hash
1353 1358 as needed.
1354 1359 """
1355 1360 return hash(text, p1, p2)
1356 1361
1357 1362 def _processflags(self, text, flags, operation, raw=False):
1358 1363 """Inspect revision data flags and applies transforms defined by
1359 1364 registered flag processors.
1360 1365
1361 1366 ``text`` - the revision data to process
1362 1367 ``flags`` - the revision flags
1363 1368 ``operation`` - the operation being performed (read or write)
1364 1369 ``raw`` - an optional argument describing if the raw transform should be
1365 1370 applied.
1366 1371
1367 1372 This method processes the flags in the order (or reverse order if
1368 1373 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1369 1374 flag processors registered for present flags. The order of flags defined
1370 1375 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1371 1376
1372 1377 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1373 1378 processed text and ``validatehash`` is a bool indicating whether the
1374 1379 returned text should be checked for hash integrity.
1375 1380
1376 1381 Note: If the ``raw`` argument is set, it has precedence over the
1377 1382 operation and will only update the value of ``validatehash``.
1378 1383 """
1379 1384 # fast path: no flag processors will run
1380 1385 if flags == 0:
1381 1386 return text, True
1382 1387 if not operation in ('read', 'write'):
1383 1388 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1384 1389 # Check all flags are known.
1385 1390 if flags & ~REVIDX_KNOWN_FLAGS:
1386 1391 raise RevlogError(_("incompatible revision flag '%#x'") %
1387 1392 (flags & ~REVIDX_KNOWN_FLAGS))
1388 1393 validatehash = True
1389 1394 # Depending on the operation (read or write), the order might be
1390 1395 # reversed due to non-commutative transforms.
1391 1396 orderedflags = REVIDX_FLAGS_ORDER
1392 1397 if operation == 'write':
1393 1398 orderedflags = reversed(orderedflags)
1394 1399
1395 1400 for flag in orderedflags:
1396 1401 # If a flagprocessor has been registered for a known flag, apply the
1397 1402 # related operation transform and update result tuple.
1398 1403 if flag & flags:
1399 1404 vhash = True
1400 1405
1401 1406 if flag not in _flagprocessors:
1402 1407 message = _("missing processor for flag '%#x'") % (flag)
1403 1408 raise RevlogError(message)
1404 1409
1405 1410 processor = _flagprocessors[flag]
1406 1411 if processor is not None:
1407 1412 readtransform, writetransform, rawtransform = processor
1408 1413
1409 1414 if raw:
1410 1415 vhash = rawtransform(self, text)
1411 1416 elif operation == 'read':
1412 1417 text, vhash = readtransform(self, text)
1413 1418 else: # write operation
1414 1419 text, vhash = writetransform(self, text)
1415 1420 validatehash = validatehash and vhash
1416 1421
1417 1422 return text, validatehash
1418 1423
1419 1424 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1420 1425 """Check node hash integrity.
1421 1426
1422 1427 Available as a function so that subclasses can extend hash mismatch
1423 1428 behaviors as needed.
1424 1429 """
1425 1430 if p1 is None and p2 is None:
1426 1431 p1, p2 = self.parents(node)
1427 1432 if node != self.hash(text, p1, p2):
1428 1433 revornode = rev
1429 1434 if revornode is None:
1430 1435 revornode = templatefilters.short(hex(node))
1431 1436 raise RevlogError(_("integrity check failed on %s:%s")
1432 1437 % (self.indexfile, revornode))
1433 1438
1434 1439 def checkinlinesize(self, tr, fp=None):
1435 1440 """Check if the revlog is too big for inline and convert if so.
1436 1441
1437 1442 This should be called after revisions are added to the revlog. If the
1438 1443 revlog has grown too large to be an inline revlog, it will convert it
1439 1444 to use multiple index and data files.
1440 1445 """
1441 1446 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1442 1447 return
1443 1448
1444 1449 trinfo = tr.find(self.indexfile)
1445 1450 if trinfo is None:
1446 1451 raise RevlogError(_("%s not found in the transaction")
1447 1452 % self.indexfile)
1448 1453
1449 1454 trindex = trinfo[2]
1450 1455 if trindex is not None:
1451 1456 dataoff = self.start(trindex)
1452 1457 else:
1453 1458 # revlog was stripped at start of transaction, use all leftover data
1454 1459 trindex = len(self) - 1
1455 1460 dataoff = self.end(-2)
1456 1461
1457 1462 tr.add(self.datafile, dataoff)
1458 1463
1459 1464 if fp:
1460 1465 fp.flush()
1461 1466 fp.close()
1462 1467
1463 1468 df = self.opener(self.datafile, 'w')
1464 1469 try:
1465 1470 for r in self:
1466 1471 df.write(self._getsegmentforrevs(r, r)[1])
1467 1472 finally:
1468 1473 df.close()
1469 1474
1470 1475 fp = self.opener(self.indexfile, 'w', atomictemp=True,
1471 1476 checkambig=self._checkambig)
1472 1477 self.version &= ~FLAG_INLINE_DATA
1473 1478 self._inline = False
1474 1479 for i in self:
1475 1480 e = self._io.packentry(self.index[i], self.node, self.version, i)
1476 1481 fp.write(e)
1477 1482
1478 1483 # if we don't call close, the temp file will never replace the
1479 1484 # real index
1480 1485 fp.close()
1481 1486
1482 1487 tr.replace(self.indexfile, trindex * self._io.size)
1483 1488 self._chunkclear()
1484 1489
1485 1490 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1486 1491 node=None, flags=REVIDX_DEFAULT_FLAGS):
1487 1492 """add a revision to the log
1488 1493
1489 1494 text - the revision data to add
1490 1495 transaction - the transaction object used for rollback
1491 1496 link - the linkrev data to add
1492 1497 p1, p2 - the parent nodeids of the revision
1493 1498 cachedelta - an optional precomputed delta
1494 1499 node - nodeid of revision; typically node is not specified, and it is
1495 1500 computed by default as hash(text, p1, p2), however subclasses might
1496 1501 use different hashing method (and override checkhash() in such case)
1497 1502 flags - the known flags to set on the revision
1498 1503 """
1499 1504 if link == nullrev:
1500 1505 raise RevlogError(_("attempted to add linkrev -1 to %s")
1501 1506 % self.indexfile)
1502 1507
1503 1508 if flags:
1504 1509 node = node or self.hash(text, p1, p2)
1505 1510
1506 1511 rawtext, validatehash = self._processflags(text, flags, 'write')
1507 1512
1508 1513 # If the flag processor modifies the revision data, ignore any provided
1509 1514 # cachedelta.
1510 1515 if rawtext != text:
1511 1516 cachedelta = None
1512 1517
1513 1518 if len(rawtext) > _maxentrysize:
1514 1519 raise RevlogError(
1515 1520 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1516 1521 % (self.indexfile, len(rawtext)))
1517 1522
1518 1523 node = node or self.hash(rawtext, p1, p2)
1519 1524 if node in self.nodemap:
1520 1525 return node
1521 1526
1522 1527 if validatehash:
1523 1528 self.checkhash(rawtext, node, p1=p1, p2=p2)
1524 1529
1525 1530 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1526 1531 flags, cachedelta=cachedelta)
1527 1532
1528 1533 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1529 1534 cachedelta=None):
1530 1535 """add a raw revision with known flags, node and parents
1531 1536 useful when reusing a revision not stored in this revlog (ex: received
1532 1537 over wire, or read from an external bundle).
1533 1538 """
1534 1539 dfh = None
1535 1540 if not self._inline:
1536 1541 dfh = self.opener(self.datafile, "a+")
1537 1542 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1538 1543 try:
1539 1544 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1540 1545 flags, cachedelta, ifh, dfh)
1541 1546 finally:
1542 1547 if dfh:
1543 1548 dfh.close()
1544 1549 ifh.close()
1545 1550
1546 1551 def compress(self, data):
1547 1552 """Generate a possibly-compressed representation of data."""
1548 1553 if not data:
1549 1554 return '', data
1550 1555
1551 1556 compressed = self._compressor.compress(data)
1552 1557
1553 1558 if compressed:
1554 1559 # The revlog compressor added the header in the returned data.
1555 1560 return '', compressed
1556 1561
1557 1562 if data[0:1] == '\0':
1558 1563 return '', data
1559 1564 return 'u', data
1560 1565
1561 1566 def decompress(self, data):
1562 1567 """Decompress a revlog chunk.
1563 1568
1564 1569 The chunk is expected to begin with a header identifying the
1565 1570 format type so it can be routed to an appropriate decompressor.
1566 1571 """
1567 1572 if not data:
1568 1573 return data
1569 1574
1570 1575 # Revlogs are read much more frequently than they are written and many
1571 1576 # chunks only take microseconds to decompress, so performance is
1572 1577 # important here.
1573 1578 #
1574 1579 # We can make a few assumptions about revlogs:
1575 1580 #
1576 1581 # 1) the majority of chunks will be compressed (as opposed to inline
1577 1582 # raw data).
1578 1583 # 2) decompressing *any* data will likely by at least 10x slower than
1579 1584 # returning raw inline data.
1580 1585 # 3) we want to prioritize common and officially supported compression
1581 1586 # engines
1582 1587 #
1583 1588 # It follows that we want to optimize for "decompress compressed data
1584 1589 # when encoded with common and officially supported compression engines"
1585 1590 # case over "raw data" and "data encoded by less common or non-official
1586 1591 # compression engines." That is why we have the inline lookup first
1587 1592 # followed by the compengines lookup.
1588 1593 #
1589 1594 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1590 1595 # compressed chunks. And this matters for changelog and manifest reads.
1591 1596 t = data[0:1]
1592 1597
1593 1598 if t == 'x':
1594 1599 try:
1595 1600 return _zlibdecompress(data)
1596 1601 except zlib.error as e:
1597 1602 raise RevlogError(_('revlog decompress error: %s') % str(e))
1598 1603 # '\0' is more common than 'u' so it goes first.
1599 1604 elif t == '\0':
1600 1605 return data
1601 1606 elif t == 'u':
1602 1607 return util.buffer(data, 1)
1603 1608
1604 1609 try:
1605 1610 compressor = self._decompressors[t]
1606 1611 except KeyError:
1607 1612 try:
1608 1613 engine = util.compengines.forrevlogheader(t)
1609 1614 compressor = engine.revlogcompressor()
1610 1615 self._decompressors[t] = compressor
1611 1616 except KeyError:
1612 1617 raise RevlogError(_('unknown compression type %r') % t)
1613 1618
1614 1619 return compressor.decompress(data)
1615 1620
1616 1621 def _isgooddelta(self, d, textlen):
1617 1622 """Returns True if the given delta is good. Good means that it is within
1618 1623 the disk span, disk size, and chain length bounds that we know to be
1619 1624 performant."""
1620 1625 if d is None:
1621 1626 return False
1622 1627
1623 1628 # - 'dist' is the distance from the base revision -- bounding it limits
1624 1629 # the amount of I/O we need to do.
1625 1630 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1626 1631 # to apply -- bounding it limits the amount of CPU we consume.
1627 1632 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1628 1633 if (dist > textlen * 4 or l > textlen or
1629 1634 compresseddeltalen > textlen * 2 or
1630 1635 (self._maxchainlen and chainlen > self._maxchainlen)):
1631 1636 return False
1632 1637
1633 1638 return True
1634 1639
1635 1640 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1636 1641 cachedelta, ifh, dfh, alwayscache=False):
1637 1642 """internal function to add revisions to the log
1638 1643
1639 1644 see addrevision for argument descriptions.
1640 1645
1641 1646 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1642 1647
1643 1648 invariants:
1644 1649 - rawtext is optional (can be None); if not set, cachedelta must be set.
1645 1650 if both are set, they must correspond to each other.
1646 1651 """
1647 1652 btext = [rawtext]
1648 1653 def buildtext():
1649 1654 if btext[0] is not None:
1650 1655 return btext[0]
1651 1656 baserev = cachedelta[0]
1652 1657 delta = cachedelta[1]
1653 1658 # special case deltas which replace entire base; no need to decode
1654 1659 # base revision. this neatly avoids censored bases, which throw when
1655 1660 # they're decoded.
1656 1661 hlen = struct.calcsize(">lll")
1657 1662 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1658 1663 len(delta) - hlen):
1659 1664 btext[0] = delta[hlen:]
1660 1665 else:
1661 1666 if self._inline:
1662 1667 fh = ifh
1663 1668 else:
1664 1669 fh = dfh
1665 1670 basetext = self.revision(baserev, _df=fh, raw=True)
1666 1671 btext[0] = mdiff.patch(basetext, delta)
1667 1672
1668 1673 try:
1669 1674 res = self._processflags(btext[0], flags, 'read', raw=True)
1670 1675 btext[0], validatehash = res
1671 1676 if validatehash:
1672 1677 self.checkhash(btext[0], node, p1=p1, p2=p2)
1673 1678 if flags & REVIDX_ISCENSORED:
1674 1679 raise RevlogError(_('node %s is not censored') % node)
1675 1680 except CensoredNodeError:
1676 1681 # must pass the censored index flag to add censored revisions
1677 1682 if not flags & REVIDX_ISCENSORED:
1678 1683 raise
1679 1684 return btext[0]
1680 1685
1681 1686 def builddelta(rev):
1682 1687 # can we use the cached delta?
1683 1688 if cachedelta and cachedelta[0] == rev:
1684 1689 delta = cachedelta[1]
1685 1690 else:
1686 1691 t = buildtext()
1687 1692 if self.iscensored(rev):
1688 1693 # deltas based on a censored revision must replace the
1689 1694 # full content in one patch, so delta works everywhere
1690 1695 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1691 1696 delta = header + t
1692 1697 else:
1693 1698 if self._inline:
1694 1699 fh = ifh
1695 1700 else:
1696 1701 fh = dfh
1697 1702 ptext = self.revision(rev, _df=fh, raw=True)
1698 1703 delta = mdiff.textdiff(ptext, t)
1699 1704 header, data = self.compress(delta)
1700 1705 deltalen = len(header) + len(data)
1701 1706 chainbase = self.chainbase(rev)
1702 1707 dist = deltalen + offset - self.start(chainbase)
1703 1708 if self._generaldelta:
1704 1709 base = rev
1705 1710 else:
1706 1711 base = chainbase
1707 1712 chainlen, compresseddeltalen = self._chaininfo(rev)
1708 1713 chainlen += 1
1709 1714 compresseddeltalen += deltalen
1710 1715 return (dist, deltalen, (header, data), base,
1711 1716 chainbase, chainlen, compresseddeltalen)
1712 1717
1713 1718 curr = len(self)
1714 1719 prev = curr - 1
1715 1720 offset = self.end(prev)
1716 1721 delta = None
1717 1722 p1r, p2r = self.rev(p1), self.rev(p2)
1718 1723
1719 1724 # full versions are inserted when the needed deltas
1720 1725 # become comparable to the uncompressed text
1721 1726 if rawtext is None:
1722 1727 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1723 1728 cachedelta[1])
1724 1729 else:
1725 1730 textlen = len(rawtext)
1726 1731
1727 1732 # should we try to build a delta?
1728 1733 if prev != nullrev and self.storedeltachains:
1729 1734 tested = set()
1730 1735 # This condition is true most of the time when processing
1731 1736 # changegroup data into a generaldelta repo. The only time it
1732 1737 # isn't true is if this is the first revision in a delta chain
1733 1738 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
1734 1739 if cachedelta and self._generaldelta and self._lazydeltabase:
1735 1740 # Assume what we received from the server is a good choice
1736 1741 # build delta will reuse the cache
1737 1742 candidatedelta = builddelta(cachedelta[0])
1738 1743 tested.add(cachedelta[0])
1739 1744 if self._isgooddelta(candidatedelta, textlen):
1740 1745 delta = candidatedelta
1741 1746 if delta is None and self._generaldelta:
1742 1747 # exclude already lazy tested base if any
1743 1748 parents = [p for p in (p1r, p2r)
1744 1749 if p != nullrev and p not in tested]
1745 1750 if parents and not self._aggressivemergedeltas:
1746 1751 # Pick whichever parent is closer to us (to minimize the
1747 1752 # chance of having to build a fulltext).
1748 1753 parents = [max(parents)]
1749 1754 tested.update(parents)
1750 1755 pdeltas = []
1751 1756 for p in parents:
1752 1757 pd = builddelta(p)
1753 1758 if self._isgooddelta(pd, textlen):
1754 1759 pdeltas.append(pd)
1755 1760 if pdeltas:
1756 1761 delta = min(pdeltas, key=lambda x: x[1])
1757 1762 if delta is None and prev not in tested:
1758 1763 # other approach failed try against prev to hopefully save us a
1759 1764 # fulltext.
1760 1765 candidatedelta = builddelta(prev)
1761 1766 if self._isgooddelta(candidatedelta, textlen):
1762 1767 delta = candidatedelta
1763 1768 if delta is not None:
1764 1769 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1765 1770 else:
1766 1771 rawtext = buildtext()
1767 1772 data = self.compress(rawtext)
1768 1773 l = len(data[1]) + len(data[0])
1769 1774 base = chainbase = curr
1770 1775
1771 1776 e = (offset_type(offset, flags), l, textlen,
1772 1777 base, link, p1r, p2r, node)
1773 1778 self.index.insert(-1, e)
1774 1779 self.nodemap[node] = curr
1775 1780
1776 1781 entry = self._io.packentry(e, self.node, self.version, curr)
1777 1782 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1778 1783
1779 1784 if alwayscache and rawtext is None:
1780 1785 rawtext = buildtext()
1781 1786
1782 1787 if type(rawtext) == str: # only accept immutable objects
1783 1788 self._cache = (node, curr, rawtext)
1784 1789 self._chainbasecache[curr] = chainbase
1785 1790 return node
1786 1791
1787 1792 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1788 1793 # Files opened in a+ mode have inconsistent behavior on various
1789 1794 # platforms. Windows requires that a file positioning call be made
1790 1795 # when the file handle transitions between reads and writes. See
1791 1796 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1792 1797 # platforms, Python or the platform itself can be buggy. Some versions
1793 1798 # of Solaris have been observed to not append at the end of the file
1794 1799 # if the file was seeked to before the end. See issue4943 for more.
1795 1800 #
1796 1801 # We work around this issue by inserting a seek() before writing.
1797 1802 # Note: This is likely not necessary on Python 3.
1798 1803 ifh.seek(0, os.SEEK_END)
1799 1804 if dfh:
1800 1805 dfh.seek(0, os.SEEK_END)
1801 1806
1802 1807 curr = len(self) - 1
1803 1808 if not self._inline:
1804 1809 transaction.add(self.datafile, offset)
1805 1810 transaction.add(self.indexfile, curr * len(entry))
1806 1811 if data[0]:
1807 1812 dfh.write(data[0])
1808 1813 dfh.write(data[1])
1809 1814 ifh.write(entry)
1810 1815 else:
1811 1816 offset += curr * self._io.size
1812 1817 transaction.add(self.indexfile, offset, curr)
1813 1818 ifh.write(entry)
1814 1819 ifh.write(data[0])
1815 1820 ifh.write(data[1])
1816 1821 self.checkinlinesize(transaction, ifh)
1817 1822
1818 1823 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1819 1824 """
1820 1825 add a delta group
1821 1826
1822 1827 given a set of deltas, add them to the revision log. the
1823 1828 first delta is against its parent, which should be in our
1824 1829 log, the rest are against the previous delta.
1825 1830
1826 1831 If ``addrevisioncb`` is defined, it will be called with arguments of
1827 1832 this revlog and the node that was added.
1828 1833 """
1829 1834
1830 1835 # track the base of the current delta log
1831 1836 content = []
1832 1837 node = None
1833 1838
1834 1839 r = len(self)
1835 1840 end = 0
1836 1841 if r:
1837 1842 end = self.end(r - 1)
1838 1843 ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
1839 1844 isize = r * self._io.size
1840 1845 if self._inline:
1841 1846 transaction.add(self.indexfile, end + isize, r)
1842 1847 dfh = None
1843 1848 else:
1844 1849 transaction.add(self.indexfile, isize, r)
1845 1850 transaction.add(self.datafile, end)
1846 1851 dfh = self.opener(self.datafile, "a+")
1847 1852 def flush():
1848 1853 if dfh:
1849 1854 dfh.flush()
1850 1855 ifh.flush()
1851 1856 try:
1852 1857 # loop through our set of deltas
1853 1858 chain = None
1854 1859 for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
1855 1860 node = chunkdata['node']
1856 1861 p1 = chunkdata['p1']
1857 1862 p2 = chunkdata['p2']
1858 1863 cs = chunkdata['cs']
1859 1864 deltabase = chunkdata['deltabase']
1860 1865 delta = chunkdata['delta']
1861 1866 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1862 1867
1863 1868 content.append(node)
1864 1869
1865 1870 link = linkmapper(cs)
1866 1871 if node in self.nodemap:
1867 1872 # this can happen if two branches make the same change
1868 1873 chain = node
1869 1874 continue
1870 1875
1871 1876 for p in (p1, p2):
1872 1877 if p not in self.nodemap:
1873 1878 raise LookupError(p, self.indexfile,
1874 1879 _('unknown parent'))
1875 1880
1876 1881 if deltabase not in self.nodemap:
1877 1882 raise LookupError(deltabase, self.indexfile,
1878 1883 _('unknown delta base'))
1879 1884
1880 1885 baserev = self.rev(deltabase)
1881 1886
1882 1887 if baserev != nullrev and self.iscensored(baserev):
1883 1888 # if base is censored, delta must be full replacement in a
1884 1889 # single patch operation
1885 1890 hlen = struct.calcsize(">lll")
1886 1891 oldlen = self.rawsize(baserev)
1887 1892 newlen = len(delta) - hlen
1888 1893 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1889 1894 raise error.CensoredBaseError(self.indexfile,
1890 1895 self.node(baserev))
1891 1896
1892 1897 if not flags and self._peek_iscensored(baserev, delta, flush):
1893 1898 flags |= REVIDX_ISCENSORED
1894 1899
1895 1900 # We assume consumers of addrevisioncb will want to retrieve
1896 1901 # the added revision, which will require a call to
1897 1902 # revision(). revision() will fast path if there is a cache
1898 1903 # hit. So, we tell _addrevision() to always cache in this case.
1899 1904 # We're only using addgroup() in the context of changegroup
1900 1905 # generation so the revision data can always be handled as raw
1901 1906 # by the flagprocessor.
1902 1907 chain = self._addrevision(node, None, transaction, link,
1903 1908 p1, p2, flags, (baserev, delta),
1904 1909 ifh, dfh,
1905 1910 alwayscache=bool(addrevisioncb))
1906 1911
1907 1912 if addrevisioncb:
1908 1913 addrevisioncb(self, chain)
1909 1914
1910 1915 if not dfh and not self._inline:
1911 1916 # addrevision switched from inline to conventional
1912 1917 # reopen the index
1913 1918 ifh.close()
1914 1919 dfh = self.opener(self.datafile, "a+")
1915 1920 ifh = self.opener(self.indexfile, "a+",
1916 1921 checkambig=self._checkambig)
1917 1922 finally:
1918 1923 if dfh:
1919 1924 dfh.close()
1920 1925 ifh.close()
1921 1926
1922 1927 return content
1923 1928
1924 1929 def iscensored(self, rev):
1925 1930 """Check if a file revision is censored."""
1926 1931 return False
1927 1932
1928 1933 def _peek_iscensored(self, baserev, delta, flush):
1929 1934 """Quickly check if a delta produces a censored revision."""
1930 1935 return False
1931 1936
1932 1937 def getstrippoint(self, minlink):
1933 1938 """find the minimum rev that must be stripped to strip the linkrev
1934 1939
1935 1940 Returns a tuple containing the minimum rev and a set of all revs that
1936 1941 have linkrevs that will be broken by this strip.
1937 1942 """
1938 1943 brokenrevs = set()
1939 1944 strippoint = len(self)
1940 1945
1941 1946 heads = {}
1942 1947 futurelargelinkrevs = set()
1943 1948 for head in self.headrevs():
1944 1949 headlinkrev = self.linkrev(head)
1945 1950 heads[head] = headlinkrev
1946 1951 if headlinkrev >= minlink:
1947 1952 futurelargelinkrevs.add(headlinkrev)
1948 1953
1949 1954 # This algorithm involves walking down the rev graph, starting at the
1950 1955 # heads. Since the revs are topologically sorted according to linkrev,
1951 1956 # once all head linkrevs are below the minlink, we know there are
1952 1957 # no more revs that could have a linkrev greater than minlink.
1953 1958 # So we can stop walking.
1954 1959 while futurelargelinkrevs:
1955 1960 strippoint -= 1
1956 1961 linkrev = heads.pop(strippoint)
1957 1962
1958 1963 if linkrev < minlink:
1959 1964 brokenrevs.add(strippoint)
1960 1965 else:
1961 1966 futurelargelinkrevs.remove(linkrev)
1962 1967
1963 1968 for p in self.parentrevs(strippoint):
1964 1969 if p != nullrev:
1965 1970 plinkrev = self.linkrev(p)
1966 1971 heads[p] = plinkrev
1967 1972 if plinkrev >= minlink:
1968 1973 futurelargelinkrevs.add(plinkrev)
1969 1974
1970 1975 return strippoint, brokenrevs
1971 1976
1972 1977 def strip(self, minlink, transaction):
1973 1978 """truncate the revlog on the first revision with a linkrev >= minlink
1974 1979
1975 1980 This function is called when we're stripping revision minlink and
1976 1981 its descendants from the repository.
1977 1982
1978 1983 We have to remove all revisions with linkrev >= minlink, because
1979 1984 the equivalent changelog revisions will be renumbered after the
1980 1985 strip.
1981 1986
1982 1987 So we truncate the revlog on the first of these revisions, and
1983 1988 trust that the caller has saved the revisions that shouldn't be
1984 1989 removed and that it'll re-add them after this truncation.
1985 1990 """
1986 1991 if len(self) == 0:
1987 1992 return
1988 1993
1989 1994 rev, _ = self.getstrippoint(minlink)
1990 1995 if rev == len(self):
1991 1996 return
1992 1997
1993 1998 # first truncate the files on disk
1994 1999 end = self.start(rev)
1995 2000 if not self._inline:
1996 2001 transaction.add(self.datafile, end)
1997 2002 end = rev * self._io.size
1998 2003 else:
1999 2004 end += rev * self._io.size
2000 2005
2001 2006 transaction.add(self.indexfile, end)
2002 2007
2003 2008 # then reset internal state in memory to forget those revisions
2004 2009 self._cache = None
2005 2010 self._chaininfocache = {}
2006 2011 self._chunkclear()
2007 2012 for x in xrange(rev, len(self)):
2008 2013 del self.nodemap[self.node(x)]
2009 2014
2010 2015 del self.index[rev:-1]
2011 2016
2012 2017 def checksize(self):
2013 2018 expected = 0
2014 2019 if len(self):
2015 2020 expected = max(0, self.end(len(self) - 1))
2016 2021
2017 2022 try:
2018 2023 f = self.opener(self.datafile)
2019 2024 f.seek(0, 2)
2020 2025 actual = f.tell()
2021 2026 f.close()
2022 2027 dd = actual - expected
2023 2028 except IOError as inst:
2024 2029 if inst.errno != errno.ENOENT:
2025 2030 raise
2026 2031 dd = 0
2027 2032
2028 2033 try:
2029 2034 f = self.opener(self.indexfile)
2030 2035 f.seek(0, 2)
2031 2036 actual = f.tell()
2032 2037 f.close()
2033 2038 s = self._io.size
2034 2039 i = max(0, actual // s)
2035 2040 di = actual - (i * s)
2036 2041 if self._inline:
2037 2042 databytes = 0
2038 2043 for r in self:
2039 2044 databytes += max(0, self.length(r))
2040 2045 dd = 0
2041 2046 di = actual - len(self) * s - databytes
2042 2047 except IOError as inst:
2043 2048 if inst.errno != errno.ENOENT:
2044 2049 raise
2045 2050 di = 0
2046 2051
2047 2052 return (dd, di)
2048 2053
2049 2054 def files(self):
2050 2055 res = [self.indexfile]
2051 2056 if not self._inline:
2052 2057 res.append(self.datafile)
2053 2058 return res
2054 2059
2055 2060 DELTAREUSEALWAYS = 'always'
2056 2061 DELTAREUSESAMEREVS = 'samerevs'
2057 2062 DELTAREUSENEVER = 'never'
2058 2063
2059 2064 DELTAREUSEALL = {'always', 'samerevs', 'never'}
2060 2065
2061 2066 def clone(self, tr, destrevlog, addrevisioncb=None,
2062 2067 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2063 2068 """Copy this revlog to another, possibly with format changes.
2064 2069
2065 2070 The destination revlog will contain the same revisions and nodes.
2066 2071 However, it may not be bit-for-bit identical due to e.g. delta encoding
2067 2072 differences.
2068 2073
2069 2074 The ``deltareuse`` argument control how deltas from the existing revlog
2070 2075 are preserved in the destination revlog. The argument can have the
2071 2076 following values:
2072 2077
2073 2078 DELTAREUSEALWAYS
2074 2079 Deltas will always be reused (if possible), even if the destination
2075 2080 revlog would not select the same revisions for the delta. This is the
2076 2081 fastest mode of operation.
2077 2082 DELTAREUSESAMEREVS
2078 2083 Deltas will be reused if the destination revlog would pick the same
2079 2084 revisions for the delta. This mode strikes a balance between speed
2080 2085 and optimization.
2081 2086 DELTAREUSENEVER
2082 2087 Deltas will never be reused. This is the slowest mode of execution.
2083 2088 This mode can be used to recompute deltas (e.g. if the diff/delta
2084 2089 algorithm changes).
2085 2090
2086 2091 Delta computation can be slow, so the choice of delta reuse policy can
2087 2092 significantly affect run time.
2088 2093
2089 2094 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2090 2095 two extremes. Deltas will be reused if they are appropriate. But if the
2091 2096 delta could choose a better revision, it will do so. This means if you
2092 2097 are converting a non-generaldelta revlog to a generaldelta revlog,
2093 2098 deltas will be recomputed if the delta's parent isn't a parent of the
2094 2099 revision.
2095 2100
2096 2101 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2097 2102 controls whether to compute deltas against both parents for merges.
2098 2103 By default, the current default is used.
2099 2104 """
2100 2105 if deltareuse not in self.DELTAREUSEALL:
2101 2106 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2102 2107
2103 2108 if len(destrevlog):
2104 2109 raise ValueError(_('destination revlog is not empty'))
2105 2110
2106 2111 if getattr(self, 'filteredrevs', None):
2107 2112 raise ValueError(_('source revlog has filtered revisions'))
2108 2113 if getattr(destrevlog, 'filteredrevs', None):
2109 2114 raise ValueError(_('destination revlog has filtered revisions'))
2110 2115
2111 2116 # lazydeltabase controls whether to reuse a cached delta, if possible.
2112 2117 oldlazydeltabase = destrevlog._lazydeltabase
2113 2118 oldamd = destrevlog._aggressivemergedeltas
2114 2119
2115 2120 try:
2116 2121 if deltareuse == self.DELTAREUSEALWAYS:
2117 2122 destrevlog._lazydeltabase = True
2118 2123 elif deltareuse == self.DELTAREUSESAMEREVS:
2119 2124 destrevlog._lazydeltabase = False
2120 2125
2121 2126 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2122 2127
2123 2128 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2124 2129 self.DELTAREUSESAMEREVS)
2125 2130
2126 2131 index = self.index
2127 2132 for rev in self:
2128 2133 entry = index[rev]
2129 2134
2130 2135 # Some classes override linkrev to take filtered revs into
2131 2136 # account. Use raw entry from index.
2132 2137 flags = entry[0] & 0xffff
2133 2138 linkrev = entry[4]
2134 2139 p1 = index[entry[5]][7]
2135 2140 p2 = index[entry[6]][7]
2136 2141 node = entry[7]
2137 2142
2138 2143 # (Possibly) reuse the delta from the revlog if allowed and
2139 2144 # the revlog chunk is a delta.
2140 2145 cachedelta = None
2141 2146 rawtext = None
2142 2147 if populatecachedelta:
2143 2148 dp = self.deltaparent(rev)
2144 2149 if dp != nullrev:
2145 2150 cachedelta = (dp, str(self._chunk(rev)))
2146 2151
2147 2152 if not cachedelta:
2148 2153 rawtext = self.revision(rev, raw=True)
2149 2154
2150 2155 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2151 2156 checkambig=False)
2152 2157 dfh = None
2153 2158 if not destrevlog._inline:
2154 2159 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2155 2160 try:
2156 2161 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
2157 2162 flags, cachedelta, ifh, dfh)
2158 2163 finally:
2159 2164 if dfh:
2160 2165 dfh.close()
2161 2166 ifh.close()
2162 2167
2163 2168 if addrevisioncb:
2164 2169 addrevisioncb(self, rev, node)
2165 2170 finally:
2166 2171 destrevlog._lazydeltabase = oldlazydeltabase
2167 2172 destrevlog._aggressivemergedeltas = oldamd
@@ -1,994 +1,996
1 1 # scmutil.py - Mercurial core utility functions
2 2 #
3 3 # Copyright Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import glob
12 12 import hashlib
13 13 import os
14 14 import re
15 15 import socket
16 16
17 17 from .i18n import _
18 18 from .node import (
19 19 wdirid,
20 20 wdirrev,
21 21 )
22 22
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 pathutil,
28 28 phases,
29 29 pycompat,
30 30 revsetlang,
31 31 similar,
32 32 util,
33 33 )
34 34
35 35 if pycompat.osname == 'nt':
36 36 from . import scmwindows as scmplatform
37 37 else:
38 38 from . import scmposix as scmplatform
39 39
40 40 termsize = scmplatform.termsize
41 41
42 42 class status(tuple):
43 43 '''Named tuple with a list of files per status. The 'deleted', 'unknown'
44 44 and 'ignored' properties are only relevant to the working copy.
45 45 '''
46 46
47 47 __slots__ = ()
48 48
49 49 def __new__(cls, modified, added, removed, deleted, unknown, ignored,
50 50 clean):
51 51 return tuple.__new__(cls, (modified, added, removed, deleted, unknown,
52 52 ignored, clean))
53 53
54 54 @property
55 55 def modified(self):
56 56 '''files that have been modified'''
57 57 return self[0]
58 58
59 59 @property
60 60 def added(self):
61 61 '''files that have been added'''
62 62 return self[1]
63 63
64 64 @property
65 65 def removed(self):
66 66 '''files that have been removed'''
67 67 return self[2]
68 68
69 69 @property
70 70 def deleted(self):
71 71 '''files that are in the dirstate, but have been deleted from the
72 72 working copy (aka "missing")
73 73 '''
74 74 return self[3]
75 75
76 76 @property
77 77 def unknown(self):
78 78 '''files not in the dirstate that are not ignored'''
79 79 return self[4]
80 80
81 81 @property
82 82 def ignored(self):
83 83 '''files not in the dirstate that are ignored (by _dirignore())'''
84 84 return self[5]
85 85
86 86 @property
87 87 def clean(self):
88 88 '''files that have not been modified'''
89 89 return self[6]
90 90
91 91 def __repr__(self, *args, **kwargs):
92 92 return (('<status modified=%r, added=%r, removed=%r, deleted=%r, '
93 93 'unknown=%r, ignored=%r, clean=%r>') % self)
94 94
95 95 def itersubrepos(ctx1, ctx2):
96 96 """find subrepos in ctx1 or ctx2"""
97 97 # Create a (subpath, ctx) mapping where we prefer subpaths from
98 98 # ctx1. The subpaths from ctx2 are important when the .hgsub file
99 99 # has been modified (in ctx2) but not yet committed (in ctx1).
100 100 subpaths = dict.fromkeys(ctx2.substate, ctx2)
101 101 subpaths.update(dict.fromkeys(ctx1.substate, ctx1))
102 102
103 103 missing = set()
104 104
105 105 for subpath in ctx2.substate:
106 106 if subpath not in ctx1.substate:
107 107 del subpaths[subpath]
108 108 missing.add(subpath)
109 109
110 110 for subpath, ctx in sorted(subpaths.iteritems()):
111 111 yield subpath, ctx.sub(subpath)
112 112
113 113 # Yield an empty subrepo based on ctx1 for anything only in ctx2. That way,
114 114 # status and diff will have an accurate result when it does
115 115 # 'sub.{status|diff}(rev2)'. Otherwise, the ctx2 subrepo is compared
116 116 # against itself.
117 117 for subpath in missing:
118 118 yield subpath, ctx2.nullsub(subpath, ctx1)
119 119
120 120 def nochangesfound(ui, repo, excluded=None):
121 121 '''Report no changes for push/pull, excluded is None or a list of
122 122 nodes excluded from the push/pull.
123 123 '''
124 124 secretlist = []
125 125 if excluded:
126 126 for n in excluded:
127 127 ctx = repo[n]
128 128 if ctx.phase() >= phases.secret and not ctx.extinct():
129 129 secretlist.append(n)
130 130
131 131 if secretlist:
132 132 ui.status(_("no changes found (ignored %d secret changesets)\n")
133 133 % len(secretlist))
134 134 else:
135 135 ui.status(_("no changes found\n"))
136 136
137 137 def callcatch(ui, func):
138 138 """call func() with global exception handling
139 139
140 140 return func() if no exception happens. otherwise do some error handling
141 141 and return an exit code accordingly. does not handle all exceptions.
142 142 """
143 143 try:
144 144 try:
145 145 return func()
146 146 except: # re-raises
147 147 ui.traceback()
148 148 raise
149 149 # Global exception handling, alphabetically
150 150 # Mercurial-specific first, followed by built-in and library exceptions
151 151 except error.LockHeld as inst:
152 152 if inst.errno == errno.ETIMEDOUT:
153 153 reason = _('timed out waiting for lock held by %r') % inst.locker
154 154 else:
155 155 reason = _('lock held by %r') % inst.locker
156 156 ui.warn(_("abort: %s: %s\n") % (inst.desc or inst.filename, reason))
157 157 if not inst.locker:
158 158 ui.warn(_("(lock might be very busy)\n"))
159 159 except error.LockUnavailable as inst:
160 160 ui.warn(_("abort: could not lock %s: %s\n") %
161 161 (inst.desc or inst.filename, inst.strerror))
162 162 except error.OutOfBandError as inst:
163 163 if inst.args:
164 164 msg = _("abort: remote error:\n")
165 165 else:
166 166 msg = _("abort: remote error\n")
167 167 ui.warn(msg)
168 168 if inst.args:
169 169 ui.warn(''.join(inst.args))
170 170 if inst.hint:
171 171 ui.warn('(%s)\n' % inst.hint)
172 172 except error.RepoError as inst:
173 173 ui.warn(_("abort: %s!\n") % inst)
174 174 if inst.hint:
175 175 ui.warn(_("(%s)\n") % inst.hint)
176 176 except error.ResponseError as inst:
177 177 ui.warn(_("abort: %s") % inst.args[0])
178 178 if not isinstance(inst.args[1], basestring):
179 179 ui.warn(" %r\n" % (inst.args[1],))
180 180 elif not inst.args[1]:
181 181 ui.warn(_(" empty string\n"))
182 182 else:
183 183 ui.warn("\n%r\n" % util.ellipsis(inst.args[1]))
184 184 except error.CensoredNodeError as inst:
185 185 ui.warn(_("abort: file censored %s!\n") % inst)
186 186 except error.RevlogError as inst:
187 187 ui.warn(_("abort: %s!\n") % inst)
188 188 except error.InterventionRequired as inst:
189 189 ui.warn("%s\n" % inst)
190 190 if inst.hint:
191 191 ui.warn(_("(%s)\n") % inst.hint)
192 192 return 1
193 except error.WdirUnsupported:
194 ui.warn(_("abort: working directory revision cannot be specified\n"))
193 195 except error.Abort as inst:
194 196 ui.warn(_("abort: %s\n") % inst)
195 197 if inst.hint:
196 198 ui.warn(_("(%s)\n") % inst.hint)
197 199 except ImportError as inst:
198 200 ui.warn(_("abort: %s!\n") % inst)
199 201 m = str(inst).split()[-1]
200 202 if m in "mpatch bdiff".split():
201 203 ui.warn(_("(did you forget to compile extensions?)\n"))
202 204 elif m in "zlib".split():
203 205 ui.warn(_("(is your Python install correct?)\n"))
204 206 except IOError as inst:
205 207 if util.safehasattr(inst, "code"):
206 208 ui.warn(_("abort: %s\n") % inst)
207 209 elif util.safehasattr(inst, "reason"):
208 210 try: # usually it is in the form (errno, strerror)
209 211 reason = inst.reason.args[1]
210 212 except (AttributeError, IndexError):
211 213 # it might be anything, for example a string
212 214 reason = inst.reason
213 215 if isinstance(reason, unicode):
214 216 # SSLError of Python 2.7.9 contains a unicode
215 217 reason = encoding.unitolocal(reason)
216 218 ui.warn(_("abort: error: %s\n") % reason)
217 219 elif (util.safehasattr(inst, "args")
218 220 and inst.args and inst.args[0] == errno.EPIPE):
219 221 pass
220 222 elif getattr(inst, "strerror", None):
221 223 if getattr(inst, "filename", None):
222 224 ui.warn(_("abort: %s: %s\n") % (inst.strerror, inst.filename))
223 225 else:
224 226 ui.warn(_("abort: %s\n") % inst.strerror)
225 227 else:
226 228 raise
227 229 except OSError as inst:
228 230 if getattr(inst, "filename", None) is not None:
229 231 ui.warn(_("abort: %s: '%s'\n") % (inst.strerror, inst.filename))
230 232 else:
231 233 ui.warn(_("abort: %s\n") % inst.strerror)
232 234 except MemoryError:
233 235 ui.warn(_("abort: out of memory\n"))
234 236 except SystemExit as inst:
235 237 # Commands shouldn't sys.exit directly, but give a return code.
236 238 # Just in case catch this and and pass exit code to caller.
237 239 return inst.code
238 240 except socket.error as inst:
239 241 ui.warn(_("abort: %s\n") % inst.args[-1])
240 242
241 243 return -1
242 244
243 245 def checknewlabel(repo, lbl, kind):
244 246 # Do not use the "kind" parameter in ui output.
245 247 # It makes strings difficult to translate.
246 248 if lbl in ['tip', '.', 'null']:
247 249 raise error.Abort(_("the name '%s' is reserved") % lbl)
248 250 for c in (':', '\0', '\n', '\r'):
249 251 if c in lbl:
250 252 raise error.Abort(_("%r cannot be used in a name") % c)
251 253 try:
252 254 int(lbl)
253 255 raise error.Abort(_("cannot use an integer as a name"))
254 256 except ValueError:
255 257 pass
256 258
257 259 def checkfilename(f):
258 260 '''Check that the filename f is an acceptable filename for a tracked file'''
259 261 if '\r' in f or '\n' in f:
260 262 raise error.Abort(_("'\\n' and '\\r' disallowed in filenames: %r") % f)
261 263
262 264 def checkportable(ui, f):
263 265 '''Check if filename f is portable and warn or abort depending on config'''
264 266 checkfilename(f)
265 267 abort, warn = checkportabilityalert(ui)
266 268 if abort or warn:
267 269 msg = util.checkwinfilename(f)
268 270 if msg:
269 271 msg = "%s: %r" % (msg, f)
270 272 if abort:
271 273 raise error.Abort(msg)
272 274 ui.warn(_("warning: %s\n") % msg)
273 275
274 276 def checkportabilityalert(ui):
275 277 '''check if the user's config requests nothing, a warning, or abort for
276 278 non-portable filenames'''
277 279 val = ui.config('ui', 'portablefilenames', 'warn')
278 280 lval = val.lower()
279 281 bval = util.parsebool(val)
280 282 abort = pycompat.osname == 'nt' or lval == 'abort'
281 283 warn = bval or lval == 'warn'
282 284 if bval is None and not (warn or abort or lval == 'ignore'):
283 285 raise error.ConfigError(
284 286 _("ui.portablefilenames value is invalid ('%s')") % val)
285 287 return abort, warn
286 288
287 289 class casecollisionauditor(object):
288 290 def __init__(self, ui, abort, dirstate):
289 291 self._ui = ui
290 292 self._abort = abort
291 293 allfiles = '\0'.join(dirstate._map)
292 294 self._loweredfiles = set(encoding.lower(allfiles).split('\0'))
293 295 self._dirstate = dirstate
294 296 # The purpose of _newfiles is so that we don't complain about
295 297 # case collisions if someone were to call this object with the
296 298 # same filename twice.
297 299 self._newfiles = set()
298 300
299 301 def __call__(self, f):
300 302 if f in self._newfiles:
301 303 return
302 304 fl = encoding.lower(f)
303 305 if fl in self._loweredfiles and f not in self._dirstate:
304 306 msg = _('possible case-folding collision for %s') % f
305 307 if self._abort:
306 308 raise error.Abort(msg)
307 309 self._ui.warn(_("warning: %s\n") % msg)
308 310 self._loweredfiles.add(fl)
309 311 self._newfiles.add(f)
310 312
311 313 def filteredhash(repo, maxrev):
312 314 """build hash of filtered revisions in the current repoview.
313 315
314 316 Multiple caches perform up-to-date validation by checking that the
315 317 tiprev and tipnode stored in the cache file match the current repository.
316 318 However, this is not sufficient for validating repoviews because the set
317 319 of revisions in the view may change without the repository tiprev and
318 320 tipnode changing.
319 321
320 322 This function hashes all the revs filtered from the view and returns
321 323 that SHA-1 digest.
322 324 """
323 325 cl = repo.changelog
324 326 if not cl.filteredrevs:
325 327 return None
326 328 key = None
327 329 revs = sorted(r for r in cl.filteredrevs if r <= maxrev)
328 330 if revs:
329 331 s = hashlib.sha1()
330 332 for rev in revs:
331 333 s.update('%d;' % rev)
332 334 key = s.digest()
333 335 return key
334 336
335 337 def walkrepos(path, followsym=False, seen_dirs=None, recurse=False):
336 338 '''yield every hg repository under path, always recursively.
337 339 The recurse flag will only control recursion into repo working dirs'''
338 340 def errhandler(err):
339 341 if err.filename == path:
340 342 raise err
341 343 samestat = getattr(os.path, 'samestat', None)
342 344 if followsym and samestat is not None:
343 345 def adddir(dirlst, dirname):
344 346 match = False
345 347 dirstat = os.stat(dirname)
346 348 for lstdirstat in dirlst:
347 349 if samestat(dirstat, lstdirstat):
348 350 match = True
349 351 break
350 352 if not match:
351 353 dirlst.append(dirstat)
352 354 return not match
353 355 else:
354 356 followsym = False
355 357
356 358 if (seen_dirs is None) and followsym:
357 359 seen_dirs = []
358 360 adddir(seen_dirs, path)
359 361 for root, dirs, files in os.walk(path, topdown=True, onerror=errhandler):
360 362 dirs.sort()
361 363 if '.hg' in dirs:
362 364 yield root # found a repository
363 365 qroot = os.path.join(root, '.hg', 'patches')
364 366 if os.path.isdir(os.path.join(qroot, '.hg')):
365 367 yield qroot # we have a patch queue repo here
366 368 if recurse:
367 369 # avoid recursing inside the .hg directory
368 370 dirs.remove('.hg')
369 371 else:
370 372 dirs[:] = [] # don't descend further
371 373 elif followsym:
372 374 newdirs = []
373 375 for d in dirs:
374 376 fname = os.path.join(root, d)
375 377 if adddir(seen_dirs, fname):
376 378 if os.path.islink(fname):
377 379 for hgname in walkrepos(fname, True, seen_dirs):
378 380 yield hgname
379 381 else:
380 382 newdirs.append(d)
381 383 dirs[:] = newdirs
382 384
383 385 def binnode(ctx):
384 386 """Return binary node id for a given basectx"""
385 387 node = ctx.node()
386 388 if node is None:
387 389 return wdirid
388 390 return node
389 391
390 392 def intrev(ctx):
391 393 """Return integer for a given basectx that can be used in comparison or
392 394 arithmetic operation"""
393 395 rev = ctx.rev()
394 396 if rev is None:
395 397 return wdirrev
396 398 return rev
397 399
398 400 def revsingle(repo, revspec, default='.'):
399 401 if not revspec and revspec != 0:
400 402 return repo[default]
401 403
402 404 l = revrange(repo, [revspec])
403 405 if not l:
404 406 raise error.Abort(_('empty revision set'))
405 407 return repo[l.last()]
406 408
407 409 def _pairspec(revspec):
408 410 tree = revsetlang.parse(revspec)
409 411 return tree and tree[0] in ('range', 'rangepre', 'rangepost', 'rangeall')
410 412
411 413 def revpair(repo, revs):
412 414 if not revs:
413 415 return repo.dirstate.p1(), None
414 416
415 417 l = revrange(repo, revs)
416 418
417 419 if not l:
418 420 first = second = None
419 421 elif l.isascending():
420 422 first = l.min()
421 423 second = l.max()
422 424 elif l.isdescending():
423 425 first = l.max()
424 426 second = l.min()
425 427 else:
426 428 first = l.first()
427 429 second = l.last()
428 430
429 431 if first is None:
430 432 raise error.Abort(_('empty revision range'))
431 433 if (first == second and len(revs) >= 2
432 434 and not all(revrange(repo, [r]) for r in revs)):
433 435 raise error.Abort(_('empty revision on one side of range'))
434 436
435 437 # if top-level is range expression, the result must always be a pair
436 438 if first == second and len(revs) == 1 and not _pairspec(revs[0]):
437 439 return repo.lookup(first), None
438 440
439 441 return repo.lookup(first), repo.lookup(second)
440 442
441 443 def revrange(repo, specs):
442 444 """Execute 1 to many revsets and return the union.
443 445
444 446 This is the preferred mechanism for executing revsets using user-specified
445 447 config options, such as revset aliases.
446 448
447 449 The revsets specified by ``specs`` will be executed via a chained ``OR``
448 450 expression. If ``specs`` is empty, an empty result is returned.
449 451
450 452 ``specs`` can contain integers, in which case they are assumed to be
451 453 revision numbers.
452 454
453 455 It is assumed the revsets are already formatted. If you have arguments
454 456 that need to be expanded in the revset, call ``revsetlang.formatspec()``
455 457 and pass the result as an element of ``specs``.
456 458
457 459 Specifying a single revset is allowed.
458 460
459 461 Returns a ``revset.abstractsmartset`` which is a list-like interface over
460 462 integer revisions.
461 463 """
462 464 allspecs = []
463 465 for spec in specs:
464 466 if isinstance(spec, int):
465 467 spec = revsetlang.formatspec('rev(%d)', spec)
466 468 allspecs.append(spec)
467 469 return repo.anyrevs(allspecs, user=True)
468 470
469 471 def meaningfulparents(repo, ctx):
470 472 """Return list of meaningful (or all if debug) parentrevs for rev.
471 473
472 474 For merges (two non-nullrev revisions) both parents are meaningful.
473 475 Otherwise the first parent revision is considered meaningful if it
474 476 is not the preceding revision.
475 477 """
476 478 parents = ctx.parents()
477 479 if len(parents) > 1:
478 480 return parents
479 481 if repo.ui.debugflag:
480 482 return [parents[0], repo['null']]
481 483 if parents[0].rev() >= intrev(ctx) - 1:
482 484 return []
483 485 return parents
484 486
485 487 def expandpats(pats):
486 488 '''Expand bare globs when running on windows.
487 489 On posix we assume it already has already been done by sh.'''
488 490 if not util.expandglobs:
489 491 return list(pats)
490 492 ret = []
491 493 for kindpat in pats:
492 494 kind, pat = matchmod._patsplit(kindpat, None)
493 495 if kind is None:
494 496 try:
495 497 globbed = glob.glob(pat)
496 498 except re.error:
497 499 globbed = [pat]
498 500 if globbed:
499 501 ret.extend(globbed)
500 502 continue
501 503 ret.append(kindpat)
502 504 return ret
503 505
504 506 def matchandpats(ctx, pats=(), opts=None, globbed=False, default='relpath',
505 507 badfn=None):
506 508 '''Return a matcher and the patterns that were used.
507 509 The matcher will warn about bad matches, unless an alternate badfn callback
508 510 is provided.'''
509 511 if pats == ("",):
510 512 pats = []
511 513 if opts is None:
512 514 opts = {}
513 515 if not globbed and default == 'relpath':
514 516 pats = expandpats(pats or [])
515 517
516 518 def bad(f, msg):
517 519 ctx.repo().ui.warn("%s: %s\n" % (m.rel(f), msg))
518 520
519 521 if badfn is None:
520 522 badfn = bad
521 523
522 524 m = ctx.match(pats, opts.get('include'), opts.get('exclude'),
523 525 default, listsubrepos=opts.get('subrepos'), badfn=badfn)
524 526
525 527 if m.always():
526 528 pats = []
527 529 return m, pats
528 530
529 531 def match(ctx, pats=(), opts=None, globbed=False, default='relpath',
530 532 badfn=None):
531 533 '''Return a matcher that will warn about bad matches.'''
532 534 return matchandpats(ctx, pats, opts, globbed, default, badfn=badfn)[0]
533 535
534 536 def matchall(repo):
535 537 '''Return a matcher that will efficiently match everything.'''
536 538 return matchmod.always(repo.root, repo.getcwd())
537 539
538 540 def matchfiles(repo, files, badfn=None):
539 541 '''Return a matcher that will efficiently match exactly these files.'''
540 542 return matchmod.exact(repo.root, repo.getcwd(), files, badfn=badfn)
541 543
542 544 def origpath(ui, repo, filepath):
543 545 '''customize where .orig files are created
544 546
545 547 Fetch user defined path from config file: [ui] origbackuppath = <path>
546 548 Fall back to default (filepath) if not specified
547 549 '''
548 550 origbackuppath = ui.config('ui', 'origbackuppath', None)
549 551 if origbackuppath is None:
550 552 return filepath + ".orig"
551 553
552 554 filepathfromroot = os.path.relpath(filepath, start=repo.root)
553 555 fullorigpath = repo.wjoin(origbackuppath, filepathfromroot)
554 556
555 557 origbackupdir = repo.vfs.dirname(fullorigpath)
556 558 if not repo.vfs.exists(origbackupdir):
557 559 ui.note(_('creating directory: %s\n') % origbackupdir)
558 560 util.makedirs(origbackupdir)
559 561
560 562 return fullorigpath + ".orig"
561 563
562 564 def addremove(repo, matcher, prefix, opts=None, dry_run=None, similarity=None):
563 565 if opts is None:
564 566 opts = {}
565 567 m = matcher
566 568 if dry_run is None:
567 569 dry_run = opts.get('dry_run')
568 570 if similarity is None:
569 571 similarity = float(opts.get('similarity') or 0)
570 572
571 573 ret = 0
572 574 join = lambda f: os.path.join(prefix, f)
573 575
574 576 wctx = repo[None]
575 577 for subpath in sorted(wctx.substate):
576 578 submatch = matchmod.subdirmatcher(subpath, m)
577 579 if opts.get('subrepos') or m.exact(subpath) or any(submatch.files()):
578 580 sub = wctx.sub(subpath)
579 581 try:
580 582 if sub.addremove(submatch, prefix, opts, dry_run, similarity):
581 583 ret = 1
582 584 except error.LookupError:
583 585 repo.ui.status(_("skipping missing subrepository: %s\n")
584 586 % join(subpath))
585 587
586 588 rejected = []
587 589 def badfn(f, msg):
588 590 if f in m.files():
589 591 m.bad(f, msg)
590 592 rejected.append(f)
591 593
592 594 badmatch = matchmod.badmatch(m, badfn)
593 595 added, unknown, deleted, removed, forgotten = _interestingfiles(repo,
594 596 badmatch)
595 597
596 598 unknownset = set(unknown + forgotten)
597 599 toprint = unknownset.copy()
598 600 toprint.update(deleted)
599 601 for abs in sorted(toprint):
600 602 if repo.ui.verbose or not m.exact(abs):
601 603 if abs in unknownset:
602 604 status = _('adding %s\n') % m.uipath(abs)
603 605 else:
604 606 status = _('removing %s\n') % m.uipath(abs)
605 607 repo.ui.status(status)
606 608
607 609 renames = _findrenames(repo, m, added + unknown, removed + deleted,
608 610 similarity)
609 611
610 612 if not dry_run:
611 613 _markchanges(repo, unknown + forgotten, deleted, renames)
612 614
613 615 for f in rejected:
614 616 if f in m.files():
615 617 return 1
616 618 return ret
617 619
618 620 def marktouched(repo, files, similarity=0.0):
619 621 '''Assert that files have somehow been operated upon. files are relative to
620 622 the repo root.'''
621 623 m = matchfiles(repo, files, badfn=lambda x, y: rejected.append(x))
622 624 rejected = []
623 625
624 626 added, unknown, deleted, removed, forgotten = _interestingfiles(repo, m)
625 627
626 628 if repo.ui.verbose:
627 629 unknownset = set(unknown + forgotten)
628 630 toprint = unknownset.copy()
629 631 toprint.update(deleted)
630 632 for abs in sorted(toprint):
631 633 if abs in unknownset:
632 634 status = _('adding %s\n') % abs
633 635 else:
634 636 status = _('removing %s\n') % abs
635 637 repo.ui.status(status)
636 638
637 639 renames = _findrenames(repo, m, added + unknown, removed + deleted,
638 640 similarity)
639 641
640 642 _markchanges(repo, unknown + forgotten, deleted, renames)
641 643
642 644 for f in rejected:
643 645 if f in m.files():
644 646 return 1
645 647 return 0
646 648
647 649 def _interestingfiles(repo, matcher):
648 650 '''Walk dirstate with matcher, looking for files that addremove would care
649 651 about.
650 652
651 653 This is different from dirstate.status because it doesn't care about
652 654 whether files are modified or clean.'''
653 655 added, unknown, deleted, removed, forgotten = [], [], [], [], []
654 656 audit_path = pathutil.pathauditor(repo.root)
655 657
656 658 ctx = repo[None]
657 659 dirstate = repo.dirstate
658 660 walkresults = dirstate.walk(matcher, sorted(ctx.substate), True, False,
659 661 full=False)
660 662 for abs, st in walkresults.iteritems():
661 663 dstate = dirstate[abs]
662 664 if dstate == '?' and audit_path.check(abs):
663 665 unknown.append(abs)
664 666 elif dstate != 'r' and not st:
665 667 deleted.append(abs)
666 668 elif dstate == 'r' and st:
667 669 forgotten.append(abs)
668 670 # for finding renames
669 671 elif dstate == 'r' and not st:
670 672 removed.append(abs)
671 673 elif dstate == 'a':
672 674 added.append(abs)
673 675
674 676 return added, unknown, deleted, removed, forgotten
675 677
676 678 def _findrenames(repo, matcher, added, removed, similarity):
677 679 '''Find renames from removed files to added ones.'''
678 680 renames = {}
679 681 if similarity > 0:
680 682 for old, new, score in similar.findrenames(repo, added, removed,
681 683 similarity):
682 684 if (repo.ui.verbose or not matcher.exact(old)
683 685 or not matcher.exact(new)):
684 686 repo.ui.status(_('recording removal of %s as rename to %s '
685 687 '(%d%% similar)\n') %
686 688 (matcher.rel(old), matcher.rel(new),
687 689 score * 100))
688 690 renames[new] = old
689 691 return renames
690 692
691 693 def _markchanges(repo, unknown, deleted, renames):
692 694 '''Marks the files in unknown as added, the files in deleted as removed,
693 695 and the files in renames as copied.'''
694 696 wctx = repo[None]
695 697 with repo.wlock():
696 698 wctx.forget(deleted)
697 699 wctx.add(unknown)
698 700 for new, old in renames.iteritems():
699 701 wctx.copy(old, new)
700 702
701 703 def dirstatecopy(ui, repo, wctx, src, dst, dryrun=False, cwd=None):
702 704 """Update the dirstate to reflect the intent of copying src to dst. For
703 705 different reasons it might not end with dst being marked as copied from src.
704 706 """
705 707 origsrc = repo.dirstate.copied(src) or src
706 708 if dst == origsrc: # copying back a copy?
707 709 if repo.dirstate[dst] not in 'mn' and not dryrun:
708 710 repo.dirstate.normallookup(dst)
709 711 else:
710 712 if repo.dirstate[origsrc] == 'a' and origsrc == src:
711 713 if not ui.quiet:
712 714 ui.warn(_("%s has not been committed yet, so no copy "
713 715 "data will be stored for %s.\n")
714 716 % (repo.pathto(origsrc, cwd), repo.pathto(dst, cwd)))
715 717 if repo.dirstate[dst] in '?r' and not dryrun:
716 718 wctx.add([dst])
717 719 elif not dryrun:
718 720 wctx.copy(origsrc, dst)
719 721
720 722 def readrequires(opener, supported):
721 723 '''Reads and parses .hg/requires and checks if all entries found
722 724 are in the list of supported features.'''
723 725 requirements = set(opener.read("requires").splitlines())
724 726 missings = []
725 727 for r in requirements:
726 728 if r not in supported:
727 729 if not r or not r[0].isalnum():
728 730 raise error.RequirementError(_(".hg/requires file is corrupt"))
729 731 missings.append(r)
730 732 missings.sort()
731 733 if missings:
732 734 raise error.RequirementError(
733 735 _("repository requires features unknown to this Mercurial: %s")
734 736 % " ".join(missings),
735 737 hint=_("see https://mercurial-scm.org/wiki/MissingRequirement"
736 738 " for more information"))
737 739 return requirements
738 740
739 741 def writerequires(opener, requirements):
740 742 with opener('requires', 'w') as fp:
741 743 for r in sorted(requirements):
742 744 fp.write("%s\n" % r)
743 745
744 746 class filecachesubentry(object):
745 747 def __init__(self, path, stat):
746 748 self.path = path
747 749 self.cachestat = None
748 750 self._cacheable = None
749 751
750 752 if stat:
751 753 self.cachestat = filecachesubentry.stat(self.path)
752 754
753 755 if self.cachestat:
754 756 self._cacheable = self.cachestat.cacheable()
755 757 else:
756 758 # None means we don't know yet
757 759 self._cacheable = None
758 760
759 761 def refresh(self):
760 762 if self.cacheable():
761 763 self.cachestat = filecachesubentry.stat(self.path)
762 764
763 765 def cacheable(self):
764 766 if self._cacheable is not None:
765 767 return self._cacheable
766 768
767 769 # we don't know yet, assume it is for now
768 770 return True
769 771
770 772 def changed(self):
771 773 # no point in going further if we can't cache it
772 774 if not self.cacheable():
773 775 return True
774 776
775 777 newstat = filecachesubentry.stat(self.path)
776 778
777 779 # we may not know if it's cacheable yet, check again now
778 780 if newstat and self._cacheable is None:
779 781 self._cacheable = newstat.cacheable()
780 782
781 783 # check again
782 784 if not self._cacheable:
783 785 return True
784 786
785 787 if self.cachestat != newstat:
786 788 self.cachestat = newstat
787 789 return True
788 790 else:
789 791 return False
790 792
791 793 @staticmethod
792 794 def stat(path):
793 795 try:
794 796 return util.cachestat(path)
795 797 except OSError as e:
796 798 if e.errno != errno.ENOENT:
797 799 raise
798 800
799 801 class filecacheentry(object):
800 802 def __init__(self, paths, stat=True):
801 803 self._entries = []
802 804 for path in paths:
803 805 self._entries.append(filecachesubentry(path, stat))
804 806
805 807 def changed(self):
806 808 '''true if any entry has changed'''
807 809 for entry in self._entries:
808 810 if entry.changed():
809 811 return True
810 812 return False
811 813
812 814 def refresh(self):
813 815 for entry in self._entries:
814 816 entry.refresh()
815 817
816 818 class filecache(object):
817 819 '''A property like decorator that tracks files under .hg/ for updates.
818 820
819 821 Records stat info when called in _filecache.
820 822
821 823 On subsequent calls, compares old stat info with new info, and recreates the
822 824 object when any of the files changes, updating the new stat info in
823 825 _filecache.
824 826
825 827 Mercurial either atomic renames or appends for files under .hg,
826 828 so to ensure the cache is reliable we need the filesystem to be able
827 829 to tell us if a file has been replaced. If it can't, we fallback to
828 830 recreating the object on every call (essentially the same behavior as
829 831 propertycache).
830 832
831 833 '''
832 834 def __init__(self, *paths):
833 835 self.paths = paths
834 836
835 837 def join(self, obj, fname):
836 838 """Used to compute the runtime path of a cached file.
837 839
838 840 Users should subclass filecache and provide their own version of this
839 841 function to call the appropriate join function on 'obj' (an instance
840 842 of the class that its member function was decorated).
841 843 """
842 844 raise NotImplementedError
843 845
844 846 def __call__(self, func):
845 847 self.func = func
846 848 self.name = func.__name__.encode('ascii')
847 849 return self
848 850
849 851 def __get__(self, obj, type=None):
850 852 # if accessed on the class, return the descriptor itself.
851 853 if obj is None:
852 854 return self
853 855 # do we need to check if the file changed?
854 856 if self.name in obj.__dict__:
855 857 assert self.name in obj._filecache, self.name
856 858 return obj.__dict__[self.name]
857 859
858 860 entry = obj._filecache.get(self.name)
859 861
860 862 if entry:
861 863 if entry.changed():
862 864 entry.obj = self.func(obj)
863 865 else:
864 866 paths = [self.join(obj, path) for path in self.paths]
865 867
866 868 # We stat -before- creating the object so our cache doesn't lie if
867 869 # a writer modified between the time we read and stat
868 870 entry = filecacheentry(paths, True)
869 871 entry.obj = self.func(obj)
870 872
871 873 obj._filecache[self.name] = entry
872 874
873 875 obj.__dict__[self.name] = entry.obj
874 876 return entry.obj
875 877
876 878 def __set__(self, obj, value):
877 879 if self.name not in obj._filecache:
878 880 # we add an entry for the missing value because X in __dict__
879 881 # implies X in _filecache
880 882 paths = [self.join(obj, path) for path in self.paths]
881 883 ce = filecacheentry(paths, False)
882 884 obj._filecache[self.name] = ce
883 885 else:
884 886 ce = obj._filecache[self.name]
885 887
886 888 ce.obj = value # update cached copy
887 889 obj.__dict__[self.name] = value # update copy returned by obj.x
888 890
889 891 def __delete__(self, obj):
890 892 try:
891 893 del obj.__dict__[self.name]
892 894 except KeyError:
893 895 raise AttributeError(self.name)
894 896
895 897 def _locksub(repo, lock, envvar, cmd, environ=None, *args, **kwargs):
896 898 if lock is None:
897 899 raise error.LockInheritanceContractViolation(
898 900 'lock can only be inherited while held')
899 901 if environ is None:
900 902 environ = {}
901 903 with lock.inherit() as locker:
902 904 environ[envvar] = locker
903 905 return repo.ui.system(cmd, environ=environ, *args, **kwargs)
904 906
905 907 def wlocksub(repo, cmd, *args, **kwargs):
906 908 """run cmd as a subprocess that allows inheriting repo's wlock
907 909
908 910 This can only be called while the wlock is held. This takes all the
909 911 arguments that ui.system does, and returns the exit code of the
910 912 subprocess."""
911 913 return _locksub(repo, repo.currentwlock(), 'HG_WLOCK_LOCKER', cmd, *args,
912 914 **kwargs)
913 915
914 916 def gdinitconfig(ui):
915 917 """helper function to know if a repo should be created as general delta
916 918 """
917 919 # experimental config: format.generaldelta
918 920 return (ui.configbool('format', 'generaldelta', False)
919 921 or ui.configbool('format', 'usegeneraldelta', True))
920 922
921 923 def gddeltaconfig(ui):
922 924 """helper function to know if incoming delta should be optimised
923 925 """
924 926 # experimental config: format.generaldelta
925 927 return ui.configbool('format', 'generaldelta', False)
926 928
927 929 class simplekeyvaluefile(object):
928 930 """A simple file with key=value lines
929 931
930 932 Keys must be alphanumerics and start with a letter, values must not
931 933 contain '\n' characters"""
932 934 firstlinekey = '__firstline'
933 935
934 936 def __init__(self, vfs, path, keys=None):
935 937 self.vfs = vfs
936 938 self.path = path
937 939
938 940 def read(self, firstlinenonkeyval=False):
939 941 """Read the contents of a simple key-value file
940 942
941 943 'firstlinenonkeyval' indicates whether the first line of file should
942 944 be treated as a key-value pair or reuturned fully under the
943 945 __firstline key."""
944 946 lines = self.vfs.readlines(self.path)
945 947 d = {}
946 948 if firstlinenonkeyval:
947 949 if not lines:
948 950 e = _("empty simplekeyvalue file")
949 951 raise error.CorruptedState(e)
950 952 # we don't want to include '\n' in the __firstline
951 953 d[self.firstlinekey] = lines[0][:-1]
952 954 del lines[0]
953 955
954 956 try:
955 957 # the 'if line.strip()' part prevents us from failing on empty
956 958 # lines which only contain '\n' therefore are not skipped
957 959 # by 'if line'
958 960 updatedict = dict(line[:-1].split('=', 1) for line in lines
959 961 if line.strip())
960 962 if self.firstlinekey in updatedict:
961 963 e = _("%r can't be used as a key")
962 964 raise error.CorruptedState(e % self.firstlinekey)
963 965 d.update(updatedict)
964 966 except ValueError as e:
965 967 raise error.CorruptedState(str(e))
966 968 return d
967 969
968 970 def write(self, data, firstline=None):
969 971 """Write key=>value mapping to a file
970 972 data is a dict. Keys must be alphanumerical and start with a letter.
971 973 Values must not contain newline characters.
972 974
973 975 If 'firstline' is not None, it is written to file before
974 976 everything else, as it is, not in a key=value form"""
975 977 lines = []
976 978 if firstline is not None:
977 979 lines.append('%s\n' % firstline)
978 980
979 981 for k, v in data.items():
980 982 if k == self.firstlinekey:
981 983 e = "key name '%s' is reserved" % self.firstlinekey
982 984 raise error.ProgrammingError(e)
983 985 if not k[0].isalpha():
984 986 e = "keys must start with a letter in a key-value file"
985 987 raise error.ProgrammingError(e)
986 988 if not k.isalnum():
987 989 e = "invalid key name in a simple key-value file"
988 990 raise error.ProgrammingError(e)
989 991 if '\n' in v:
990 992 e = "invalid value in a simple key-value file"
991 993 raise error.ProgrammingError(e)
992 994 lines.append("%s=%s\n" % (k, v))
993 995 with self.vfs(self.path, mode='wb', atomictemp=True) as fp:
994 996 fp.write(''.join(lines))
@@ -1,149 +1,155
1 1 $ cat << EOF >> $HGRCPATH
2 2 > [format]
3 3 > usegeneraldelta=yes
4 4 > EOF
5 5
6 6 $ hg init debugrevlog
7 7 $ cd debugrevlog
8 8 $ echo a > a
9 9 $ hg ci -Am adda
10 10 adding a
11 11 $ hg debugrevlog -m
12 12 format : 1
13 13 flags : inline, generaldelta
14 14
15 15 revisions : 1
16 16 merges : 0 ( 0.00%)
17 17 normal : 1 (100.00%)
18 18 revisions : 1
19 19 full : 1 (100.00%)
20 20 deltas : 0 ( 0.00%)
21 21 revision size : 44
22 22 full : 44 (100.00%)
23 23 deltas : 0 ( 0.00%)
24 24
25 25 chunks : 1
26 26 0x75 (u) : 1 (100.00%)
27 27 chunks size : 44
28 28 0x75 (u) : 44 (100.00%)
29 29
30 30 avg chain length : 0
31 31 max chain length : 0
32 32 compression ratio : 0
33 33
34 34 uncompressed data size (min/max/avg) : 43 / 43 / 43
35 35 full revision size (min/max/avg) : 44 / 44 / 44
36 36 delta size (min/max/avg) : 0 / 0 / 0
37 37
38 38 Test debugindex, with and without the --debug flag
39 39 $ hg debugindex a
40 40 rev offset length ..... linkrev nodeid p1 p2 (re)
41 41 0 0 3 .... 0 b789fdd96dc2 000000000000 000000000000 (re)
42 42 $ hg --debug debugindex a
43 43 rev offset length ..... linkrev nodeid p1 p2 (re)
44 44 0 0 3 .... 0 b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 (re)
45 45 $ hg debugindex -f 1 a
46 46 rev flag offset length size ..... link p1 p2 nodeid (re)
47 47 0 0000 0 3 2 .... 0 -1 -1 b789fdd96dc2 (re)
48 48 $ hg --debug debugindex -f 1 a
49 49 rev flag offset length size ..... link p1 p2 nodeid (re)
50 50 0 0000 0 3 2 .... 0 -1 -1 b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 (re)
51 51
52 52 debugdelta chain basic output
53 53
54 54 $ hg debugdeltachain -m
55 55 rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio
56 56 0 1 1 -1 base 44 43 44 1.02326 44 0 0.00000
57 57
58 58 $ hg debugdeltachain -m -T '{rev} {chainid} {chainlen}\n'
59 59 0 1 1
60 60
61 61 $ hg debugdeltachain -m -Tjson
62 62 [
63 63 {
64 64 "chainid": 1,
65 65 "chainlen": 1,
66 66 "chainratio": 1.02325581395,
67 67 "chainsize": 44,
68 68 "compsize": 44,
69 69 "deltatype": "base",
70 70 "extradist": 0,
71 71 "extraratio": 0.0,
72 72 "lindist": 44,
73 73 "prevrev": -1,
74 74 "rev": 0,
75 75 "uncompsize": 43
76 76 }
77 77 ]
78 78
79 79 Test max chain len
80 80 $ cat >> $HGRCPATH << EOF
81 81 > [format]
82 82 > maxchainlen=4
83 83 > EOF
84 84
85 85 $ printf "This test checks if maxchainlen config value is respected also it can serve as basic test for debugrevlog -d <file>.\n" >> a
86 86 $ hg ci -m a
87 87 $ printf "b\n" >> a
88 88 $ hg ci -m a
89 89 $ printf "c\n" >> a
90 90 $ hg ci -m a
91 91 $ printf "d\n" >> a
92 92 $ hg ci -m a
93 93 $ printf "e\n" >> a
94 94 $ hg ci -m a
95 95 $ printf "f\n" >> a
96 96 $ hg ci -m a
97 97 $ printf 'g\n' >> a
98 98 $ hg ci -m a
99 99 $ printf 'h\n' >> a
100 100 $ hg ci -m a
101 101 $ hg debugrevlog -d a
102 102 # rev p1rev p2rev start end deltastart base p1 p2 rawsize totalsize compression heads chainlen
103 103 0 -1 -1 0 ??? 0 0 0 0 ??? ???? ? 1 0 (glob)
104 104 1 0 -1 ??? ??? 0 0 0 0 ??? ???? ? 1 1 (glob)
105 105 2 1 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 2 (glob)
106 106 3 2 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 3 (glob)
107 107 4 3 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 4 (glob)
108 108 5 4 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 0 (glob)
109 109 6 5 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 1 (glob)
110 110 7 6 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 2 (glob)
111 111 8 7 -1 ??? ??? ??? ??? ??? 0 ??? ???? ? 1 3 (glob)
112 112
113 Test WdirUnsupported exception
114
115 $ hg debugdata -c ffffffffffffffffffffffffffffffffffffffff
116 abort: working directory revision cannot be specified
117 [255]
118
113 119 Test cache warming command
114 120
115 121 $ rm -rf .hg/cache/
116 122 $ hg debugupdatecaches --debug
117 123 updating the branch cache
118 124 $ ls -r .hg/cache/*
119 125 .hg/cache/rbc-revs-v1
120 126 .hg/cache/rbc-names-v1
121 127 .hg/cache/branch2-served
122 128
123 129 $ cd ..
124 130
125 131 Test internal debugstacktrace command
126 132
127 133 $ cat > debugstacktrace.py << EOF
128 134 > from mercurial.util import debugstacktrace, dst, sys
129 135 > def f():
130 136 > debugstacktrace(f=sys.stdout)
131 137 > g()
132 138 > def g():
133 139 > dst('hello from g\\n', skip=1)
134 140 > h()
135 141 > def h():
136 142 > dst('hi ...\\nfrom h hidden in g', 1, depth=2)
137 143 > f()
138 144 > EOF
139 145 $ python debugstacktrace.py
140 146 stacktrace at:
141 147 debugstacktrace.py:10 in * (glob)
142 148 debugstacktrace.py:3 in f
143 149 hello from g at:
144 150 debugstacktrace.py:10 in * (glob)
145 151 debugstacktrace.py:4 in f
146 152 hi ...
147 153 from h hidden in g at:
148 154 debugstacktrace.py:4 in f
149 155 debugstacktrace.py:7 in g
General Comments 0
You need to be logged in to leave comments. Login now