##// END OF EJS Templates
revlog: move censor logic into main revlog class...
Gregory Szorc -
r37461:65250a66 default
parent child Browse files
Show More
@@ -1,124 +1,85
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 import struct
11
12 10 from .thirdparty.zope import (
13 11 interface as zi,
14 12 )
15 13 from . import (
16 error,
17 mdiff,
18 14 repository,
19 15 revlog,
20 16 )
21 17
22 def _censoredtext(text):
23 m, offs = revlog.parsemeta(text)
24 return m and "censored" in m
25
26 18 @zi.implementer(repository.ifilestorage)
27 19 class filelog(revlog.revlog):
28 20 def __init__(self, opener, path):
29 21 super(filelog, self).__init__(opener,
30 "/".join(("data", path + ".i")))
22 "/".join(("data", path + ".i")),
23 censorable=True)
31 24 # full name of the user visible file, relative to the repository root
32 25 self.filename = path
33 26
34 27 def read(self, node):
35 28 t = self.revision(node)
36 29 if not t.startswith('\1\n'):
37 30 return t
38 31 s = t.index('\1\n', 2)
39 32 return t[s + 2:]
40 33
41 34 def add(self, text, meta, transaction, link, p1=None, p2=None):
42 35 if meta or text.startswith('\1\n'):
43 36 text = revlog.packmeta(meta, text)
44 37 return self.addrevision(text, transaction, link, p1, p2)
45 38
46 39 def renamed(self, node):
47 40 if self.parents(node)[0] != revlog.nullid:
48 41 return False
49 42 t = self.revision(node)
50 43 m = revlog.parsemeta(t)[0]
51 44 if m and "copy" in m:
52 45 return (m["copy"], revlog.bin(m["copyrev"]))
53 46 return False
54 47
55 48 def size(self, rev):
56 49 """return the size of a given revision"""
57 50
58 51 # for revisions with renames, we have to go the slow way
59 52 node = self.node(rev)
60 53 if self.renamed(node):
61 54 return len(self.read(node))
62 55 if self.iscensored(rev):
63 56 return 0
64 57
65 58 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
66 59 return super(filelog, self).size(rev)
67 60
68 61 def cmp(self, node, text):
69 62 """compare text with a given file revision
70 63
71 64 returns True if text is different than what is stored.
72 65 """
73 66
74 67 t = text
75 68 if text.startswith('\1\n'):
76 69 t = '\1\n\1\n' + text
77 70
78 71 samehashes = not super(filelog, self).cmp(node, t)
79 72 if samehashes:
80 73 return False
81 74
82 75 # censored files compare against the empty file
83 76 if self.iscensored(self.rev(node)):
84 77 return text != ''
85 78
86 79 # renaming a file produces a different hash, even if the data
87 80 # remains unchanged. Check if it's the case (slow):
88 81 if self.renamed(node):
89 82 t2 = self.read(node)
90 83 return t2 != text
91 84
92 85 return True
93
94 def checkhash(self, text, node, p1=None, p2=None, rev=None):
95 try:
96 super(filelog, self).checkhash(text, node, p1=p1, p2=p2, rev=rev)
97 except error.RevlogError:
98 if _censoredtext(text):
99 raise error.CensoredNodeError(self.indexfile, node, text)
100 raise
101
102 def iscensored(self, rev):
103 """Check if a file revision is censored."""
104 return self.flags(rev) & revlog.REVIDX_ISCENSORED
105
106 def _peek_iscensored(self, baserev, delta, flush):
107 """Quickly check if a delta produces a censored revision."""
108 # Fragile heuristic: unless new file meta keys are added alphabetically
109 # preceding "censored", all censored revisions are prefixed by
110 # "\1\ncensored:". A delta producing such a censored revision must be a
111 # full-replacement delta, so we inspect the first and only patch in the
112 # delta for this prefix.
113 hlen = struct.calcsize(">lll")
114 if len(delta) <= hlen:
115 return False
116
117 oldlen = self.rawsize(baserev)
118 newlen = len(delta) - hlen
119 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
120 return False
121
122 add = "\1\ncensored:"
123 addlen = len(add)
124 return newlen >= addlen and delta[hlen:hlen + addlen] == add
@@ -1,2554 +1,2588
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import heapq
21 21 import os
22 22 import re
23 23 import struct
24 24 import zlib
25 25
26 26 # import stuff from node for others to import from revlog
27 27 from .node import (
28 28 bin,
29 29 hex,
30 30 nullid,
31 31 nullrev,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .thirdparty import (
38 38 attr,
39 39 )
40 40 from . import (
41 41 ancestor,
42 42 error,
43 43 mdiff,
44 44 policy,
45 45 pycompat,
46 46 templatefilters,
47 47 util,
48 48 )
49 49 from .utils import (
50 50 stringutil,
51 51 )
52 52
53 53 parsers = policy.importmod(r'parsers')
54 54
55 55 # Aliased for performance.
56 56 _zlibdecompress = zlib.decompress
57 57
58 58 # revlog header flags
59 59 REVLOGV0 = 0
60 60 REVLOGV1 = 1
61 61 # Dummy value until file format is finalized.
62 62 # Reminder: change the bounds check in revlog.__init__ when this is changed.
63 63 REVLOGV2 = 0xDEAD
64 64 FLAG_INLINE_DATA = (1 << 16)
65 65 FLAG_GENERALDELTA = (1 << 17)
66 66 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
67 67 REVLOG_DEFAULT_FORMAT = REVLOGV1
68 68 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
69 69 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
70 70 REVLOGV2_FLAGS = REVLOGV1_FLAGS
71 71
72 72 # revlog index flags
73 73 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
74 74 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
75 75 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
76 76 REVIDX_DEFAULT_FLAGS = 0
77 77 # stable order in which flags need to be processed and their processors applied
78 78 REVIDX_FLAGS_ORDER = [
79 79 REVIDX_ISCENSORED,
80 80 REVIDX_ELLIPSIS,
81 81 REVIDX_EXTSTORED,
82 82 ]
83 83 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
84 84 # bitmark for flags that could cause rawdata content change
85 85 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
86 86
87 87 # max size of revlog with inline data
88 88 _maxinline = 131072
89 89 _chunksize = 1048576
90 90
91 91 RevlogError = error.RevlogError
92 92 LookupError = error.LookupError
93 93 CensoredNodeError = error.CensoredNodeError
94 94 ProgrammingError = error.ProgrammingError
95 95
96 96 # Store flag processors (cf. 'addflagprocessor()' to register)
97 97 _flagprocessors = {
98 98 REVIDX_ISCENSORED: None,
99 99 }
100 100
101 101 _mdre = re.compile('\1\n')
102 102 def parsemeta(text):
103 103 """return (metadatadict, metadatasize)"""
104 104 # text can be buffer, so we can't use .startswith or .index
105 105 if text[:2] != '\1\n':
106 106 return None, None
107 107 s = _mdre.search(text, 2).start()
108 108 mtext = text[2:s]
109 109 meta = {}
110 110 for l in mtext.splitlines():
111 111 k, v = l.split(": ", 1)
112 112 meta[k] = v
113 113 return meta, (s + 2)
114 114
115 115 def packmeta(meta, text):
116 116 keys = sorted(meta)
117 117 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 118 return "\1\n%s\1\n%s" % (metatext, text)
119 119
120 def _censoredtext(text):
121 m, offs = parsemeta(text)
122 return m and "censored" in m
123
120 124 def addflagprocessor(flag, processor):
121 125 """Register a flag processor on a revision data flag.
122 126
123 127 Invariant:
124 128 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
125 129 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
126 130 - Only one flag processor can be registered on a specific flag.
127 131 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
128 132 following signatures:
129 133 - (read) f(self, rawtext) -> text, bool
130 134 - (write) f(self, text) -> rawtext, bool
131 135 - (raw) f(self, rawtext) -> bool
132 136 "text" is presented to the user. "rawtext" is stored in revlog data, not
133 137 directly visible to the user.
134 138 The boolean returned by these transforms is used to determine whether
135 139 the returned text can be used for hash integrity checking. For example,
136 140 if "write" returns False, then "text" is used to generate hash. If
137 141 "write" returns True, that basically means "rawtext" returned by "write"
138 142 should be used to generate hash. Usually, "write" and "read" return
139 143 different booleans. And "raw" returns a same boolean as "write".
140 144
141 145 Note: The 'raw' transform is used for changegroup generation and in some
142 146 debug commands. In this case the transform only indicates whether the
143 147 contents can be used for hash integrity checks.
144 148 """
145 149 if not flag & REVIDX_KNOWN_FLAGS:
146 150 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
147 151 raise ProgrammingError(msg)
148 152 if flag not in REVIDX_FLAGS_ORDER:
149 153 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
150 154 raise ProgrammingError(msg)
151 155 if flag in _flagprocessors:
152 156 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
153 157 raise error.Abort(msg)
154 158 _flagprocessors[flag] = processor
155 159
156 160 def getoffset(q):
157 161 return int(q >> 16)
158 162
159 163 def gettype(q):
160 164 return int(q & 0xFFFF)
161 165
162 166 def offset_type(offset, type):
163 167 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
164 168 raise ValueError('unknown revlog index flags')
165 169 return int(int(offset) << 16 | type)
166 170
167 171 _nullhash = hashlib.sha1(nullid)
168 172
169 173 def hash(text, p1, p2):
170 174 """generate a hash from the given text and its parent hashes
171 175
172 176 This hash combines both the current file contents and its history
173 177 in a manner that makes it easy to distinguish nodes with the same
174 178 content in the revision graph.
175 179 """
176 180 # As of now, if one of the parent node is null, p2 is null
177 181 if p2 == nullid:
178 182 # deep copy of a hash is faster than creating one
179 183 s = _nullhash.copy()
180 184 s.update(p1)
181 185 else:
182 186 # none of the parent nodes are nullid
183 187 if p1 < p2:
184 188 a = p1
185 189 b = p2
186 190 else:
187 191 a = p2
188 192 b = p1
189 193 s = hashlib.sha1(a)
190 194 s.update(b)
191 195 s.update(text)
192 196 return s.digest()
193 197
194 198 def _trimchunk(revlog, revs, startidx, endidx=None):
195 199 """returns revs[startidx:endidx] without empty trailing revs
196 200 """
197 201 length = revlog.length
198 202
199 203 if endidx is None:
200 204 endidx = len(revs)
201 205
202 206 # Trim empty revs at the end, but never the very first revision of a chain
203 207 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
204 208 endidx -= 1
205 209
206 210 return revs[startidx:endidx]
207 211
208 212 def _slicechunk(revlog, revs):
209 213 """slice revs to reduce the amount of unrelated data to be read from disk.
210 214
211 215 ``revs`` is sliced into groups that should be read in one time.
212 216 Assume that revs are sorted.
213 217 """
214 218 start = revlog.start
215 219 length = revlog.length
216 220
217 221 if len(revs) <= 1:
218 222 yield revs
219 223 return
220 224
221 225 startbyte = start(revs[0])
222 226 endbyte = start(revs[-1]) + length(revs[-1])
223 227 readdata = deltachainspan = endbyte - startbyte
224 228
225 229 chainpayload = sum(length(r) for r in revs)
226 230
227 231 if deltachainspan:
228 232 density = chainpayload / float(deltachainspan)
229 233 else:
230 234 density = 1.0
231 235
232 236 # Store the gaps in a heap to have them sorted by decreasing size
233 237 gapsheap = []
234 238 heapq.heapify(gapsheap)
235 239 prevend = None
236 240 for i, rev in enumerate(revs):
237 241 revstart = start(rev)
238 242 revlen = length(rev)
239 243
240 244 # Skip empty revisions to form larger holes
241 245 if revlen == 0:
242 246 continue
243 247
244 248 if prevend is not None:
245 249 gapsize = revstart - prevend
246 250 # only consider holes that are large enough
247 251 if gapsize > revlog._srmingapsize:
248 252 heapq.heappush(gapsheap, (-gapsize, i))
249 253
250 254 prevend = revstart + revlen
251 255
252 256 # Collect the indices of the largest holes until the density is acceptable
253 257 indicesheap = []
254 258 heapq.heapify(indicesheap)
255 259 while gapsheap and density < revlog._srdensitythreshold:
256 260 oppgapsize, gapidx = heapq.heappop(gapsheap)
257 261
258 262 heapq.heappush(indicesheap, gapidx)
259 263
260 264 # the gap sizes are stored as negatives to be sorted decreasingly
261 265 # by the heap
262 266 readdata -= (-oppgapsize)
263 267 if readdata > 0:
264 268 density = chainpayload / float(readdata)
265 269 else:
266 270 density = 1.0
267 271
268 272 # Cut the revs at collected indices
269 273 previdx = 0
270 274 while indicesheap:
271 275 idx = heapq.heappop(indicesheap)
272 276
273 277 chunk = _trimchunk(revlog, revs, previdx, idx)
274 278 if chunk:
275 279 yield chunk
276 280
277 281 previdx = idx
278 282
279 283 chunk = _trimchunk(revlog, revs, previdx)
280 284 if chunk:
281 285 yield chunk
282 286
283 287 @attr.s(slots=True, frozen=True)
284 288 class _deltainfo(object):
285 289 distance = attr.ib()
286 290 deltalen = attr.ib()
287 291 data = attr.ib()
288 292 base = attr.ib()
289 293 chainbase = attr.ib()
290 294 chainlen = attr.ib()
291 295 compresseddeltalen = attr.ib()
292 296
293 297 class _deltacomputer(object):
294 298 def __init__(self, revlog):
295 299 self.revlog = revlog
296 300
297 301 def _getcandidaterevs(self, p1, p2, cachedelta):
298 302 """
299 303 Provides revisions that present an interest to be diffed against,
300 304 grouped by level of easiness.
301 305 """
302 306 revlog = self.revlog
303 307 curr = len(revlog)
304 308 prev = curr - 1
305 309 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
306 310
307 311 # should we try to build a delta?
308 312 if prev != nullrev and revlog.storedeltachains:
309 313 tested = set()
310 314 # This condition is true most of the time when processing
311 315 # changegroup data into a generaldelta repo. The only time it
312 316 # isn't true is if this is the first revision in a delta chain
313 317 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
314 318 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
315 319 # Assume what we received from the server is a good choice
316 320 # build delta will reuse the cache
317 321 yield (cachedelta[0],)
318 322 tested.add(cachedelta[0])
319 323
320 324 if revlog._generaldelta:
321 325 # exclude already lazy tested base if any
322 326 parents = [p for p in (p1r, p2r)
323 327 if p != nullrev and p not in tested]
324 328 if parents and not revlog._aggressivemergedeltas:
325 329 # Pick whichever parent is closer to us (to minimize the
326 330 # chance of having to build a fulltext).
327 331 parents = [max(parents)]
328 332 tested.update(parents)
329 333 yield parents
330 334
331 335 if prev not in tested:
332 336 # other approach failed try against prev to hopefully save us a
333 337 # fulltext.
334 338 yield (prev,)
335 339
336 340 def buildtext(self, revinfo, fh):
337 341 """Builds a fulltext version of a revision
338 342
339 343 revinfo: _revisioninfo instance that contains all needed info
340 344 fh: file handle to either the .i or the .d revlog file,
341 345 depending on whether it is inlined or not
342 346 """
343 347 btext = revinfo.btext
344 348 if btext[0] is not None:
345 349 return btext[0]
346 350
347 351 revlog = self.revlog
348 352 cachedelta = revinfo.cachedelta
349 353 flags = revinfo.flags
350 354 node = revinfo.node
351 355
352 356 baserev = cachedelta[0]
353 357 delta = cachedelta[1]
354 358 # special case deltas which replace entire base; no need to decode
355 359 # base revision. this neatly avoids censored bases, which throw when
356 360 # they're decoded.
357 361 hlen = struct.calcsize(">lll")
358 362 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
359 363 len(delta) - hlen):
360 364 btext[0] = delta[hlen:]
361 365 else:
362 366 # deltabase is rawtext before changed by flag processors, which is
363 367 # equivalent to non-raw text
364 368 basetext = revlog.revision(baserev, _df=fh, raw=False)
365 369 btext[0] = mdiff.patch(basetext, delta)
366 370
367 371 try:
368 372 res = revlog._processflags(btext[0], flags, 'read', raw=True)
369 373 btext[0], validatehash = res
370 374 if validatehash:
371 375 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
372 376 if flags & REVIDX_ISCENSORED:
373 377 raise RevlogError(_('node %s is not censored') % node)
374 378 except CensoredNodeError:
375 379 # must pass the censored index flag to add censored revisions
376 380 if not flags & REVIDX_ISCENSORED:
377 381 raise
378 382 return btext[0]
379 383
380 384 def _builddeltadiff(self, base, revinfo, fh):
381 385 revlog = self.revlog
382 386 t = self.buildtext(revinfo, fh)
383 387 if revlog.iscensored(base):
384 388 # deltas based on a censored revision must replace the
385 389 # full content in one patch, so delta works everywhere
386 390 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
387 391 delta = header + t
388 392 else:
389 393 ptext = revlog.revision(base, _df=fh, raw=True)
390 394 delta = mdiff.textdiff(ptext, t)
391 395
392 396 return delta
393 397
394 398 def _builddeltainfo(self, revinfo, base, fh):
395 399 # can we use the cached delta?
396 400 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
397 401 delta = revinfo.cachedelta[1]
398 402 else:
399 403 delta = self._builddeltadiff(base, revinfo, fh)
400 404 revlog = self.revlog
401 405 header, data = revlog.compress(delta)
402 406 deltalen = len(header) + len(data)
403 407 chainbase = revlog.chainbase(base)
404 408 offset = revlog.end(len(revlog) - 1)
405 409 dist = deltalen + offset - revlog.start(chainbase)
406 410 if revlog._generaldelta:
407 411 deltabase = base
408 412 else:
409 413 deltabase = chainbase
410 414 chainlen, compresseddeltalen = revlog._chaininfo(base)
411 415 chainlen += 1
412 416 compresseddeltalen += deltalen
413 417 return _deltainfo(dist, deltalen, (header, data), deltabase,
414 418 chainbase, chainlen, compresseddeltalen)
415 419
416 420 def finddeltainfo(self, revinfo, fh):
417 421 """Find an acceptable delta against a candidate revision
418 422
419 423 revinfo: information about the revision (instance of _revisioninfo)
420 424 fh: file handle to either the .i or the .d revlog file,
421 425 depending on whether it is inlined or not
422 426
423 427 Returns the first acceptable candidate revision, as ordered by
424 428 _getcandidaterevs
425 429 """
426 430 cachedelta = revinfo.cachedelta
427 431 p1 = revinfo.p1
428 432 p2 = revinfo.p2
429 433 revlog = self.revlog
430 434
431 435 deltainfo = None
432 436 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
433 437 nominateddeltas = []
434 438 for candidaterev in candidaterevs:
435 439 # no delta for rawtext-changing revs (see "candelta" for why)
436 440 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
437 441 continue
438 442 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
439 443 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
440 444 nominateddeltas.append(candidatedelta)
441 445 if nominateddeltas:
442 446 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
443 447 break
444 448
445 449 return deltainfo
446 450
447 451 @attr.s(slots=True, frozen=True)
448 452 class _revisioninfo(object):
449 453 """Information about a revision that allows building its fulltext
450 454 node: expected hash of the revision
451 455 p1, p2: parent revs of the revision
452 456 btext: built text cache consisting of a one-element list
453 457 cachedelta: (baserev, uncompressed_delta) or None
454 458 flags: flags associated to the revision storage
455 459
456 460 One of btext[0] or cachedelta must be set.
457 461 """
458 462 node = attr.ib()
459 463 p1 = attr.ib()
460 464 p2 = attr.ib()
461 465 btext = attr.ib()
462 466 textlen = attr.ib()
463 467 cachedelta = attr.ib()
464 468 flags = attr.ib()
465 469
466 470 # index v0:
467 471 # 4 bytes: offset
468 472 # 4 bytes: compressed length
469 473 # 4 bytes: base rev
470 474 # 4 bytes: link rev
471 475 # 20 bytes: parent 1 nodeid
472 476 # 20 bytes: parent 2 nodeid
473 477 # 20 bytes: nodeid
474 478 indexformatv0 = struct.Struct(">4l20s20s20s")
475 479 indexformatv0_pack = indexformatv0.pack
476 480 indexformatv0_unpack = indexformatv0.unpack
477 481
478 482 class revlogoldio(object):
479 483 def __init__(self):
480 484 self.size = indexformatv0.size
481 485
482 486 def parseindex(self, data, inline):
483 487 s = self.size
484 488 index = []
485 489 nodemap = {nullid: nullrev}
486 490 n = off = 0
487 491 l = len(data)
488 492 while off + s <= l:
489 493 cur = data[off:off + s]
490 494 off += s
491 495 e = indexformatv0_unpack(cur)
492 496 # transform to revlogv1 format
493 497 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
494 498 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
495 499 index.append(e2)
496 500 nodemap[e[6]] = n
497 501 n += 1
498 502
499 503 # add the magic null revision at -1
500 504 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
501 505
502 506 return index, nodemap, None
503 507
504 508 def packentry(self, entry, node, version, rev):
505 509 if gettype(entry[0]):
506 510 raise RevlogError(_('index entry flags need revlog version 1'))
507 511 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
508 512 node(entry[5]), node(entry[6]), entry[7])
509 513 return indexformatv0_pack(*e2)
510 514
511 515 # index ng:
512 516 # 6 bytes: offset
513 517 # 2 bytes: flags
514 518 # 4 bytes: compressed length
515 519 # 4 bytes: uncompressed length
516 520 # 4 bytes: base rev
517 521 # 4 bytes: link rev
518 522 # 4 bytes: parent 1 rev
519 523 # 4 bytes: parent 2 rev
520 524 # 32 bytes: nodeid
521 525 indexformatng = struct.Struct(">Qiiiiii20s12x")
522 526 indexformatng_pack = indexformatng.pack
523 527 versionformat = struct.Struct(">I")
524 528 versionformat_pack = versionformat.pack
525 529 versionformat_unpack = versionformat.unpack
526 530
527 531 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
528 532 # signed integer)
529 533 _maxentrysize = 0x7fffffff
530 534
531 535 class revlogio(object):
532 536 def __init__(self):
533 537 self.size = indexformatng.size
534 538
535 539 def parseindex(self, data, inline):
536 540 # call the C implementation to parse the index data
537 541 index, cache = parsers.parse_index2(data, inline)
538 542 return index, getattr(index, 'nodemap', None), cache
539 543
540 544 def packentry(self, entry, node, version, rev):
541 545 p = indexformatng_pack(*entry)
542 546 if rev == 0:
543 547 p = versionformat_pack(version) + p[4:]
544 548 return p
545 549
546 550 class revlog(object):
547 551 """
548 552 the underlying revision storage object
549 553
550 554 A revlog consists of two parts, an index and the revision data.
551 555
552 556 The index is a file with a fixed record size containing
553 557 information on each revision, including its nodeid (hash), the
554 558 nodeids of its parents, the position and offset of its data within
555 559 the data file, and the revision it's based on. Finally, each entry
556 560 contains a linkrev entry that can serve as a pointer to external
557 561 data.
558 562
559 563 The revision data itself is a linear collection of data chunks.
560 564 Each chunk represents a revision and is usually represented as a
561 565 delta against the previous chunk. To bound lookup time, runs of
562 566 deltas are limited to about 2 times the length of the original
563 567 version data. This makes retrieval of a version proportional to
564 568 its size, or O(1) relative to the number of revisions.
565 569
566 570 Both pieces of the revlog are written to in an append-only
567 571 fashion, which means we never need to rewrite a file to insert or
568 572 remove data, and can use some simple techniques to avoid the need
569 573 for locking while reading.
570 574
571 575 If checkambig, indexfile is opened with checkambig=True at
572 576 writing, to avoid file stat ambiguity.
573 577
574 578 If mmaplargeindex is True, and an mmapindexthreshold is set, the
575 579 index will be mmapped rather than read if it is larger than the
576 580 configured threshold.
581
582 If censorable is True, the revlog can have censored revisions.
577 583 """
578 584 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
579 mmaplargeindex=False):
585 mmaplargeindex=False, censorable=False):
580 586 """
581 587 create a revlog object
582 588
583 589 opener is a function that abstracts the file opening operation
584 590 and can be used to implement COW semantics or the like.
585 591 """
586 592 self.indexfile = indexfile
587 593 self.datafile = datafile or (indexfile[:-2] + ".d")
588 594 self.opener = opener
589 595 # When True, indexfile is opened with checkambig=True at writing, to
590 596 # avoid file stat ambiguity.
591 597 self._checkambig = checkambig
598 self._censorable = censorable
592 599 # 3-tuple of (node, rev, text) for a raw revision.
593 600 self._cache = None
594 601 # Maps rev to chain base rev.
595 602 self._chainbasecache = util.lrucachedict(100)
596 603 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
597 604 self._chunkcache = (0, '')
598 605 # How much data to read and cache into the raw revlog data cache.
599 606 self._chunkcachesize = 65536
600 607 self._maxchainlen = None
601 608 self._aggressivemergedeltas = False
602 609 self.index = []
603 610 # Mapping of partial identifiers to full nodes.
604 611 self._pcache = {}
605 612 # Mapping of revision integer to full node.
606 613 self._nodecache = {nullid: nullrev}
607 614 self._nodepos = None
608 615 self._compengine = 'zlib'
609 616 self._maxdeltachainspan = -1
610 617 self._withsparseread = False
611 618 self._srdensitythreshold = 0.25
612 619 self._srmingapsize = 262144
613 620
614 621 mmapindexthreshold = None
615 622 v = REVLOG_DEFAULT_VERSION
616 623 opts = getattr(opener, 'options', None)
617 624 if opts is not None:
618 625 if 'revlogv2' in opts:
619 626 # version 2 revlogs always use generaldelta.
620 627 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
621 628 elif 'revlogv1' in opts:
622 629 if 'generaldelta' in opts:
623 630 v |= FLAG_GENERALDELTA
624 631 else:
625 632 v = 0
626 633 if 'chunkcachesize' in opts:
627 634 self._chunkcachesize = opts['chunkcachesize']
628 635 if 'maxchainlen' in opts:
629 636 self._maxchainlen = opts['maxchainlen']
630 637 if 'aggressivemergedeltas' in opts:
631 638 self._aggressivemergedeltas = opts['aggressivemergedeltas']
632 639 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
633 640 if 'compengine' in opts:
634 641 self._compengine = opts['compengine']
635 642 if 'maxdeltachainspan' in opts:
636 643 self._maxdeltachainspan = opts['maxdeltachainspan']
637 644 if mmaplargeindex and 'mmapindexthreshold' in opts:
638 645 mmapindexthreshold = opts['mmapindexthreshold']
639 646 self._withsparseread = bool(opts.get('with-sparse-read', False))
640 647 if 'sparse-read-density-threshold' in opts:
641 648 self._srdensitythreshold = opts['sparse-read-density-threshold']
642 649 if 'sparse-read-min-gap-size' in opts:
643 650 self._srmingapsize = opts['sparse-read-min-gap-size']
644 651
645 652 if self._chunkcachesize <= 0:
646 653 raise RevlogError(_('revlog chunk cache size %r is not greater '
647 654 'than 0') % self._chunkcachesize)
648 655 elif self._chunkcachesize & (self._chunkcachesize - 1):
649 656 raise RevlogError(_('revlog chunk cache size %r is not a power '
650 657 'of 2') % self._chunkcachesize)
651 658
652 659 indexdata = ''
653 660 self._initempty = True
654 661 try:
655 662 with self._indexfp() as f:
656 663 if (mmapindexthreshold is not None and
657 664 self.opener.fstat(f).st_size >= mmapindexthreshold):
658 665 indexdata = util.buffer(util.mmapread(f))
659 666 else:
660 667 indexdata = f.read()
661 668 if len(indexdata) > 0:
662 669 v = versionformat_unpack(indexdata[:4])[0]
663 670 self._initempty = False
664 671 except IOError as inst:
665 672 if inst.errno != errno.ENOENT:
666 673 raise
667 674
668 675 self.version = v
669 676 self._inline = v & FLAG_INLINE_DATA
670 677 self._generaldelta = v & FLAG_GENERALDELTA
671 678 flags = v & ~0xFFFF
672 679 fmt = v & 0xFFFF
673 680 if fmt == REVLOGV0:
674 681 if flags:
675 682 raise RevlogError(_('unknown flags (%#04x) in version %d '
676 683 'revlog %s') %
677 684 (flags >> 16, fmt, self.indexfile))
678 685 elif fmt == REVLOGV1:
679 686 if flags & ~REVLOGV1_FLAGS:
680 687 raise RevlogError(_('unknown flags (%#04x) in version %d '
681 688 'revlog %s') %
682 689 (flags >> 16, fmt, self.indexfile))
683 690 elif fmt == REVLOGV2:
684 691 if flags & ~REVLOGV2_FLAGS:
685 692 raise RevlogError(_('unknown flags (%#04x) in version %d '
686 693 'revlog %s') %
687 694 (flags >> 16, fmt, self.indexfile))
688 695 else:
689 696 raise RevlogError(_('unknown version (%d) in revlog %s') %
690 697 (fmt, self.indexfile))
691 698
692 699 self.storedeltachains = True
693 700
694 701 self._io = revlogio()
695 702 if self.version == REVLOGV0:
696 703 self._io = revlogoldio()
697 704 try:
698 705 d = self._io.parseindex(indexdata, self._inline)
699 706 except (ValueError, IndexError):
700 707 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
701 708 self.index, nodemap, self._chunkcache = d
702 709 if nodemap is not None:
703 710 self.nodemap = self._nodecache = nodemap
704 711 if not self._chunkcache:
705 712 self._chunkclear()
706 713 # revnum -> (chain-length, sum-delta-length)
707 714 self._chaininfocache = {}
708 715 # revlog header -> revlog compressor
709 716 self._decompressors = {}
710 717
711 718 @util.propertycache
712 719 def _compressor(self):
713 720 return util.compengines[self._compengine].revlogcompressor()
714 721
715 722 def _indexfp(self, mode='r'):
716 723 """file object for the revlog's index file"""
717 724 args = {r'mode': mode}
718 725 if mode != 'r':
719 726 args[r'checkambig'] = self._checkambig
720 727 if mode == 'w':
721 728 args[r'atomictemp'] = True
722 729 return self.opener(self.indexfile, **args)
723 730
724 731 def _datafp(self, mode='r'):
725 732 """file object for the revlog's data file"""
726 733 return self.opener(self.datafile, mode=mode)
727 734
728 735 @contextlib.contextmanager
729 736 def _datareadfp(self, existingfp=None):
730 737 """file object suitable to read data"""
731 738 if existingfp is not None:
732 739 yield existingfp
733 740 else:
734 741 if self._inline:
735 742 func = self._indexfp
736 743 else:
737 744 func = self._datafp
738 745 with func() as fp:
739 746 yield fp
740 747
741 748 def tip(self):
742 749 return self.node(len(self.index) - 2)
743 750 def __contains__(self, rev):
744 751 return 0 <= rev < len(self)
745 752 def __len__(self):
746 753 return len(self.index) - 1
747 754 def __iter__(self):
748 755 return iter(xrange(len(self)))
749 756 def revs(self, start=0, stop=None):
750 757 """iterate over all rev in this revlog (from start to stop)"""
751 758 step = 1
752 759 if stop is not None:
753 760 if start > stop:
754 761 step = -1
755 762 stop += step
756 763 else:
757 764 stop = len(self)
758 765 return xrange(start, stop, step)
759 766
760 767 @util.propertycache
761 768 def nodemap(self):
762 769 self.rev(self.node(0))
763 770 return self._nodecache
764 771
765 772 def hasnode(self, node):
766 773 try:
767 774 self.rev(node)
768 775 return True
769 776 except KeyError:
770 777 return False
771 778
772 779 def candelta(self, baserev, rev):
773 780 """whether two revisions (baserev, rev) can be delta-ed or not"""
774 781 # Disable delta if either rev requires a content-changing flag
775 782 # processor (ex. LFS). This is because such flag processor can alter
776 783 # the rawtext content that the delta will be based on, and two clients
777 784 # could have a same revlog node with different flags (i.e. different
778 785 # rawtext contents) and the delta could be incompatible.
779 786 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
780 787 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
781 788 return False
782 789 return True
783 790
784 791 def clearcaches(self):
785 792 self._cache = None
786 793 self._chainbasecache.clear()
787 794 self._chunkcache = (0, '')
788 795 self._pcache = {}
789 796
790 797 try:
791 798 self._nodecache.clearcaches()
792 799 except AttributeError:
793 800 self._nodecache = {nullid: nullrev}
794 801 self._nodepos = None
795 802
796 803 def rev(self, node):
797 804 try:
798 805 return self._nodecache[node]
799 806 except TypeError:
800 807 raise
801 808 except RevlogError:
802 809 # parsers.c radix tree lookup failed
803 810 if node == wdirid:
804 811 raise error.WdirUnsupported
805 812 raise LookupError(node, self.indexfile, _('no node'))
806 813 except KeyError:
807 814 # pure python cache lookup failed
808 815 n = self._nodecache
809 816 i = self.index
810 817 p = self._nodepos
811 818 if p is None:
812 819 p = len(i) - 2
813 820 for r in xrange(p, -1, -1):
814 821 v = i[r][7]
815 822 n[v] = r
816 823 if v == node:
817 824 self._nodepos = r - 1
818 825 return r
819 826 if node == wdirid:
820 827 raise error.WdirUnsupported
821 828 raise LookupError(node, self.indexfile, _('no node'))
822 829
823 830 # Accessors for index entries.
824 831
825 832 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
826 833 # are flags.
827 834 def start(self, rev):
828 835 return int(self.index[rev][0] >> 16)
829 836
830 837 def flags(self, rev):
831 838 return self.index[rev][0] & 0xFFFF
832 839
833 840 def length(self, rev):
834 841 return self.index[rev][1]
835 842
836 843 def rawsize(self, rev):
837 844 """return the length of the uncompressed text for a given revision"""
838 845 l = self.index[rev][2]
839 846 if l >= 0:
840 847 return l
841 848
842 849 t = self.revision(rev, raw=True)
843 850 return len(t)
844 851
845 852 def size(self, rev):
846 853 """length of non-raw text (processed by a "read" flag processor)"""
847 854 # fast path: if no "read" flag processor could change the content,
848 855 # size is rawsize. note: ELLIPSIS is known to not change the content.
849 856 flags = self.flags(rev)
850 857 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
851 858 return self.rawsize(rev)
852 859
853 860 return len(self.revision(rev, raw=False))
854 861
855 862 def chainbase(self, rev):
856 863 base = self._chainbasecache.get(rev)
857 864 if base is not None:
858 865 return base
859 866
860 867 index = self.index
861 868 base = index[rev][3]
862 869 while base != rev:
863 870 rev = base
864 871 base = index[rev][3]
865 872
866 873 self._chainbasecache[rev] = base
867 874 return base
868 875
869 876 def linkrev(self, rev):
870 877 return self.index[rev][4]
871 878
872 879 def parentrevs(self, rev):
873 880 try:
874 881 entry = self.index[rev]
875 882 except IndexError:
876 883 if rev == wdirrev:
877 884 raise error.WdirUnsupported
878 885 raise
879 886
880 887 return entry[5], entry[6]
881 888
882 889 def node(self, rev):
883 890 try:
884 891 return self.index[rev][7]
885 892 except IndexError:
886 893 if rev == wdirrev:
887 894 raise error.WdirUnsupported
888 895 raise
889 896
890 897 # Derived from index values.
891 898
892 899 def end(self, rev):
893 900 return self.start(rev) + self.length(rev)
894 901
895 902 def parents(self, node):
896 903 i = self.index
897 904 d = i[self.rev(node)]
898 905 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
899 906
900 907 def chainlen(self, rev):
901 908 return self._chaininfo(rev)[0]
902 909
903 910 def _chaininfo(self, rev):
904 911 chaininfocache = self._chaininfocache
905 912 if rev in chaininfocache:
906 913 return chaininfocache[rev]
907 914 index = self.index
908 915 generaldelta = self._generaldelta
909 916 iterrev = rev
910 917 e = index[iterrev]
911 918 clen = 0
912 919 compresseddeltalen = 0
913 920 while iterrev != e[3]:
914 921 clen += 1
915 922 compresseddeltalen += e[1]
916 923 if generaldelta:
917 924 iterrev = e[3]
918 925 else:
919 926 iterrev -= 1
920 927 if iterrev in chaininfocache:
921 928 t = chaininfocache[iterrev]
922 929 clen += t[0]
923 930 compresseddeltalen += t[1]
924 931 break
925 932 e = index[iterrev]
926 933 else:
927 934 # Add text length of base since decompressing that also takes
928 935 # work. For cache hits the length is already included.
929 936 compresseddeltalen += e[1]
930 937 r = (clen, compresseddeltalen)
931 938 chaininfocache[rev] = r
932 939 return r
933 940
934 941 def _deltachain(self, rev, stoprev=None):
935 942 """Obtain the delta chain for a revision.
936 943
937 944 ``stoprev`` specifies a revision to stop at. If not specified, we
938 945 stop at the base of the chain.
939 946
940 947 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
941 948 revs in ascending order and ``stopped`` is a bool indicating whether
942 949 ``stoprev`` was hit.
943 950 """
944 951 # Try C implementation.
945 952 try:
946 953 return self.index.deltachain(rev, stoprev, self._generaldelta)
947 954 except AttributeError:
948 955 pass
949 956
950 957 chain = []
951 958
952 959 # Alias to prevent attribute lookup in tight loop.
953 960 index = self.index
954 961 generaldelta = self._generaldelta
955 962
956 963 iterrev = rev
957 964 e = index[iterrev]
958 965 while iterrev != e[3] and iterrev != stoprev:
959 966 chain.append(iterrev)
960 967 if generaldelta:
961 968 iterrev = e[3]
962 969 else:
963 970 iterrev -= 1
964 971 e = index[iterrev]
965 972
966 973 if iterrev == stoprev:
967 974 stopped = True
968 975 else:
969 976 chain.append(iterrev)
970 977 stopped = False
971 978
972 979 chain.reverse()
973 980 return chain, stopped
974 981
975 982 def ancestors(self, revs, stoprev=0, inclusive=False):
976 983 """Generate the ancestors of 'revs' in reverse topological order.
977 984 Does not generate revs lower than stoprev.
978 985
979 986 See the documentation for ancestor.lazyancestors for more details."""
980 987
981 988 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
982 989 inclusive=inclusive)
983 990
984 991 def descendants(self, revs):
985 992 """Generate the descendants of 'revs' in revision order.
986 993
987 994 Yield a sequence of revision numbers starting with a child of
988 995 some rev in revs, i.e., each revision is *not* considered a
989 996 descendant of itself. Results are ordered by revision number (a
990 997 topological sort)."""
991 998 first = min(revs)
992 999 if first == nullrev:
993 1000 for i in self:
994 1001 yield i
995 1002 return
996 1003
997 1004 seen = set(revs)
998 1005 for i in self.revs(start=first + 1):
999 1006 for x in self.parentrevs(i):
1000 1007 if x != nullrev and x in seen:
1001 1008 seen.add(i)
1002 1009 yield i
1003 1010 break
1004 1011
1005 1012 def findcommonmissing(self, common=None, heads=None):
1006 1013 """Return a tuple of the ancestors of common and the ancestors of heads
1007 1014 that are not ancestors of common. In revset terminology, we return the
1008 1015 tuple:
1009 1016
1010 1017 ::common, (::heads) - (::common)
1011 1018
1012 1019 The list is sorted by revision number, meaning it is
1013 1020 topologically sorted.
1014 1021
1015 1022 'heads' and 'common' are both lists of node IDs. If heads is
1016 1023 not supplied, uses all of the revlog's heads. If common is not
1017 1024 supplied, uses nullid."""
1018 1025 if common is None:
1019 1026 common = [nullid]
1020 1027 if heads is None:
1021 1028 heads = self.heads()
1022 1029
1023 1030 common = [self.rev(n) for n in common]
1024 1031 heads = [self.rev(n) for n in heads]
1025 1032
1026 1033 # we want the ancestors, but inclusive
1027 1034 class lazyset(object):
1028 1035 def __init__(self, lazyvalues):
1029 1036 self.addedvalues = set()
1030 1037 self.lazyvalues = lazyvalues
1031 1038
1032 1039 def __contains__(self, value):
1033 1040 return value in self.addedvalues or value in self.lazyvalues
1034 1041
1035 1042 def __iter__(self):
1036 1043 added = self.addedvalues
1037 1044 for r in added:
1038 1045 yield r
1039 1046 for r in self.lazyvalues:
1040 1047 if not r in added:
1041 1048 yield r
1042 1049
1043 1050 def add(self, value):
1044 1051 self.addedvalues.add(value)
1045 1052
1046 1053 def update(self, values):
1047 1054 self.addedvalues.update(values)
1048 1055
1049 1056 has = lazyset(self.ancestors(common))
1050 1057 has.add(nullrev)
1051 1058 has.update(common)
1052 1059
1053 1060 # take all ancestors from heads that aren't in has
1054 1061 missing = set()
1055 1062 visit = collections.deque(r for r in heads if r not in has)
1056 1063 while visit:
1057 1064 r = visit.popleft()
1058 1065 if r in missing:
1059 1066 continue
1060 1067 else:
1061 1068 missing.add(r)
1062 1069 for p in self.parentrevs(r):
1063 1070 if p not in has:
1064 1071 visit.append(p)
1065 1072 missing = list(missing)
1066 1073 missing.sort()
1067 1074 return has, [self.node(miss) for miss in missing]
1068 1075
1069 1076 def incrementalmissingrevs(self, common=None):
1070 1077 """Return an object that can be used to incrementally compute the
1071 1078 revision numbers of the ancestors of arbitrary sets that are not
1072 1079 ancestors of common. This is an ancestor.incrementalmissingancestors
1073 1080 object.
1074 1081
1075 1082 'common' is a list of revision numbers. If common is not supplied, uses
1076 1083 nullrev.
1077 1084 """
1078 1085 if common is None:
1079 1086 common = [nullrev]
1080 1087
1081 1088 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1082 1089
1083 1090 def findmissingrevs(self, common=None, heads=None):
1084 1091 """Return the revision numbers of the ancestors of heads that
1085 1092 are not ancestors of common.
1086 1093
1087 1094 More specifically, return a list of revision numbers corresponding to
1088 1095 nodes N such that every N satisfies the following constraints:
1089 1096
1090 1097 1. N is an ancestor of some node in 'heads'
1091 1098 2. N is not an ancestor of any node in 'common'
1092 1099
1093 1100 The list is sorted by revision number, meaning it is
1094 1101 topologically sorted.
1095 1102
1096 1103 'heads' and 'common' are both lists of revision numbers. If heads is
1097 1104 not supplied, uses all of the revlog's heads. If common is not
1098 1105 supplied, uses nullid."""
1099 1106 if common is None:
1100 1107 common = [nullrev]
1101 1108 if heads is None:
1102 1109 heads = self.headrevs()
1103 1110
1104 1111 inc = self.incrementalmissingrevs(common=common)
1105 1112 return inc.missingancestors(heads)
1106 1113
1107 1114 def findmissing(self, common=None, heads=None):
1108 1115 """Return the ancestors of heads that are not ancestors of common.
1109 1116
1110 1117 More specifically, return a list of nodes N such that every N
1111 1118 satisfies the following constraints:
1112 1119
1113 1120 1. N is an ancestor of some node in 'heads'
1114 1121 2. N is not an ancestor of any node in 'common'
1115 1122
1116 1123 The list is sorted by revision number, meaning it is
1117 1124 topologically sorted.
1118 1125
1119 1126 'heads' and 'common' are both lists of node IDs. If heads is
1120 1127 not supplied, uses all of the revlog's heads. If common is not
1121 1128 supplied, uses nullid."""
1122 1129 if common is None:
1123 1130 common = [nullid]
1124 1131 if heads is None:
1125 1132 heads = self.heads()
1126 1133
1127 1134 common = [self.rev(n) for n in common]
1128 1135 heads = [self.rev(n) for n in heads]
1129 1136
1130 1137 inc = self.incrementalmissingrevs(common=common)
1131 1138 return [self.node(r) for r in inc.missingancestors(heads)]
1132 1139
1133 1140 def nodesbetween(self, roots=None, heads=None):
1134 1141 """Return a topological path from 'roots' to 'heads'.
1135 1142
1136 1143 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1137 1144 topologically sorted list of all nodes N that satisfy both of
1138 1145 these constraints:
1139 1146
1140 1147 1. N is a descendant of some node in 'roots'
1141 1148 2. N is an ancestor of some node in 'heads'
1142 1149
1143 1150 Every node is considered to be both a descendant and an ancestor
1144 1151 of itself, so every reachable node in 'roots' and 'heads' will be
1145 1152 included in 'nodes'.
1146 1153
1147 1154 'outroots' is the list of reachable nodes in 'roots', i.e., the
1148 1155 subset of 'roots' that is returned in 'nodes'. Likewise,
1149 1156 'outheads' is the subset of 'heads' that is also in 'nodes'.
1150 1157
1151 1158 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1152 1159 unspecified, uses nullid as the only root. If 'heads' is
1153 1160 unspecified, uses list of all of the revlog's heads."""
1154 1161 nonodes = ([], [], [])
1155 1162 if roots is not None:
1156 1163 roots = list(roots)
1157 1164 if not roots:
1158 1165 return nonodes
1159 1166 lowestrev = min([self.rev(n) for n in roots])
1160 1167 else:
1161 1168 roots = [nullid] # Everybody's a descendant of nullid
1162 1169 lowestrev = nullrev
1163 1170 if (lowestrev == nullrev) and (heads is None):
1164 1171 # We want _all_ the nodes!
1165 1172 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1166 1173 if heads is None:
1167 1174 # All nodes are ancestors, so the latest ancestor is the last
1168 1175 # node.
1169 1176 highestrev = len(self) - 1
1170 1177 # Set ancestors to None to signal that every node is an ancestor.
1171 1178 ancestors = None
1172 1179 # Set heads to an empty dictionary for later discovery of heads
1173 1180 heads = {}
1174 1181 else:
1175 1182 heads = list(heads)
1176 1183 if not heads:
1177 1184 return nonodes
1178 1185 ancestors = set()
1179 1186 # Turn heads into a dictionary so we can remove 'fake' heads.
1180 1187 # Also, later we will be using it to filter out the heads we can't
1181 1188 # find from roots.
1182 1189 heads = dict.fromkeys(heads, False)
1183 1190 # Start at the top and keep marking parents until we're done.
1184 1191 nodestotag = set(heads)
1185 1192 # Remember where the top was so we can use it as a limit later.
1186 1193 highestrev = max([self.rev(n) for n in nodestotag])
1187 1194 while nodestotag:
1188 1195 # grab a node to tag
1189 1196 n = nodestotag.pop()
1190 1197 # Never tag nullid
1191 1198 if n == nullid:
1192 1199 continue
1193 1200 # A node's revision number represents its place in a
1194 1201 # topologically sorted list of nodes.
1195 1202 r = self.rev(n)
1196 1203 if r >= lowestrev:
1197 1204 if n not in ancestors:
1198 1205 # If we are possibly a descendant of one of the roots
1199 1206 # and we haven't already been marked as an ancestor
1200 1207 ancestors.add(n) # Mark as ancestor
1201 1208 # Add non-nullid parents to list of nodes to tag.
1202 1209 nodestotag.update([p for p in self.parents(n) if
1203 1210 p != nullid])
1204 1211 elif n in heads: # We've seen it before, is it a fake head?
1205 1212 # So it is, real heads should not be the ancestors of
1206 1213 # any other heads.
1207 1214 heads.pop(n)
1208 1215 if not ancestors:
1209 1216 return nonodes
1210 1217 # Now that we have our set of ancestors, we want to remove any
1211 1218 # roots that are not ancestors.
1212 1219
1213 1220 # If one of the roots was nullid, everything is included anyway.
1214 1221 if lowestrev > nullrev:
1215 1222 # But, since we weren't, let's recompute the lowest rev to not
1216 1223 # include roots that aren't ancestors.
1217 1224
1218 1225 # Filter out roots that aren't ancestors of heads
1219 1226 roots = [root for root in roots if root in ancestors]
1220 1227 # Recompute the lowest revision
1221 1228 if roots:
1222 1229 lowestrev = min([self.rev(root) for root in roots])
1223 1230 else:
1224 1231 # No more roots? Return empty list
1225 1232 return nonodes
1226 1233 else:
1227 1234 # We are descending from nullid, and don't need to care about
1228 1235 # any other roots.
1229 1236 lowestrev = nullrev
1230 1237 roots = [nullid]
1231 1238 # Transform our roots list into a set.
1232 1239 descendants = set(roots)
1233 1240 # Also, keep the original roots so we can filter out roots that aren't
1234 1241 # 'real' roots (i.e. are descended from other roots).
1235 1242 roots = descendants.copy()
1236 1243 # Our topologically sorted list of output nodes.
1237 1244 orderedout = []
1238 1245 # Don't start at nullid since we don't want nullid in our output list,
1239 1246 # and if nullid shows up in descendants, empty parents will look like
1240 1247 # they're descendants.
1241 1248 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1242 1249 n = self.node(r)
1243 1250 isdescendant = False
1244 1251 if lowestrev == nullrev: # Everybody is a descendant of nullid
1245 1252 isdescendant = True
1246 1253 elif n in descendants:
1247 1254 # n is already a descendant
1248 1255 isdescendant = True
1249 1256 # This check only needs to be done here because all the roots
1250 1257 # will start being marked is descendants before the loop.
1251 1258 if n in roots:
1252 1259 # If n was a root, check if it's a 'real' root.
1253 1260 p = tuple(self.parents(n))
1254 1261 # If any of its parents are descendants, it's not a root.
1255 1262 if (p[0] in descendants) or (p[1] in descendants):
1256 1263 roots.remove(n)
1257 1264 else:
1258 1265 p = tuple(self.parents(n))
1259 1266 # A node is a descendant if either of its parents are
1260 1267 # descendants. (We seeded the dependents list with the roots
1261 1268 # up there, remember?)
1262 1269 if (p[0] in descendants) or (p[1] in descendants):
1263 1270 descendants.add(n)
1264 1271 isdescendant = True
1265 1272 if isdescendant and ((ancestors is None) or (n in ancestors)):
1266 1273 # Only include nodes that are both descendants and ancestors.
1267 1274 orderedout.append(n)
1268 1275 if (ancestors is not None) and (n in heads):
1269 1276 # We're trying to figure out which heads are reachable
1270 1277 # from roots.
1271 1278 # Mark this head as having been reached
1272 1279 heads[n] = True
1273 1280 elif ancestors is None:
1274 1281 # Otherwise, we're trying to discover the heads.
1275 1282 # Assume this is a head because if it isn't, the next step
1276 1283 # will eventually remove it.
1277 1284 heads[n] = True
1278 1285 # But, obviously its parents aren't.
1279 1286 for p in self.parents(n):
1280 1287 heads.pop(p, None)
1281 1288 heads = [head for head, flag in heads.iteritems() if flag]
1282 1289 roots = list(roots)
1283 1290 assert orderedout
1284 1291 assert roots
1285 1292 assert heads
1286 1293 return (orderedout, roots, heads)
1287 1294
1288 1295 def headrevs(self):
1289 1296 try:
1290 1297 return self.index.headrevs()
1291 1298 except AttributeError:
1292 1299 return self._headrevs()
1293 1300
1294 1301 def computephases(self, roots):
1295 1302 return self.index.computephasesmapsets(roots)
1296 1303
1297 1304 def _headrevs(self):
1298 1305 count = len(self)
1299 1306 if not count:
1300 1307 return [nullrev]
1301 1308 # we won't iter over filtered rev so nobody is a head at start
1302 1309 ishead = [0] * (count + 1)
1303 1310 index = self.index
1304 1311 for r in self:
1305 1312 ishead[r] = 1 # I may be an head
1306 1313 e = index[r]
1307 1314 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1308 1315 return [r for r, val in enumerate(ishead) if val]
1309 1316
1310 1317 def heads(self, start=None, stop=None):
1311 1318 """return the list of all nodes that have no children
1312 1319
1313 1320 if start is specified, only heads that are descendants of
1314 1321 start will be returned
1315 1322 if stop is specified, it will consider all the revs from stop
1316 1323 as if they had no children
1317 1324 """
1318 1325 if start is None and stop is None:
1319 1326 if not len(self):
1320 1327 return [nullid]
1321 1328 return [self.node(r) for r in self.headrevs()]
1322 1329
1323 1330 if start is None:
1324 1331 start = nullid
1325 1332 if stop is None:
1326 1333 stop = []
1327 1334 stoprevs = set([self.rev(n) for n in stop])
1328 1335 startrev = self.rev(start)
1329 1336 reachable = {startrev}
1330 1337 heads = {startrev}
1331 1338
1332 1339 parentrevs = self.parentrevs
1333 1340 for r in self.revs(start=startrev + 1):
1334 1341 for p in parentrevs(r):
1335 1342 if p in reachable:
1336 1343 if r not in stoprevs:
1337 1344 reachable.add(r)
1338 1345 heads.add(r)
1339 1346 if p in heads and p not in stoprevs:
1340 1347 heads.remove(p)
1341 1348
1342 1349 return [self.node(r) for r in heads]
1343 1350
1344 1351 def children(self, node):
1345 1352 """find the children of a given node"""
1346 1353 c = []
1347 1354 p = self.rev(node)
1348 1355 for r in self.revs(start=p + 1):
1349 1356 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1350 1357 if prevs:
1351 1358 for pr in prevs:
1352 1359 if pr == p:
1353 1360 c.append(self.node(r))
1354 1361 elif p == nullrev:
1355 1362 c.append(self.node(r))
1356 1363 return c
1357 1364
1358 1365 def descendant(self, start, end):
1359 1366 if start == nullrev:
1360 1367 return True
1361 1368 for i in self.descendants([start]):
1362 1369 if i == end:
1363 1370 return True
1364 1371 elif i > end:
1365 1372 break
1366 1373 return False
1367 1374
1368 1375 def commonancestorsheads(self, a, b):
1369 1376 """calculate all the heads of the common ancestors of nodes a and b"""
1370 1377 a, b = self.rev(a), self.rev(b)
1371 1378 try:
1372 1379 ancs = self.index.commonancestorsheads(a, b)
1373 1380 except (AttributeError, OverflowError): # C implementation failed
1374 1381 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1375 1382 return pycompat.maplist(self.node, ancs)
1376 1383
1377 1384 def isancestor(self, a, b):
1378 1385 """return True if node a is an ancestor of node b
1379 1386
1380 1387 The implementation of this is trivial but the use of
1381 1388 commonancestorsheads is not."""
1382 1389 return a in self.commonancestorsheads(a, b)
1383 1390
1384 1391 def ancestor(self, a, b):
1385 1392 """calculate the "best" common ancestor of nodes a and b"""
1386 1393
1387 1394 a, b = self.rev(a), self.rev(b)
1388 1395 try:
1389 1396 ancs = self.index.ancestors(a, b)
1390 1397 except (AttributeError, OverflowError):
1391 1398 ancs = ancestor.ancestors(self.parentrevs, a, b)
1392 1399 if ancs:
1393 1400 # choose a consistent winner when there's a tie
1394 1401 return min(map(self.node, ancs))
1395 1402 return nullid
1396 1403
1397 1404 def _match(self, id):
1398 1405 if isinstance(id, int):
1399 1406 # rev
1400 1407 return self.node(id)
1401 1408 if len(id) == 20:
1402 1409 # possibly a binary node
1403 1410 # odds of a binary node being all hex in ASCII are 1 in 10**25
1404 1411 try:
1405 1412 node = id
1406 1413 self.rev(node) # quick search the index
1407 1414 return node
1408 1415 except LookupError:
1409 1416 pass # may be partial hex id
1410 1417 try:
1411 1418 # str(rev)
1412 1419 rev = int(id)
1413 1420 if "%d" % rev != id:
1414 1421 raise ValueError
1415 1422 if rev < 0:
1416 1423 rev = len(self) + rev
1417 1424 if rev < 0 or rev >= len(self):
1418 1425 raise ValueError
1419 1426 return self.node(rev)
1420 1427 except (ValueError, OverflowError):
1421 1428 pass
1422 1429 if len(id) == 40:
1423 1430 try:
1424 1431 # a full hex nodeid?
1425 1432 node = bin(id)
1426 1433 self.rev(node)
1427 1434 return node
1428 1435 except (TypeError, LookupError):
1429 1436 pass
1430 1437
1431 1438 def _partialmatch(self, id):
1432 1439 maybewdir = wdirhex.startswith(id)
1433 1440 try:
1434 1441 partial = self.index.partialmatch(id)
1435 1442 if partial and self.hasnode(partial):
1436 1443 if maybewdir:
1437 1444 # single 'ff...' match in radix tree, ambiguous with wdir
1438 1445 raise RevlogError
1439 1446 return partial
1440 1447 if maybewdir:
1441 1448 # no 'ff...' match in radix tree, wdir identified
1442 1449 raise error.WdirUnsupported
1443 1450 return None
1444 1451 except RevlogError:
1445 1452 # parsers.c radix tree lookup gave multiple matches
1446 1453 # fast path: for unfiltered changelog, radix tree is accurate
1447 1454 if not getattr(self, 'filteredrevs', None):
1448 1455 raise LookupError(id, self.indexfile,
1449 1456 _('ambiguous identifier'))
1450 1457 # fall through to slow path that filters hidden revisions
1451 1458 except (AttributeError, ValueError):
1452 1459 # we are pure python, or key was too short to search radix tree
1453 1460 pass
1454 1461
1455 1462 if id in self._pcache:
1456 1463 return self._pcache[id]
1457 1464
1458 1465 if len(id) < 40:
1459 1466 try:
1460 1467 # hex(node)[:...]
1461 1468 l = len(id) // 2 # grab an even number of digits
1462 1469 prefix = bin(id[:l * 2])
1463 1470 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1464 1471 nl = [n for n in nl if hex(n).startswith(id) and
1465 1472 self.hasnode(n)]
1466 1473 if len(nl) > 0:
1467 1474 if len(nl) == 1 and not maybewdir:
1468 1475 self._pcache[id] = nl[0]
1469 1476 return nl[0]
1470 1477 raise LookupError(id, self.indexfile,
1471 1478 _('ambiguous identifier'))
1472 1479 if maybewdir:
1473 1480 raise error.WdirUnsupported
1474 1481 return None
1475 1482 except TypeError:
1476 1483 pass
1477 1484
1478 1485 def lookup(self, id):
1479 1486 """locate a node based on:
1480 1487 - revision number or str(revision number)
1481 1488 - nodeid or subset of hex nodeid
1482 1489 """
1483 1490 n = self._match(id)
1484 1491 if n is not None:
1485 1492 return n
1486 1493 n = self._partialmatch(id)
1487 1494 if n:
1488 1495 return n
1489 1496
1490 1497 raise LookupError(id, self.indexfile, _('no match found'))
1491 1498
1492 1499 def shortest(self, hexnode, minlength=1):
1493 1500 """Find the shortest unambiguous prefix that matches hexnode."""
1494 1501 def isvalid(test):
1495 1502 try:
1496 1503 if self._partialmatch(test) is None:
1497 1504 return False
1498 1505
1499 1506 try:
1500 1507 i = int(test)
1501 1508 # if we are a pure int, then starting with zero will not be
1502 1509 # confused as a rev; or, obviously, if the int is larger
1503 1510 # than the value of the tip rev
1504 1511 if test[0] == '0' or i > len(self):
1505 1512 return True
1506 1513 return False
1507 1514 except ValueError:
1508 1515 return True
1509 1516 except error.RevlogError:
1510 1517 return False
1511 1518 except error.WdirUnsupported:
1512 1519 # single 'ff...' match
1513 1520 return True
1514 1521
1515 1522 shortest = hexnode
1516 1523 startlength = max(6, minlength)
1517 1524 length = startlength
1518 1525 while True:
1519 1526 test = hexnode[:length]
1520 1527 if isvalid(test):
1521 1528 shortest = test
1522 1529 if length == minlength or length > startlength:
1523 1530 return shortest
1524 1531 length -= 1
1525 1532 else:
1526 1533 length += 1
1527 1534 if len(shortest) <= length:
1528 1535 return shortest
1529 1536
1530 1537 def cmp(self, node, text):
1531 1538 """compare text with a given file revision
1532 1539
1533 1540 returns True if text is different than what is stored.
1534 1541 """
1535 1542 p1, p2 = self.parents(node)
1536 1543 return hash(text, p1, p2) != node
1537 1544
1538 1545 def _cachesegment(self, offset, data):
1539 1546 """Add a segment to the revlog cache.
1540 1547
1541 1548 Accepts an absolute offset and the data that is at that location.
1542 1549 """
1543 1550 o, d = self._chunkcache
1544 1551 # try to add to existing cache
1545 1552 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1546 1553 self._chunkcache = o, d + data
1547 1554 else:
1548 1555 self._chunkcache = offset, data
1549 1556
1550 1557 def _readsegment(self, offset, length, df=None):
1551 1558 """Load a segment of raw data from the revlog.
1552 1559
1553 1560 Accepts an absolute offset, length to read, and an optional existing
1554 1561 file handle to read from.
1555 1562
1556 1563 If an existing file handle is passed, it will be seeked and the
1557 1564 original seek position will NOT be restored.
1558 1565
1559 1566 Returns a str or buffer of raw byte data.
1560 1567 """
1561 1568 # Cache data both forward and backward around the requested
1562 1569 # data, in a fixed size window. This helps speed up operations
1563 1570 # involving reading the revlog backwards.
1564 1571 cachesize = self._chunkcachesize
1565 1572 realoffset = offset & ~(cachesize - 1)
1566 1573 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1567 1574 - realoffset)
1568 1575 with self._datareadfp(df) as df:
1569 1576 df.seek(realoffset)
1570 1577 d = df.read(reallength)
1571 1578 self._cachesegment(realoffset, d)
1572 1579 if offset != realoffset or reallength != length:
1573 1580 return util.buffer(d, offset - realoffset, length)
1574 1581 return d
1575 1582
1576 1583 def _getsegment(self, offset, length, df=None):
1577 1584 """Obtain a segment of raw data from the revlog.
1578 1585
1579 1586 Accepts an absolute offset, length of bytes to obtain, and an
1580 1587 optional file handle to the already-opened revlog. If the file
1581 1588 handle is used, it's original seek position will not be preserved.
1582 1589
1583 1590 Requests for data may be returned from a cache.
1584 1591
1585 1592 Returns a str or a buffer instance of raw byte data.
1586 1593 """
1587 1594 o, d = self._chunkcache
1588 1595 l = len(d)
1589 1596
1590 1597 # is it in the cache?
1591 1598 cachestart = offset - o
1592 1599 cacheend = cachestart + length
1593 1600 if cachestart >= 0 and cacheend <= l:
1594 1601 if cachestart == 0 and cacheend == l:
1595 1602 return d # avoid a copy
1596 1603 return util.buffer(d, cachestart, cacheend - cachestart)
1597 1604
1598 1605 return self._readsegment(offset, length, df=df)
1599 1606
1600 1607 def _getsegmentforrevs(self, startrev, endrev, df=None):
1601 1608 """Obtain a segment of raw data corresponding to a range of revisions.
1602 1609
1603 1610 Accepts the start and end revisions and an optional already-open
1604 1611 file handle to be used for reading. If the file handle is read, its
1605 1612 seek position will not be preserved.
1606 1613
1607 1614 Requests for data may be satisfied by a cache.
1608 1615
1609 1616 Returns a 2-tuple of (offset, data) for the requested range of
1610 1617 revisions. Offset is the integer offset from the beginning of the
1611 1618 revlog and data is a str or buffer of the raw byte data.
1612 1619
1613 1620 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1614 1621 to determine where each revision's data begins and ends.
1615 1622 """
1616 1623 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1617 1624 # (functions are expensive).
1618 1625 index = self.index
1619 1626 istart = index[startrev]
1620 1627 start = int(istart[0] >> 16)
1621 1628 if startrev == endrev:
1622 1629 end = start + istart[1]
1623 1630 else:
1624 1631 iend = index[endrev]
1625 1632 end = int(iend[0] >> 16) + iend[1]
1626 1633
1627 1634 if self._inline:
1628 1635 start += (startrev + 1) * self._io.size
1629 1636 end += (endrev + 1) * self._io.size
1630 1637 length = end - start
1631 1638
1632 1639 return start, self._getsegment(start, length, df=df)
1633 1640
1634 1641 def _chunk(self, rev, df=None):
1635 1642 """Obtain a single decompressed chunk for a revision.
1636 1643
1637 1644 Accepts an integer revision and an optional already-open file handle
1638 1645 to be used for reading. If used, the seek position of the file will not
1639 1646 be preserved.
1640 1647
1641 1648 Returns a str holding uncompressed data for the requested revision.
1642 1649 """
1643 1650 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1644 1651
1645 1652 def _chunks(self, revs, df=None):
1646 1653 """Obtain decompressed chunks for the specified revisions.
1647 1654
1648 1655 Accepts an iterable of numeric revisions that are assumed to be in
1649 1656 ascending order. Also accepts an optional already-open file handle
1650 1657 to be used for reading. If used, the seek position of the file will
1651 1658 not be preserved.
1652 1659
1653 1660 This function is similar to calling ``self._chunk()`` multiple times,
1654 1661 but is faster.
1655 1662
1656 1663 Returns a list with decompressed data for each requested revision.
1657 1664 """
1658 1665 if not revs:
1659 1666 return []
1660 1667 start = self.start
1661 1668 length = self.length
1662 1669 inline = self._inline
1663 1670 iosize = self._io.size
1664 1671 buffer = util.buffer
1665 1672
1666 1673 l = []
1667 1674 ladd = l.append
1668 1675
1669 1676 if not self._withsparseread:
1670 1677 slicedchunks = (revs,)
1671 1678 else:
1672 1679 slicedchunks = _slicechunk(self, revs)
1673 1680
1674 1681 for revschunk in slicedchunks:
1675 1682 firstrev = revschunk[0]
1676 1683 # Skip trailing revisions with empty diff
1677 1684 for lastrev in revschunk[::-1]:
1678 1685 if length(lastrev) != 0:
1679 1686 break
1680 1687
1681 1688 try:
1682 1689 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1683 1690 except OverflowError:
1684 1691 # issue4215 - we can't cache a run of chunks greater than
1685 1692 # 2G on Windows
1686 1693 return [self._chunk(rev, df=df) for rev in revschunk]
1687 1694
1688 1695 decomp = self.decompress
1689 1696 for rev in revschunk:
1690 1697 chunkstart = start(rev)
1691 1698 if inline:
1692 1699 chunkstart += (rev + 1) * iosize
1693 1700 chunklength = length(rev)
1694 1701 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1695 1702
1696 1703 return l
1697 1704
1698 1705 def _chunkclear(self):
1699 1706 """Clear the raw chunk cache."""
1700 1707 self._chunkcache = (0, '')
1701 1708
1702 1709 def deltaparent(self, rev):
1703 1710 """return deltaparent of the given revision"""
1704 1711 base = self.index[rev][3]
1705 1712 if base == rev:
1706 1713 return nullrev
1707 1714 elif self._generaldelta:
1708 1715 return base
1709 1716 else:
1710 1717 return rev - 1
1711 1718
1712 1719 def revdiff(self, rev1, rev2):
1713 1720 """return or calculate a delta between two revisions
1714 1721
1715 1722 The delta calculated is in binary form and is intended to be written to
1716 1723 revlog data directly. So this function needs raw revision data.
1717 1724 """
1718 1725 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1719 1726 return bytes(self._chunk(rev2))
1720 1727
1721 1728 return mdiff.textdiff(self.revision(rev1, raw=True),
1722 1729 self.revision(rev2, raw=True))
1723 1730
1724 1731 def revision(self, nodeorrev, _df=None, raw=False):
1725 1732 """return an uncompressed revision of a given node or revision
1726 1733 number.
1727 1734
1728 1735 _df - an existing file handle to read from. (internal-only)
1729 1736 raw - an optional argument specifying if the revision data is to be
1730 1737 treated as raw data when applying flag transforms. 'raw' should be set
1731 1738 to True when generating changegroups or in debug commands.
1732 1739 """
1733 1740 if isinstance(nodeorrev, int):
1734 1741 rev = nodeorrev
1735 1742 node = self.node(rev)
1736 1743 else:
1737 1744 node = nodeorrev
1738 1745 rev = None
1739 1746
1740 1747 cachedrev = None
1741 1748 flags = None
1742 1749 rawtext = None
1743 1750 if node == nullid:
1744 1751 return ""
1745 1752 if self._cache:
1746 1753 if self._cache[0] == node:
1747 1754 # _cache only stores rawtext
1748 1755 if raw:
1749 1756 return self._cache[2]
1750 1757 # duplicated, but good for perf
1751 1758 if rev is None:
1752 1759 rev = self.rev(node)
1753 1760 if flags is None:
1754 1761 flags = self.flags(rev)
1755 1762 # no extra flags set, no flag processor runs, text = rawtext
1756 1763 if flags == REVIDX_DEFAULT_FLAGS:
1757 1764 return self._cache[2]
1758 1765 # rawtext is reusable. need to run flag processor
1759 1766 rawtext = self._cache[2]
1760 1767
1761 1768 cachedrev = self._cache[1]
1762 1769
1763 1770 # look up what we need to read
1764 1771 if rawtext is None:
1765 1772 if rev is None:
1766 1773 rev = self.rev(node)
1767 1774
1768 1775 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1769 1776 if stopped:
1770 1777 rawtext = self._cache[2]
1771 1778
1772 1779 # drop cache to save memory
1773 1780 self._cache = None
1774 1781
1775 1782 bins = self._chunks(chain, df=_df)
1776 1783 if rawtext is None:
1777 1784 rawtext = bytes(bins[0])
1778 1785 bins = bins[1:]
1779 1786
1780 1787 rawtext = mdiff.patches(rawtext, bins)
1781 1788 self._cache = (node, rev, rawtext)
1782 1789
1783 1790 if flags is None:
1784 1791 if rev is None:
1785 1792 rev = self.rev(node)
1786 1793 flags = self.flags(rev)
1787 1794
1788 1795 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1789 1796 if validatehash:
1790 1797 self.checkhash(text, node, rev=rev)
1791 1798
1792 1799 return text
1793 1800
1794 1801 def hash(self, text, p1, p2):
1795 1802 """Compute a node hash.
1796 1803
1797 1804 Available as a function so that subclasses can replace the hash
1798 1805 as needed.
1799 1806 """
1800 1807 return hash(text, p1, p2)
1801 1808
1802 1809 def _processflags(self, text, flags, operation, raw=False):
1803 1810 """Inspect revision data flags and applies transforms defined by
1804 1811 registered flag processors.
1805 1812
1806 1813 ``text`` - the revision data to process
1807 1814 ``flags`` - the revision flags
1808 1815 ``operation`` - the operation being performed (read or write)
1809 1816 ``raw`` - an optional argument describing if the raw transform should be
1810 1817 applied.
1811 1818
1812 1819 This method processes the flags in the order (or reverse order if
1813 1820 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1814 1821 flag processors registered for present flags. The order of flags defined
1815 1822 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1816 1823
1817 1824 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1818 1825 processed text and ``validatehash`` is a bool indicating whether the
1819 1826 returned text should be checked for hash integrity.
1820 1827
1821 1828 Note: If the ``raw`` argument is set, it has precedence over the
1822 1829 operation and will only update the value of ``validatehash``.
1823 1830 """
1824 1831 # fast path: no flag processors will run
1825 1832 if flags == 0:
1826 1833 return text, True
1827 1834 if not operation in ('read', 'write'):
1828 1835 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1829 1836 # Check all flags are known.
1830 1837 if flags & ~REVIDX_KNOWN_FLAGS:
1831 1838 raise RevlogError(_("incompatible revision flag '%#x'") %
1832 1839 (flags & ~REVIDX_KNOWN_FLAGS))
1833 1840 validatehash = True
1834 1841 # Depending on the operation (read or write), the order might be
1835 1842 # reversed due to non-commutative transforms.
1836 1843 orderedflags = REVIDX_FLAGS_ORDER
1837 1844 if operation == 'write':
1838 1845 orderedflags = reversed(orderedflags)
1839 1846
1840 1847 for flag in orderedflags:
1841 1848 # If a flagprocessor has been registered for a known flag, apply the
1842 1849 # related operation transform and update result tuple.
1843 1850 if flag & flags:
1844 1851 vhash = True
1845 1852
1846 1853 if flag not in _flagprocessors:
1847 1854 message = _("missing processor for flag '%#x'") % (flag)
1848 1855 raise RevlogError(message)
1849 1856
1850 1857 processor = _flagprocessors[flag]
1851 1858 if processor is not None:
1852 1859 readtransform, writetransform, rawtransform = processor
1853 1860
1854 1861 if raw:
1855 1862 vhash = rawtransform(self, text)
1856 1863 elif operation == 'read':
1857 1864 text, vhash = readtransform(self, text)
1858 1865 else: # write operation
1859 1866 text, vhash = writetransform(self, text)
1860 1867 validatehash = validatehash and vhash
1861 1868
1862 1869 return text, validatehash
1863 1870
1864 1871 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1865 1872 """Check node hash integrity.
1866 1873
1867 1874 Available as a function so that subclasses can extend hash mismatch
1868 1875 behaviors as needed.
1869 1876 """
1877 try:
1870 1878 if p1 is None and p2 is None:
1871 1879 p1, p2 = self.parents(node)
1872 1880 if node != self.hash(text, p1, p2):
1873 1881 revornode = rev
1874 1882 if revornode is None:
1875 1883 revornode = templatefilters.short(hex(node))
1876 1884 raise RevlogError(_("integrity check failed on %s:%s")
1877 1885 % (self.indexfile, pycompat.bytestr(revornode)))
1886 except RevlogError:
1887 if self._censorable and _censoredtext(text):
1888 raise error.CensoredNodeError(self.indexfile, node, text)
1889 raise
1878 1890
1879 1891 def _enforceinlinesize(self, tr, fp=None):
1880 1892 """Check if the revlog is too big for inline and convert if so.
1881 1893
1882 1894 This should be called after revisions are added to the revlog. If the
1883 1895 revlog has grown too large to be an inline revlog, it will convert it
1884 1896 to use multiple index and data files.
1885 1897 """
1886 1898 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1887 1899 return
1888 1900
1889 1901 trinfo = tr.find(self.indexfile)
1890 1902 if trinfo is None:
1891 1903 raise RevlogError(_("%s not found in the transaction")
1892 1904 % self.indexfile)
1893 1905
1894 1906 trindex = trinfo[2]
1895 1907 if trindex is not None:
1896 1908 dataoff = self.start(trindex)
1897 1909 else:
1898 1910 # revlog was stripped at start of transaction, use all leftover data
1899 1911 trindex = len(self) - 1
1900 1912 dataoff = self.end(-2)
1901 1913
1902 1914 tr.add(self.datafile, dataoff)
1903 1915
1904 1916 if fp:
1905 1917 fp.flush()
1906 1918 fp.close()
1907 1919
1908 1920 with self._datafp('w') as df:
1909 1921 for r in self:
1910 1922 df.write(self._getsegmentforrevs(r, r)[1])
1911 1923
1912 1924 with self._indexfp('w') as fp:
1913 1925 self.version &= ~FLAG_INLINE_DATA
1914 1926 self._inline = False
1915 1927 io = self._io
1916 1928 for i in self:
1917 1929 e = io.packentry(self.index[i], self.node, self.version, i)
1918 1930 fp.write(e)
1919 1931
1920 1932 # the temp file replace the real index when we exit the context
1921 1933 # manager
1922 1934
1923 1935 tr.replace(self.indexfile, trindex * self._io.size)
1924 1936 self._chunkclear()
1925 1937
1926 1938 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1927 1939 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1928 1940 """add a revision to the log
1929 1941
1930 1942 text - the revision data to add
1931 1943 transaction - the transaction object used for rollback
1932 1944 link - the linkrev data to add
1933 1945 p1, p2 - the parent nodeids of the revision
1934 1946 cachedelta - an optional precomputed delta
1935 1947 node - nodeid of revision; typically node is not specified, and it is
1936 1948 computed by default as hash(text, p1, p2), however subclasses might
1937 1949 use different hashing method (and override checkhash() in such case)
1938 1950 flags - the known flags to set on the revision
1939 1951 deltacomputer - an optional _deltacomputer instance shared between
1940 1952 multiple calls
1941 1953 """
1942 1954 if link == nullrev:
1943 1955 raise RevlogError(_("attempted to add linkrev -1 to %s")
1944 1956 % self.indexfile)
1945 1957
1946 1958 if flags:
1947 1959 node = node or self.hash(text, p1, p2)
1948 1960
1949 1961 rawtext, validatehash = self._processflags(text, flags, 'write')
1950 1962
1951 1963 # If the flag processor modifies the revision data, ignore any provided
1952 1964 # cachedelta.
1953 1965 if rawtext != text:
1954 1966 cachedelta = None
1955 1967
1956 1968 if len(rawtext) > _maxentrysize:
1957 1969 raise RevlogError(
1958 1970 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1959 1971 % (self.indexfile, len(rawtext)))
1960 1972
1961 1973 node = node or self.hash(rawtext, p1, p2)
1962 1974 if node in self.nodemap:
1963 1975 return node
1964 1976
1965 1977 if validatehash:
1966 1978 self.checkhash(rawtext, node, p1=p1, p2=p2)
1967 1979
1968 1980 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1969 1981 flags, cachedelta=cachedelta,
1970 1982 deltacomputer=deltacomputer)
1971 1983
1972 1984 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1973 1985 cachedelta=None, deltacomputer=None):
1974 1986 """add a raw revision with known flags, node and parents
1975 1987 useful when reusing a revision not stored in this revlog (ex: received
1976 1988 over wire, or read from an external bundle).
1977 1989 """
1978 1990 dfh = None
1979 1991 if not self._inline:
1980 1992 dfh = self._datafp("a+")
1981 1993 ifh = self._indexfp("a+")
1982 1994 try:
1983 1995 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1984 1996 flags, cachedelta, ifh, dfh,
1985 1997 deltacomputer=deltacomputer)
1986 1998 finally:
1987 1999 if dfh:
1988 2000 dfh.close()
1989 2001 ifh.close()
1990 2002
1991 2003 def compress(self, data):
1992 2004 """Generate a possibly-compressed representation of data."""
1993 2005 if not data:
1994 2006 return '', data
1995 2007
1996 2008 compressed = self._compressor.compress(data)
1997 2009
1998 2010 if compressed:
1999 2011 # The revlog compressor added the header in the returned data.
2000 2012 return '', compressed
2001 2013
2002 2014 if data[0:1] == '\0':
2003 2015 return '', data
2004 2016 return 'u', data
2005 2017
2006 2018 def decompress(self, data):
2007 2019 """Decompress a revlog chunk.
2008 2020
2009 2021 The chunk is expected to begin with a header identifying the
2010 2022 format type so it can be routed to an appropriate decompressor.
2011 2023 """
2012 2024 if not data:
2013 2025 return data
2014 2026
2015 2027 # Revlogs are read much more frequently than they are written and many
2016 2028 # chunks only take microseconds to decompress, so performance is
2017 2029 # important here.
2018 2030 #
2019 2031 # We can make a few assumptions about revlogs:
2020 2032 #
2021 2033 # 1) the majority of chunks will be compressed (as opposed to inline
2022 2034 # raw data).
2023 2035 # 2) decompressing *any* data will likely by at least 10x slower than
2024 2036 # returning raw inline data.
2025 2037 # 3) we want to prioritize common and officially supported compression
2026 2038 # engines
2027 2039 #
2028 2040 # It follows that we want to optimize for "decompress compressed data
2029 2041 # when encoded with common and officially supported compression engines"
2030 2042 # case over "raw data" and "data encoded by less common or non-official
2031 2043 # compression engines." That is why we have the inline lookup first
2032 2044 # followed by the compengines lookup.
2033 2045 #
2034 2046 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2035 2047 # compressed chunks. And this matters for changelog and manifest reads.
2036 2048 t = data[0:1]
2037 2049
2038 2050 if t == 'x':
2039 2051 try:
2040 2052 return _zlibdecompress(data)
2041 2053 except zlib.error as e:
2042 2054 raise RevlogError(_('revlog decompress error: %s') %
2043 2055 stringutil.forcebytestr(e))
2044 2056 # '\0' is more common than 'u' so it goes first.
2045 2057 elif t == '\0':
2046 2058 return data
2047 2059 elif t == 'u':
2048 2060 return util.buffer(data, 1)
2049 2061
2050 2062 try:
2051 2063 compressor = self._decompressors[t]
2052 2064 except KeyError:
2053 2065 try:
2054 2066 engine = util.compengines.forrevlogheader(t)
2055 2067 compressor = engine.revlogcompressor()
2056 2068 self._decompressors[t] = compressor
2057 2069 except KeyError:
2058 2070 raise RevlogError(_('unknown compression type %r') % t)
2059 2071
2060 2072 return compressor.decompress(data)
2061 2073
2062 2074 def _isgooddeltainfo(self, d, textlen):
2063 2075 """Returns True if the given delta is good. Good means that it is within
2064 2076 the disk span, disk size, and chain length bounds that we know to be
2065 2077 performant."""
2066 2078 if d is None:
2067 2079 return False
2068 2080
2069 2081 # - 'd.distance' is the distance from the base revision -- bounding it
2070 2082 # limits the amount of I/O we need to do.
2071 2083 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2072 2084 # need to apply -- bounding it limits the amount of CPU we consume.
2073 2085
2074 2086 defaultmax = textlen * 4
2075 2087 maxdist = self._maxdeltachainspan
2076 2088 if not maxdist:
2077 2089 maxdist = d.distance # ensure the conditional pass
2078 2090 maxdist = max(maxdist, defaultmax)
2079 2091 if (d.distance > maxdist or d.deltalen > textlen or
2080 2092 d.compresseddeltalen > textlen * 2 or
2081 2093 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2082 2094 return False
2083 2095
2084 2096 return True
2085 2097
2086 2098 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2087 2099 cachedelta, ifh, dfh, alwayscache=False,
2088 2100 deltacomputer=None):
2089 2101 """internal function to add revisions to the log
2090 2102
2091 2103 see addrevision for argument descriptions.
2092 2104
2093 2105 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2094 2106
2095 2107 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2096 2108 be used.
2097 2109
2098 2110 invariants:
2099 2111 - rawtext is optional (can be None); if not set, cachedelta must be set.
2100 2112 if both are set, they must correspond to each other.
2101 2113 """
2102 2114 if node == nullid:
2103 2115 raise RevlogError(_("%s: attempt to add null revision") %
2104 2116 (self.indexfile))
2105 2117 if node == wdirid:
2106 2118 raise RevlogError(_("%s: attempt to add wdir revision") %
2107 2119 (self.indexfile))
2108 2120
2109 2121 if self._inline:
2110 2122 fh = ifh
2111 2123 else:
2112 2124 fh = dfh
2113 2125
2114 2126 btext = [rawtext]
2115 2127
2116 2128 curr = len(self)
2117 2129 prev = curr - 1
2118 2130 offset = self.end(prev)
2119 2131 p1r, p2r = self.rev(p1), self.rev(p2)
2120 2132
2121 2133 # full versions are inserted when the needed deltas
2122 2134 # become comparable to the uncompressed text
2123 2135 if rawtext is None:
2124 2136 # need rawtext size, before changed by flag processors, which is
2125 2137 # the non-raw size. use revlog explicitly to avoid filelog's extra
2126 2138 # logic that might remove metadata size.
2127 2139 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2128 2140 cachedelta[1])
2129 2141 else:
2130 2142 textlen = len(rawtext)
2131 2143
2132 2144 if deltacomputer is None:
2133 2145 deltacomputer = _deltacomputer(self)
2134 2146
2135 2147 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2136 2148
2137 2149 # no delta for flag processor revision (see "candelta" for why)
2138 2150 # not calling candelta since only one revision needs test, also to
2139 2151 # avoid overhead fetching flags again.
2140 2152 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2141 2153 deltainfo = None
2142 2154 else:
2143 2155 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2144 2156
2145 2157 if deltainfo is not None:
2146 2158 base = deltainfo.base
2147 2159 chainbase = deltainfo.chainbase
2148 2160 data = deltainfo.data
2149 2161 l = deltainfo.deltalen
2150 2162 else:
2151 2163 rawtext = deltacomputer.buildtext(revinfo, fh)
2152 2164 data = self.compress(rawtext)
2153 2165 l = len(data[1]) + len(data[0])
2154 2166 base = chainbase = curr
2155 2167
2156 2168 e = (offset_type(offset, flags), l, textlen,
2157 2169 base, link, p1r, p2r, node)
2158 2170 self.index.insert(-1, e)
2159 2171 self.nodemap[node] = curr
2160 2172
2161 2173 entry = self._io.packentry(e, self.node, self.version, curr)
2162 2174 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2163 2175
2164 2176 if alwayscache and rawtext is None:
2165 2177 rawtext = deltacomputer._buildtext(revinfo, fh)
2166 2178
2167 2179 if type(rawtext) == bytes: # only accept immutable objects
2168 2180 self._cache = (node, curr, rawtext)
2169 2181 self._chainbasecache[curr] = chainbase
2170 2182 return node
2171 2183
2172 2184 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2173 2185 # Files opened in a+ mode have inconsistent behavior on various
2174 2186 # platforms. Windows requires that a file positioning call be made
2175 2187 # when the file handle transitions between reads and writes. See
2176 2188 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2177 2189 # platforms, Python or the platform itself can be buggy. Some versions
2178 2190 # of Solaris have been observed to not append at the end of the file
2179 2191 # if the file was seeked to before the end. See issue4943 for more.
2180 2192 #
2181 2193 # We work around this issue by inserting a seek() before writing.
2182 2194 # Note: This is likely not necessary on Python 3.
2183 2195 ifh.seek(0, os.SEEK_END)
2184 2196 if dfh:
2185 2197 dfh.seek(0, os.SEEK_END)
2186 2198
2187 2199 curr = len(self) - 1
2188 2200 if not self._inline:
2189 2201 transaction.add(self.datafile, offset)
2190 2202 transaction.add(self.indexfile, curr * len(entry))
2191 2203 if data[0]:
2192 2204 dfh.write(data[0])
2193 2205 dfh.write(data[1])
2194 2206 ifh.write(entry)
2195 2207 else:
2196 2208 offset += curr * self._io.size
2197 2209 transaction.add(self.indexfile, offset, curr)
2198 2210 ifh.write(entry)
2199 2211 ifh.write(data[0])
2200 2212 ifh.write(data[1])
2201 2213 self._enforceinlinesize(transaction, ifh)
2202 2214
2203 2215 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2204 2216 """
2205 2217 add a delta group
2206 2218
2207 2219 given a set of deltas, add them to the revision log. the
2208 2220 first delta is against its parent, which should be in our
2209 2221 log, the rest are against the previous delta.
2210 2222
2211 2223 If ``addrevisioncb`` is defined, it will be called with arguments of
2212 2224 this revlog and the node that was added.
2213 2225 """
2214 2226
2215 2227 nodes = []
2216 2228
2217 2229 r = len(self)
2218 2230 end = 0
2219 2231 if r:
2220 2232 end = self.end(r - 1)
2221 2233 ifh = self._indexfp("a+")
2222 2234 isize = r * self._io.size
2223 2235 if self._inline:
2224 2236 transaction.add(self.indexfile, end + isize, r)
2225 2237 dfh = None
2226 2238 else:
2227 2239 transaction.add(self.indexfile, isize, r)
2228 2240 transaction.add(self.datafile, end)
2229 2241 dfh = self._datafp("a+")
2230 2242 def flush():
2231 2243 if dfh:
2232 2244 dfh.flush()
2233 2245 ifh.flush()
2234 2246 try:
2235 2247 deltacomputer = _deltacomputer(self)
2236 2248 # loop through our set of deltas
2237 2249 for data in deltas:
2238 2250 node, p1, p2, linknode, deltabase, delta, flags = data
2239 2251 link = linkmapper(linknode)
2240 2252 flags = flags or REVIDX_DEFAULT_FLAGS
2241 2253
2242 2254 nodes.append(node)
2243 2255
2244 2256 if node in self.nodemap:
2245 2257 # this can happen if two branches make the same change
2246 2258 continue
2247 2259
2248 2260 for p in (p1, p2):
2249 2261 if p not in self.nodemap:
2250 2262 raise LookupError(p, self.indexfile,
2251 2263 _('unknown parent'))
2252 2264
2253 2265 if deltabase not in self.nodemap:
2254 2266 raise LookupError(deltabase, self.indexfile,
2255 2267 _('unknown delta base'))
2256 2268
2257 2269 baserev = self.rev(deltabase)
2258 2270
2259 2271 if baserev != nullrev and self.iscensored(baserev):
2260 2272 # if base is censored, delta must be full replacement in a
2261 2273 # single patch operation
2262 2274 hlen = struct.calcsize(">lll")
2263 2275 oldlen = self.rawsize(baserev)
2264 2276 newlen = len(delta) - hlen
2265 2277 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2266 2278 raise error.CensoredBaseError(self.indexfile,
2267 2279 self.node(baserev))
2268 2280
2269 2281 if not flags and self._peek_iscensored(baserev, delta, flush):
2270 2282 flags |= REVIDX_ISCENSORED
2271 2283
2272 2284 # We assume consumers of addrevisioncb will want to retrieve
2273 2285 # the added revision, which will require a call to
2274 2286 # revision(). revision() will fast path if there is a cache
2275 2287 # hit. So, we tell _addrevision() to always cache in this case.
2276 2288 # We're only using addgroup() in the context of changegroup
2277 2289 # generation so the revision data can always be handled as raw
2278 2290 # by the flagprocessor.
2279 2291 self._addrevision(node, None, transaction, link,
2280 2292 p1, p2, flags, (baserev, delta),
2281 2293 ifh, dfh,
2282 2294 alwayscache=bool(addrevisioncb),
2283 2295 deltacomputer=deltacomputer)
2284 2296
2285 2297 if addrevisioncb:
2286 2298 addrevisioncb(self, node)
2287 2299
2288 2300 if not dfh and not self._inline:
2289 2301 # addrevision switched from inline to conventional
2290 2302 # reopen the index
2291 2303 ifh.close()
2292 2304 dfh = self._datafp("a+")
2293 2305 ifh = self._indexfp("a+")
2294 2306 finally:
2295 2307 if dfh:
2296 2308 dfh.close()
2297 2309 ifh.close()
2298 2310
2299 2311 return nodes
2300 2312
2301 2313 def iscensored(self, rev):
2302 2314 """Check if a file revision is censored."""
2315 if not self._censorable:
2303 2316 return False
2304 2317
2318 return self.flags(rev) & REVIDX_ISCENSORED
2319
2305 2320 def _peek_iscensored(self, baserev, delta, flush):
2306 2321 """Quickly check if a delta produces a censored revision."""
2322 if not self._censorable:
2307 2323 return False
2308 2324
2325 # Fragile heuristic: unless new file meta keys are added alphabetically
2326 # preceding "censored", all censored revisions are prefixed by
2327 # "\1\ncensored:". A delta producing such a censored revision must be a
2328 # full-replacement delta, so we inspect the first and only patch in the
2329 # delta for this prefix.
2330 hlen = struct.calcsize(">lll")
2331 if len(delta) <= hlen:
2332 return False
2333
2334 oldlen = self.rawsize(baserev)
2335 newlen = len(delta) - hlen
2336 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2337 return False
2338
2339 add = "\1\ncensored:"
2340 addlen = len(add)
2341 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2342
2309 2343 def getstrippoint(self, minlink):
2310 2344 """find the minimum rev that must be stripped to strip the linkrev
2311 2345
2312 2346 Returns a tuple containing the minimum rev and a set of all revs that
2313 2347 have linkrevs that will be broken by this strip.
2314 2348 """
2315 2349 brokenrevs = set()
2316 2350 strippoint = len(self)
2317 2351
2318 2352 heads = {}
2319 2353 futurelargelinkrevs = set()
2320 2354 for head in self.headrevs():
2321 2355 headlinkrev = self.linkrev(head)
2322 2356 heads[head] = headlinkrev
2323 2357 if headlinkrev >= minlink:
2324 2358 futurelargelinkrevs.add(headlinkrev)
2325 2359
2326 2360 # This algorithm involves walking down the rev graph, starting at the
2327 2361 # heads. Since the revs are topologically sorted according to linkrev,
2328 2362 # once all head linkrevs are below the minlink, we know there are
2329 2363 # no more revs that could have a linkrev greater than minlink.
2330 2364 # So we can stop walking.
2331 2365 while futurelargelinkrevs:
2332 2366 strippoint -= 1
2333 2367 linkrev = heads.pop(strippoint)
2334 2368
2335 2369 if linkrev < minlink:
2336 2370 brokenrevs.add(strippoint)
2337 2371 else:
2338 2372 futurelargelinkrevs.remove(linkrev)
2339 2373
2340 2374 for p in self.parentrevs(strippoint):
2341 2375 if p != nullrev:
2342 2376 plinkrev = self.linkrev(p)
2343 2377 heads[p] = plinkrev
2344 2378 if plinkrev >= minlink:
2345 2379 futurelargelinkrevs.add(plinkrev)
2346 2380
2347 2381 return strippoint, brokenrevs
2348 2382
2349 2383 def strip(self, minlink, transaction):
2350 2384 """truncate the revlog on the first revision with a linkrev >= minlink
2351 2385
2352 2386 This function is called when we're stripping revision minlink and
2353 2387 its descendants from the repository.
2354 2388
2355 2389 We have to remove all revisions with linkrev >= minlink, because
2356 2390 the equivalent changelog revisions will be renumbered after the
2357 2391 strip.
2358 2392
2359 2393 So we truncate the revlog on the first of these revisions, and
2360 2394 trust that the caller has saved the revisions that shouldn't be
2361 2395 removed and that it'll re-add them after this truncation.
2362 2396 """
2363 2397 if len(self) == 0:
2364 2398 return
2365 2399
2366 2400 rev, _ = self.getstrippoint(minlink)
2367 2401 if rev == len(self):
2368 2402 return
2369 2403
2370 2404 # first truncate the files on disk
2371 2405 end = self.start(rev)
2372 2406 if not self._inline:
2373 2407 transaction.add(self.datafile, end)
2374 2408 end = rev * self._io.size
2375 2409 else:
2376 2410 end += rev * self._io.size
2377 2411
2378 2412 transaction.add(self.indexfile, end)
2379 2413
2380 2414 # then reset internal state in memory to forget those revisions
2381 2415 self._cache = None
2382 2416 self._chaininfocache = {}
2383 2417 self._chunkclear()
2384 2418 for x in xrange(rev, len(self)):
2385 2419 del self.nodemap[self.node(x)]
2386 2420
2387 2421 del self.index[rev:-1]
2388 2422
2389 2423 def checksize(self):
2390 2424 expected = 0
2391 2425 if len(self):
2392 2426 expected = max(0, self.end(len(self) - 1))
2393 2427
2394 2428 try:
2395 2429 with self._datafp() as f:
2396 2430 f.seek(0, 2)
2397 2431 actual = f.tell()
2398 2432 dd = actual - expected
2399 2433 except IOError as inst:
2400 2434 if inst.errno != errno.ENOENT:
2401 2435 raise
2402 2436 dd = 0
2403 2437
2404 2438 try:
2405 2439 f = self.opener(self.indexfile)
2406 2440 f.seek(0, 2)
2407 2441 actual = f.tell()
2408 2442 f.close()
2409 2443 s = self._io.size
2410 2444 i = max(0, actual // s)
2411 2445 di = actual - (i * s)
2412 2446 if self._inline:
2413 2447 databytes = 0
2414 2448 for r in self:
2415 2449 databytes += max(0, self.length(r))
2416 2450 dd = 0
2417 2451 di = actual - len(self) * s - databytes
2418 2452 except IOError as inst:
2419 2453 if inst.errno != errno.ENOENT:
2420 2454 raise
2421 2455 di = 0
2422 2456
2423 2457 return (dd, di)
2424 2458
2425 2459 def files(self):
2426 2460 res = [self.indexfile]
2427 2461 if not self._inline:
2428 2462 res.append(self.datafile)
2429 2463 return res
2430 2464
2431 2465 DELTAREUSEALWAYS = 'always'
2432 2466 DELTAREUSESAMEREVS = 'samerevs'
2433 2467 DELTAREUSENEVER = 'never'
2434 2468
2435 2469 DELTAREUSEFULLADD = 'fulladd'
2436 2470
2437 2471 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2438 2472
2439 2473 def clone(self, tr, destrevlog, addrevisioncb=None,
2440 2474 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2441 2475 """Copy this revlog to another, possibly with format changes.
2442 2476
2443 2477 The destination revlog will contain the same revisions and nodes.
2444 2478 However, it may not be bit-for-bit identical due to e.g. delta encoding
2445 2479 differences.
2446 2480
2447 2481 The ``deltareuse`` argument control how deltas from the existing revlog
2448 2482 are preserved in the destination revlog. The argument can have the
2449 2483 following values:
2450 2484
2451 2485 DELTAREUSEALWAYS
2452 2486 Deltas will always be reused (if possible), even if the destination
2453 2487 revlog would not select the same revisions for the delta. This is the
2454 2488 fastest mode of operation.
2455 2489 DELTAREUSESAMEREVS
2456 2490 Deltas will be reused if the destination revlog would pick the same
2457 2491 revisions for the delta. This mode strikes a balance between speed
2458 2492 and optimization.
2459 2493 DELTAREUSENEVER
2460 2494 Deltas will never be reused. This is the slowest mode of execution.
2461 2495 This mode can be used to recompute deltas (e.g. if the diff/delta
2462 2496 algorithm changes).
2463 2497
2464 2498 Delta computation can be slow, so the choice of delta reuse policy can
2465 2499 significantly affect run time.
2466 2500
2467 2501 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2468 2502 two extremes. Deltas will be reused if they are appropriate. But if the
2469 2503 delta could choose a better revision, it will do so. This means if you
2470 2504 are converting a non-generaldelta revlog to a generaldelta revlog,
2471 2505 deltas will be recomputed if the delta's parent isn't a parent of the
2472 2506 revision.
2473 2507
2474 2508 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2475 2509 controls whether to compute deltas against both parents for merges.
2476 2510 By default, the current default is used.
2477 2511 """
2478 2512 if deltareuse not in self.DELTAREUSEALL:
2479 2513 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2480 2514
2481 2515 if len(destrevlog):
2482 2516 raise ValueError(_('destination revlog is not empty'))
2483 2517
2484 2518 if getattr(self, 'filteredrevs', None):
2485 2519 raise ValueError(_('source revlog has filtered revisions'))
2486 2520 if getattr(destrevlog, 'filteredrevs', None):
2487 2521 raise ValueError(_('destination revlog has filtered revisions'))
2488 2522
2489 2523 # lazydeltabase controls whether to reuse a cached delta, if possible.
2490 2524 oldlazydeltabase = destrevlog._lazydeltabase
2491 2525 oldamd = destrevlog._aggressivemergedeltas
2492 2526
2493 2527 try:
2494 2528 if deltareuse == self.DELTAREUSEALWAYS:
2495 2529 destrevlog._lazydeltabase = True
2496 2530 elif deltareuse == self.DELTAREUSESAMEREVS:
2497 2531 destrevlog._lazydeltabase = False
2498 2532
2499 2533 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2500 2534
2501 2535 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2502 2536 self.DELTAREUSESAMEREVS)
2503 2537
2504 2538 deltacomputer = _deltacomputer(destrevlog)
2505 2539 index = self.index
2506 2540 for rev in self:
2507 2541 entry = index[rev]
2508 2542
2509 2543 # Some classes override linkrev to take filtered revs into
2510 2544 # account. Use raw entry from index.
2511 2545 flags = entry[0] & 0xffff
2512 2546 linkrev = entry[4]
2513 2547 p1 = index[entry[5]][7]
2514 2548 p2 = index[entry[6]][7]
2515 2549 node = entry[7]
2516 2550
2517 2551 # (Possibly) reuse the delta from the revlog if allowed and
2518 2552 # the revlog chunk is a delta.
2519 2553 cachedelta = None
2520 2554 rawtext = None
2521 2555 if populatecachedelta:
2522 2556 dp = self.deltaparent(rev)
2523 2557 if dp != nullrev:
2524 2558 cachedelta = (dp, bytes(self._chunk(rev)))
2525 2559
2526 2560 if not cachedelta:
2527 2561 rawtext = self.revision(rev, raw=True)
2528 2562
2529 2563
2530 2564 if deltareuse == self.DELTAREUSEFULLADD:
2531 2565 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2532 2566 cachedelta=cachedelta,
2533 2567 node=node, flags=flags,
2534 2568 deltacomputer=deltacomputer)
2535 2569 else:
2536 2570 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2537 2571 checkambig=False)
2538 2572 dfh = None
2539 2573 if not destrevlog._inline:
2540 2574 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2541 2575 try:
2542 2576 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2543 2577 p2, flags, cachedelta, ifh, dfh,
2544 2578 deltacomputer=deltacomputer)
2545 2579 finally:
2546 2580 if dfh:
2547 2581 dfh.close()
2548 2582 ifh.close()
2549 2583
2550 2584 if addrevisioncb:
2551 2585 addrevisioncb(self, rev, node)
2552 2586 finally:
2553 2587 destrevlog._lazydeltabase = oldlazydeltabase
2554 2588 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now