##// END OF EJS Templates
revlog: use proper version comparison during verify...
Gregory Szorc -
r39881:e6d3d39c default
parent child Browse files
Show More
@@ -1,2609 +1,2609
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import re
21 import re
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .thirdparty import (
57 from .thirdparty import (
58 attr,
58 attr,
59 )
59 )
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 stringutil,
75 stringutil,
76 )
76 )
77
77
78 # blanked usage of all the name to prevent pyflakes constraints
78 # blanked usage of all the name to prevent pyflakes constraints
79 # We need these name available in the module for extensions.
79 # We need these name available in the module for extensions.
80 REVLOGV0
80 REVLOGV0
81 REVLOGV1
81 REVLOGV1
82 REVLOGV2
82 REVLOGV2
83 FLAG_INLINE_DATA
83 FLAG_INLINE_DATA
84 FLAG_GENERALDELTA
84 FLAG_GENERALDELTA
85 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_VERSION
87 REVLOG_DEFAULT_VERSION
88 REVLOGV1_FLAGS
88 REVLOGV1_FLAGS
89 REVLOGV2_FLAGS
89 REVLOGV2_FLAGS
90 REVIDX_ISCENSORED
90 REVIDX_ISCENSORED
91 REVIDX_ELLIPSIS
91 REVIDX_ELLIPSIS
92 REVIDX_EXTSTORED
92 REVIDX_EXTSTORED
93 REVIDX_DEFAULT_FLAGS
93 REVIDX_DEFAULT_FLAGS
94 REVIDX_FLAGS_ORDER
94 REVIDX_FLAGS_ORDER
95 REVIDX_KNOWN_FLAGS
95 REVIDX_KNOWN_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
97
97
98 parsers = policy.importmod(r'parsers')
98 parsers = policy.importmod(r'parsers')
99
99
100 # Aliased for performance.
100 # Aliased for performance.
101 _zlibdecompress = zlib.decompress
101 _zlibdecompress = zlib.decompress
102
102
103 # max size of revlog with inline data
103 # max size of revlog with inline data
104 _maxinline = 131072
104 _maxinline = 131072
105 _chunksize = 1048576
105 _chunksize = 1048576
106
106
107 # Store flag processors (cf. 'addflagprocessor()' to register)
107 # Store flag processors (cf. 'addflagprocessor()' to register)
108 _flagprocessors = {
108 _flagprocessors = {
109 REVIDX_ISCENSORED: None,
109 REVIDX_ISCENSORED: None,
110 }
110 }
111
111
112 # Flag processors for REVIDX_ELLIPSIS.
112 # Flag processors for REVIDX_ELLIPSIS.
113 def ellipsisreadprocessor(rl, text):
113 def ellipsisreadprocessor(rl, text):
114 return text, False
114 return text, False
115
115
116 def ellipsiswriteprocessor(rl, text):
116 def ellipsiswriteprocessor(rl, text):
117 return text, False
117 return text, False
118
118
119 def ellipsisrawprocessor(rl, text):
119 def ellipsisrawprocessor(rl, text):
120 return False
120 return False
121
121
122 ellipsisprocessor = (
122 ellipsisprocessor = (
123 ellipsisreadprocessor,
123 ellipsisreadprocessor,
124 ellipsiswriteprocessor,
124 ellipsiswriteprocessor,
125 ellipsisrawprocessor,
125 ellipsisrawprocessor,
126 )
126 )
127
127
128 _mdre = re.compile('\1\n')
128 _mdre = re.compile('\1\n')
129 def parsemeta(text):
129 def parsemeta(text):
130 """return (metadatadict, metadatasize)"""
130 """return (metadatadict, metadatasize)"""
131 # text can be buffer, so we can't use .startswith or .index
131 # text can be buffer, so we can't use .startswith or .index
132 if text[:2] != '\1\n':
132 if text[:2] != '\1\n':
133 return None, None
133 return None, None
134 s = _mdre.search(text, 2).start()
134 s = _mdre.search(text, 2).start()
135 mtext = text[2:s]
135 mtext = text[2:s]
136 meta = {}
136 meta = {}
137 for l in mtext.splitlines():
137 for l in mtext.splitlines():
138 k, v = l.split(": ", 1)
138 k, v = l.split(": ", 1)
139 meta[k] = v
139 meta[k] = v
140 return meta, (s + 2)
140 return meta, (s + 2)
141
141
142 def packmeta(meta, text):
142 def packmeta(meta, text):
143 keys = sorted(meta)
143 keys = sorted(meta)
144 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
144 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
145 return "\1\n%s\1\n%s" % (metatext, text)
145 return "\1\n%s\1\n%s" % (metatext, text)
146
146
147 def _censoredtext(text):
147 def _censoredtext(text):
148 m, offs = parsemeta(text)
148 m, offs = parsemeta(text)
149 return m and "censored" in m
149 return m and "censored" in m
150
150
151 def addflagprocessor(flag, processor):
151 def addflagprocessor(flag, processor):
152 """Register a flag processor on a revision data flag.
152 """Register a flag processor on a revision data flag.
153
153
154 Invariant:
154 Invariant:
155 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
155 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
156 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
156 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
157 - Only one flag processor can be registered on a specific flag.
157 - Only one flag processor can be registered on a specific flag.
158 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
158 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
159 following signatures:
159 following signatures:
160 - (read) f(self, rawtext) -> text, bool
160 - (read) f(self, rawtext) -> text, bool
161 - (write) f(self, text) -> rawtext, bool
161 - (write) f(self, text) -> rawtext, bool
162 - (raw) f(self, rawtext) -> bool
162 - (raw) f(self, rawtext) -> bool
163 "text" is presented to the user. "rawtext" is stored in revlog data, not
163 "text" is presented to the user. "rawtext" is stored in revlog data, not
164 directly visible to the user.
164 directly visible to the user.
165 The boolean returned by these transforms is used to determine whether
165 The boolean returned by these transforms is used to determine whether
166 the returned text can be used for hash integrity checking. For example,
166 the returned text can be used for hash integrity checking. For example,
167 if "write" returns False, then "text" is used to generate hash. If
167 if "write" returns False, then "text" is used to generate hash. If
168 "write" returns True, that basically means "rawtext" returned by "write"
168 "write" returns True, that basically means "rawtext" returned by "write"
169 should be used to generate hash. Usually, "write" and "read" return
169 should be used to generate hash. Usually, "write" and "read" return
170 different booleans. And "raw" returns a same boolean as "write".
170 different booleans. And "raw" returns a same boolean as "write".
171
171
172 Note: The 'raw' transform is used for changegroup generation and in some
172 Note: The 'raw' transform is used for changegroup generation and in some
173 debug commands. In this case the transform only indicates whether the
173 debug commands. In this case the transform only indicates whether the
174 contents can be used for hash integrity checks.
174 contents can be used for hash integrity checks.
175 """
175 """
176 if not flag & REVIDX_KNOWN_FLAGS:
176 if not flag & REVIDX_KNOWN_FLAGS:
177 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
177 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
178 raise error.ProgrammingError(msg)
178 raise error.ProgrammingError(msg)
179 if flag not in REVIDX_FLAGS_ORDER:
179 if flag not in REVIDX_FLAGS_ORDER:
180 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
180 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
181 raise error.ProgrammingError(msg)
181 raise error.ProgrammingError(msg)
182 if flag in _flagprocessors:
182 if flag in _flagprocessors:
183 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
183 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
184 raise error.Abort(msg)
184 raise error.Abort(msg)
185 _flagprocessors[flag] = processor
185 _flagprocessors[flag] = processor
186
186
187 def getoffset(q):
187 def getoffset(q):
188 return int(q >> 16)
188 return int(q >> 16)
189
189
190 def gettype(q):
190 def gettype(q):
191 return int(q & 0xFFFF)
191 return int(q & 0xFFFF)
192
192
193 def offset_type(offset, type):
193 def offset_type(offset, type):
194 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
194 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
195 raise ValueError('unknown revlog index flags')
195 raise ValueError('unknown revlog index flags')
196 return int(int(offset) << 16 | type)
196 return int(int(offset) << 16 | type)
197
197
198 _nullhash = hashlib.sha1(nullid)
198 _nullhash = hashlib.sha1(nullid)
199
199
200 def hash(text, p1, p2):
200 def hash(text, p1, p2):
201 """generate a hash from the given text and its parent hashes
201 """generate a hash from the given text and its parent hashes
202
202
203 This hash combines both the current file contents and its history
203 This hash combines both the current file contents and its history
204 in a manner that makes it easy to distinguish nodes with the same
204 in a manner that makes it easy to distinguish nodes with the same
205 content in the revision graph.
205 content in the revision graph.
206 """
206 """
207 # As of now, if one of the parent node is null, p2 is null
207 # As of now, if one of the parent node is null, p2 is null
208 if p2 == nullid:
208 if p2 == nullid:
209 # deep copy of a hash is faster than creating one
209 # deep copy of a hash is faster than creating one
210 s = _nullhash.copy()
210 s = _nullhash.copy()
211 s.update(p1)
211 s.update(p1)
212 else:
212 else:
213 # none of the parent nodes are nullid
213 # none of the parent nodes are nullid
214 if p1 < p2:
214 if p1 < p2:
215 a = p1
215 a = p1
216 b = p2
216 b = p2
217 else:
217 else:
218 a = p2
218 a = p2
219 b = p1
219 b = p1
220 s = hashlib.sha1(a)
220 s = hashlib.sha1(a)
221 s.update(b)
221 s.update(b)
222 s.update(text)
222 s.update(text)
223 return s.digest()
223 return s.digest()
224
224
225 @attr.s(slots=True, frozen=True)
225 @attr.s(slots=True, frozen=True)
226 class _revisioninfo(object):
226 class _revisioninfo(object):
227 """Information about a revision that allows building its fulltext
227 """Information about a revision that allows building its fulltext
228 node: expected hash of the revision
228 node: expected hash of the revision
229 p1, p2: parent revs of the revision
229 p1, p2: parent revs of the revision
230 btext: built text cache consisting of a one-element list
230 btext: built text cache consisting of a one-element list
231 cachedelta: (baserev, uncompressed_delta) or None
231 cachedelta: (baserev, uncompressed_delta) or None
232 flags: flags associated to the revision storage
232 flags: flags associated to the revision storage
233
233
234 One of btext[0] or cachedelta must be set.
234 One of btext[0] or cachedelta must be set.
235 """
235 """
236 node = attr.ib()
236 node = attr.ib()
237 p1 = attr.ib()
237 p1 = attr.ib()
238 p2 = attr.ib()
238 p2 = attr.ib()
239 btext = attr.ib()
239 btext = attr.ib()
240 textlen = attr.ib()
240 textlen = attr.ib()
241 cachedelta = attr.ib()
241 cachedelta = attr.ib()
242 flags = attr.ib()
242 flags = attr.ib()
243
243
244 @interfaceutil.implementer(repository.irevisiondelta)
244 @interfaceutil.implementer(repository.irevisiondelta)
245 @attr.s(slots=True, frozen=True)
245 @attr.s(slots=True, frozen=True)
246 class revlogrevisiondelta(object):
246 class revlogrevisiondelta(object):
247 node = attr.ib()
247 node = attr.ib()
248 p1node = attr.ib()
248 p1node = attr.ib()
249 p2node = attr.ib()
249 p2node = attr.ib()
250 basenode = attr.ib()
250 basenode = attr.ib()
251 linknode = attr.ib()
251 linknode = attr.ib()
252 flags = attr.ib()
252 flags = attr.ib()
253 baserevisionsize = attr.ib()
253 baserevisionsize = attr.ib()
254 revision = attr.ib()
254 revision = attr.ib()
255 delta = attr.ib()
255 delta = attr.ib()
256
256
257 @interfaceutil.implementer(repository.iverifyproblem)
257 @interfaceutil.implementer(repository.iverifyproblem)
258 @attr.s(frozen=True)
258 @attr.s(frozen=True)
259 class revlogproblem(object):
259 class revlogproblem(object):
260 warning = attr.ib(default=None)
260 warning = attr.ib(default=None)
261 error = attr.ib(default=None)
261 error = attr.ib(default=None)
262
262
263 # index v0:
263 # index v0:
264 # 4 bytes: offset
264 # 4 bytes: offset
265 # 4 bytes: compressed length
265 # 4 bytes: compressed length
266 # 4 bytes: base rev
266 # 4 bytes: base rev
267 # 4 bytes: link rev
267 # 4 bytes: link rev
268 # 20 bytes: parent 1 nodeid
268 # 20 bytes: parent 1 nodeid
269 # 20 bytes: parent 2 nodeid
269 # 20 bytes: parent 2 nodeid
270 # 20 bytes: nodeid
270 # 20 bytes: nodeid
271 indexformatv0 = struct.Struct(">4l20s20s20s")
271 indexformatv0 = struct.Struct(">4l20s20s20s")
272 indexformatv0_pack = indexformatv0.pack
272 indexformatv0_pack = indexformatv0.pack
273 indexformatv0_unpack = indexformatv0.unpack
273 indexformatv0_unpack = indexformatv0.unpack
274
274
275 class revlogoldindex(list):
275 class revlogoldindex(list):
276 def __getitem__(self, i):
276 def __getitem__(self, i):
277 if i == -1:
277 if i == -1:
278 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 return (0, 0, 0, -1, -1, -1, -1, nullid)
279 return list.__getitem__(self, i)
279 return list.__getitem__(self, i)
280
280
281 class revlogoldio(object):
281 class revlogoldio(object):
282 def __init__(self):
282 def __init__(self):
283 self.size = indexformatv0.size
283 self.size = indexformatv0.size
284
284
285 def parseindex(self, data, inline):
285 def parseindex(self, data, inline):
286 s = self.size
286 s = self.size
287 index = []
287 index = []
288 nodemap = {nullid: nullrev}
288 nodemap = {nullid: nullrev}
289 n = off = 0
289 n = off = 0
290 l = len(data)
290 l = len(data)
291 while off + s <= l:
291 while off + s <= l:
292 cur = data[off:off + s]
292 cur = data[off:off + s]
293 off += s
293 off += s
294 e = indexformatv0_unpack(cur)
294 e = indexformatv0_unpack(cur)
295 # transform to revlogv1 format
295 # transform to revlogv1 format
296 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
296 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
297 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
297 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
298 index.append(e2)
298 index.append(e2)
299 nodemap[e[6]] = n
299 nodemap[e[6]] = n
300 n += 1
300 n += 1
301
301
302 return revlogoldindex(index), nodemap, None
302 return revlogoldindex(index), nodemap, None
303
303
304 def packentry(self, entry, node, version, rev):
304 def packentry(self, entry, node, version, rev):
305 if gettype(entry[0]):
305 if gettype(entry[0]):
306 raise error.RevlogError(_('index entry flags need revlog '
306 raise error.RevlogError(_('index entry flags need revlog '
307 'version 1'))
307 'version 1'))
308 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
308 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
309 node(entry[5]), node(entry[6]), entry[7])
309 node(entry[5]), node(entry[6]), entry[7])
310 return indexformatv0_pack(*e2)
310 return indexformatv0_pack(*e2)
311
311
312 # index ng:
312 # index ng:
313 # 6 bytes: offset
313 # 6 bytes: offset
314 # 2 bytes: flags
314 # 2 bytes: flags
315 # 4 bytes: compressed length
315 # 4 bytes: compressed length
316 # 4 bytes: uncompressed length
316 # 4 bytes: uncompressed length
317 # 4 bytes: base rev
317 # 4 bytes: base rev
318 # 4 bytes: link rev
318 # 4 bytes: link rev
319 # 4 bytes: parent 1 rev
319 # 4 bytes: parent 1 rev
320 # 4 bytes: parent 2 rev
320 # 4 bytes: parent 2 rev
321 # 32 bytes: nodeid
321 # 32 bytes: nodeid
322 indexformatng = struct.Struct(">Qiiiiii20s12x")
322 indexformatng = struct.Struct(">Qiiiiii20s12x")
323 indexformatng_pack = indexformatng.pack
323 indexformatng_pack = indexformatng.pack
324 versionformat = struct.Struct(">I")
324 versionformat = struct.Struct(">I")
325 versionformat_pack = versionformat.pack
325 versionformat_pack = versionformat.pack
326 versionformat_unpack = versionformat.unpack
326 versionformat_unpack = versionformat.unpack
327
327
328 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
328 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
329 # signed integer)
329 # signed integer)
330 _maxentrysize = 0x7fffffff
330 _maxentrysize = 0x7fffffff
331
331
332 class revlogio(object):
332 class revlogio(object):
333 def __init__(self):
333 def __init__(self):
334 self.size = indexformatng.size
334 self.size = indexformatng.size
335
335
336 def parseindex(self, data, inline):
336 def parseindex(self, data, inline):
337 # call the C implementation to parse the index data
337 # call the C implementation to parse the index data
338 index, cache = parsers.parse_index2(data, inline)
338 index, cache = parsers.parse_index2(data, inline)
339 return index, getattr(index, 'nodemap', None), cache
339 return index, getattr(index, 'nodemap', None), cache
340
340
341 def packentry(self, entry, node, version, rev):
341 def packentry(self, entry, node, version, rev):
342 p = indexformatng_pack(*entry)
342 p = indexformatng_pack(*entry)
343 if rev == 0:
343 if rev == 0:
344 p = versionformat_pack(version) + p[4:]
344 p = versionformat_pack(version) + p[4:]
345 return p
345 return p
346
346
347 class revlog(object):
347 class revlog(object):
348 """
348 """
349 the underlying revision storage object
349 the underlying revision storage object
350
350
351 A revlog consists of two parts, an index and the revision data.
351 A revlog consists of two parts, an index and the revision data.
352
352
353 The index is a file with a fixed record size containing
353 The index is a file with a fixed record size containing
354 information on each revision, including its nodeid (hash), the
354 information on each revision, including its nodeid (hash), the
355 nodeids of its parents, the position and offset of its data within
355 nodeids of its parents, the position and offset of its data within
356 the data file, and the revision it's based on. Finally, each entry
356 the data file, and the revision it's based on. Finally, each entry
357 contains a linkrev entry that can serve as a pointer to external
357 contains a linkrev entry that can serve as a pointer to external
358 data.
358 data.
359
359
360 The revision data itself is a linear collection of data chunks.
360 The revision data itself is a linear collection of data chunks.
361 Each chunk represents a revision and is usually represented as a
361 Each chunk represents a revision and is usually represented as a
362 delta against the previous chunk. To bound lookup time, runs of
362 delta against the previous chunk. To bound lookup time, runs of
363 deltas are limited to about 2 times the length of the original
363 deltas are limited to about 2 times the length of the original
364 version data. This makes retrieval of a version proportional to
364 version data. This makes retrieval of a version proportional to
365 its size, or O(1) relative to the number of revisions.
365 its size, or O(1) relative to the number of revisions.
366
366
367 Both pieces of the revlog are written to in an append-only
367 Both pieces of the revlog are written to in an append-only
368 fashion, which means we never need to rewrite a file to insert or
368 fashion, which means we never need to rewrite a file to insert or
369 remove data, and can use some simple techniques to avoid the need
369 remove data, and can use some simple techniques to avoid the need
370 for locking while reading.
370 for locking while reading.
371
371
372 If checkambig, indexfile is opened with checkambig=True at
372 If checkambig, indexfile is opened with checkambig=True at
373 writing, to avoid file stat ambiguity.
373 writing, to avoid file stat ambiguity.
374
374
375 If mmaplargeindex is True, and an mmapindexthreshold is set, the
375 If mmaplargeindex is True, and an mmapindexthreshold is set, the
376 index will be mmapped rather than read if it is larger than the
376 index will be mmapped rather than read if it is larger than the
377 configured threshold.
377 configured threshold.
378
378
379 If censorable is True, the revlog can have censored revisions.
379 If censorable is True, the revlog can have censored revisions.
380 """
380 """
381 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
381 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
382 mmaplargeindex=False, censorable=False):
382 mmaplargeindex=False, censorable=False):
383 """
383 """
384 create a revlog object
384 create a revlog object
385
385
386 opener is a function that abstracts the file opening operation
386 opener is a function that abstracts the file opening operation
387 and can be used to implement COW semantics or the like.
387 and can be used to implement COW semantics or the like.
388 """
388 """
389 self.indexfile = indexfile
389 self.indexfile = indexfile
390 self.datafile = datafile or (indexfile[:-2] + ".d")
390 self.datafile = datafile or (indexfile[:-2] + ".d")
391 self.opener = opener
391 self.opener = opener
392 # When True, indexfile is opened with checkambig=True at writing, to
392 # When True, indexfile is opened with checkambig=True at writing, to
393 # avoid file stat ambiguity.
393 # avoid file stat ambiguity.
394 self._checkambig = checkambig
394 self._checkambig = checkambig
395 self._censorable = censorable
395 self._censorable = censorable
396 # 3-tuple of (node, rev, text) for a raw revision.
396 # 3-tuple of (node, rev, text) for a raw revision.
397 self._cache = None
397 self._cache = None
398 # Maps rev to chain base rev.
398 # Maps rev to chain base rev.
399 self._chainbasecache = util.lrucachedict(100)
399 self._chainbasecache = util.lrucachedict(100)
400 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
400 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
401 self._chunkcache = (0, '')
401 self._chunkcache = (0, '')
402 # How much data to read and cache into the raw revlog data cache.
402 # How much data to read and cache into the raw revlog data cache.
403 self._chunkcachesize = 65536
403 self._chunkcachesize = 65536
404 self._maxchainlen = None
404 self._maxchainlen = None
405 self._deltabothparents = True
405 self._deltabothparents = True
406 self.index = []
406 self.index = []
407 # Mapping of partial identifiers to full nodes.
407 # Mapping of partial identifiers to full nodes.
408 self._pcache = {}
408 self._pcache = {}
409 # Mapping of revision integer to full node.
409 # Mapping of revision integer to full node.
410 self._nodecache = {nullid: nullrev}
410 self._nodecache = {nullid: nullrev}
411 self._nodepos = None
411 self._nodepos = None
412 self._compengine = 'zlib'
412 self._compengine = 'zlib'
413 self._maxdeltachainspan = -1
413 self._maxdeltachainspan = -1
414 self._withsparseread = False
414 self._withsparseread = False
415 self._sparserevlog = False
415 self._sparserevlog = False
416 self._srdensitythreshold = 0.50
416 self._srdensitythreshold = 0.50
417 self._srmingapsize = 262144
417 self._srmingapsize = 262144
418
418
419 # Make copy of flag processors so each revlog instance can support
419 # Make copy of flag processors so each revlog instance can support
420 # custom flags.
420 # custom flags.
421 self._flagprocessors = dict(_flagprocessors)
421 self._flagprocessors = dict(_flagprocessors)
422
422
423 mmapindexthreshold = None
423 mmapindexthreshold = None
424 v = REVLOG_DEFAULT_VERSION
424 v = REVLOG_DEFAULT_VERSION
425 opts = getattr(opener, 'options', None)
425 opts = getattr(opener, 'options', None)
426 if opts is not None:
426 if opts is not None:
427 if 'revlogv2' in opts:
427 if 'revlogv2' in opts:
428 # version 2 revlogs always use generaldelta.
428 # version 2 revlogs always use generaldelta.
429 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
429 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
430 elif 'revlogv1' in opts:
430 elif 'revlogv1' in opts:
431 if 'generaldelta' in opts:
431 if 'generaldelta' in opts:
432 v |= FLAG_GENERALDELTA
432 v |= FLAG_GENERALDELTA
433 else:
433 else:
434 v = 0
434 v = 0
435 if 'chunkcachesize' in opts:
435 if 'chunkcachesize' in opts:
436 self._chunkcachesize = opts['chunkcachesize']
436 self._chunkcachesize = opts['chunkcachesize']
437 if 'maxchainlen' in opts:
437 if 'maxchainlen' in opts:
438 self._maxchainlen = opts['maxchainlen']
438 self._maxchainlen = opts['maxchainlen']
439 if 'deltabothparents' in opts:
439 if 'deltabothparents' in opts:
440 self._deltabothparents = opts['deltabothparents']
440 self._deltabothparents = opts['deltabothparents']
441 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
441 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
442 if 'compengine' in opts:
442 if 'compengine' in opts:
443 self._compengine = opts['compengine']
443 self._compengine = opts['compengine']
444 if 'maxdeltachainspan' in opts:
444 if 'maxdeltachainspan' in opts:
445 self._maxdeltachainspan = opts['maxdeltachainspan']
445 self._maxdeltachainspan = opts['maxdeltachainspan']
446 if mmaplargeindex and 'mmapindexthreshold' in opts:
446 if mmaplargeindex and 'mmapindexthreshold' in opts:
447 mmapindexthreshold = opts['mmapindexthreshold']
447 mmapindexthreshold = opts['mmapindexthreshold']
448 self._sparserevlog = bool(opts.get('sparse-revlog', False))
448 self._sparserevlog = bool(opts.get('sparse-revlog', False))
449 withsparseread = bool(opts.get('with-sparse-read', False))
449 withsparseread = bool(opts.get('with-sparse-read', False))
450 # sparse-revlog forces sparse-read
450 # sparse-revlog forces sparse-read
451 self._withsparseread = self._sparserevlog or withsparseread
451 self._withsparseread = self._sparserevlog or withsparseread
452 if 'sparse-read-density-threshold' in opts:
452 if 'sparse-read-density-threshold' in opts:
453 self._srdensitythreshold = opts['sparse-read-density-threshold']
453 self._srdensitythreshold = opts['sparse-read-density-threshold']
454 if 'sparse-read-min-gap-size' in opts:
454 if 'sparse-read-min-gap-size' in opts:
455 self._srmingapsize = opts['sparse-read-min-gap-size']
455 self._srmingapsize = opts['sparse-read-min-gap-size']
456 if opts.get('enableellipsis'):
456 if opts.get('enableellipsis'):
457 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
457 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
458
458
459 if self._chunkcachesize <= 0:
459 if self._chunkcachesize <= 0:
460 raise error.RevlogError(_('revlog chunk cache size %r is not '
460 raise error.RevlogError(_('revlog chunk cache size %r is not '
461 'greater than 0') % self._chunkcachesize)
461 'greater than 0') % self._chunkcachesize)
462 elif self._chunkcachesize & (self._chunkcachesize - 1):
462 elif self._chunkcachesize & (self._chunkcachesize - 1):
463 raise error.RevlogError(_('revlog chunk cache size %r is not a '
463 raise error.RevlogError(_('revlog chunk cache size %r is not a '
464 'power of 2') % self._chunkcachesize)
464 'power of 2') % self._chunkcachesize)
465
465
466 indexdata = ''
466 indexdata = ''
467 self._initempty = True
467 self._initempty = True
468 try:
468 try:
469 with self._indexfp() as f:
469 with self._indexfp() as f:
470 if (mmapindexthreshold is not None and
470 if (mmapindexthreshold is not None and
471 self.opener.fstat(f).st_size >= mmapindexthreshold):
471 self.opener.fstat(f).st_size >= mmapindexthreshold):
472 indexdata = util.buffer(util.mmapread(f))
472 indexdata = util.buffer(util.mmapread(f))
473 else:
473 else:
474 indexdata = f.read()
474 indexdata = f.read()
475 if len(indexdata) > 0:
475 if len(indexdata) > 0:
476 v = versionformat_unpack(indexdata[:4])[0]
476 v = versionformat_unpack(indexdata[:4])[0]
477 self._initempty = False
477 self._initempty = False
478 except IOError as inst:
478 except IOError as inst:
479 if inst.errno != errno.ENOENT:
479 if inst.errno != errno.ENOENT:
480 raise
480 raise
481
481
482 self.version = v
482 self.version = v
483 self._inline = v & FLAG_INLINE_DATA
483 self._inline = v & FLAG_INLINE_DATA
484 self._generaldelta = v & FLAG_GENERALDELTA
484 self._generaldelta = v & FLAG_GENERALDELTA
485 flags = v & ~0xFFFF
485 flags = v & ~0xFFFF
486 fmt = v & 0xFFFF
486 fmt = v & 0xFFFF
487 if fmt == REVLOGV0:
487 if fmt == REVLOGV0:
488 if flags:
488 if flags:
489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
490 'revlog %s') %
490 'revlog %s') %
491 (flags >> 16, fmt, self.indexfile))
491 (flags >> 16, fmt, self.indexfile))
492 elif fmt == REVLOGV1:
492 elif fmt == REVLOGV1:
493 if flags & ~REVLOGV1_FLAGS:
493 if flags & ~REVLOGV1_FLAGS:
494 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
494 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
495 'revlog %s') %
495 'revlog %s') %
496 (flags >> 16, fmt, self.indexfile))
496 (flags >> 16, fmt, self.indexfile))
497 elif fmt == REVLOGV2:
497 elif fmt == REVLOGV2:
498 if flags & ~REVLOGV2_FLAGS:
498 if flags & ~REVLOGV2_FLAGS:
499 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
499 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
500 'revlog %s') %
500 'revlog %s') %
501 (flags >> 16, fmt, self.indexfile))
501 (flags >> 16, fmt, self.indexfile))
502 else:
502 else:
503 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
503 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
504 (fmt, self.indexfile))
504 (fmt, self.indexfile))
505
505
506 self._storedeltachains = True
506 self._storedeltachains = True
507
507
508 self._io = revlogio()
508 self._io = revlogio()
509 if self.version == REVLOGV0:
509 if self.version == REVLOGV0:
510 self._io = revlogoldio()
510 self._io = revlogoldio()
511 try:
511 try:
512 d = self._io.parseindex(indexdata, self._inline)
512 d = self._io.parseindex(indexdata, self._inline)
513 except (ValueError, IndexError):
513 except (ValueError, IndexError):
514 raise error.RevlogError(_("index %s is corrupted") %
514 raise error.RevlogError(_("index %s is corrupted") %
515 self.indexfile)
515 self.indexfile)
516 self.index, nodemap, self._chunkcache = d
516 self.index, nodemap, self._chunkcache = d
517 if nodemap is not None:
517 if nodemap is not None:
518 self.nodemap = self._nodecache = nodemap
518 self.nodemap = self._nodecache = nodemap
519 if not self._chunkcache:
519 if not self._chunkcache:
520 self._chunkclear()
520 self._chunkclear()
521 # revnum -> (chain-length, sum-delta-length)
521 # revnum -> (chain-length, sum-delta-length)
522 self._chaininfocache = {}
522 self._chaininfocache = {}
523 # revlog header -> revlog compressor
523 # revlog header -> revlog compressor
524 self._decompressors = {}
524 self._decompressors = {}
525
525
526 @util.propertycache
526 @util.propertycache
527 def _compressor(self):
527 def _compressor(self):
528 return util.compengines[self._compengine].revlogcompressor()
528 return util.compengines[self._compengine].revlogcompressor()
529
529
530 def _indexfp(self, mode='r'):
530 def _indexfp(self, mode='r'):
531 """file object for the revlog's index file"""
531 """file object for the revlog's index file"""
532 args = {r'mode': mode}
532 args = {r'mode': mode}
533 if mode != 'r':
533 if mode != 'r':
534 args[r'checkambig'] = self._checkambig
534 args[r'checkambig'] = self._checkambig
535 if mode == 'w':
535 if mode == 'w':
536 args[r'atomictemp'] = True
536 args[r'atomictemp'] = True
537 return self.opener(self.indexfile, **args)
537 return self.opener(self.indexfile, **args)
538
538
539 def _datafp(self, mode='r'):
539 def _datafp(self, mode='r'):
540 """file object for the revlog's data file"""
540 """file object for the revlog's data file"""
541 return self.opener(self.datafile, mode=mode)
541 return self.opener(self.datafile, mode=mode)
542
542
543 @contextlib.contextmanager
543 @contextlib.contextmanager
544 def _datareadfp(self, existingfp=None):
544 def _datareadfp(self, existingfp=None):
545 """file object suitable to read data"""
545 """file object suitable to read data"""
546 if existingfp is not None:
546 if existingfp is not None:
547 yield existingfp
547 yield existingfp
548 else:
548 else:
549 if self._inline:
549 if self._inline:
550 func = self._indexfp
550 func = self._indexfp
551 else:
551 else:
552 func = self._datafp
552 func = self._datafp
553 with func() as fp:
553 with func() as fp:
554 yield fp
554 yield fp
555
555
556 def tip(self):
556 def tip(self):
557 return self.node(len(self.index) - 1)
557 return self.node(len(self.index) - 1)
558 def __contains__(self, rev):
558 def __contains__(self, rev):
559 return 0 <= rev < len(self)
559 return 0 <= rev < len(self)
560 def __len__(self):
560 def __len__(self):
561 return len(self.index)
561 return len(self.index)
562 def __iter__(self):
562 def __iter__(self):
563 return iter(pycompat.xrange(len(self)))
563 return iter(pycompat.xrange(len(self)))
564 def revs(self, start=0, stop=None):
564 def revs(self, start=0, stop=None):
565 """iterate over all rev in this revlog (from start to stop)"""
565 """iterate over all rev in this revlog (from start to stop)"""
566 step = 1
566 step = 1
567 length = len(self)
567 length = len(self)
568 if stop is not None:
568 if stop is not None:
569 if start > stop:
569 if start > stop:
570 step = -1
570 step = -1
571 stop += step
571 stop += step
572 if stop > length:
572 if stop > length:
573 stop = length
573 stop = length
574 else:
574 else:
575 stop = length
575 stop = length
576 return pycompat.xrange(start, stop, step)
576 return pycompat.xrange(start, stop, step)
577
577
578 @util.propertycache
578 @util.propertycache
579 def nodemap(self):
579 def nodemap(self):
580 if self.index:
580 if self.index:
581 # populate mapping down to the initial node
581 # populate mapping down to the initial node
582 node0 = self.index[0][7] # get around changelog filtering
582 node0 = self.index[0][7] # get around changelog filtering
583 self.rev(node0)
583 self.rev(node0)
584 return self._nodecache
584 return self._nodecache
585
585
586 def hasnode(self, node):
586 def hasnode(self, node):
587 try:
587 try:
588 self.rev(node)
588 self.rev(node)
589 return True
589 return True
590 except KeyError:
590 except KeyError:
591 return False
591 return False
592
592
593 def candelta(self, baserev, rev):
593 def candelta(self, baserev, rev):
594 """whether two revisions (baserev, rev) can be delta-ed or not"""
594 """whether two revisions (baserev, rev) can be delta-ed or not"""
595 # Disable delta if either rev requires a content-changing flag
595 # Disable delta if either rev requires a content-changing flag
596 # processor (ex. LFS). This is because such flag processor can alter
596 # processor (ex. LFS). This is because such flag processor can alter
597 # the rawtext content that the delta will be based on, and two clients
597 # the rawtext content that the delta will be based on, and two clients
598 # could have a same revlog node with different flags (i.e. different
598 # could have a same revlog node with different flags (i.e. different
599 # rawtext contents) and the delta could be incompatible.
599 # rawtext contents) and the delta could be incompatible.
600 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
600 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
601 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
601 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
602 return False
602 return False
603 return True
603 return True
604
604
605 def clearcaches(self):
605 def clearcaches(self):
606 self._cache = None
606 self._cache = None
607 self._chainbasecache.clear()
607 self._chainbasecache.clear()
608 self._chunkcache = (0, '')
608 self._chunkcache = (0, '')
609 self._pcache = {}
609 self._pcache = {}
610
610
611 try:
611 try:
612 self._nodecache.clearcaches()
612 self._nodecache.clearcaches()
613 except AttributeError:
613 except AttributeError:
614 self._nodecache = {nullid: nullrev}
614 self._nodecache = {nullid: nullrev}
615 self._nodepos = None
615 self._nodepos = None
616
616
617 def rev(self, node):
617 def rev(self, node):
618 try:
618 try:
619 return self._nodecache[node]
619 return self._nodecache[node]
620 except TypeError:
620 except TypeError:
621 raise
621 raise
622 except error.RevlogError:
622 except error.RevlogError:
623 # parsers.c radix tree lookup failed
623 # parsers.c radix tree lookup failed
624 if node == wdirid or node in wdirfilenodeids:
624 if node == wdirid or node in wdirfilenodeids:
625 raise error.WdirUnsupported
625 raise error.WdirUnsupported
626 raise error.LookupError(node, self.indexfile, _('no node'))
626 raise error.LookupError(node, self.indexfile, _('no node'))
627 except KeyError:
627 except KeyError:
628 # pure python cache lookup failed
628 # pure python cache lookup failed
629 n = self._nodecache
629 n = self._nodecache
630 i = self.index
630 i = self.index
631 p = self._nodepos
631 p = self._nodepos
632 if p is None:
632 if p is None:
633 p = len(i) - 1
633 p = len(i) - 1
634 else:
634 else:
635 assert p < len(i)
635 assert p < len(i)
636 for r in pycompat.xrange(p, -1, -1):
636 for r in pycompat.xrange(p, -1, -1):
637 v = i[r][7]
637 v = i[r][7]
638 n[v] = r
638 n[v] = r
639 if v == node:
639 if v == node:
640 self._nodepos = r - 1
640 self._nodepos = r - 1
641 return r
641 return r
642 if node == wdirid or node in wdirfilenodeids:
642 if node == wdirid or node in wdirfilenodeids:
643 raise error.WdirUnsupported
643 raise error.WdirUnsupported
644 raise error.LookupError(node, self.indexfile, _('no node'))
644 raise error.LookupError(node, self.indexfile, _('no node'))
645
645
646 # Accessors for index entries.
646 # Accessors for index entries.
647
647
648 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
648 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
649 # are flags.
649 # are flags.
650 def start(self, rev):
650 def start(self, rev):
651 return int(self.index[rev][0] >> 16)
651 return int(self.index[rev][0] >> 16)
652
652
653 def flags(self, rev):
653 def flags(self, rev):
654 return self.index[rev][0] & 0xFFFF
654 return self.index[rev][0] & 0xFFFF
655
655
656 def length(self, rev):
656 def length(self, rev):
657 return self.index[rev][1]
657 return self.index[rev][1]
658
658
659 def rawsize(self, rev):
659 def rawsize(self, rev):
660 """return the length of the uncompressed text for a given revision"""
660 """return the length of the uncompressed text for a given revision"""
661 l = self.index[rev][2]
661 l = self.index[rev][2]
662 if l >= 0:
662 if l >= 0:
663 return l
663 return l
664
664
665 t = self.revision(rev, raw=True)
665 t = self.revision(rev, raw=True)
666 return len(t)
666 return len(t)
667
667
668 def size(self, rev):
668 def size(self, rev):
669 """length of non-raw text (processed by a "read" flag processor)"""
669 """length of non-raw text (processed by a "read" flag processor)"""
670 # fast path: if no "read" flag processor could change the content,
670 # fast path: if no "read" flag processor could change the content,
671 # size is rawsize. note: ELLIPSIS is known to not change the content.
671 # size is rawsize. note: ELLIPSIS is known to not change the content.
672 flags = self.flags(rev)
672 flags = self.flags(rev)
673 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
673 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
674 return self.rawsize(rev)
674 return self.rawsize(rev)
675
675
676 return len(self.revision(rev, raw=False))
676 return len(self.revision(rev, raw=False))
677
677
678 def chainbase(self, rev):
678 def chainbase(self, rev):
679 base = self._chainbasecache.get(rev)
679 base = self._chainbasecache.get(rev)
680 if base is not None:
680 if base is not None:
681 return base
681 return base
682
682
683 index = self.index
683 index = self.index
684 iterrev = rev
684 iterrev = rev
685 base = index[iterrev][3]
685 base = index[iterrev][3]
686 while base != iterrev:
686 while base != iterrev:
687 iterrev = base
687 iterrev = base
688 base = index[iterrev][3]
688 base = index[iterrev][3]
689
689
690 self._chainbasecache[rev] = base
690 self._chainbasecache[rev] = base
691 return base
691 return base
692
692
693 def linkrev(self, rev):
693 def linkrev(self, rev):
694 return self.index[rev][4]
694 return self.index[rev][4]
695
695
696 def parentrevs(self, rev):
696 def parentrevs(self, rev):
697 try:
697 try:
698 entry = self.index[rev]
698 entry = self.index[rev]
699 except IndexError:
699 except IndexError:
700 if rev == wdirrev:
700 if rev == wdirrev:
701 raise error.WdirUnsupported
701 raise error.WdirUnsupported
702 raise
702 raise
703
703
704 return entry[5], entry[6]
704 return entry[5], entry[6]
705
705
706 def node(self, rev):
706 def node(self, rev):
707 try:
707 try:
708 return self.index[rev][7]
708 return self.index[rev][7]
709 except IndexError:
709 except IndexError:
710 if rev == wdirrev:
710 if rev == wdirrev:
711 raise error.WdirUnsupported
711 raise error.WdirUnsupported
712 raise
712 raise
713
713
714 # Derived from index values.
714 # Derived from index values.
715
715
716 def end(self, rev):
716 def end(self, rev):
717 return self.start(rev) + self.length(rev)
717 return self.start(rev) + self.length(rev)
718
718
719 def parents(self, node):
719 def parents(self, node):
720 i = self.index
720 i = self.index
721 d = i[self.rev(node)]
721 d = i[self.rev(node)]
722 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
722 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
723
723
724 def chainlen(self, rev):
724 def chainlen(self, rev):
725 return self._chaininfo(rev)[0]
725 return self._chaininfo(rev)[0]
726
726
727 def _chaininfo(self, rev):
727 def _chaininfo(self, rev):
728 chaininfocache = self._chaininfocache
728 chaininfocache = self._chaininfocache
729 if rev in chaininfocache:
729 if rev in chaininfocache:
730 return chaininfocache[rev]
730 return chaininfocache[rev]
731 index = self.index
731 index = self.index
732 generaldelta = self._generaldelta
732 generaldelta = self._generaldelta
733 iterrev = rev
733 iterrev = rev
734 e = index[iterrev]
734 e = index[iterrev]
735 clen = 0
735 clen = 0
736 compresseddeltalen = 0
736 compresseddeltalen = 0
737 while iterrev != e[3]:
737 while iterrev != e[3]:
738 clen += 1
738 clen += 1
739 compresseddeltalen += e[1]
739 compresseddeltalen += e[1]
740 if generaldelta:
740 if generaldelta:
741 iterrev = e[3]
741 iterrev = e[3]
742 else:
742 else:
743 iterrev -= 1
743 iterrev -= 1
744 if iterrev in chaininfocache:
744 if iterrev in chaininfocache:
745 t = chaininfocache[iterrev]
745 t = chaininfocache[iterrev]
746 clen += t[0]
746 clen += t[0]
747 compresseddeltalen += t[1]
747 compresseddeltalen += t[1]
748 break
748 break
749 e = index[iterrev]
749 e = index[iterrev]
750 else:
750 else:
751 # Add text length of base since decompressing that also takes
751 # Add text length of base since decompressing that also takes
752 # work. For cache hits the length is already included.
752 # work. For cache hits the length is already included.
753 compresseddeltalen += e[1]
753 compresseddeltalen += e[1]
754 r = (clen, compresseddeltalen)
754 r = (clen, compresseddeltalen)
755 chaininfocache[rev] = r
755 chaininfocache[rev] = r
756 return r
756 return r
757
757
758 def _deltachain(self, rev, stoprev=None):
758 def _deltachain(self, rev, stoprev=None):
759 """Obtain the delta chain for a revision.
759 """Obtain the delta chain for a revision.
760
760
761 ``stoprev`` specifies a revision to stop at. If not specified, we
761 ``stoprev`` specifies a revision to stop at. If not specified, we
762 stop at the base of the chain.
762 stop at the base of the chain.
763
763
764 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
764 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
765 revs in ascending order and ``stopped`` is a bool indicating whether
765 revs in ascending order and ``stopped`` is a bool indicating whether
766 ``stoprev`` was hit.
766 ``stoprev`` was hit.
767 """
767 """
768 # Try C implementation.
768 # Try C implementation.
769 try:
769 try:
770 return self.index.deltachain(rev, stoprev, self._generaldelta)
770 return self.index.deltachain(rev, stoprev, self._generaldelta)
771 except AttributeError:
771 except AttributeError:
772 pass
772 pass
773
773
774 chain = []
774 chain = []
775
775
776 # Alias to prevent attribute lookup in tight loop.
776 # Alias to prevent attribute lookup in tight loop.
777 index = self.index
777 index = self.index
778 generaldelta = self._generaldelta
778 generaldelta = self._generaldelta
779
779
780 iterrev = rev
780 iterrev = rev
781 e = index[iterrev]
781 e = index[iterrev]
782 while iterrev != e[3] and iterrev != stoprev:
782 while iterrev != e[3] and iterrev != stoprev:
783 chain.append(iterrev)
783 chain.append(iterrev)
784 if generaldelta:
784 if generaldelta:
785 iterrev = e[3]
785 iterrev = e[3]
786 else:
786 else:
787 iterrev -= 1
787 iterrev -= 1
788 e = index[iterrev]
788 e = index[iterrev]
789
789
790 if iterrev == stoprev:
790 if iterrev == stoprev:
791 stopped = True
791 stopped = True
792 else:
792 else:
793 chain.append(iterrev)
793 chain.append(iterrev)
794 stopped = False
794 stopped = False
795
795
796 chain.reverse()
796 chain.reverse()
797 return chain, stopped
797 return chain, stopped
798
798
799 def ancestors(self, revs, stoprev=0, inclusive=False):
799 def ancestors(self, revs, stoprev=0, inclusive=False):
800 """Generate the ancestors of 'revs' in reverse topological order.
800 """Generate the ancestors of 'revs' in reverse topological order.
801 Does not generate revs lower than stoprev.
801 Does not generate revs lower than stoprev.
802
802
803 See the documentation for ancestor.lazyancestors for more details."""
803 See the documentation for ancestor.lazyancestors for more details."""
804
804
805 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
805 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
806 inclusive=inclusive)
806 inclusive=inclusive)
807
807
808 def descendants(self, revs):
808 def descendants(self, revs):
809 """Generate the descendants of 'revs' in revision order.
809 """Generate the descendants of 'revs' in revision order.
810
810
811 Yield a sequence of revision numbers starting with a child of
811 Yield a sequence of revision numbers starting with a child of
812 some rev in revs, i.e., each revision is *not* considered a
812 some rev in revs, i.e., each revision is *not* considered a
813 descendant of itself. Results are ordered by revision number (a
813 descendant of itself. Results are ordered by revision number (a
814 topological sort)."""
814 topological sort)."""
815 first = min(revs)
815 first = min(revs)
816 if first == nullrev:
816 if first == nullrev:
817 for i in self:
817 for i in self:
818 yield i
818 yield i
819 return
819 return
820
820
821 seen = set(revs)
821 seen = set(revs)
822 for i in self.revs(start=first + 1):
822 for i in self.revs(start=first + 1):
823 for x in self.parentrevs(i):
823 for x in self.parentrevs(i):
824 if x != nullrev and x in seen:
824 if x != nullrev and x in seen:
825 seen.add(i)
825 seen.add(i)
826 yield i
826 yield i
827 break
827 break
828
828
829 def findcommonmissing(self, common=None, heads=None):
829 def findcommonmissing(self, common=None, heads=None):
830 """Return a tuple of the ancestors of common and the ancestors of heads
830 """Return a tuple of the ancestors of common and the ancestors of heads
831 that are not ancestors of common. In revset terminology, we return the
831 that are not ancestors of common. In revset terminology, we return the
832 tuple:
832 tuple:
833
833
834 ::common, (::heads) - (::common)
834 ::common, (::heads) - (::common)
835
835
836 The list is sorted by revision number, meaning it is
836 The list is sorted by revision number, meaning it is
837 topologically sorted.
837 topologically sorted.
838
838
839 'heads' and 'common' are both lists of node IDs. If heads is
839 'heads' and 'common' are both lists of node IDs. If heads is
840 not supplied, uses all of the revlog's heads. If common is not
840 not supplied, uses all of the revlog's heads. If common is not
841 supplied, uses nullid."""
841 supplied, uses nullid."""
842 if common is None:
842 if common is None:
843 common = [nullid]
843 common = [nullid]
844 if heads is None:
844 if heads is None:
845 heads = self.heads()
845 heads = self.heads()
846
846
847 common = [self.rev(n) for n in common]
847 common = [self.rev(n) for n in common]
848 heads = [self.rev(n) for n in heads]
848 heads = [self.rev(n) for n in heads]
849
849
850 # we want the ancestors, but inclusive
850 # we want the ancestors, but inclusive
851 class lazyset(object):
851 class lazyset(object):
852 def __init__(self, lazyvalues):
852 def __init__(self, lazyvalues):
853 self.addedvalues = set()
853 self.addedvalues = set()
854 self.lazyvalues = lazyvalues
854 self.lazyvalues = lazyvalues
855
855
856 def __contains__(self, value):
856 def __contains__(self, value):
857 return value in self.addedvalues or value in self.lazyvalues
857 return value in self.addedvalues or value in self.lazyvalues
858
858
859 def __iter__(self):
859 def __iter__(self):
860 added = self.addedvalues
860 added = self.addedvalues
861 for r in added:
861 for r in added:
862 yield r
862 yield r
863 for r in self.lazyvalues:
863 for r in self.lazyvalues:
864 if not r in added:
864 if not r in added:
865 yield r
865 yield r
866
866
867 def add(self, value):
867 def add(self, value):
868 self.addedvalues.add(value)
868 self.addedvalues.add(value)
869
869
870 def update(self, values):
870 def update(self, values):
871 self.addedvalues.update(values)
871 self.addedvalues.update(values)
872
872
873 has = lazyset(self.ancestors(common))
873 has = lazyset(self.ancestors(common))
874 has.add(nullrev)
874 has.add(nullrev)
875 has.update(common)
875 has.update(common)
876
876
877 # take all ancestors from heads that aren't in has
877 # take all ancestors from heads that aren't in has
878 missing = set()
878 missing = set()
879 visit = collections.deque(r for r in heads if r not in has)
879 visit = collections.deque(r for r in heads if r not in has)
880 while visit:
880 while visit:
881 r = visit.popleft()
881 r = visit.popleft()
882 if r in missing:
882 if r in missing:
883 continue
883 continue
884 else:
884 else:
885 missing.add(r)
885 missing.add(r)
886 for p in self.parentrevs(r):
886 for p in self.parentrevs(r):
887 if p not in has:
887 if p not in has:
888 visit.append(p)
888 visit.append(p)
889 missing = list(missing)
889 missing = list(missing)
890 missing.sort()
890 missing.sort()
891 return has, [self.node(miss) for miss in missing]
891 return has, [self.node(miss) for miss in missing]
892
892
893 def incrementalmissingrevs(self, common=None):
893 def incrementalmissingrevs(self, common=None):
894 """Return an object that can be used to incrementally compute the
894 """Return an object that can be used to incrementally compute the
895 revision numbers of the ancestors of arbitrary sets that are not
895 revision numbers of the ancestors of arbitrary sets that are not
896 ancestors of common. This is an ancestor.incrementalmissingancestors
896 ancestors of common. This is an ancestor.incrementalmissingancestors
897 object.
897 object.
898
898
899 'common' is a list of revision numbers. If common is not supplied, uses
899 'common' is a list of revision numbers. If common is not supplied, uses
900 nullrev.
900 nullrev.
901 """
901 """
902 if common is None:
902 if common is None:
903 common = [nullrev]
903 common = [nullrev]
904
904
905 return ancestor.incrementalmissingancestors(self.parentrevs, common)
905 return ancestor.incrementalmissingancestors(self.parentrevs, common)
906
906
907 def findmissingrevs(self, common=None, heads=None):
907 def findmissingrevs(self, common=None, heads=None):
908 """Return the revision numbers of the ancestors of heads that
908 """Return the revision numbers of the ancestors of heads that
909 are not ancestors of common.
909 are not ancestors of common.
910
910
911 More specifically, return a list of revision numbers corresponding to
911 More specifically, return a list of revision numbers corresponding to
912 nodes N such that every N satisfies the following constraints:
912 nodes N such that every N satisfies the following constraints:
913
913
914 1. N is an ancestor of some node in 'heads'
914 1. N is an ancestor of some node in 'heads'
915 2. N is not an ancestor of any node in 'common'
915 2. N is not an ancestor of any node in 'common'
916
916
917 The list is sorted by revision number, meaning it is
917 The list is sorted by revision number, meaning it is
918 topologically sorted.
918 topologically sorted.
919
919
920 'heads' and 'common' are both lists of revision numbers. If heads is
920 'heads' and 'common' are both lists of revision numbers. If heads is
921 not supplied, uses all of the revlog's heads. If common is not
921 not supplied, uses all of the revlog's heads. If common is not
922 supplied, uses nullid."""
922 supplied, uses nullid."""
923 if common is None:
923 if common is None:
924 common = [nullrev]
924 common = [nullrev]
925 if heads is None:
925 if heads is None:
926 heads = self.headrevs()
926 heads = self.headrevs()
927
927
928 inc = self.incrementalmissingrevs(common=common)
928 inc = self.incrementalmissingrevs(common=common)
929 return inc.missingancestors(heads)
929 return inc.missingancestors(heads)
930
930
931 def findmissing(self, common=None, heads=None):
931 def findmissing(self, common=None, heads=None):
932 """Return the ancestors of heads that are not ancestors of common.
932 """Return the ancestors of heads that are not ancestors of common.
933
933
934 More specifically, return a list of nodes N such that every N
934 More specifically, return a list of nodes N such that every N
935 satisfies the following constraints:
935 satisfies the following constraints:
936
936
937 1. N is an ancestor of some node in 'heads'
937 1. N is an ancestor of some node in 'heads'
938 2. N is not an ancestor of any node in 'common'
938 2. N is not an ancestor of any node in 'common'
939
939
940 The list is sorted by revision number, meaning it is
940 The list is sorted by revision number, meaning it is
941 topologically sorted.
941 topologically sorted.
942
942
943 'heads' and 'common' are both lists of node IDs. If heads is
943 'heads' and 'common' are both lists of node IDs. If heads is
944 not supplied, uses all of the revlog's heads. If common is not
944 not supplied, uses all of the revlog's heads. If common is not
945 supplied, uses nullid."""
945 supplied, uses nullid."""
946 if common is None:
946 if common is None:
947 common = [nullid]
947 common = [nullid]
948 if heads is None:
948 if heads is None:
949 heads = self.heads()
949 heads = self.heads()
950
950
951 common = [self.rev(n) for n in common]
951 common = [self.rev(n) for n in common]
952 heads = [self.rev(n) for n in heads]
952 heads = [self.rev(n) for n in heads]
953
953
954 inc = self.incrementalmissingrevs(common=common)
954 inc = self.incrementalmissingrevs(common=common)
955 return [self.node(r) for r in inc.missingancestors(heads)]
955 return [self.node(r) for r in inc.missingancestors(heads)]
956
956
957 def nodesbetween(self, roots=None, heads=None):
957 def nodesbetween(self, roots=None, heads=None):
958 """Return a topological path from 'roots' to 'heads'.
958 """Return a topological path from 'roots' to 'heads'.
959
959
960 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
960 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
961 topologically sorted list of all nodes N that satisfy both of
961 topologically sorted list of all nodes N that satisfy both of
962 these constraints:
962 these constraints:
963
963
964 1. N is a descendant of some node in 'roots'
964 1. N is a descendant of some node in 'roots'
965 2. N is an ancestor of some node in 'heads'
965 2. N is an ancestor of some node in 'heads'
966
966
967 Every node is considered to be both a descendant and an ancestor
967 Every node is considered to be both a descendant and an ancestor
968 of itself, so every reachable node in 'roots' and 'heads' will be
968 of itself, so every reachable node in 'roots' and 'heads' will be
969 included in 'nodes'.
969 included in 'nodes'.
970
970
971 'outroots' is the list of reachable nodes in 'roots', i.e., the
971 'outroots' is the list of reachable nodes in 'roots', i.e., the
972 subset of 'roots' that is returned in 'nodes'. Likewise,
972 subset of 'roots' that is returned in 'nodes'. Likewise,
973 'outheads' is the subset of 'heads' that is also in 'nodes'.
973 'outheads' is the subset of 'heads' that is also in 'nodes'.
974
974
975 'roots' and 'heads' are both lists of node IDs. If 'roots' is
975 'roots' and 'heads' are both lists of node IDs. If 'roots' is
976 unspecified, uses nullid as the only root. If 'heads' is
976 unspecified, uses nullid as the only root. If 'heads' is
977 unspecified, uses list of all of the revlog's heads."""
977 unspecified, uses list of all of the revlog's heads."""
978 nonodes = ([], [], [])
978 nonodes = ([], [], [])
979 if roots is not None:
979 if roots is not None:
980 roots = list(roots)
980 roots = list(roots)
981 if not roots:
981 if not roots:
982 return nonodes
982 return nonodes
983 lowestrev = min([self.rev(n) for n in roots])
983 lowestrev = min([self.rev(n) for n in roots])
984 else:
984 else:
985 roots = [nullid] # Everybody's a descendant of nullid
985 roots = [nullid] # Everybody's a descendant of nullid
986 lowestrev = nullrev
986 lowestrev = nullrev
987 if (lowestrev == nullrev) and (heads is None):
987 if (lowestrev == nullrev) and (heads is None):
988 # We want _all_ the nodes!
988 # We want _all_ the nodes!
989 return ([self.node(r) for r in self], [nullid], list(self.heads()))
989 return ([self.node(r) for r in self], [nullid], list(self.heads()))
990 if heads is None:
990 if heads is None:
991 # All nodes are ancestors, so the latest ancestor is the last
991 # All nodes are ancestors, so the latest ancestor is the last
992 # node.
992 # node.
993 highestrev = len(self) - 1
993 highestrev = len(self) - 1
994 # Set ancestors to None to signal that every node is an ancestor.
994 # Set ancestors to None to signal that every node is an ancestor.
995 ancestors = None
995 ancestors = None
996 # Set heads to an empty dictionary for later discovery of heads
996 # Set heads to an empty dictionary for later discovery of heads
997 heads = {}
997 heads = {}
998 else:
998 else:
999 heads = list(heads)
999 heads = list(heads)
1000 if not heads:
1000 if not heads:
1001 return nonodes
1001 return nonodes
1002 ancestors = set()
1002 ancestors = set()
1003 # Turn heads into a dictionary so we can remove 'fake' heads.
1003 # Turn heads into a dictionary so we can remove 'fake' heads.
1004 # Also, later we will be using it to filter out the heads we can't
1004 # Also, later we will be using it to filter out the heads we can't
1005 # find from roots.
1005 # find from roots.
1006 heads = dict.fromkeys(heads, False)
1006 heads = dict.fromkeys(heads, False)
1007 # Start at the top and keep marking parents until we're done.
1007 # Start at the top and keep marking parents until we're done.
1008 nodestotag = set(heads)
1008 nodestotag = set(heads)
1009 # Remember where the top was so we can use it as a limit later.
1009 # Remember where the top was so we can use it as a limit later.
1010 highestrev = max([self.rev(n) for n in nodestotag])
1010 highestrev = max([self.rev(n) for n in nodestotag])
1011 while nodestotag:
1011 while nodestotag:
1012 # grab a node to tag
1012 # grab a node to tag
1013 n = nodestotag.pop()
1013 n = nodestotag.pop()
1014 # Never tag nullid
1014 # Never tag nullid
1015 if n == nullid:
1015 if n == nullid:
1016 continue
1016 continue
1017 # A node's revision number represents its place in a
1017 # A node's revision number represents its place in a
1018 # topologically sorted list of nodes.
1018 # topologically sorted list of nodes.
1019 r = self.rev(n)
1019 r = self.rev(n)
1020 if r >= lowestrev:
1020 if r >= lowestrev:
1021 if n not in ancestors:
1021 if n not in ancestors:
1022 # If we are possibly a descendant of one of the roots
1022 # If we are possibly a descendant of one of the roots
1023 # and we haven't already been marked as an ancestor
1023 # and we haven't already been marked as an ancestor
1024 ancestors.add(n) # Mark as ancestor
1024 ancestors.add(n) # Mark as ancestor
1025 # Add non-nullid parents to list of nodes to tag.
1025 # Add non-nullid parents to list of nodes to tag.
1026 nodestotag.update([p for p in self.parents(n) if
1026 nodestotag.update([p for p in self.parents(n) if
1027 p != nullid])
1027 p != nullid])
1028 elif n in heads: # We've seen it before, is it a fake head?
1028 elif n in heads: # We've seen it before, is it a fake head?
1029 # So it is, real heads should not be the ancestors of
1029 # So it is, real heads should not be the ancestors of
1030 # any other heads.
1030 # any other heads.
1031 heads.pop(n)
1031 heads.pop(n)
1032 if not ancestors:
1032 if not ancestors:
1033 return nonodes
1033 return nonodes
1034 # Now that we have our set of ancestors, we want to remove any
1034 # Now that we have our set of ancestors, we want to remove any
1035 # roots that are not ancestors.
1035 # roots that are not ancestors.
1036
1036
1037 # If one of the roots was nullid, everything is included anyway.
1037 # If one of the roots was nullid, everything is included anyway.
1038 if lowestrev > nullrev:
1038 if lowestrev > nullrev:
1039 # But, since we weren't, let's recompute the lowest rev to not
1039 # But, since we weren't, let's recompute the lowest rev to not
1040 # include roots that aren't ancestors.
1040 # include roots that aren't ancestors.
1041
1041
1042 # Filter out roots that aren't ancestors of heads
1042 # Filter out roots that aren't ancestors of heads
1043 roots = [root for root in roots if root in ancestors]
1043 roots = [root for root in roots if root in ancestors]
1044 # Recompute the lowest revision
1044 # Recompute the lowest revision
1045 if roots:
1045 if roots:
1046 lowestrev = min([self.rev(root) for root in roots])
1046 lowestrev = min([self.rev(root) for root in roots])
1047 else:
1047 else:
1048 # No more roots? Return empty list
1048 # No more roots? Return empty list
1049 return nonodes
1049 return nonodes
1050 else:
1050 else:
1051 # We are descending from nullid, and don't need to care about
1051 # We are descending from nullid, and don't need to care about
1052 # any other roots.
1052 # any other roots.
1053 lowestrev = nullrev
1053 lowestrev = nullrev
1054 roots = [nullid]
1054 roots = [nullid]
1055 # Transform our roots list into a set.
1055 # Transform our roots list into a set.
1056 descendants = set(roots)
1056 descendants = set(roots)
1057 # Also, keep the original roots so we can filter out roots that aren't
1057 # Also, keep the original roots so we can filter out roots that aren't
1058 # 'real' roots (i.e. are descended from other roots).
1058 # 'real' roots (i.e. are descended from other roots).
1059 roots = descendants.copy()
1059 roots = descendants.copy()
1060 # Our topologically sorted list of output nodes.
1060 # Our topologically sorted list of output nodes.
1061 orderedout = []
1061 orderedout = []
1062 # Don't start at nullid since we don't want nullid in our output list,
1062 # Don't start at nullid since we don't want nullid in our output list,
1063 # and if nullid shows up in descendants, empty parents will look like
1063 # and if nullid shows up in descendants, empty parents will look like
1064 # they're descendants.
1064 # they're descendants.
1065 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1065 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1066 n = self.node(r)
1066 n = self.node(r)
1067 isdescendant = False
1067 isdescendant = False
1068 if lowestrev == nullrev: # Everybody is a descendant of nullid
1068 if lowestrev == nullrev: # Everybody is a descendant of nullid
1069 isdescendant = True
1069 isdescendant = True
1070 elif n in descendants:
1070 elif n in descendants:
1071 # n is already a descendant
1071 # n is already a descendant
1072 isdescendant = True
1072 isdescendant = True
1073 # This check only needs to be done here because all the roots
1073 # This check only needs to be done here because all the roots
1074 # will start being marked is descendants before the loop.
1074 # will start being marked is descendants before the loop.
1075 if n in roots:
1075 if n in roots:
1076 # If n was a root, check if it's a 'real' root.
1076 # If n was a root, check if it's a 'real' root.
1077 p = tuple(self.parents(n))
1077 p = tuple(self.parents(n))
1078 # If any of its parents are descendants, it's not a root.
1078 # If any of its parents are descendants, it's not a root.
1079 if (p[0] in descendants) or (p[1] in descendants):
1079 if (p[0] in descendants) or (p[1] in descendants):
1080 roots.remove(n)
1080 roots.remove(n)
1081 else:
1081 else:
1082 p = tuple(self.parents(n))
1082 p = tuple(self.parents(n))
1083 # A node is a descendant if either of its parents are
1083 # A node is a descendant if either of its parents are
1084 # descendants. (We seeded the dependents list with the roots
1084 # descendants. (We seeded the dependents list with the roots
1085 # up there, remember?)
1085 # up there, remember?)
1086 if (p[0] in descendants) or (p[1] in descendants):
1086 if (p[0] in descendants) or (p[1] in descendants):
1087 descendants.add(n)
1087 descendants.add(n)
1088 isdescendant = True
1088 isdescendant = True
1089 if isdescendant and ((ancestors is None) or (n in ancestors)):
1089 if isdescendant and ((ancestors is None) or (n in ancestors)):
1090 # Only include nodes that are both descendants and ancestors.
1090 # Only include nodes that are both descendants and ancestors.
1091 orderedout.append(n)
1091 orderedout.append(n)
1092 if (ancestors is not None) and (n in heads):
1092 if (ancestors is not None) and (n in heads):
1093 # We're trying to figure out which heads are reachable
1093 # We're trying to figure out which heads are reachable
1094 # from roots.
1094 # from roots.
1095 # Mark this head as having been reached
1095 # Mark this head as having been reached
1096 heads[n] = True
1096 heads[n] = True
1097 elif ancestors is None:
1097 elif ancestors is None:
1098 # Otherwise, we're trying to discover the heads.
1098 # Otherwise, we're trying to discover the heads.
1099 # Assume this is a head because if it isn't, the next step
1099 # Assume this is a head because if it isn't, the next step
1100 # will eventually remove it.
1100 # will eventually remove it.
1101 heads[n] = True
1101 heads[n] = True
1102 # But, obviously its parents aren't.
1102 # But, obviously its parents aren't.
1103 for p in self.parents(n):
1103 for p in self.parents(n):
1104 heads.pop(p, None)
1104 heads.pop(p, None)
1105 heads = [head for head, flag in heads.iteritems() if flag]
1105 heads = [head for head, flag in heads.iteritems() if flag]
1106 roots = list(roots)
1106 roots = list(roots)
1107 assert orderedout
1107 assert orderedout
1108 assert roots
1108 assert roots
1109 assert heads
1109 assert heads
1110 return (orderedout, roots, heads)
1110 return (orderedout, roots, heads)
1111
1111
1112 def headrevs(self):
1112 def headrevs(self):
1113 try:
1113 try:
1114 return self.index.headrevs()
1114 return self.index.headrevs()
1115 except AttributeError:
1115 except AttributeError:
1116 return self._headrevs()
1116 return self._headrevs()
1117
1117
1118 def computephases(self, roots):
1118 def computephases(self, roots):
1119 return self.index.computephasesmapsets(roots)
1119 return self.index.computephasesmapsets(roots)
1120
1120
1121 def _headrevs(self):
1121 def _headrevs(self):
1122 count = len(self)
1122 count = len(self)
1123 if not count:
1123 if not count:
1124 return [nullrev]
1124 return [nullrev]
1125 # we won't iter over filtered rev so nobody is a head at start
1125 # we won't iter over filtered rev so nobody is a head at start
1126 ishead = [0] * (count + 1)
1126 ishead = [0] * (count + 1)
1127 index = self.index
1127 index = self.index
1128 for r in self:
1128 for r in self:
1129 ishead[r] = 1 # I may be an head
1129 ishead[r] = 1 # I may be an head
1130 e = index[r]
1130 e = index[r]
1131 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1131 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1132 return [r for r, val in enumerate(ishead) if val]
1132 return [r for r, val in enumerate(ishead) if val]
1133
1133
1134 def heads(self, start=None, stop=None):
1134 def heads(self, start=None, stop=None):
1135 """return the list of all nodes that have no children
1135 """return the list of all nodes that have no children
1136
1136
1137 if start is specified, only heads that are descendants of
1137 if start is specified, only heads that are descendants of
1138 start will be returned
1138 start will be returned
1139 if stop is specified, it will consider all the revs from stop
1139 if stop is specified, it will consider all the revs from stop
1140 as if they had no children
1140 as if they had no children
1141 """
1141 """
1142 if start is None and stop is None:
1142 if start is None and stop is None:
1143 if not len(self):
1143 if not len(self):
1144 return [nullid]
1144 return [nullid]
1145 return [self.node(r) for r in self.headrevs()]
1145 return [self.node(r) for r in self.headrevs()]
1146
1146
1147 if start is None:
1147 if start is None:
1148 start = nullid
1148 start = nullid
1149 if stop is None:
1149 if stop is None:
1150 stop = []
1150 stop = []
1151 stoprevs = set([self.rev(n) for n in stop])
1151 stoprevs = set([self.rev(n) for n in stop])
1152 startrev = self.rev(start)
1152 startrev = self.rev(start)
1153 reachable = {startrev}
1153 reachable = {startrev}
1154 heads = {startrev}
1154 heads = {startrev}
1155
1155
1156 parentrevs = self.parentrevs
1156 parentrevs = self.parentrevs
1157 for r in self.revs(start=startrev + 1):
1157 for r in self.revs(start=startrev + 1):
1158 for p in parentrevs(r):
1158 for p in parentrevs(r):
1159 if p in reachable:
1159 if p in reachable:
1160 if r not in stoprevs:
1160 if r not in stoprevs:
1161 reachable.add(r)
1161 reachable.add(r)
1162 heads.add(r)
1162 heads.add(r)
1163 if p in heads and p not in stoprevs:
1163 if p in heads and p not in stoprevs:
1164 heads.remove(p)
1164 heads.remove(p)
1165
1165
1166 return [self.node(r) for r in heads]
1166 return [self.node(r) for r in heads]
1167
1167
1168 def children(self, node):
1168 def children(self, node):
1169 """find the children of a given node"""
1169 """find the children of a given node"""
1170 c = []
1170 c = []
1171 p = self.rev(node)
1171 p = self.rev(node)
1172 for r in self.revs(start=p + 1):
1172 for r in self.revs(start=p + 1):
1173 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1173 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1174 if prevs:
1174 if prevs:
1175 for pr in prevs:
1175 for pr in prevs:
1176 if pr == p:
1176 if pr == p:
1177 c.append(self.node(r))
1177 c.append(self.node(r))
1178 elif p == nullrev:
1178 elif p == nullrev:
1179 c.append(self.node(r))
1179 c.append(self.node(r))
1180 return c
1180 return c
1181
1181
1182 def commonancestorsheads(self, a, b):
1182 def commonancestorsheads(self, a, b):
1183 """calculate all the heads of the common ancestors of nodes a and b"""
1183 """calculate all the heads of the common ancestors of nodes a and b"""
1184 a, b = self.rev(a), self.rev(b)
1184 a, b = self.rev(a), self.rev(b)
1185 ancs = self._commonancestorsheads(a, b)
1185 ancs = self._commonancestorsheads(a, b)
1186 return pycompat.maplist(self.node, ancs)
1186 return pycompat.maplist(self.node, ancs)
1187
1187
1188 def _commonancestorsheads(self, *revs):
1188 def _commonancestorsheads(self, *revs):
1189 """calculate all the heads of the common ancestors of revs"""
1189 """calculate all the heads of the common ancestors of revs"""
1190 try:
1190 try:
1191 ancs = self.index.commonancestorsheads(*revs)
1191 ancs = self.index.commonancestorsheads(*revs)
1192 except (AttributeError, OverflowError): # C implementation failed
1192 except (AttributeError, OverflowError): # C implementation failed
1193 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1193 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1194 return ancs
1194 return ancs
1195
1195
1196 def isancestor(self, a, b):
1196 def isancestor(self, a, b):
1197 """return True if node a is an ancestor of node b
1197 """return True if node a is an ancestor of node b
1198
1198
1199 A revision is considered an ancestor of itself."""
1199 A revision is considered an ancestor of itself."""
1200 a, b = self.rev(a), self.rev(b)
1200 a, b = self.rev(a), self.rev(b)
1201 return self.isancestorrev(a, b)
1201 return self.isancestorrev(a, b)
1202
1202
1203 def isancestorrev(self, a, b):
1203 def isancestorrev(self, a, b):
1204 """return True if revision a is an ancestor of revision b
1204 """return True if revision a is an ancestor of revision b
1205
1205
1206 A revision is considered an ancestor of itself.
1206 A revision is considered an ancestor of itself.
1207
1207
1208 The implementation of this is trivial but the use of
1208 The implementation of this is trivial but the use of
1209 commonancestorsheads is not."""
1209 commonancestorsheads is not."""
1210 if a == nullrev:
1210 if a == nullrev:
1211 return True
1211 return True
1212 elif a == b:
1212 elif a == b:
1213 return True
1213 return True
1214 elif a > b:
1214 elif a > b:
1215 return False
1215 return False
1216 return a in self._commonancestorsheads(a, b)
1216 return a in self._commonancestorsheads(a, b)
1217
1217
1218 def ancestor(self, a, b):
1218 def ancestor(self, a, b):
1219 """calculate the "best" common ancestor of nodes a and b"""
1219 """calculate the "best" common ancestor of nodes a and b"""
1220
1220
1221 a, b = self.rev(a), self.rev(b)
1221 a, b = self.rev(a), self.rev(b)
1222 try:
1222 try:
1223 ancs = self.index.ancestors(a, b)
1223 ancs = self.index.ancestors(a, b)
1224 except (AttributeError, OverflowError):
1224 except (AttributeError, OverflowError):
1225 ancs = ancestor.ancestors(self.parentrevs, a, b)
1225 ancs = ancestor.ancestors(self.parentrevs, a, b)
1226 if ancs:
1226 if ancs:
1227 # choose a consistent winner when there's a tie
1227 # choose a consistent winner when there's a tie
1228 return min(map(self.node, ancs))
1228 return min(map(self.node, ancs))
1229 return nullid
1229 return nullid
1230
1230
1231 def _match(self, id):
1231 def _match(self, id):
1232 if isinstance(id, int):
1232 if isinstance(id, int):
1233 # rev
1233 # rev
1234 return self.node(id)
1234 return self.node(id)
1235 if len(id) == 20:
1235 if len(id) == 20:
1236 # possibly a binary node
1236 # possibly a binary node
1237 # odds of a binary node being all hex in ASCII are 1 in 10**25
1237 # odds of a binary node being all hex in ASCII are 1 in 10**25
1238 try:
1238 try:
1239 node = id
1239 node = id
1240 self.rev(node) # quick search the index
1240 self.rev(node) # quick search the index
1241 return node
1241 return node
1242 except error.LookupError:
1242 except error.LookupError:
1243 pass # may be partial hex id
1243 pass # may be partial hex id
1244 try:
1244 try:
1245 # str(rev)
1245 # str(rev)
1246 rev = int(id)
1246 rev = int(id)
1247 if "%d" % rev != id:
1247 if "%d" % rev != id:
1248 raise ValueError
1248 raise ValueError
1249 if rev < 0:
1249 if rev < 0:
1250 rev = len(self) + rev
1250 rev = len(self) + rev
1251 if rev < 0 or rev >= len(self):
1251 if rev < 0 or rev >= len(self):
1252 raise ValueError
1252 raise ValueError
1253 return self.node(rev)
1253 return self.node(rev)
1254 except (ValueError, OverflowError):
1254 except (ValueError, OverflowError):
1255 pass
1255 pass
1256 if len(id) == 40:
1256 if len(id) == 40:
1257 try:
1257 try:
1258 # a full hex nodeid?
1258 # a full hex nodeid?
1259 node = bin(id)
1259 node = bin(id)
1260 self.rev(node)
1260 self.rev(node)
1261 return node
1261 return node
1262 except (TypeError, error.LookupError):
1262 except (TypeError, error.LookupError):
1263 pass
1263 pass
1264
1264
1265 def _partialmatch(self, id):
1265 def _partialmatch(self, id):
1266 # we don't care wdirfilenodeids as they should be always full hash
1266 # we don't care wdirfilenodeids as they should be always full hash
1267 maybewdir = wdirhex.startswith(id)
1267 maybewdir = wdirhex.startswith(id)
1268 try:
1268 try:
1269 partial = self.index.partialmatch(id)
1269 partial = self.index.partialmatch(id)
1270 if partial and self.hasnode(partial):
1270 if partial and self.hasnode(partial):
1271 if maybewdir:
1271 if maybewdir:
1272 # single 'ff...' match in radix tree, ambiguous with wdir
1272 # single 'ff...' match in radix tree, ambiguous with wdir
1273 raise error.RevlogError
1273 raise error.RevlogError
1274 return partial
1274 return partial
1275 if maybewdir:
1275 if maybewdir:
1276 # no 'ff...' match in radix tree, wdir identified
1276 # no 'ff...' match in radix tree, wdir identified
1277 raise error.WdirUnsupported
1277 raise error.WdirUnsupported
1278 return None
1278 return None
1279 except error.RevlogError:
1279 except error.RevlogError:
1280 # parsers.c radix tree lookup gave multiple matches
1280 # parsers.c radix tree lookup gave multiple matches
1281 # fast path: for unfiltered changelog, radix tree is accurate
1281 # fast path: for unfiltered changelog, radix tree is accurate
1282 if not getattr(self, 'filteredrevs', None):
1282 if not getattr(self, 'filteredrevs', None):
1283 raise error.AmbiguousPrefixLookupError(
1283 raise error.AmbiguousPrefixLookupError(
1284 id, self.indexfile, _('ambiguous identifier'))
1284 id, self.indexfile, _('ambiguous identifier'))
1285 # fall through to slow path that filters hidden revisions
1285 # fall through to slow path that filters hidden revisions
1286 except (AttributeError, ValueError):
1286 except (AttributeError, ValueError):
1287 # we are pure python, or key was too short to search radix tree
1287 # we are pure python, or key was too short to search radix tree
1288 pass
1288 pass
1289
1289
1290 if id in self._pcache:
1290 if id in self._pcache:
1291 return self._pcache[id]
1291 return self._pcache[id]
1292
1292
1293 if len(id) <= 40:
1293 if len(id) <= 40:
1294 try:
1294 try:
1295 # hex(node)[:...]
1295 # hex(node)[:...]
1296 l = len(id) // 2 # grab an even number of digits
1296 l = len(id) // 2 # grab an even number of digits
1297 prefix = bin(id[:l * 2])
1297 prefix = bin(id[:l * 2])
1298 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1298 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1299 nl = [n for n in nl if hex(n).startswith(id) and
1299 nl = [n for n in nl if hex(n).startswith(id) and
1300 self.hasnode(n)]
1300 self.hasnode(n)]
1301 if nullhex.startswith(id):
1301 if nullhex.startswith(id):
1302 nl.append(nullid)
1302 nl.append(nullid)
1303 if len(nl) > 0:
1303 if len(nl) > 0:
1304 if len(nl) == 1 and not maybewdir:
1304 if len(nl) == 1 and not maybewdir:
1305 self._pcache[id] = nl[0]
1305 self._pcache[id] = nl[0]
1306 return nl[0]
1306 return nl[0]
1307 raise error.AmbiguousPrefixLookupError(
1307 raise error.AmbiguousPrefixLookupError(
1308 id, self.indexfile, _('ambiguous identifier'))
1308 id, self.indexfile, _('ambiguous identifier'))
1309 if maybewdir:
1309 if maybewdir:
1310 raise error.WdirUnsupported
1310 raise error.WdirUnsupported
1311 return None
1311 return None
1312 except TypeError:
1312 except TypeError:
1313 pass
1313 pass
1314
1314
1315 def lookup(self, id):
1315 def lookup(self, id):
1316 """locate a node based on:
1316 """locate a node based on:
1317 - revision number or str(revision number)
1317 - revision number or str(revision number)
1318 - nodeid or subset of hex nodeid
1318 - nodeid or subset of hex nodeid
1319 """
1319 """
1320 n = self._match(id)
1320 n = self._match(id)
1321 if n is not None:
1321 if n is not None:
1322 return n
1322 return n
1323 n = self._partialmatch(id)
1323 n = self._partialmatch(id)
1324 if n:
1324 if n:
1325 return n
1325 return n
1326
1326
1327 raise error.LookupError(id, self.indexfile, _('no match found'))
1327 raise error.LookupError(id, self.indexfile, _('no match found'))
1328
1328
1329 def shortest(self, node, minlength=1):
1329 def shortest(self, node, minlength=1):
1330 """Find the shortest unambiguous prefix that matches node."""
1330 """Find the shortest unambiguous prefix that matches node."""
1331 def isvalid(prefix):
1331 def isvalid(prefix):
1332 try:
1332 try:
1333 node = self._partialmatch(prefix)
1333 node = self._partialmatch(prefix)
1334 except error.AmbiguousPrefixLookupError:
1334 except error.AmbiguousPrefixLookupError:
1335 return False
1335 return False
1336 except error.WdirUnsupported:
1336 except error.WdirUnsupported:
1337 # single 'ff...' match
1337 # single 'ff...' match
1338 return True
1338 return True
1339 if node is None:
1339 if node is None:
1340 raise error.LookupError(node, self.indexfile, _('no node'))
1340 raise error.LookupError(node, self.indexfile, _('no node'))
1341 return True
1341 return True
1342
1342
1343 def maybewdir(prefix):
1343 def maybewdir(prefix):
1344 return all(c == 'f' for c in prefix)
1344 return all(c == 'f' for c in prefix)
1345
1345
1346 hexnode = hex(node)
1346 hexnode = hex(node)
1347
1347
1348 def disambiguate(hexnode, minlength):
1348 def disambiguate(hexnode, minlength):
1349 """Disambiguate against wdirid."""
1349 """Disambiguate against wdirid."""
1350 for length in range(minlength, 41):
1350 for length in range(minlength, 41):
1351 prefix = hexnode[:length]
1351 prefix = hexnode[:length]
1352 if not maybewdir(prefix):
1352 if not maybewdir(prefix):
1353 return prefix
1353 return prefix
1354
1354
1355 if not getattr(self, 'filteredrevs', None):
1355 if not getattr(self, 'filteredrevs', None):
1356 try:
1356 try:
1357 length = max(self.index.shortest(node), minlength)
1357 length = max(self.index.shortest(node), minlength)
1358 return disambiguate(hexnode, length)
1358 return disambiguate(hexnode, length)
1359 except error.RevlogError:
1359 except error.RevlogError:
1360 if node != wdirid:
1360 if node != wdirid:
1361 raise error.LookupError(node, self.indexfile, _('no node'))
1361 raise error.LookupError(node, self.indexfile, _('no node'))
1362 except AttributeError:
1362 except AttributeError:
1363 # Fall through to pure code
1363 # Fall through to pure code
1364 pass
1364 pass
1365
1365
1366 if node == wdirid:
1366 if node == wdirid:
1367 for length in range(minlength, 41):
1367 for length in range(minlength, 41):
1368 prefix = hexnode[:length]
1368 prefix = hexnode[:length]
1369 if isvalid(prefix):
1369 if isvalid(prefix):
1370 return prefix
1370 return prefix
1371
1371
1372 for length in range(minlength, 41):
1372 for length in range(minlength, 41):
1373 prefix = hexnode[:length]
1373 prefix = hexnode[:length]
1374 if isvalid(prefix):
1374 if isvalid(prefix):
1375 return disambiguate(hexnode, length)
1375 return disambiguate(hexnode, length)
1376
1376
1377 def cmp(self, node, text):
1377 def cmp(self, node, text):
1378 """compare text with a given file revision
1378 """compare text with a given file revision
1379
1379
1380 returns True if text is different than what is stored.
1380 returns True if text is different than what is stored.
1381 """
1381 """
1382 p1, p2 = self.parents(node)
1382 p1, p2 = self.parents(node)
1383 return hash(text, p1, p2) != node
1383 return hash(text, p1, p2) != node
1384
1384
1385 def _cachesegment(self, offset, data):
1385 def _cachesegment(self, offset, data):
1386 """Add a segment to the revlog cache.
1386 """Add a segment to the revlog cache.
1387
1387
1388 Accepts an absolute offset and the data that is at that location.
1388 Accepts an absolute offset and the data that is at that location.
1389 """
1389 """
1390 o, d = self._chunkcache
1390 o, d = self._chunkcache
1391 # try to add to existing cache
1391 # try to add to existing cache
1392 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1392 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1393 self._chunkcache = o, d + data
1393 self._chunkcache = o, d + data
1394 else:
1394 else:
1395 self._chunkcache = offset, data
1395 self._chunkcache = offset, data
1396
1396
1397 def _readsegment(self, offset, length, df=None):
1397 def _readsegment(self, offset, length, df=None):
1398 """Load a segment of raw data from the revlog.
1398 """Load a segment of raw data from the revlog.
1399
1399
1400 Accepts an absolute offset, length to read, and an optional existing
1400 Accepts an absolute offset, length to read, and an optional existing
1401 file handle to read from.
1401 file handle to read from.
1402
1402
1403 If an existing file handle is passed, it will be seeked and the
1403 If an existing file handle is passed, it will be seeked and the
1404 original seek position will NOT be restored.
1404 original seek position will NOT be restored.
1405
1405
1406 Returns a str or buffer of raw byte data.
1406 Returns a str or buffer of raw byte data.
1407 """
1407 """
1408 # Cache data both forward and backward around the requested
1408 # Cache data both forward and backward around the requested
1409 # data, in a fixed size window. This helps speed up operations
1409 # data, in a fixed size window. This helps speed up operations
1410 # involving reading the revlog backwards.
1410 # involving reading the revlog backwards.
1411 cachesize = self._chunkcachesize
1411 cachesize = self._chunkcachesize
1412 realoffset = offset & ~(cachesize - 1)
1412 realoffset = offset & ~(cachesize - 1)
1413 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1413 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1414 - realoffset)
1414 - realoffset)
1415 with self._datareadfp(df) as df:
1415 with self._datareadfp(df) as df:
1416 df.seek(realoffset)
1416 df.seek(realoffset)
1417 d = df.read(reallength)
1417 d = df.read(reallength)
1418 self._cachesegment(realoffset, d)
1418 self._cachesegment(realoffset, d)
1419 if offset != realoffset or reallength != length:
1419 if offset != realoffset or reallength != length:
1420 return util.buffer(d, offset - realoffset, length)
1420 return util.buffer(d, offset - realoffset, length)
1421 return d
1421 return d
1422
1422
1423 def _getsegment(self, offset, length, df=None):
1423 def _getsegment(self, offset, length, df=None):
1424 """Obtain a segment of raw data from the revlog.
1424 """Obtain a segment of raw data from the revlog.
1425
1425
1426 Accepts an absolute offset, length of bytes to obtain, and an
1426 Accepts an absolute offset, length of bytes to obtain, and an
1427 optional file handle to the already-opened revlog. If the file
1427 optional file handle to the already-opened revlog. If the file
1428 handle is used, it's original seek position will not be preserved.
1428 handle is used, it's original seek position will not be preserved.
1429
1429
1430 Requests for data may be returned from a cache.
1430 Requests for data may be returned from a cache.
1431
1431
1432 Returns a str or a buffer instance of raw byte data.
1432 Returns a str or a buffer instance of raw byte data.
1433 """
1433 """
1434 o, d = self._chunkcache
1434 o, d = self._chunkcache
1435 l = len(d)
1435 l = len(d)
1436
1436
1437 # is it in the cache?
1437 # is it in the cache?
1438 cachestart = offset - o
1438 cachestart = offset - o
1439 cacheend = cachestart + length
1439 cacheend = cachestart + length
1440 if cachestart >= 0 and cacheend <= l:
1440 if cachestart >= 0 and cacheend <= l:
1441 if cachestart == 0 and cacheend == l:
1441 if cachestart == 0 and cacheend == l:
1442 return d # avoid a copy
1442 return d # avoid a copy
1443 return util.buffer(d, cachestart, cacheend - cachestart)
1443 return util.buffer(d, cachestart, cacheend - cachestart)
1444
1444
1445 return self._readsegment(offset, length, df=df)
1445 return self._readsegment(offset, length, df=df)
1446
1446
1447 def _getsegmentforrevs(self, startrev, endrev, df=None):
1447 def _getsegmentforrevs(self, startrev, endrev, df=None):
1448 """Obtain a segment of raw data corresponding to a range of revisions.
1448 """Obtain a segment of raw data corresponding to a range of revisions.
1449
1449
1450 Accepts the start and end revisions and an optional already-open
1450 Accepts the start and end revisions and an optional already-open
1451 file handle to be used for reading. If the file handle is read, its
1451 file handle to be used for reading. If the file handle is read, its
1452 seek position will not be preserved.
1452 seek position will not be preserved.
1453
1453
1454 Requests for data may be satisfied by a cache.
1454 Requests for data may be satisfied by a cache.
1455
1455
1456 Returns a 2-tuple of (offset, data) for the requested range of
1456 Returns a 2-tuple of (offset, data) for the requested range of
1457 revisions. Offset is the integer offset from the beginning of the
1457 revisions. Offset is the integer offset from the beginning of the
1458 revlog and data is a str or buffer of the raw byte data.
1458 revlog and data is a str or buffer of the raw byte data.
1459
1459
1460 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1460 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1461 to determine where each revision's data begins and ends.
1461 to determine where each revision's data begins and ends.
1462 """
1462 """
1463 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1463 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1464 # (functions are expensive).
1464 # (functions are expensive).
1465 index = self.index
1465 index = self.index
1466 istart = index[startrev]
1466 istart = index[startrev]
1467 start = int(istart[0] >> 16)
1467 start = int(istart[0] >> 16)
1468 if startrev == endrev:
1468 if startrev == endrev:
1469 end = start + istart[1]
1469 end = start + istart[1]
1470 else:
1470 else:
1471 iend = index[endrev]
1471 iend = index[endrev]
1472 end = int(iend[0] >> 16) + iend[1]
1472 end = int(iend[0] >> 16) + iend[1]
1473
1473
1474 if self._inline:
1474 if self._inline:
1475 start += (startrev + 1) * self._io.size
1475 start += (startrev + 1) * self._io.size
1476 end += (endrev + 1) * self._io.size
1476 end += (endrev + 1) * self._io.size
1477 length = end - start
1477 length = end - start
1478
1478
1479 return start, self._getsegment(start, length, df=df)
1479 return start, self._getsegment(start, length, df=df)
1480
1480
1481 def _chunk(self, rev, df=None):
1481 def _chunk(self, rev, df=None):
1482 """Obtain a single decompressed chunk for a revision.
1482 """Obtain a single decompressed chunk for a revision.
1483
1483
1484 Accepts an integer revision and an optional already-open file handle
1484 Accepts an integer revision and an optional already-open file handle
1485 to be used for reading. If used, the seek position of the file will not
1485 to be used for reading. If used, the seek position of the file will not
1486 be preserved.
1486 be preserved.
1487
1487
1488 Returns a str holding uncompressed data for the requested revision.
1488 Returns a str holding uncompressed data for the requested revision.
1489 """
1489 """
1490 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1490 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1491
1491
1492 def _chunks(self, revs, df=None, targetsize=None):
1492 def _chunks(self, revs, df=None, targetsize=None):
1493 """Obtain decompressed chunks for the specified revisions.
1493 """Obtain decompressed chunks for the specified revisions.
1494
1494
1495 Accepts an iterable of numeric revisions that are assumed to be in
1495 Accepts an iterable of numeric revisions that are assumed to be in
1496 ascending order. Also accepts an optional already-open file handle
1496 ascending order. Also accepts an optional already-open file handle
1497 to be used for reading. If used, the seek position of the file will
1497 to be used for reading. If used, the seek position of the file will
1498 not be preserved.
1498 not be preserved.
1499
1499
1500 This function is similar to calling ``self._chunk()`` multiple times,
1500 This function is similar to calling ``self._chunk()`` multiple times,
1501 but is faster.
1501 but is faster.
1502
1502
1503 Returns a list with decompressed data for each requested revision.
1503 Returns a list with decompressed data for each requested revision.
1504 """
1504 """
1505 if not revs:
1505 if not revs:
1506 return []
1506 return []
1507 start = self.start
1507 start = self.start
1508 length = self.length
1508 length = self.length
1509 inline = self._inline
1509 inline = self._inline
1510 iosize = self._io.size
1510 iosize = self._io.size
1511 buffer = util.buffer
1511 buffer = util.buffer
1512
1512
1513 l = []
1513 l = []
1514 ladd = l.append
1514 ladd = l.append
1515
1515
1516 if not self._withsparseread:
1516 if not self._withsparseread:
1517 slicedchunks = (revs,)
1517 slicedchunks = (revs,)
1518 else:
1518 else:
1519 slicedchunks = deltautil.slicechunk(self, revs,
1519 slicedchunks = deltautil.slicechunk(self, revs,
1520 targetsize=targetsize)
1520 targetsize=targetsize)
1521
1521
1522 for revschunk in slicedchunks:
1522 for revschunk in slicedchunks:
1523 firstrev = revschunk[0]
1523 firstrev = revschunk[0]
1524 # Skip trailing revisions with empty diff
1524 # Skip trailing revisions with empty diff
1525 for lastrev in revschunk[::-1]:
1525 for lastrev in revschunk[::-1]:
1526 if length(lastrev) != 0:
1526 if length(lastrev) != 0:
1527 break
1527 break
1528
1528
1529 try:
1529 try:
1530 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1530 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1531 except OverflowError:
1531 except OverflowError:
1532 # issue4215 - we can't cache a run of chunks greater than
1532 # issue4215 - we can't cache a run of chunks greater than
1533 # 2G on Windows
1533 # 2G on Windows
1534 return [self._chunk(rev, df=df) for rev in revschunk]
1534 return [self._chunk(rev, df=df) for rev in revschunk]
1535
1535
1536 decomp = self.decompress
1536 decomp = self.decompress
1537 for rev in revschunk:
1537 for rev in revschunk:
1538 chunkstart = start(rev)
1538 chunkstart = start(rev)
1539 if inline:
1539 if inline:
1540 chunkstart += (rev + 1) * iosize
1540 chunkstart += (rev + 1) * iosize
1541 chunklength = length(rev)
1541 chunklength = length(rev)
1542 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1542 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1543
1543
1544 return l
1544 return l
1545
1545
1546 def _chunkclear(self):
1546 def _chunkclear(self):
1547 """Clear the raw chunk cache."""
1547 """Clear the raw chunk cache."""
1548 self._chunkcache = (0, '')
1548 self._chunkcache = (0, '')
1549
1549
1550 def deltaparent(self, rev):
1550 def deltaparent(self, rev):
1551 """return deltaparent of the given revision"""
1551 """return deltaparent of the given revision"""
1552 base = self.index[rev][3]
1552 base = self.index[rev][3]
1553 if base == rev:
1553 if base == rev:
1554 return nullrev
1554 return nullrev
1555 elif self._generaldelta:
1555 elif self._generaldelta:
1556 return base
1556 return base
1557 else:
1557 else:
1558 return rev - 1
1558 return rev - 1
1559
1559
1560 def issnapshot(self, rev):
1560 def issnapshot(self, rev):
1561 """tells whether rev is a snapshot
1561 """tells whether rev is a snapshot
1562 """
1562 """
1563 if rev == nullrev:
1563 if rev == nullrev:
1564 return True
1564 return True
1565 deltap = self.deltaparent(rev)
1565 deltap = self.deltaparent(rev)
1566 if deltap == nullrev:
1566 if deltap == nullrev:
1567 return True
1567 return True
1568 p1, p2 = self.parentrevs(rev)
1568 p1, p2 = self.parentrevs(rev)
1569 if deltap in (p1, p2):
1569 if deltap in (p1, p2):
1570 return False
1570 return False
1571 return self.issnapshot(deltap)
1571 return self.issnapshot(deltap)
1572
1572
1573 def snapshotdepth(self, rev):
1573 def snapshotdepth(self, rev):
1574 """number of snapshot in the chain before this one"""
1574 """number of snapshot in the chain before this one"""
1575 if not self.issnapshot(rev):
1575 if not self.issnapshot(rev):
1576 raise error.ProgrammingError('revision %d not a snapshot')
1576 raise error.ProgrammingError('revision %d not a snapshot')
1577 return len(self._deltachain(rev)[0]) - 1
1577 return len(self._deltachain(rev)[0]) - 1
1578
1578
1579 def revdiff(self, rev1, rev2):
1579 def revdiff(self, rev1, rev2):
1580 """return or calculate a delta between two revisions
1580 """return or calculate a delta between two revisions
1581
1581
1582 The delta calculated is in binary form and is intended to be written to
1582 The delta calculated is in binary form and is intended to be written to
1583 revlog data directly. So this function needs raw revision data.
1583 revlog data directly. So this function needs raw revision data.
1584 """
1584 """
1585 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1585 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1586 return bytes(self._chunk(rev2))
1586 return bytes(self._chunk(rev2))
1587
1587
1588 return mdiff.textdiff(self.revision(rev1, raw=True),
1588 return mdiff.textdiff(self.revision(rev1, raw=True),
1589 self.revision(rev2, raw=True))
1589 self.revision(rev2, raw=True))
1590
1590
1591 def revision(self, nodeorrev, _df=None, raw=False):
1591 def revision(self, nodeorrev, _df=None, raw=False):
1592 """return an uncompressed revision of a given node or revision
1592 """return an uncompressed revision of a given node or revision
1593 number.
1593 number.
1594
1594
1595 _df - an existing file handle to read from. (internal-only)
1595 _df - an existing file handle to read from. (internal-only)
1596 raw - an optional argument specifying if the revision data is to be
1596 raw - an optional argument specifying if the revision data is to be
1597 treated as raw data when applying flag transforms. 'raw' should be set
1597 treated as raw data when applying flag transforms. 'raw' should be set
1598 to True when generating changegroups or in debug commands.
1598 to True when generating changegroups or in debug commands.
1599 """
1599 """
1600 if isinstance(nodeorrev, int):
1600 if isinstance(nodeorrev, int):
1601 rev = nodeorrev
1601 rev = nodeorrev
1602 node = self.node(rev)
1602 node = self.node(rev)
1603 else:
1603 else:
1604 node = nodeorrev
1604 node = nodeorrev
1605 rev = None
1605 rev = None
1606
1606
1607 cachedrev = None
1607 cachedrev = None
1608 flags = None
1608 flags = None
1609 rawtext = None
1609 rawtext = None
1610 if node == nullid:
1610 if node == nullid:
1611 return ""
1611 return ""
1612 if self._cache:
1612 if self._cache:
1613 if self._cache[0] == node:
1613 if self._cache[0] == node:
1614 # _cache only stores rawtext
1614 # _cache only stores rawtext
1615 if raw:
1615 if raw:
1616 return self._cache[2]
1616 return self._cache[2]
1617 # duplicated, but good for perf
1617 # duplicated, but good for perf
1618 if rev is None:
1618 if rev is None:
1619 rev = self.rev(node)
1619 rev = self.rev(node)
1620 if flags is None:
1620 if flags is None:
1621 flags = self.flags(rev)
1621 flags = self.flags(rev)
1622 # no extra flags set, no flag processor runs, text = rawtext
1622 # no extra flags set, no flag processor runs, text = rawtext
1623 if flags == REVIDX_DEFAULT_FLAGS:
1623 if flags == REVIDX_DEFAULT_FLAGS:
1624 return self._cache[2]
1624 return self._cache[2]
1625 # rawtext is reusable. need to run flag processor
1625 # rawtext is reusable. need to run flag processor
1626 rawtext = self._cache[2]
1626 rawtext = self._cache[2]
1627
1627
1628 cachedrev = self._cache[1]
1628 cachedrev = self._cache[1]
1629
1629
1630 # look up what we need to read
1630 # look up what we need to read
1631 if rawtext is None:
1631 if rawtext is None:
1632 if rev is None:
1632 if rev is None:
1633 rev = self.rev(node)
1633 rev = self.rev(node)
1634
1634
1635 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1635 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1636 if stopped:
1636 if stopped:
1637 rawtext = self._cache[2]
1637 rawtext = self._cache[2]
1638
1638
1639 # drop cache to save memory
1639 # drop cache to save memory
1640 self._cache = None
1640 self._cache = None
1641
1641
1642 targetsize = None
1642 targetsize = None
1643 rawsize = self.index[rev][2]
1643 rawsize = self.index[rev][2]
1644 if 0 <= rawsize:
1644 if 0 <= rawsize:
1645 targetsize = 4 * rawsize
1645 targetsize = 4 * rawsize
1646
1646
1647 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1647 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1648 if rawtext is None:
1648 if rawtext is None:
1649 rawtext = bytes(bins[0])
1649 rawtext = bytes(bins[0])
1650 bins = bins[1:]
1650 bins = bins[1:]
1651
1651
1652 rawtext = mdiff.patches(rawtext, bins)
1652 rawtext = mdiff.patches(rawtext, bins)
1653 self._cache = (node, rev, rawtext)
1653 self._cache = (node, rev, rawtext)
1654
1654
1655 if flags is None:
1655 if flags is None:
1656 if rev is None:
1656 if rev is None:
1657 rev = self.rev(node)
1657 rev = self.rev(node)
1658 flags = self.flags(rev)
1658 flags = self.flags(rev)
1659
1659
1660 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1660 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1661 if validatehash:
1661 if validatehash:
1662 self.checkhash(text, node, rev=rev)
1662 self.checkhash(text, node, rev=rev)
1663
1663
1664 return text
1664 return text
1665
1665
1666 def hash(self, text, p1, p2):
1666 def hash(self, text, p1, p2):
1667 """Compute a node hash.
1667 """Compute a node hash.
1668
1668
1669 Available as a function so that subclasses can replace the hash
1669 Available as a function so that subclasses can replace the hash
1670 as needed.
1670 as needed.
1671 """
1671 """
1672 return hash(text, p1, p2)
1672 return hash(text, p1, p2)
1673
1673
1674 def _processflags(self, text, flags, operation, raw=False):
1674 def _processflags(self, text, flags, operation, raw=False):
1675 """Inspect revision data flags and applies transforms defined by
1675 """Inspect revision data flags and applies transforms defined by
1676 registered flag processors.
1676 registered flag processors.
1677
1677
1678 ``text`` - the revision data to process
1678 ``text`` - the revision data to process
1679 ``flags`` - the revision flags
1679 ``flags`` - the revision flags
1680 ``operation`` - the operation being performed (read or write)
1680 ``operation`` - the operation being performed (read or write)
1681 ``raw`` - an optional argument describing if the raw transform should be
1681 ``raw`` - an optional argument describing if the raw transform should be
1682 applied.
1682 applied.
1683
1683
1684 This method processes the flags in the order (or reverse order if
1684 This method processes the flags in the order (or reverse order if
1685 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1685 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1686 flag processors registered for present flags. The order of flags defined
1686 flag processors registered for present flags. The order of flags defined
1687 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1687 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1688
1688
1689 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1689 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1690 processed text and ``validatehash`` is a bool indicating whether the
1690 processed text and ``validatehash`` is a bool indicating whether the
1691 returned text should be checked for hash integrity.
1691 returned text should be checked for hash integrity.
1692
1692
1693 Note: If the ``raw`` argument is set, it has precedence over the
1693 Note: If the ``raw`` argument is set, it has precedence over the
1694 operation and will only update the value of ``validatehash``.
1694 operation and will only update the value of ``validatehash``.
1695 """
1695 """
1696 # fast path: no flag processors will run
1696 # fast path: no flag processors will run
1697 if flags == 0:
1697 if flags == 0:
1698 return text, True
1698 return text, True
1699 if not operation in ('read', 'write'):
1699 if not operation in ('read', 'write'):
1700 raise error.ProgrammingError(_("invalid '%s' operation") %
1700 raise error.ProgrammingError(_("invalid '%s' operation") %
1701 operation)
1701 operation)
1702 # Check all flags are known.
1702 # Check all flags are known.
1703 if flags & ~REVIDX_KNOWN_FLAGS:
1703 if flags & ~REVIDX_KNOWN_FLAGS:
1704 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1704 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1705 (flags & ~REVIDX_KNOWN_FLAGS))
1705 (flags & ~REVIDX_KNOWN_FLAGS))
1706 validatehash = True
1706 validatehash = True
1707 # Depending on the operation (read or write), the order might be
1707 # Depending on the operation (read or write), the order might be
1708 # reversed due to non-commutative transforms.
1708 # reversed due to non-commutative transforms.
1709 orderedflags = REVIDX_FLAGS_ORDER
1709 orderedflags = REVIDX_FLAGS_ORDER
1710 if operation == 'write':
1710 if operation == 'write':
1711 orderedflags = reversed(orderedflags)
1711 orderedflags = reversed(orderedflags)
1712
1712
1713 for flag in orderedflags:
1713 for flag in orderedflags:
1714 # If a flagprocessor has been registered for a known flag, apply the
1714 # If a flagprocessor has been registered for a known flag, apply the
1715 # related operation transform and update result tuple.
1715 # related operation transform and update result tuple.
1716 if flag & flags:
1716 if flag & flags:
1717 vhash = True
1717 vhash = True
1718
1718
1719 if flag not in self._flagprocessors:
1719 if flag not in self._flagprocessors:
1720 message = _("missing processor for flag '%#x'") % (flag)
1720 message = _("missing processor for flag '%#x'") % (flag)
1721 raise error.RevlogError(message)
1721 raise error.RevlogError(message)
1722
1722
1723 processor = self._flagprocessors[flag]
1723 processor = self._flagprocessors[flag]
1724 if processor is not None:
1724 if processor is not None:
1725 readtransform, writetransform, rawtransform = processor
1725 readtransform, writetransform, rawtransform = processor
1726
1726
1727 if raw:
1727 if raw:
1728 vhash = rawtransform(self, text)
1728 vhash = rawtransform(self, text)
1729 elif operation == 'read':
1729 elif operation == 'read':
1730 text, vhash = readtransform(self, text)
1730 text, vhash = readtransform(self, text)
1731 else: # write operation
1731 else: # write operation
1732 text, vhash = writetransform(self, text)
1732 text, vhash = writetransform(self, text)
1733 validatehash = validatehash and vhash
1733 validatehash = validatehash and vhash
1734
1734
1735 return text, validatehash
1735 return text, validatehash
1736
1736
1737 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1737 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1738 """Check node hash integrity.
1738 """Check node hash integrity.
1739
1739
1740 Available as a function so that subclasses can extend hash mismatch
1740 Available as a function so that subclasses can extend hash mismatch
1741 behaviors as needed.
1741 behaviors as needed.
1742 """
1742 """
1743 try:
1743 try:
1744 if p1 is None and p2 is None:
1744 if p1 is None and p2 is None:
1745 p1, p2 = self.parents(node)
1745 p1, p2 = self.parents(node)
1746 if node != self.hash(text, p1, p2):
1746 if node != self.hash(text, p1, p2):
1747 revornode = rev
1747 revornode = rev
1748 if revornode is None:
1748 if revornode is None:
1749 revornode = templatefilters.short(hex(node))
1749 revornode = templatefilters.short(hex(node))
1750 raise error.RevlogError(_("integrity check failed on %s:%s")
1750 raise error.RevlogError(_("integrity check failed on %s:%s")
1751 % (self.indexfile, pycompat.bytestr(revornode)))
1751 % (self.indexfile, pycompat.bytestr(revornode)))
1752 except error.RevlogError:
1752 except error.RevlogError:
1753 if self._censorable and _censoredtext(text):
1753 if self._censorable and _censoredtext(text):
1754 raise error.CensoredNodeError(self.indexfile, node, text)
1754 raise error.CensoredNodeError(self.indexfile, node, text)
1755 raise
1755 raise
1756
1756
1757 def _enforceinlinesize(self, tr, fp=None):
1757 def _enforceinlinesize(self, tr, fp=None):
1758 """Check if the revlog is too big for inline and convert if so.
1758 """Check if the revlog is too big for inline and convert if so.
1759
1759
1760 This should be called after revisions are added to the revlog. If the
1760 This should be called after revisions are added to the revlog. If the
1761 revlog has grown too large to be an inline revlog, it will convert it
1761 revlog has grown too large to be an inline revlog, it will convert it
1762 to use multiple index and data files.
1762 to use multiple index and data files.
1763 """
1763 """
1764 tiprev = len(self) - 1
1764 tiprev = len(self) - 1
1765 if (not self._inline or
1765 if (not self._inline or
1766 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1766 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1767 return
1767 return
1768
1768
1769 trinfo = tr.find(self.indexfile)
1769 trinfo = tr.find(self.indexfile)
1770 if trinfo is None:
1770 if trinfo is None:
1771 raise error.RevlogError(_("%s not found in the transaction")
1771 raise error.RevlogError(_("%s not found in the transaction")
1772 % self.indexfile)
1772 % self.indexfile)
1773
1773
1774 trindex = trinfo[2]
1774 trindex = trinfo[2]
1775 if trindex is not None:
1775 if trindex is not None:
1776 dataoff = self.start(trindex)
1776 dataoff = self.start(trindex)
1777 else:
1777 else:
1778 # revlog was stripped at start of transaction, use all leftover data
1778 # revlog was stripped at start of transaction, use all leftover data
1779 trindex = len(self) - 1
1779 trindex = len(self) - 1
1780 dataoff = self.end(tiprev)
1780 dataoff = self.end(tiprev)
1781
1781
1782 tr.add(self.datafile, dataoff)
1782 tr.add(self.datafile, dataoff)
1783
1783
1784 if fp:
1784 if fp:
1785 fp.flush()
1785 fp.flush()
1786 fp.close()
1786 fp.close()
1787
1787
1788 with self._datafp('w') as df:
1788 with self._datafp('w') as df:
1789 for r in self:
1789 for r in self:
1790 df.write(self._getsegmentforrevs(r, r)[1])
1790 df.write(self._getsegmentforrevs(r, r)[1])
1791
1791
1792 with self._indexfp('w') as fp:
1792 with self._indexfp('w') as fp:
1793 self.version &= ~FLAG_INLINE_DATA
1793 self.version &= ~FLAG_INLINE_DATA
1794 self._inline = False
1794 self._inline = False
1795 io = self._io
1795 io = self._io
1796 for i in self:
1796 for i in self:
1797 e = io.packentry(self.index[i], self.node, self.version, i)
1797 e = io.packentry(self.index[i], self.node, self.version, i)
1798 fp.write(e)
1798 fp.write(e)
1799
1799
1800 # the temp file replace the real index when we exit the context
1800 # the temp file replace the real index when we exit the context
1801 # manager
1801 # manager
1802
1802
1803 tr.replace(self.indexfile, trindex * self._io.size)
1803 tr.replace(self.indexfile, trindex * self._io.size)
1804 self._chunkclear()
1804 self._chunkclear()
1805
1805
1806 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1806 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1807 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1807 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1808 """add a revision to the log
1808 """add a revision to the log
1809
1809
1810 text - the revision data to add
1810 text - the revision data to add
1811 transaction - the transaction object used for rollback
1811 transaction - the transaction object used for rollback
1812 link - the linkrev data to add
1812 link - the linkrev data to add
1813 p1, p2 - the parent nodeids of the revision
1813 p1, p2 - the parent nodeids of the revision
1814 cachedelta - an optional precomputed delta
1814 cachedelta - an optional precomputed delta
1815 node - nodeid of revision; typically node is not specified, and it is
1815 node - nodeid of revision; typically node is not specified, and it is
1816 computed by default as hash(text, p1, p2), however subclasses might
1816 computed by default as hash(text, p1, p2), however subclasses might
1817 use different hashing method (and override checkhash() in such case)
1817 use different hashing method (and override checkhash() in such case)
1818 flags - the known flags to set on the revision
1818 flags - the known flags to set on the revision
1819 deltacomputer - an optional deltacomputer instance shared between
1819 deltacomputer - an optional deltacomputer instance shared between
1820 multiple calls
1820 multiple calls
1821 """
1821 """
1822 if link == nullrev:
1822 if link == nullrev:
1823 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1823 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1824 % self.indexfile)
1824 % self.indexfile)
1825
1825
1826 if flags:
1826 if flags:
1827 node = node or self.hash(text, p1, p2)
1827 node = node or self.hash(text, p1, p2)
1828
1828
1829 rawtext, validatehash = self._processflags(text, flags, 'write')
1829 rawtext, validatehash = self._processflags(text, flags, 'write')
1830
1830
1831 # If the flag processor modifies the revision data, ignore any provided
1831 # If the flag processor modifies the revision data, ignore any provided
1832 # cachedelta.
1832 # cachedelta.
1833 if rawtext != text:
1833 if rawtext != text:
1834 cachedelta = None
1834 cachedelta = None
1835
1835
1836 if len(rawtext) > _maxentrysize:
1836 if len(rawtext) > _maxentrysize:
1837 raise error.RevlogError(
1837 raise error.RevlogError(
1838 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1838 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1839 % (self.indexfile, len(rawtext)))
1839 % (self.indexfile, len(rawtext)))
1840
1840
1841 node = node or self.hash(rawtext, p1, p2)
1841 node = node or self.hash(rawtext, p1, p2)
1842 if node in self.nodemap:
1842 if node in self.nodemap:
1843 return node
1843 return node
1844
1844
1845 if validatehash:
1845 if validatehash:
1846 self.checkhash(rawtext, node, p1=p1, p2=p2)
1846 self.checkhash(rawtext, node, p1=p1, p2=p2)
1847
1847
1848 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1848 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1849 flags, cachedelta=cachedelta,
1849 flags, cachedelta=cachedelta,
1850 deltacomputer=deltacomputer)
1850 deltacomputer=deltacomputer)
1851
1851
1852 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1852 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1853 cachedelta=None, deltacomputer=None):
1853 cachedelta=None, deltacomputer=None):
1854 """add a raw revision with known flags, node and parents
1854 """add a raw revision with known flags, node and parents
1855 useful when reusing a revision not stored in this revlog (ex: received
1855 useful when reusing a revision not stored in this revlog (ex: received
1856 over wire, or read from an external bundle).
1856 over wire, or read from an external bundle).
1857 """
1857 """
1858 dfh = None
1858 dfh = None
1859 if not self._inline:
1859 if not self._inline:
1860 dfh = self._datafp("a+")
1860 dfh = self._datafp("a+")
1861 ifh = self._indexfp("a+")
1861 ifh = self._indexfp("a+")
1862 try:
1862 try:
1863 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1863 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1864 flags, cachedelta, ifh, dfh,
1864 flags, cachedelta, ifh, dfh,
1865 deltacomputer=deltacomputer)
1865 deltacomputer=deltacomputer)
1866 finally:
1866 finally:
1867 if dfh:
1867 if dfh:
1868 dfh.close()
1868 dfh.close()
1869 ifh.close()
1869 ifh.close()
1870
1870
1871 def compress(self, data):
1871 def compress(self, data):
1872 """Generate a possibly-compressed representation of data."""
1872 """Generate a possibly-compressed representation of data."""
1873 if not data:
1873 if not data:
1874 return '', data
1874 return '', data
1875
1875
1876 compressed = self._compressor.compress(data)
1876 compressed = self._compressor.compress(data)
1877
1877
1878 if compressed:
1878 if compressed:
1879 # The revlog compressor added the header in the returned data.
1879 # The revlog compressor added the header in the returned data.
1880 return '', compressed
1880 return '', compressed
1881
1881
1882 if data[0:1] == '\0':
1882 if data[0:1] == '\0':
1883 return '', data
1883 return '', data
1884 return 'u', data
1884 return 'u', data
1885
1885
1886 def decompress(self, data):
1886 def decompress(self, data):
1887 """Decompress a revlog chunk.
1887 """Decompress a revlog chunk.
1888
1888
1889 The chunk is expected to begin with a header identifying the
1889 The chunk is expected to begin with a header identifying the
1890 format type so it can be routed to an appropriate decompressor.
1890 format type so it can be routed to an appropriate decompressor.
1891 """
1891 """
1892 if not data:
1892 if not data:
1893 return data
1893 return data
1894
1894
1895 # Revlogs are read much more frequently than they are written and many
1895 # Revlogs are read much more frequently than they are written and many
1896 # chunks only take microseconds to decompress, so performance is
1896 # chunks only take microseconds to decompress, so performance is
1897 # important here.
1897 # important here.
1898 #
1898 #
1899 # We can make a few assumptions about revlogs:
1899 # We can make a few assumptions about revlogs:
1900 #
1900 #
1901 # 1) the majority of chunks will be compressed (as opposed to inline
1901 # 1) the majority of chunks will be compressed (as opposed to inline
1902 # raw data).
1902 # raw data).
1903 # 2) decompressing *any* data will likely by at least 10x slower than
1903 # 2) decompressing *any* data will likely by at least 10x slower than
1904 # returning raw inline data.
1904 # returning raw inline data.
1905 # 3) we want to prioritize common and officially supported compression
1905 # 3) we want to prioritize common and officially supported compression
1906 # engines
1906 # engines
1907 #
1907 #
1908 # It follows that we want to optimize for "decompress compressed data
1908 # It follows that we want to optimize for "decompress compressed data
1909 # when encoded with common and officially supported compression engines"
1909 # when encoded with common and officially supported compression engines"
1910 # case over "raw data" and "data encoded by less common or non-official
1910 # case over "raw data" and "data encoded by less common or non-official
1911 # compression engines." That is why we have the inline lookup first
1911 # compression engines." That is why we have the inline lookup first
1912 # followed by the compengines lookup.
1912 # followed by the compengines lookup.
1913 #
1913 #
1914 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1914 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1915 # compressed chunks. And this matters for changelog and manifest reads.
1915 # compressed chunks. And this matters for changelog and manifest reads.
1916 t = data[0:1]
1916 t = data[0:1]
1917
1917
1918 if t == 'x':
1918 if t == 'x':
1919 try:
1919 try:
1920 return _zlibdecompress(data)
1920 return _zlibdecompress(data)
1921 except zlib.error as e:
1921 except zlib.error as e:
1922 raise error.RevlogError(_('revlog decompress error: %s') %
1922 raise error.RevlogError(_('revlog decompress error: %s') %
1923 stringutil.forcebytestr(e))
1923 stringutil.forcebytestr(e))
1924 # '\0' is more common than 'u' so it goes first.
1924 # '\0' is more common than 'u' so it goes first.
1925 elif t == '\0':
1925 elif t == '\0':
1926 return data
1926 return data
1927 elif t == 'u':
1927 elif t == 'u':
1928 return util.buffer(data, 1)
1928 return util.buffer(data, 1)
1929
1929
1930 try:
1930 try:
1931 compressor = self._decompressors[t]
1931 compressor = self._decompressors[t]
1932 except KeyError:
1932 except KeyError:
1933 try:
1933 try:
1934 engine = util.compengines.forrevlogheader(t)
1934 engine = util.compengines.forrevlogheader(t)
1935 compressor = engine.revlogcompressor()
1935 compressor = engine.revlogcompressor()
1936 self._decompressors[t] = compressor
1936 self._decompressors[t] = compressor
1937 except KeyError:
1937 except KeyError:
1938 raise error.RevlogError(_('unknown compression type %r') % t)
1938 raise error.RevlogError(_('unknown compression type %r') % t)
1939
1939
1940 return compressor.decompress(data)
1940 return compressor.decompress(data)
1941
1941
1942 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1942 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1943 cachedelta, ifh, dfh, alwayscache=False,
1943 cachedelta, ifh, dfh, alwayscache=False,
1944 deltacomputer=None):
1944 deltacomputer=None):
1945 """internal function to add revisions to the log
1945 """internal function to add revisions to the log
1946
1946
1947 see addrevision for argument descriptions.
1947 see addrevision for argument descriptions.
1948
1948
1949 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1949 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1950
1950
1951 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1951 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1952 be used.
1952 be used.
1953
1953
1954 invariants:
1954 invariants:
1955 - rawtext is optional (can be None); if not set, cachedelta must be set.
1955 - rawtext is optional (can be None); if not set, cachedelta must be set.
1956 if both are set, they must correspond to each other.
1956 if both are set, they must correspond to each other.
1957 """
1957 """
1958 if node == nullid:
1958 if node == nullid:
1959 raise error.RevlogError(_("%s: attempt to add null revision") %
1959 raise error.RevlogError(_("%s: attempt to add null revision") %
1960 self.indexfile)
1960 self.indexfile)
1961 if node == wdirid or node in wdirfilenodeids:
1961 if node == wdirid or node in wdirfilenodeids:
1962 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1962 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1963 self.indexfile)
1963 self.indexfile)
1964
1964
1965 if self._inline:
1965 if self._inline:
1966 fh = ifh
1966 fh = ifh
1967 else:
1967 else:
1968 fh = dfh
1968 fh = dfh
1969
1969
1970 btext = [rawtext]
1970 btext = [rawtext]
1971
1971
1972 curr = len(self)
1972 curr = len(self)
1973 prev = curr - 1
1973 prev = curr - 1
1974 offset = self.end(prev)
1974 offset = self.end(prev)
1975 p1r, p2r = self.rev(p1), self.rev(p2)
1975 p1r, p2r = self.rev(p1), self.rev(p2)
1976
1976
1977 # full versions are inserted when the needed deltas
1977 # full versions are inserted when the needed deltas
1978 # become comparable to the uncompressed text
1978 # become comparable to the uncompressed text
1979 if rawtext is None:
1979 if rawtext is None:
1980 # need rawtext size, before changed by flag processors, which is
1980 # need rawtext size, before changed by flag processors, which is
1981 # the non-raw size. use revlog explicitly to avoid filelog's extra
1981 # the non-raw size. use revlog explicitly to avoid filelog's extra
1982 # logic that might remove metadata size.
1982 # logic that might remove metadata size.
1983 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1983 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1984 cachedelta[1])
1984 cachedelta[1])
1985 else:
1985 else:
1986 textlen = len(rawtext)
1986 textlen = len(rawtext)
1987
1987
1988 if deltacomputer is None:
1988 if deltacomputer is None:
1989 deltacomputer = deltautil.deltacomputer(self)
1989 deltacomputer = deltautil.deltacomputer(self)
1990
1990
1991 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1991 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1992
1992
1993 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1993 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1994
1994
1995 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1995 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1996 deltainfo.base, link, p1r, p2r, node)
1996 deltainfo.base, link, p1r, p2r, node)
1997 self.index.append(e)
1997 self.index.append(e)
1998 self.nodemap[node] = curr
1998 self.nodemap[node] = curr
1999
1999
2000 entry = self._io.packentry(e, self.node, self.version, curr)
2000 entry = self._io.packentry(e, self.node, self.version, curr)
2001 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2001 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2002 link, offset)
2002 link, offset)
2003
2003
2004 rawtext = btext[0]
2004 rawtext = btext[0]
2005
2005
2006 if alwayscache and rawtext is None:
2006 if alwayscache and rawtext is None:
2007 rawtext = deltacomputer.buildtext(revinfo, fh)
2007 rawtext = deltacomputer.buildtext(revinfo, fh)
2008
2008
2009 if type(rawtext) == bytes: # only accept immutable objects
2009 if type(rawtext) == bytes: # only accept immutable objects
2010 self._cache = (node, curr, rawtext)
2010 self._cache = (node, curr, rawtext)
2011 self._chainbasecache[curr] = deltainfo.chainbase
2011 self._chainbasecache[curr] = deltainfo.chainbase
2012 return node
2012 return node
2013
2013
2014 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2014 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2015 # Files opened in a+ mode have inconsistent behavior on various
2015 # Files opened in a+ mode have inconsistent behavior on various
2016 # platforms. Windows requires that a file positioning call be made
2016 # platforms. Windows requires that a file positioning call be made
2017 # when the file handle transitions between reads and writes. See
2017 # when the file handle transitions between reads and writes. See
2018 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2018 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2019 # platforms, Python or the platform itself can be buggy. Some versions
2019 # platforms, Python or the platform itself can be buggy. Some versions
2020 # of Solaris have been observed to not append at the end of the file
2020 # of Solaris have been observed to not append at the end of the file
2021 # if the file was seeked to before the end. See issue4943 for more.
2021 # if the file was seeked to before the end. See issue4943 for more.
2022 #
2022 #
2023 # We work around this issue by inserting a seek() before writing.
2023 # We work around this issue by inserting a seek() before writing.
2024 # Note: This is likely not necessary on Python 3.
2024 # Note: This is likely not necessary on Python 3.
2025 ifh.seek(0, os.SEEK_END)
2025 ifh.seek(0, os.SEEK_END)
2026 if dfh:
2026 if dfh:
2027 dfh.seek(0, os.SEEK_END)
2027 dfh.seek(0, os.SEEK_END)
2028
2028
2029 curr = len(self) - 1
2029 curr = len(self) - 1
2030 if not self._inline:
2030 if not self._inline:
2031 transaction.add(self.datafile, offset)
2031 transaction.add(self.datafile, offset)
2032 transaction.add(self.indexfile, curr * len(entry))
2032 transaction.add(self.indexfile, curr * len(entry))
2033 if data[0]:
2033 if data[0]:
2034 dfh.write(data[0])
2034 dfh.write(data[0])
2035 dfh.write(data[1])
2035 dfh.write(data[1])
2036 ifh.write(entry)
2036 ifh.write(entry)
2037 else:
2037 else:
2038 offset += curr * self._io.size
2038 offset += curr * self._io.size
2039 transaction.add(self.indexfile, offset, curr)
2039 transaction.add(self.indexfile, offset, curr)
2040 ifh.write(entry)
2040 ifh.write(entry)
2041 ifh.write(data[0])
2041 ifh.write(data[0])
2042 ifh.write(data[1])
2042 ifh.write(data[1])
2043 self._enforceinlinesize(transaction, ifh)
2043 self._enforceinlinesize(transaction, ifh)
2044
2044
2045 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2045 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2046 """
2046 """
2047 add a delta group
2047 add a delta group
2048
2048
2049 given a set of deltas, add them to the revision log. the
2049 given a set of deltas, add them to the revision log. the
2050 first delta is against its parent, which should be in our
2050 first delta is against its parent, which should be in our
2051 log, the rest are against the previous delta.
2051 log, the rest are against the previous delta.
2052
2052
2053 If ``addrevisioncb`` is defined, it will be called with arguments of
2053 If ``addrevisioncb`` is defined, it will be called with arguments of
2054 this revlog and the node that was added.
2054 this revlog and the node that was added.
2055 """
2055 """
2056
2056
2057 nodes = []
2057 nodes = []
2058
2058
2059 r = len(self)
2059 r = len(self)
2060 end = 0
2060 end = 0
2061 if r:
2061 if r:
2062 end = self.end(r - 1)
2062 end = self.end(r - 1)
2063 ifh = self._indexfp("a+")
2063 ifh = self._indexfp("a+")
2064 isize = r * self._io.size
2064 isize = r * self._io.size
2065 if self._inline:
2065 if self._inline:
2066 transaction.add(self.indexfile, end + isize, r)
2066 transaction.add(self.indexfile, end + isize, r)
2067 dfh = None
2067 dfh = None
2068 else:
2068 else:
2069 transaction.add(self.indexfile, isize, r)
2069 transaction.add(self.indexfile, isize, r)
2070 transaction.add(self.datafile, end)
2070 transaction.add(self.datafile, end)
2071 dfh = self._datafp("a+")
2071 dfh = self._datafp("a+")
2072 def flush():
2072 def flush():
2073 if dfh:
2073 if dfh:
2074 dfh.flush()
2074 dfh.flush()
2075 ifh.flush()
2075 ifh.flush()
2076 try:
2076 try:
2077 deltacomputer = deltautil.deltacomputer(self)
2077 deltacomputer = deltautil.deltacomputer(self)
2078 # loop through our set of deltas
2078 # loop through our set of deltas
2079 for data in deltas:
2079 for data in deltas:
2080 node, p1, p2, linknode, deltabase, delta, flags = data
2080 node, p1, p2, linknode, deltabase, delta, flags = data
2081 link = linkmapper(linknode)
2081 link = linkmapper(linknode)
2082 flags = flags or REVIDX_DEFAULT_FLAGS
2082 flags = flags or REVIDX_DEFAULT_FLAGS
2083
2083
2084 nodes.append(node)
2084 nodes.append(node)
2085
2085
2086 if node in self.nodemap:
2086 if node in self.nodemap:
2087 # this can happen if two branches make the same change
2087 # this can happen if two branches make the same change
2088 continue
2088 continue
2089
2089
2090 for p in (p1, p2):
2090 for p in (p1, p2):
2091 if p not in self.nodemap:
2091 if p not in self.nodemap:
2092 raise error.LookupError(p, self.indexfile,
2092 raise error.LookupError(p, self.indexfile,
2093 _('unknown parent'))
2093 _('unknown parent'))
2094
2094
2095 if deltabase not in self.nodemap:
2095 if deltabase not in self.nodemap:
2096 raise error.LookupError(deltabase, self.indexfile,
2096 raise error.LookupError(deltabase, self.indexfile,
2097 _('unknown delta base'))
2097 _('unknown delta base'))
2098
2098
2099 baserev = self.rev(deltabase)
2099 baserev = self.rev(deltabase)
2100
2100
2101 if baserev != nullrev and self.iscensored(baserev):
2101 if baserev != nullrev and self.iscensored(baserev):
2102 # if base is censored, delta must be full replacement in a
2102 # if base is censored, delta must be full replacement in a
2103 # single patch operation
2103 # single patch operation
2104 hlen = struct.calcsize(">lll")
2104 hlen = struct.calcsize(">lll")
2105 oldlen = self.rawsize(baserev)
2105 oldlen = self.rawsize(baserev)
2106 newlen = len(delta) - hlen
2106 newlen = len(delta) - hlen
2107 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2107 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2108 raise error.CensoredBaseError(self.indexfile,
2108 raise error.CensoredBaseError(self.indexfile,
2109 self.node(baserev))
2109 self.node(baserev))
2110
2110
2111 if not flags and self._peek_iscensored(baserev, delta, flush):
2111 if not flags and self._peek_iscensored(baserev, delta, flush):
2112 flags |= REVIDX_ISCENSORED
2112 flags |= REVIDX_ISCENSORED
2113
2113
2114 # We assume consumers of addrevisioncb will want to retrieve
2114 # We assume consumers of addrevisioncb will want to retrieve
2115 # the added revision, which will require a call to
2115 # the added revision, which will require a call to
2116 # revision(). revision() will fast path if there is a cache
2116 # revision(). revision() will fast path if there is a cache
2117 # hit. So, we tell _addrevision() to always cache in this case.
2117 # hit. So, we tell _addrevision() to always cache in this case.
2118 # We're only using addgroup() in the context of changegroup
2118 # We're only using addgroup() in the context of changegroup
2119 # generation so the revision data can always be handled as raw
2119 # generation so the revision data can always be handled as raw
2120 # by the flagprocessor.
2120 # by the flagprocessor.
2121 self._addrevision(node, None, transaction, link,
2121 self._addrevision(node, None, transaction, link,
2122 p1, p2, flags, (baserev, delta),
2122 p1, p2, flags, (baserev, delta),
2123 ifh, dfh,
2123 ifh, dfh,
2124 alwayscache=bool(addrevisioncb),
2124 alwayscache=bool(addrevisioncb),
2125 deltacomputer=deltacomputer)
2125 deltacomputer=deltacomputer)
2126
2126
2127 if addrevisioncb:
2127 if addrevisioncb:
2128 addrevisioncb(self, node)
2128 addrevisioncb(self, node)
2129
2129
2130 if not dfh and not self._inline:
2130 if not dfh and not self._inline:
2131 # addrevision switched from inline to conventional
2131 # addrevision switched from inline to conventional
2132 # reopen the index
2132 # reopen the index
2133 ifh.close()
2133 ifh.close()
2134 dfh = self._datafp("a+")
2134 dfh = self._datafp("a+")
2135 ifh = self._indexfp("a+")
2135 ifh = self._indexfp("a+")
2136 finally:
2136 finally:
2137 if dfh:
2137 if dfh:
2138 dfh.close()
2138 dfh.close()
2139 ifh.close()
2139 ifh.close()
2140
2140
2141 return nodes
2141 return nodes
2142
2142
2143 def iscensored(self, rev):
2143 def iscensored(self, rev):
2144 """Check if a file revision is censored."""
2144 """Check if a file revision is censored."""
2145 if not self._censorable:
2145 if not self._censorable:
2146 return False
2146 return False
2147
2147
2148 return self.flags(rev) & REVIDX_ISCENSORED
2148 return self.flags(rev) & REVIDX_ISCENSORED
2149
2149
2150 def _peek_iscensored(self, baserev, delta, flush):
2150 def _peek_iscensored(self, baserev, delta, flush):
2151 """Quickly check if a delta produces a censored revision."""
2151 """Quickly check if a delta produces a censored revision."""
2152 if not self._censorable:
2152 if not self._censorable:
2153 return False
2153 return False
2154
2154
2155 # Fragile heuristic: unless new file meta keys are added alphabetically
2155 # Fragile heuristic: unless new file meta keys are added alphabetically
2156 # preceding "censored", all censored revisions are prefixed by
2156 # preceding "censored", all censored revisions are prefixed by
2157 # "\1\ncensored:". A delta producing such a censored revision must be a
2157 # "\1\ncensored:". A delta producing such a censored revision must be a
2158 # full-replacement delta, so we inspect the first and only patch in the
2158 # full-replacement delta, so we inspect the first and only patch in the
2159 # delta for this prefix.
2159 # delta for this prefix.
2160 hlen = struct.calcsize(">lll")
2160 hlen = struct.calcsize(">lll")
2161 if len(delta) <= hlen:
2161 if len(delta) <= hlen:
2162 return False
2162 return False
2163
2163
2164 oldlen = self.rawsize(baserev)
2164 oldlen = self.rawsize(baserev)
2165 newlen = len(delta) - hlen
2165 newlen = len(delta) - hlen
2166 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2166 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2167 return False
2167 return False
2168
2168
2169 add = "\1\ncensored:"
2169 add = "\1\ncensored:"
2170 addlen = len(add)
2170 addlen = len(add)
2171 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2171 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2172
2172
2173 def getstrippoint(self, minlink):
2173 def getstrippoint(self, minlink):
2174 """find the minimum rev that must be stripped to strip the linkrev
2174 """find the minimum rev that must be stripped to strip the linkrev
2175
2175
2176 Returns a tuple containing the minimum rev and a set of all revs that
2176 Returns a tuple containing the minimum rev and a set of all revs that
2177 have linkrevs that will be broken by this strip.
2177 have linkrevs that will be broken by this strip.
2178 """
2178 """
2179 brokenrevs = set()
2179 brokenrevs = set()
2180 strippoint = len(self)
2180 strippoint = len(self)
2181
2181
2182 heads = {}
2182 heads = {}
2183 futurelargelinkrevs = set()
2183 futurelargelinkrevs = set()
2184 for head in self.headrevs():
2184 for head in self.headrevs():
2185 headlinkrev = self.linkrev(head)
2185 headlinkrev = self.linkrev(head)
2186 heads[head] = headlinkrev
2186 heads[head] = headlinkrev
2187 if headlinkrev >= minlink:
2187 if headlinkrev >= minlink:
2188 futurelargelinkrevs.add(headlinkrev)
2188 futurelargelinkrevs.add(headlinkrev)
2189
2189
2190 # This algorithm involves walking down the rev graph, starting at the
2190 # This algorithm involves walking down the rev graph, starting at the
2191 # heads. Since the revs are topologically sorted according to linkrev,
2191 # heads. Since the revs are topologically sorted according to linkrev,
2192 # once all head linkrevs are below the minlink, we know there are
2192 # once all head linkrevs are below the minlink, we know there are
2193 # no more revs that could have a linkrev greater than minlink.
2193 # no more revs that could have a linkrev greater than minlink.
2194 # So we can stop walking.
2194 # So we can stop walking.
2195 while futurelargelinkrevs:
2195 while futurelargelinkrevs:
2196 strippoint -= 1
2196 strippoint -= 1
2197 linkrev = heads.pop(strippoint)
2197 linkrev = heads.pop(strippoint)
2198
2198
2199 if linkrev < minlink:
2199 if linkrev < minlink:
2200 brokenrevs.add(strippoint)
2200 brokenrevs.add(strippoint)
2201 else:
2201 else:
2202 futurelargelinkrevs.remove(linkrev)
2202 futurelargelinkrevs.remove(linkrev)
2203
2203
2204 for p in self.parentrevs(strippoint):
2204 for p in self.parentrevs(strippoint):
2205 if p != nullrev:
2205 if p != nullrev:
2206 plinkrev = self.linkrev(p)
2206 plinkrev = self.linkrev(p)
2207 heads[p] = plinkrev
2207 heads[p] = plinkrev
2208 if plinkrev >= minlink:
2208 if plinkrev >= minlink:
2209 futurelargelinkrevs.add(plinkrev)
2209 futurelargelinkrevs.add(plinkrev)
2210
2210
2211 return strippoint, brokenrevs
2211 return strippoint, brokenrevs
2212
2212
2213 def strip(self, minlink, transaction):
2213 def strip(self, minlink, transaction):
2214 """truncate the revlog on the first revision with a linkrev >= minlink
2214 """truncate the revlog on the first revision with a linkrev >= minlink
2215
2215
2216 This function is called when we're stripping revision minlink and
2216 This function is called when we're stripping revision minlink and
2217 its descendants from the repository.
2217 its descendants from the repository.
2218
2218
2219 We have to remove all revisions with linkrev >= minlink, because
2219 We have to remove all revisions with linkrev >= minlink, because
2220 the equivalent changelog revisions will be renumbered after the
2220 the equivalent changelog revisions will be renumbered after the
2221 strip.
2221 strip.
2222
2222
2223 So we truncate the revlog on the first of these revisions, and
2223 So we truncate the revlog on the first of these revisions, and
2224 trust that the caller has saved the revisions that shouldn't be
2224 trust that the caller has saved the revisions that shouldn't be
2225 removed and that it'll re-add them after this truncation.
2225 removed and that it'll re-add them after this truncation.
2226 """
2226 """
2227 if len(self) == 0:
2227 if len(self) == 0:
2228 return
2228 return
2229
2229
2230 rev, _ = self.getstrippoint(minlink)
2230 rev, _ = self.getstrippoint(minlink)
2231 if rev == len(self):
2231 if rev == len(self):
2232 return
2232 return
2233
2233
2234 # first truncate the files on disk
2234 # first truncate the files on disk
2235 end = self.start(rev)
2235 end = self.start(rev)
2236 if not self._inline:
2236 if not self._inline:
2237 transaction.add(self.datafile, end)
2237 transaction.add(self.datafile, end)
2238 end = rev * self._io.size
2238 end = rev * self._io.size
2239 else:
2239 else:
2240 end += rev * self._io.size
2240 end += rev * self._io.size
2241
2241
2242 transaction.add(self.indexfile, end)
2242 transaction.add(self.indexfile, end)
2243
2243
2244 # then reset internal state in memory to forget those revisions
2244 # then reset internal state in memory to forget those revisions
2245 self._cache = None
2245 self._cache = None
2246 self._chaininfocache = {}
2246 self._chaininfocache = {}
2247 self._chunkclear()
2247 self._chunkclear()
2248 for x in pycompat.xrange(rev, len(self)):
2248 for x in pycompat.xrange(rev, len(self)):
2249 del self.nodemap[self.node(x)]
2249 del self.nodemap[self.node(x)]
2250
2250
2251 del self.index[rev:-1]
2251 del self.index[rev:-1]
2252 self._nodepos = None
2252 self._nodepos = None
2253
2253
2254 def checksize(self):
2254 def checksize(self):
2255 expected = 0
2255 expected = 0
2256 if len(self):
2256 if len(self):
2257 expected = max(0, self.end(len(self) - 1))
2257 expected = max(0, self.end(len(self) - 1))
2258
2258
2259 try:
2259 try:
2260 with self._datafp() as f:
2260 with self._datafp() as f:
2261 f.seek(0, 2)
2261 f.seek(0, 2)
2262 actual = f.tell()
2262 actual = f.tell()
2263 dd = actual - expected
2263 dd = actual - expected
2264 except IOError as inst:
2264 except IOError as inst:
2265 if inst.errno != errno.ENOENT:
2265 if inst.errno != errno.ENOENT:
2266 raise
2266 raise
2267 dd = 0
2267 dd = 0
2268
2268
2269 try:
2269 try:
2270 f = self.opener(self.indexfile)
2270 f = self.opener(self.indexfile)
2271 f.seek(0, 2)
2271 f.seek(0, 2)
2272 actual = f.tell()
2272 actual = f.tell()
2273 f.close()
2273 f.close()
2274 s = self._io.size
2274 s = self._io.size
2275 i = max(0, actual // s)
2275 i = max(0, actual // s)
2276 di = actual - (i * s)
2276 di = actual - (i * s)
2277 if self._inline:
2277 if self._inline:
2278 databytes = 0
2278 databytes = 0
2279 for r in self:
2279 for r in self:
2280 databytes += max(0, self.length(r))
2280 databytes += max(0, self.length(r))
2281 dd = 0
2281 dd = 0
2282 di = actual - len(self) * s - databytes
2282 di = actual - len(self) * s - databytes
2283 except IOError as inst:
2283 except IOError as inst:
2284 if inst.errno != errno.ENOENT:
2284 if inst.errno != errno.ENOENT:
2285 raise
2285 raise
2286 di = 0
2286 di = 0
2287
2287
2288 return (dd, di)
2288 return (dd, di)
2289
2289
2290 def files(self):
2290 def files(self):
2291 res = [self.indexfile]
2291 res = [self.indexfile]
2292 if not self._inline:
2292 if not self._inline:
2293 res.append(self.datafile)
2293 res.append(self.datafile)
2294 return res
2294 return res
2295
2295
2296 def emitrevisiondeltas(self, requests):
2296 def emitrevisiondeltas(self, requests):
2297 frev = self.rev
2297 frev = self.rev
2298
2298
2299 prevrev = None
2299 prevrev = None
2300 for request in requests:
2300 for request in requests:
2301 node = request.node
2301 node = request.node
2302 rev = frev(node)
2302 rev = frev(node)
2303
2303
2304 if prevrev is None:
2304 if prevrev is None:
2305 prevrev = self.index[rev][5]
2305 prevrev = self.index[rev][5]
2306
2306
2307 # Requesting a full revision.
2307 # Requesting a full revision.
2308 if request.basenode == nullid:
2308 if request.basenode == nullid:
2309 baserev = nullrev
2309 baserev = nullrev
2310 # Requesting an explicit revision.
2310 # Requesting an explicit revision.
2311 elif request.basenode is not None:
2311 elif request.basenode is not None:
2312 baserev = frev(request.basenode)
2312 baserev = frev(request.basenode)
2313 # Allowing us to choose.
2313 # Allowing us to choose.
2314 else:
2314 else:
2315 p1rev, p2rev = self.parentrevs(rev)
2315 p1rev, p2rev = self.parentrevs(rev)
2316 deltaparentrev = self.deltaparent(rev)
2316 deltaparentrev = self.deltaparent(rev)
2317
2317
2318 # Avoid sending full revisions when delta parent is null. Pick
2318 # Avoid sending full revisions when delta parent is null. Pick
2319 # prev in that case. It's tempting to pick p1 in this case, as
2319 # prev in that case. It's tempting to pick p1 in this case, as
2320 # p1 will be smaller in the common case. However, computing a
2320 # p1 will be smaller in the common case. However, computing a
2321 # delta against p1 may require resolving the raw text of p1,
2321 # delta against p1 may require resolving the raw text of p1,
2322 # which could be expensive. The revlog caches should have prev
2322 # which could be expensive. The revlog caches should have prev
2323 # cached, meaning less CPU for delta generation. There is
2323 # cached, meaning less CPU for delta generation. There is
2324 # likely room to add a flag and/or config option to control this
2324 # likely room to add a flag and/or config option to control this
2325 # behavior.
2325 # behavior.
2326 if deltaparentrev == nullrev and self._storedeltachains:
2326 if deltaparentrev == nullrev and self._storedeltachains:
2327 baserev = prevrev
2327 baserev = prevrev
2328
2328
2329 # Revlog is configured to use full snapshot for a reason.
2329 # Revlog is configured to use full snapshot for a reason.
2330 # Stick to full snapshot.
2330 # Stick to full snapshot.
2331 elif deltaparentrev == nullrev:
2331 elif deltaparentrev == nullrev:
2332 baserev = nullrev
2332 baserev = nullrev
2333
2333
2334 # Pick previous when we can't be sure the base is available
2334 # Pick previous when we can't be sure the base is available
2335 # on consumer.
2335 # on consumer.
2336 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2336 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2337 baserev = prevrev
2337 baserev = prevrev
2338 else:
2338 else:
2339 baserev = deltaparentrev
2339 baserev = deltaparentrev
2340
2340
2341 if baserev != nullrev and not self.candelta(baserev, rev):
2341 if baserev != nullrev and not self.candelta(baserev, rev):
2342 baserev = nullrev
2342 baserev = nullrev
2343
2343
2344 revision = None
2344 revision = None
2345 delta = None
2345 delta = None
2346 baserevisionsize = None
2346 baserevisionsize = None
2347
2347
2348 if self.iscensored(baserev) or self.iscensored(rev):
2348 if self.iscensored(baserev) or self.iscensored(rev):
2349 try:
2349 try:
2350 revision = self.revision(node, raw=True)
2350 revision = self.revision(node, raw=True)
2351 except error.CensoredNodeError as e:
2351 except error.CensoredNodeError as e:
2352 revision = e.tombstone
2352 revision = e.tombstone
2353
2353
2354 if baserev != nullrev:
2354 if baserev != nullrev:
2355 baserevisionsize = self.rawsize(baserev)
2355 baserevisionsize = self.rawsize(baserev)
2356
2356
2357 elif baserev == nullrev:
2357 elif baserev == nullrev:
2358 revision = self.revision(node, raw=True)
2358 revision = self.revision(node, raw=True)
2359 else:
2359 else:
2360 delta = self.revdiff(baserev, rev)
2360 delta = self.revdiff(baserev, rev)
2361
2361
2362 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2362 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2363
2363
2364 yield revlogrevisiondelta(
2364 yield revlogrevisiondelta(
2365 node=node,
2365 node=node,
2366 p1node=request.p1node,
2366 p1node=request.p1node,
2367 p2node=request.p2node,
2367 p2node=request.p2node,
2368 linknode=request.linknode,
2368 linknode=request.linknode,
2369 basenode=self.node(baserev),
2369 basenode=self.node(baserev),
2370 flags=self.flags(rev) | extraflags,
2370 flags=self.flags(rev) | extraflags,
2371 baserevisionsize=baserevisionsize,
2371 baserevisionsize=baserevisionsize,
2372 revision=revision,
2372 revision=revision,
2373 delta=delta)
2373 delta=delta)
2374
2374
2375 prevrev = rev
2375 prevrev = rev
2376
2376
2377 DELTAREUSEALWAYS = 'always'
2377 DELTAREUSEALWAYS = 'always'
2378 DELTAREUSESAMEREVS = 'samerevs'
2378 DELTAREUSESAMEREVS = 'samerevs'
2379 DELTAREUSENEVER = 'never'
2379 DELTAREUSENEVER = 'never'
2380
2380
2381 DELTAREUSEFULLADD = 'fulladd'
2381 DELTAREUSEFULLADD = 'fulladd'
2382
2382
2383 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2383 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2384
2384
2385 def clone(self, tr, destrevlog, addrevisioncb=None,
2385 def clone(self, tr, destrevlog, addrevisioncb=None,
2386 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2386 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2387 """Copy this revlog to another, possibly with format changes.
2387 """Copy this revlog to another, possibly with format changes.
2388
2388
2389 The destination revlog will contain the same revisions and nodes.
2389 The destination revlog will contain the same revisions and nodes.
2390 However, it may not be bit-for-bit identical due to e.g. delta encoding
2390 However, it may not be bit-for-bit identical due to e.g. delta encoding
2391 differences.
2391 differences.
2392
2392
2393 The ``deltareuse`` argument control how deltas from the existing revlog
2393 The ``deltareuse`` argument control how deltas from the existing revlog
2394 are preserved in the destination revlog. The argument can have the
2394 are preserved in the destination revlog. The argument can have the
2395 following values:
2395 following values:
2396
2396
2397 DELTAREUSEALWAYS
2397 DELTAREUSEALWAYS
2398 Deltas will always be reused (if possible), even if the destination
2398 Deltas will always be reused (if possible), even if the destination
2399 revlog would not select the same revisions for the delta. This is the
2399 revlog would not select the same revisions for the delta. This is the
2400 fastest mode of operation.
2400 fastest mode of operation.
2401 DELTAREUSESAMEREVS
2401 DELTAREUSESAMEREVS
2402 Deltas will be reused if the destination revlog would pick the same
2402 Deltas will be reused if the destination revlog would pick the same
2403 revisions for the delta. This mode strikes a balance between speed
2403 revisions for the delta. This mode strikes a balance between speed
2404 and optimization.
2404 and optimization.
2405 DELTAREUSENEVER
2405 DELTAREUSENEVER
2406 Deltas will never be reused. This is the slowest mode of execution.
2406 Deltas will never be reused. This is the slowest mode of execution.
2407 This mode can be used to recompute deltas (e.g. if the diff/delta
2407 This mode can be used to recompute deltas (e.g. if the diff/delta
2408 algorithm changes).
2408 algorithm changes).
2409
2409
2410 Delta computation can be slow, so the choice of delta reuse policy can
2410 Delta computation can be slow, so the choice of delta reuse policy can
2411 significantly affect run time.
2411 significantly affect run time.
2412
2412
2413 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2413 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2414 two extremes. Deltas will be reused if they are appropriate. But if the
2414 two extremes. Deltas will be reused if they are appropriate. But if the
2415 delta could choose a better revision, it will do so. This means if you
2415 delta could choose a better revision, it will do so. This means if you
2416 are converting a non-generaldelta revlog to a generaldelta revlog,
2416 are converting a non-generaldelta revlog to a generaldelta revlog,
2417 deltas will be recomputed if the delta's parent isn't a parent of the
2417 deltas will be recomputed if the delta's parent isn't a parent of the
2418 revision.
2418 revision.
2419
2419
2420 In addition to the delta policy, the ``deltabothparents`` argument
2420 In addition to the delta policy, the ``deltabothparents`` argument
2421 controls whether to compute deltas against both parents for merges.
2421 controls whether to compute deltas against both parents for merges.
2422 By default, the current default is used.
2422 By default, the current default is used.
2423 """
2423 """
2424 if deltareuse not in self.DELTAREUSEALL:
2424 if deltareuse not in self.DELTAREUSEALL:
2425 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2425 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2426
2426
2427 if len(destrevlog):
2427 if len(destrevlog):
2428 raise ValueError(_('destination revlog is not empty'))
2428 raise ValueError(_('destination revlog is not empty'))
2429
2429
2430 if getattr(self, 'filteredrevs', None):
2430 if getattr(self, 'filteredrevs', None):
2431 raise ValueError(_('source revlog has filtered revisions'))
2431 raise ValueError(_('source revlog has filtered revisions'))
2432 if getattr(destrevlog, 'filteredrevs', None):
2432 if getattr(destrevlog, 'filteredrevs', None):
2433 raise ValueError(_('destination revlog has filtered revisions'))
2433 raise ValueError(_('destination revlog has filtered revisions'))
2434
2434
2435 # lazydeltabase controls whether to reuse a cached delta, if possible.
2435 # lazydeltabase controls whether to reuse a cached delta, if possible.
2436 oldlazydeltabase = destrevlog._lazydeltabase
2436 oldlazydeltabase = destrevlog._lazydeltabase
2437 oldamd = destrevlog._deltabothparents
2437 oldamd = destrevlog._deltabothparents
2438
2438
2439 try:
2439 try:
2440 if deltareuse == self.DELTAREUSEALWAYS:
2440 if deltareuse == self.DELTAREUSEALWAYS:
2441 destrevlog._lazydeltabase = True
2441 destrevlog._lazydeltabase = True
2442 elif deltareuse == self.DELTAREUSESAMEREVS:
2442 elif deltareuse == self.DELTAREUSESAMEREVS:
2443 destrevlog._lazydeltabase = False
2443 destrevlog._lazydeltabase = False
2444
2444
2445 destrevlog._deltabothparents = deltabothparents or oldamd
2445 destrevlog._deltabothparents = deltabothparents or oldamd
2446
2446
2447 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2447 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2448 self.DELTAREUSESAMEREVS)
2448 self.DELTAREUSESAMEREVS)
2449
2449
2450 deltacomputer = deltautil.deltacomputer(destrevlog)
2450 deltacomputer = deltautil.deltacomputer(destrevlog)
2451 index = self.index
2451 index = self.index
2452 for rev in self:
2452 for rev in self:
2453 entry = index[rev]
2453 entry = index[rev]
2454
2454
2455 # Some classes override linkrev to take filtered revs into
2455 # Some classes override linkrev to take filtered revs into
2456 # account. Use raw entry from index.
2456 # account. Use raw entry from index.
2457 flags = entry[0] & 0xffff
2457 flags = entry[0] & 0xffff
2458 linkrev = entry[4]
2458 linkrev = entry[4]
2459 p1 = index[entry[5]][7]
2459 p1 = index[entry[5]][7]
2460 p2 = index[entry[6]][7]
2460 p2 = index[entry[6]][7]
2461 node = entry[7]
2461 node = entry[7]
2462
2462
2463 # (Possibly) reuse the delta from the revlog if allowed and
2463 # (Possibly) reuse the delta from the revlog if allowed and
2464 # the revlog chunk is a delta.
2464 # the revlog chunk is a delta.
2465 cachedelta = None
2465 cachedelta = None
2466 rawtext = None
2466 rawtext = None
2467 if populatecachedelta:
2467 if populatecachedelta:
2468 dp = self.deltaparent(rev)
2468 dp = self.deltaparent(rev)
2469 if dp != nullrev:
2469 if dp != nullrev:
2470 cachedelta = (dp, bytes(self._chunk(rev)))
2470 cachedelta = (dp, bytes(self._chunk(rev)))
2471
2471
2472 if not cachedelta:
2472 if not cachedelta:
2473 rawtext = self.revision(rev, raw=True)
2473 rawtext = self.revision(rev, raw=True)
2474
2474
2475
2475
2476 if deltareuse == self.DELTAREUSEFULLADD:
2476 if deltareuse == self.DELTAREUSEFULLADD:
2477 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2477 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2478 cachedelta=cachedelta,
2478 cachedelta=cachedelta,
2479 node=node, flags=flags,
2479 node=node, flags=flags,
2480 deltacomputer=deltacomputer)
2480 deltacomputer=deltacomputer)
2481 else:
2481 else:
2482 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2482 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2483 checkambig=False)
2483 checkambig=False)
2484 dfh = None
2484 dfh = None
2485 if not destrevlog._inline:
2485 if not destrevlog._inline:
2486 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2486 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2487 try:
2487 try:
2488 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2488 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2489 p2, flags, cachedelta, ifh, dfh,
2489 p2, flags, cachedelta, ifh, dfh,
2490 deltacomputer=deltacomputer)
2490 deltacomputer=deltacomputer)
2491 finally:
2491 finally:
2492 if dfh:
2492 if dfh:
2493 dfh.close()
2493 dfh.close()
2494 ifh.close()
2494 ifh.close()
2495
2495
2496 if addrevisioncb:
2496 if addrevisioncb:
2497 addrevisioncb(self, rev, node)
2497 addrevisioncb(self, rev, node)
2498 finally:
2498 finally:
2499 destrevlog._lazydeltabase = oldlazydeltabase
2499 destrevlog._lazydeltabase = oldlazydeltabase
2500 destrevlog._deltabothparents = oldamd
2500 destrevlog._deltabothparents = oldamd
2501
2501
2502 def censorrevision(self, node, tombstone=b''):
2502 def censorrevision(self, node, tombstone=b''):
2503 if (self.version & 0xFFFF) == REVLOGV0:
2503 if (self.version & 0xFFFF) == REVLOGV0:
2504 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2504 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2505 self.version)
2505 self.version)
2506
2506
2507 rev = self.rev(node)
2507 rev = self.rev(node)
2508 tombstone = packmeta({b'censored': tombstone}, b'')
2508 tombstone = packmeta({b'censored': tombstone}, b'')
2509
2509
2510 if len(tombstone) > self.rawsize(rev):
2510 if len(tombstone) > self.rawsize(rev):
2511 raise error.Abort(_('censor tombstone must be no longer than '
2511 raise error.Abort(_('censor tombstone must be no longer than '
2512 'censored data'))
2512 'censored data'))
2513
2513
2514 # Using two files instead of one makes it easy to rewrite entry-by-entry
2514 # Using two files instead of one makes it easy to rewrite entry-by-entry
2515 idxread = self.opener(self.indexfile, 'r')
2515 idxread = self.opener(self.indexfile, 'r')
2516 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2516 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2517 if self.version & FLAG_INLINE_DATA:
2517 if self.version & FLAG_INLINE_DATA:
2518 dataread, datawrite = idxread, idxwrite
2518 dataread, datawrite = idxread, idxwrite
2519 else:
2519 else:
2520 dataread = self.opener(self.datafile, 'r')
2520 dataread = self.opener(self.datafile, 'r')
2521 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2521 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2522
2522
2523 # Copy all revlog data up to the entry to be censored.
2523 # Copy all revlog data up to the entry to be censored.
2524 offset = self.start(rev)
2524 offset = self.start(rev)
2525
2525
2526 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2526 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2527 idxwrite.write(chunk)
2527 idxwrite.write(chunk)
2528 for chunk in util.filechunkiter(dataread, limit=offset):
2528 for chunk in util.filechunkiter(dataread, limit=offset):
2529 datawrite.write(chunk)
2529 datawrite.write(chunk)
2530
2530
2531 def rewriteindex(r, newoffs, newdata=None):
2531 def rewriteindex(r, newoffs, newdata=None):
2532 """Rewrite the index entry with a new data offset and new data.
2532 """Rewrite the index entry with a new data offset and new data.
2533
2533
2534 The newdata argument, if given, is a tuple of three positive
2534 The newdata argument, if given, is a tuple of three positive
2535 integers: (new compressed, new uncompressed, added flag bits).
2535 integers: (new compressed, new uncompressed, added flag bits).
2536 """
2536 """
2537 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2537 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2538 flags = gettype(offlags)
2538 flags = gettype(offlags)
2539 if newdata:
2539 if newdata:
2540 comp, uncomp, nflags = newdata
2540 comp, uncomp, nflags = newdata
2541 flags |= nflags
2541 flags |= nflags
2542 offlags = offset_type(newoffs, flags)
2542 offlags = offset_type(newoffs, flags)
2543 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2543 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2544 idxwrite.write(self._io.packentry(e, None, self.version, r))
2544 idxwrite.write(self._io.packentry(e, None, self.version, r))
2545 idxread.seek(self._io.size, 1)
2545 idxread.seek(self._io.size, 1)
2546
2546
2547 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2547 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2548 """Write the given fulltext with the given data offset.
2548 """Write the given fulltext with the given data offset.
2549
2549
2550 Returns:
2550 Returns:
2551 The integer number of data bytes written, for tracking data
2551 The integer number of data bytes written, for tracking data
2552 offsets.
2552 offsets.
2553 """
2553 """
2554 flag, compdata = self.compress(data)
2554 flag, compdata = self.compress(data)
2555 newcomp = len(flag) + len(compdata)
2555 newcomp = len(flag) + len(compdata)
2556 rewriteindex(r, offs, (newcomp, len(data), nflags))
2556 rewriteindex(r, offs, (newcomp, len(data), nflags))
2557 datawrite.write(flag)
2557 datawrite.write(flag)
2558 datawrite.write(compdata)
2558 datawrite.write(compdata)
2559 dataread.seek(self.length(r), 1)
2559 dataread.seek(self.length(r), 1)
2560 return newcomp
2560 return newcomp
2561
2561
2562 # Rewrite censored entry with (padded) tombstone data.
2562 # Rewrite censored entry with (padded) tombstone data.
2563 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2563 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2564 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2564 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2565
2565
2566 # Rewrite all following filelog revisions fixing up offsets and deltas.
2566 # Rewrite all following filelog revisions fixing up offsets and deltas.
2567 for srev in pycompat.xrange(rev + 1, len(self)):
2567 for srev in pycompat.xrange(rev + 1, len(self)):
2568 if rev in self.parentrevs(srev):
2568 if rev in self.parentrevs(srev):
2569 # Immediate children of censored node must be re-added as
2569 # Immediate children of censored node must be re-added as
2570 # fulltext.
2570 # fulltext.
2571 try:
2571 try:
2572 revdata = self.revision(srev)
2572 revdata = self.revision(srev)
2573 except error.CensoredNodeError as e:
2573 except error.CensoredNodeError as e:
2574 revdata = e.tombstone
2574 revdata = e.tombstone
2575 dlen = rewrite(srev, offset, revdata)
2575 dlen = rewrite(srev, offset, revdata)
2576 else:
2576 else:
2577 # Copy any other revision data verbatim after fixing up the
2577 # Copy any other revision data verbatim after fixing up the
2578 # offset.
2578 # offset.
2579 rewriteindex(srev, offset)
2579 rewriteindex(srev, offset)
2580 dlen = self.length(srev)
2580 dlen = self.length(srev)
2581 for chunk in util.filechunkiter(dataread, limit=dlen):
2581 for chunk in util.filechunkiter(dataread, limit=dlen):
2582 datawrite.write(chunk)
2582 datawrite.write(chunk)
2583 offset += dlen
2583 offset += dlen
2584
2584
2585 idxread.close()
2585 idxread.close()
2586 idxwrite.close()
2586 idxwrite.close()
2587 if dataread is not idxread:
2587 if dataread is not idxread:
2588 dataread.close()
2588 dataread.close()
2589 datawrite.close()
2589 datawrite.close()
2590
2590
2591 def verifyintegrity(self, state):
2591 def verifyintegrity(self, state):
2592 """Verifies the integrity of the revlog.
2592 """Verifies the integrity of the revlog.
2593
2593
2594 Yields ``revlogproblem`` instances describing problems that are
2594 Yields ``revlogproblem`` instances describing problems that are
2595 found.
2595 found.
2596 """
2596 """
2597 dd, di = self.checksize()
2597 dd, di = self.checksize()
2598 if dd:
2598 if dd:
2599 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2599 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2600 if di:
2600 if di:
2601 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2601 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2602
2602
2603 if self.version != REVLOGV0:
2603 version = self.version & 0xFFFF
2604 if not state['revlogv1']:
2604
2605 yield revlogproblem(warning=_("warning: `%s' uses revlog "
2605 # The verifier tells us what version revlog we should be.
2606 "format 1") % self.indexfile)
2606 if version != state['expectedversion']:
2607 elif state['revlogv1']:
2607 yield revlogproblem(
2608 yield revlogproblem(warning=_("warning: `%s' uses revlog "
2608 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2609 "format 0") % self.indexfile)
2609 (self.indexfile, version, state['expectedversion']))
@@ -1,505 +1,506
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 nullid,
14 nullid,
15 short,
15 short,
16 )
16 )
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25
25
26 def verify(repo):
26 def verify(repo):
27 with repo.lock():
27 with repo.lock():
28 return verifier(repo).verify()
28 return verifier(repo).verify()
29
29
30 def _normpath(f):
30 def _normpath(f):
31 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 # under hg < 2.4, convert didn't sanitize paths properly, so a
32 # converted repo may contain repeated slashes
32 # converted repo may contain repeated slashes
33 while '//' in f:
33 while '//' in f:
34 f = f.replace('//', '/')
34 f = f.replace('//', '/')
35 return f
35 return f
36
36
37 class verifier(object):
37 class verifier(object):
38 # The match argument is always None in hg core, but e.g. the narrowhg
38 # The match argument is always None in hg core, but e.g. the narrowhg
39 # extension will pass in a matcher here.
39 # extension will pass in a matcher here.
40 def __init__(self, repo, match=None):
40 def __init__(self, repo, match=None):
41 self.repo = repo.unfiltered()
41 self.repo = repo.unfiltered()
42 self.ui = repo.ui
42 self.ui = repo.ui
43 self.match = match or scmutil.matchall(repo)
43 self.match = match or scmutil.matchall(repo)
44 self.badrevs = set()
44 self.badrevs = set()
45 self.errors = 0
45 self.errors = 0
46 self.warnings = 0
46 self.warnings = 0
47 self.havecl = len(repo.changelog) > 0
47 self.havecl = len(repo.changelog) > 0
48 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
48 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
49 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
49 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
50 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
50 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
51 self.refersmf = False
51 self.refersmf = False
52 self.fncachewarned = False
52 self.fncachewarned = False
53 # developer config: verify.skipflags
53 # developer config: verify.skipflags
54 self.skipflags = repo.ui.configint('verify', 'skipflags')
54 self.skipflags = repo.ui.configint('verify', 'skipflags')
55 self.warnorphanstorefiles = True
55 self.warnorphanstorefiles = True
56
56
57 def warn(self, msg):
57 def warn(self, msg):
58 self.ui.warn(msg + "\n")
58 self.ui.warn(msg + "\n")
59 self.warnings += 1
59 self.warnings += 1
60
60
61 def err(self, linkrev, msg, filename=None):
61 def err(self, linkrev, msg, filename=None):
62 if linkrev is not None:
62 if linkrev is not None:
63 self.badrevs.add(linkrev)
63 self.badrevs.add(linkrev)
64 linkrev = "%d" % linkrev
64 linkrev = "%d" % linkrev
65 else:
65 else:
66 linkrev = '?'
66 linkrev = '?'
67 msg = "%s: %s" % (linkrev, msg)
67 msg = "%s: %s" % (linkrev, msg)
68 if filename:
68 if filename:
69 msg = "%s@%s" % (filename, msg)
69 msg = "%s@%s" % (filename, msg)
70 self.ui.warn(" " + msg + "\n")
70 self.ui.warn(" " + msg + "\n")
71 self.errors += 1
71 self.errors += 1
72
72
73 def exc(self, linkrev, msg, inst, filename=None):
73 def exc(self, linkrev, msg, inst, filename=None):
74 fmsg = pycompat.bytestr(inst)
74 fmsg = pycompat.bytestr(inst)
75 if not fmsg:
75 if not fmsg:
76 fmsg = pycompat.byterepr(inst)
76 fmsg = pycompat.byterepr(inst)
77 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
77 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
78
78
79 def checklog(self, obj, name, linkrev):
79 def checklog(self, obj, name, linkrev):
80 if not len(obj) and (self.havecl or self.havemf):
80 if not len(obj) and (self.havecl or self.havemf):
81 self.err(linkrev, _("empty or missing %s") % name)
81 self.err(linkrev, _("empty or missing %s") % name)
82 return
82 return
83
83
84 d = obj.checksize()
84 d = obj.checksize()
85 if d[0]:
85 if d[0]:
86 self.err(None, _("data length off by %d bytes") % d[0], name)
86 self.err(None, _("data length off by %d bytes") % d[0], name)
87 if d[1]:
87 if d[1]:
88 self.err(None, _("index contains %d extra bytes") % d[1], name)
88 self.err(None, _("index contains %d extra bytes") % d[1], name)
89
89
90 if obj.version != revlog.REVLOGV0:
90 if obj.version != revlog.REVLOGV0:
91 if not self.revlogv1:
91 if not self.revlogv1:
92 self.warn(_("warning: `%s' uses revlog format 1") % name)
92 self.warn(_("warning: `%s' uses revlog format 1") % name)
93 elif self.revlogv1:
93 elif self.revlogv1:
94 self.warn(_("warning: `%s' uses revlog format 0") % name)
94 self.warn(_("warning: `%s' uses revlog format 0") % name)
95
95
96 def checkentry(self, obj, i, node, seen, linkrevs, f):
96 def checkentry(self, obj, i, node, seen, linkrevs, f):
97 lr = obj.linkrev(obj.rev(node))
97 lr = obj.linkrev(obj.rev(node))
98 if lr < 0 or (self.havecl and lr not in linkrevs):
98 if lr < 0 or (self.havecl and lr not in linkrevs):
99 if lr < 0 or lr >= len(self.repo.changelog):
99 if lr < 0 or lr >= len(self.repo.changelog):
100 msg = _("rev %d points to nonexistent changeset %d")
100 msg = _("rev %d points to nonexistent changeset %d")
101 else:
101 else:
102 msg = _("rev %d points to unexpected changeset %d")
102 msg = _("rev %d points to unexpected changeset %d")
103 self.err(None, msg % (i, lr), f)
103 self.err(None, msg % (i, lr), f)
104 if linkrevs:
104 if linkrevs:
105 if f and len(linkrevs) > 1:
105 if f and len(linkrevs) > 1:
106 try:
106 try:
107 # attempt to filter down to real linkrevs
107 # attempt to filter down to real linkrevs
108 linkrevs = [l for l in linkrevs
108 linkrevs = [l for l in linkrevs
109 if self.lrugetctx(l)[f].filenode() == node]
109 if self.lrugetctx(l)[f].filenode() == node]
110 except Exception:
110 except Exception:
111 pass
111 pass
112 self.warn(_(" (expected %s)") % " ".join
112 self.warn(_(" (expected %s)") % " ".join
113 (map(pycompat.bytestr, linkrevs)))
113 (map(pycompat.bytestr, linkrevs)))
114 lr = None # can't be trusted
114 lr = None # can't be trusted
115
115
116 try:
116 try:
117 p1, p2 = obj.parents(node)
117 p1, p2 = obj.parents(node)
118 if p1 not in seen and p1 != nullid:
118 if p1 not in seen and p1 != nullid:
119 self.err(lr, _("unknown parent 1 %s of %s") %
119 self.err(lr, _("unknown parent 1 %s of %s") %
120 (short(p1), short(node)), f)
120 (short(p1), short(node)), f)
121 if p2 not in seen and p2 != nullid:
121 if p2 not in seen and p2 != nullid:
122 self.err(lr, _("unknown parent 2 %s of %s") %
122 self.err(lr, _("unknown parent 2 %s of %s") %
123 (short(p2), short(node)), f)
123 (short(p2), short(node)), f)
124 except Exception as inst:
124 except Exception as inst:
125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126
126
127 if node in seen:
127 if node in seen:
128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 seen[node] = i
129 seen[node] = i
130 return lr
130 return lr
131
131
132 def verify(self):
132 def verify(self):
133 repo = self.repo
133 repo = self.repo
134
134
135 ui = repo.ui
135 ui = repo.ui
136
136
137 if not repo.url().startswith('file:'):
137 if not repo.url().startswith('file:'):
138 raise error.Abort(_("cannot verify bundle or remote repos"))
138 raise error.Abort(_("cannot verify bundle or remote repos"))
139
139
140 if os.path.exists(repo.sjoin("journal")):
140 if os.path.exists(repo.sjoin("journal")):
141 ui.warn(_("abandoned transaction found - run hg recover\n"))
141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142
142
143 if ui.verbose or not self.revlogv1:
143 if ui.verbose or not self.revlogv1:
144 ui.status(_("repository uses revlog format %d\n") %
144 ui.status(_("repository uses revlog format %d\n") %
145 (self.revlogv1 and 1 or 0))
145 (self.revlogv1 and 1 or 0))
146
146
147 mflinkrevs, filelinkrevs = self._verifychangelog()
147 mflinkrevs, filelinkrevs = self._verifychangelog()
148
148
149 filenodes = self._verifymanifest(mflinkrevs)
149 filenodes = self._verifymanifest(mflinkrevs)
150 del mflinkrevs
150 del mflinkrevs
151
151
152 self._crosscheckfiles(filelinkrevs, filenodes)
152 self._crosscheckfiles(filelinkrevs, filenodes)
153
153
154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155
155
156 ui.status(_("checked %d changesets with %d changes to %d files\n") %
156 ui.status(_("checked %d changesets with %d changes to %d files\n") %
157 (len(repo.changelog), filerevisions, totalfiles))
157 (len(repo.changelog), filerevisions, totalfiles))
158 if self.warnings:
158 if self.warnings:
159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 if self.fncachewarned:
160 if self.fncachewarned:
161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 'corrupt fncache\n'))
162 'corrupt fncache\n'))
163 if self.errors:
163 if self.errors:
164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 if self.badrevs:
165 if self.badrevs:
166 ui.warn(_("(first damaged changeset appears to be %d)\n")
166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 % min(self.badrevs))
167 % min(self.badrevs))
168 return 1
168 return 1
169
169
170 def _verifychangelog(self):
170 def _verifychangelog(self):
171 ui = self.ui
171 ui = self.ui
172 repo = self.repo
172 repo = self.repo
173 match = self.match
173 match = self.match
174 cl = repo.changelog
174 cl = repo.changelog
175
175
176 ui.status(_("checking changesets\n"))
176 ui.status(_("checking changesets\n"))
177 mflinkrevs = {}
177 mflinkrevs = {}
178 filelinkrevs = {}
178 filelinkrevs = {}
179 seen = {}
179 seen = {}
180 self.checklog(cl, "changelog", 0)
180 self.checklog(cl, "changelog", 0)
181 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
181 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
182 total=len(repo))
182 total=len(repo))
183 for i in repo:
183 for i in repo:
184 progress.update(i)
184 progress.update(i)
185 n = cl.node(i)
185 n = cl.node(i)
186 self.checkentry(cl, i, n, seen, [i], "changelog")
186 self.checkentry(cl, i, n, seen, [i], "changelog")
187
187
188 try:
188 try:
189 changes = cl.read(n)
189 changes = cl.read(n)
190 if changes[0] != nullid:
190 if changes[0] != nullid:
191 mflinkrevs.setdefault(changes[0], []).append(i)
191 mflinkrevs.setdefault(changes[0], []).append(i)
192 self.refersmf = True
192 self.refersmf = True
193 for f in changes[3]:
193 for f in changes[3]:
194 if match(f):
194 if match(f):
195 filelinkrevs.setdefault(_normpath(f), []).append(i)
195 filelinkrevs.setdefault(_normpath(f), []).append(i)
196 except Exception as inst:
196 except Exception as inst:
197 self.refersmf = True
197 self.refersmf = True
198 self.exc(i, _("unpacking changeset %s") % short(n), inst)
198 self.exc(i, _("unpacking changeset %s") % short(n), inst)
199 progress.complete()
199 progress.complete()
200 return mflinkrevs, filelinkrevs
200 return mflinkrevs, filelinkrevs
201
201
202 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
202 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
203 subdirprogress=None):
203 subdirprogress=None):
204 repo = self.repo
204 repo = self.repo
205 ui = self.ui
205 ui = self.ui
206 match = self.match
206 match = self.match
207 mfl = self.repo.manifestlog
207 mfl = self.repo.manifestlog
208 mf = mfl.getstorage(dir)
208 mf = mfl.getstorage(dir)
209
209
210 if not dir:
210 if not dir:
211 self.ui.status(_("checking manifests\n"))
211 self.ui.status(_("checking manifests\n"))
212
212
213 filenodes = {}
213 filenodes = {}
214 subdirnodes = {}
214 subdirnodes = {}
215 seen = {}
215 seen = {}
216 label = "manifest"
216 label = "manifest"
217 if dir:
217 if dir:
218 label = dir
218 label = dir
219 revlogfiles = mf.files()
219 revlogfiles = mf.files()
220 storefiles.difference_update(revlogfiles)
220 storefiles.difference_update(revlogfiles)
221 if subdirprogress: # should be true since we're in a subdirectory
221 if subdirprogress: # should be true since we're in a subdirectory
222 subdirprogress.increment()
222 subdirprogress.increment()
223 if self.refersmf:
223 if self.refersmf:
224 # Do not check manifest if there are only changelog entries with
224 # Do not check manifest if there are only changelog entries with
225 # null manifests.
225 # null manifests.
226 self.checklog(mf, label, 0)
226 self.checklog(mf, label, 0)
227 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
227 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
228 total=len(mf))
228 total=len(mf))
229 for i in mf:
229 for i in mf:
230 if not dir:
230 if not dir:
231 progress.update(i)
231 progress.update(i)
232 n = mf.node(i)
232 n = mf.node(i)
233 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
233 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
234 if n in mflinkrevs:
234 if n in mflinkrevs:
235 del mflinkrevs[n]
235 del mflinkrevs[n]
236 elif dir:
236 elif dir:
237 self.err(lr, _("%s not in parent-directory manifest") %
237 self.err(lr, _("%s not in parent-directory manifest") %
238 short(n), label)
238 short(n), label)
239 else:
239 else:
240 self.err(lr, _("%s not in changesets") % short(n), label)
240 self.err(lr, _("%s not in changesets") % short(n), label)
241
241
242 try:
242 try:
243 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
243 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
244 for f, fn, fl in mfdelta.iterentries():
244 for f, fn, fl in mfdelta.iterentries():
245 if not f:
245 if not f:
246 self.err(lr, _("entry without name in manifest"))
246 self.err(lr, _("entry without name in manifest"))
247 elif f == "/dev/null": # ignore this in very old repos
247 elif f == "/dev/null": # ignore this in very old repos
248 continue
248 continue
249 fullpath = dir + _normpath(f)
249 fullpath = dir + _normpath(f)
250 if fl == 't':
250 if fl == 't':
251 if not match.visitdir(fullpath):
251 if not match.visitdir(fullpath):
252 continue
252 continue
253 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
253 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
254 fn, []).append(lr)
254 fn, []).append(lr)
255 else:
255 else:
256 if not match(fullpath):
256 if not match(fullpath):
257 continue
257 continue
258 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
258 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
259 except Exception as inst:
259 except Exception as inst:
260 self.exc(lr, _("reading delta %s") % short(n), inst, label)
260 self.exc(lr, _("reading delta %s") % short(n), inst, label)
261 if not dir:
261 if not dir:
262 progress.complete()
262 progress.complete()
263
263
264 if self.havemf:
264 if self.havemf:
265 for c, m in sorted([(c, m) for m in mflinkrevs
265 for c, m in sorted([(c, m) for m in mflinkrevs
266 for c in mflinkrevs[m]]):
266 for c in mflinkrevs[m]]):
267 if dir:
267 if dir:
268 self.err(c, _("parent-directory manifest refers to unknown "
268 self.err(c, _("parent-directory manifest refers to unknown "
269 "revision %s") % short(m), label)
269 "revision %s") % short(m), label)
270 else:
270 else:
271 self.err(c, _("changeset refers to unknown revision %s") %
271 self.err(c, _("changeset refers to unknown revision %s") %
272 short(m), label)
272 short(m), label)
273
273
274 if not dir and subdirnodes:
274 if not dir and subdirnodes:
275 self.ui.status(_("checking directory manifests\n"))
275 self.ui.status(_("checking directory manifests\n"))
276 storefiles = set()
276 storefiles = set()
277 subdirs = set()
277 subdirs = set()
278 revlogv1 = self.revlogv1
278 revlogv1 = self.revlogv1
279 for f, f2, size in repo.store.datafiles():
279 for f, f2, size in repo.store.datafiles():
280 if not f:
280 if not f:
281 self.err(None, _("cannot decode filename '%s'") % f2)
281 self.err(None, _("cannot decode filename '%s'") % f2)
282 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
282 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
283 storefiles.add(_normpath(f))
283 storefiles.add(_normpath(f))
284 subdirs.add(os.path.dirname(f))
284 subdirs.add(os.path.dirname(f))
285 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
285 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
286 total=len(subdirs))
286 total=len(subdirs))
287
287
288 for subdir, linkrevs in subdirnodes.iteritems():
288 for subdir, linkrevs in subdirnodes.iteritems():
289 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
289 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
290 subdirprogress)
290 subdirprogress)
291 for f, onefilenodes in subdirfilenodes.iteritems():
291 for f, onefilenodes in subdirfilenodes.iteritems():
292 filenodes.setdefault(f, {}).update(onefilenodes)
292 filenodes.setdefault(f, {}).update(onefilenodes)
293
293
294 if not dir and subdirnodes:
294 if not dir and subdirnodes:
295 subdirprogress.complete()
295 subdirprogress.complete()
296 if self.warnorphanstorefiles:
296 if self.warnorphanstorefiles:
297 for f in sorted(storefiles):
297 for f in sorted(storefiles):
298 self.warn(_("warning: orphan data file '%s'") % f)
298 self.warn(_("warning: orphan data file '%s'") % f)
299
299
300 return filenodes
300 return filenodes
301
301
302 def _crosscheckfiles(self, filelinkrevs, filenodes):
302 def _crosscheckfiles(self, filelinkrevs, filenodes):
303 repo = self.repo
303 repo = self.repo
304 ui = self.ui
304 ui = self.ui
305 ui.status(_("crosschecking files in changesets and manifests\n"))
305 ui.status(_("crosschecking files in changesets and manifests\n"))
306
306
307 total = len(filelinkrevs) + len(filenodes)
307 total = len(filelinkrevs) + len(filenodes)
308 progress = ui.makeprogress(_('crosschecking'), total=total)
308 progress = ui.makeprogress(_('crosschecking'), total=total)
309 if self.havemf:
309 if self.havemf:
310 for f in sorted(filelinkrevs):
310 for f in sorted(filelinkrevs):
311 progress.increment()
311 progress.increment()
312 if f not in filenodes:
312 if f not in filenodes:
313 lr = filelinkrevs[f][0]
313 lr = filelinkrevs[f][0]
314 self.err(lr, _("in changeset but not in manifest"), f)
314 self.err(lr, _("in changeset but not in manifest"), f)
315
315
316 if self.havecl:
316 if self.havecl:
317 for f in sorted(filenodes):
317 for f in sorted(filenodes):
318 progress.increment()
318 progress.increment()
319 if f not in filelinkrevs:
319 if f not in filelinkrevs:
320 try:
320 try:
321 fl = repo.file(f)
321 fl = repo.file(f)
322 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
322 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
323 except Exception:
323 except Exception:
324 lr = None
324 lr = None
325 self.err(lr, _("in manifest but not in changeset"), f)
325 self.err(lr, _("in manifest but not in changeset"), f)
326
326
327 progress.complete()
327 progress.complete()
328
328
329 def _verifyfiles(self, filenodes, filelinkrevs):
329 def _verifyfiles(self, filenodes, filelinkrevs):
330 repo = self.repo
330 repo = self.repo
331 ui = self.ui
331 ui = self.ui
332 lrugetctx = self.lrugetctx
332 lrugetctx = self.lrugetctx
333 revlogv1 = self.revlogv1
333 revlogv1 = self.revlogv1
334 havemf = self.havemf
334 havemf = self.havemf
335 ui.status(_("checking files\n"))
335 ui.status(_("checking files\n"))
336
336
337 storefiles = set()
337 storefiles = set()
338 for f, f2, size in repo.store.datafiles():
338 for f, f2, size in repo.store.datafiles():
339 if not f:
339 if not f:
340 self.err(None, _("cannot decode filename '%s'") % f2)
340 self.err(None, _("cannot decode filename '%s'") % f2)
341 elif (size > 0 or not revlogv1) and f.startswith('data/'):
341 elif (size > 0 or not revlogv1) and f.startswith('data/'):
342 storefiles.add(_normpath(f))
342 storefiles.add(_normpath(f))
343
343
344 state = {
344 state = {
345 'revlogv1': self.revlogv1,
345 # TODO this assumes revlog storage for changelog.
346 'expectedversion': self.repo.changelog.version & 0xFFFF
346 }
347 }
347
348
348 files = sorted(set(filenodes) | set(filelinkrevs))
349 files = sorted(set(filenodes) | set(filelinkrevs))
349 revisions = 0
350 revisions = 0
350 progress = ui.makeprogress(_('checking'), unit=_('files'),
351 progress = ui.makeprogress(_('checking'), unit=_('files'),
351 total=len(files))
352 total=len(files))
352 for i, f in enumerate(files):
353 for i, f in enumerate(files):
353 progress.update(i, item=f)
354 progress.update(i, item=f)
354 try:
355 try:
355 linkrevs = filelinkrevs[f]
356 linkrevs = filelinkrevs[f]
356 except KeyError:
357 except KeyError:
357 # in manifest but not in changelog
358 # in manifest but not in changelog
358 linkrevs = []
359 linkrevs = []
359
360
360 if linkrevs:
361 if linkrevs:
361 lr = linkrevs[0]
362 lr = linkrevs[0]
362 else:
363 else:
363 lr = None
364 lr = None
364
365
365 try:
366 try:
366 fl = repo.file(f)
367 fl = repo.file(f)
367 except error.StorageError as e:
368 except error.StorageError as e:
368 self.err(lr, _("broken revlog! (%s)") % e, f)
369 self.err(lr, _("broken revlog! (%s)") % e, f)
369 continue
370 continue
370
371
371 for ff in fl.files():
372 for ff in fl.files():
372 try:
373 try:
373 storefiles.remove(ff)
374 storefiles.remove(ff)
374 except KeyError:
375 except KeyError:
375 if self.warnorphanstorefiles:
376 if self.warnorphanstorefiles:
376 self.warn(_(" warning: revlog '%s' not in fncache!") %
377 self.warn(_(" warning: revlog '%s' not in fncache!") %
377 ff)
378 ff)
378 self.fncachewarned = True
379 self.fncachewarned = True
379
380
380 if not len(fl) and (self.havecl or self.havemf):
381 if not len(fl) and (self.havecl or self.havemf):
381 self.err(lr, _("empty or missing %s") % f)
382 self.err(lr, _("empty or missing %s") % f)
382 else:
383 else:
383 for problem in fl.verifyintegrity(state):
384 for problem in fl.verifyintegrity(state):
384 if problem.warning:
385 if problem.warning:
385 self.warn(problem.warning)
386 self.warn(problem.warning)
386 elif problem.error:
387 elif problem.error:
387 self.err(lr, problem.error, f)
388 self.err(lr, problem.error, f)
388 else:
389 else:
389 raise error.ProgrammingError(
390 raise error.ProgrammingError(
390 'problem instance does not set warning or error '
391 'problem instance does not set warning or error '
391 'attribute: %s' % problem.msg)
392 'attribute: %s' % problem.msg)
392
393
393 seen = {}
394 seen = {}
394 rp = None
395 rp = None
395 for i in fl:
396 for i in fl:
396 revisions += 1
397 revisions += 1
397 n = fl.node(i)
398 n = fl.node(i)
398 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
399 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
399 if f in filenodes:
400 if f in filenodes:
400 if havemf and n not in filenodes[f]:
401 if havemf and n not in filenodes[f]:
401 self.err(lr, _("%s not in manifests") % (short(n)), f)
402 self.err(lr, _("%s not in manifests") % (short(n)), f)
402 else:
403 else:
403 del filenodes[f][n]
404 del filenodes[f][n]
404
405
405 # Verify contents. 4 cases to care about:
406 # Verify contents. 4 cases to care about:
406 #
407 #
407 # common: the most common case
408 # common: the most common case
408 # rename: with a rename
409 # rename: with a rename
409 # meta: file content starts with b'\1\n', the metadata
410 # meta: file content starts with b'\1\n', the metadata
410 # header defined in filelog.py, but without a rename
411 # header defined in filelog.py, but without a rename
411 # ext: content stored externally
412 # ext: content stored externally
412 #
413 #
413 # More formally, their differences are shown below:
414 # More formally, their differences are shown below:
414 #
415 #
415 # | common | rename | meta | ext
416 # | common | rename | meta | ext
416 # -------------------------------------------------------
417 # -------------------------------------------------------
417 # flags() | 0 | 0 | 0 | not 0
418 # flags() | 0 | 0 | 0 | not 0
418 # renamed() | False | True | False | ?
419 # renamed() | False | True | False | ?
419 # rawtext[0:2]=='\1\n'| False | True | True | ?
420 # rawtext[0:2]=='\1\n'| False | True | True | ?
420 #
421 #
421 # "rawtext" means the raw text stored in revlog data, which
422 # "rawtext" means the raw text stored in revlog data, which
422 # could be retrieved by "revision(rev, raw=True)". "text"
423 # could be retrieved by "revision(rev, raw=True)". "text"
423 # mentioned below is "revision(rev, raw=False)".
424 # mentioned below is "revision(rev, raw=False)".
424 #
425 #
425 # There are 3 different lengths stored physically:
426 # There are 3 different lengths stored physically:
426 # 1. L1: rawsize, stored in revlog index
427 # 1. L1: rawsize, stored in revlog index
427 # 2. L2: len(rawtext), stored in revlog data
428 # 2. L2: len(rawtext), stored in revlog data
428 # 3. L3: len(text), stored in revlog data if flags==0, or
429 # 3. L3: len(text), stored in revlog data if flags==0, or
429 # possibly somewhere else if flags!=0
430 # possibly somewhere else if flags!=0
430 #
431 #
431 # L1 should be equal to L2. L3 could be different from them.
432 # L1 should be equal to L2. L3 could be different from them.
432 # "text" may or may not affect commit hash depending on flag
433 # "text" may or may not affect commit hash depending on flag
433 # processors (see revlog.addflagprocessor).
434 # processors (see revlog.addflagprocessor).
434 #
435 #
435 # | common | rename | meta | ext
436 # | common | rename | meta | ext
436 # -------------------------------------------------
437 # -------------------------------------------------
437 # rawsize() | L1 | L1 | L1 | L1
438 # rawsize() | L1 | L1 | L1 | L1
438 # size() | L1 | L2-LM | L1(*) | L1 (?)
439 # size() | L1 | L2-LM | L1(*) | L1 (?)
439 # len(rawtext) | L2 | L2 | L2 | L2
440 # len(rawtext) | L2 | L2 | L2 | L2
440 # len(text) | L2 | L2 | L2 | L3
441 # len(text) | L2 | L2 | L2 | L3
441 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
442 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
442 #
443 #
443 # LM: length of metadata, depending on rawtext
444 # LM: length of metadata, depending on rawtext
444 # (*): not ideal, see comment in filelog.size
445 # (*): not ideal, see comment in filelog.size
445 # (?): could be "- len(meta)" if the resolved content has
446 # (?): could be "- len(meta)" if the resolved content has
446 # rename metadata
447 # rename metadata
447 #
448 #
448 # Checks needed to be done:
449 # Checks needed to be done:
449 # 1. length check: L1 == L2, in all cases.
450 # 1. length check: L1 == L2, in all cases.
450 # 2. hash check: depending on flag processor, we may need to
451 # 2. hash check: depending on flag processor, we may need to
451 # use either "text" (external), or "rawtext" (in revlog).
452 # use either "text" (external), or "rawtext" (in revlog).
452 try:
453 try:
453 skipflags = self.skipflags
454 skipflags = self.skipflags
454 if skipflags:
455 if skipflags:
455 skipflags &= fl.flags(i)
456 skipflags &= fl.flags(i)
456 if not skipflags:
457 if not skipflags:
457 fl.read(n) # side effect: read content and do checkhash
458 fl.read(n) # side effect: read content and do checkhash
458 rp = fl.renamed(n)
459 rp = fl.renamed(n)
459 # the "L1 == L2" check
460 # the "L1 == L2" check
460 l1 = fl.rawsize(i)
461 l1 = fl.rawsize(i)
461 l2 = len(fl.revision(n, raw=True))
462 l2 = len(fl.revision(n, raw=True))
462 if l1 != l2:
463 if l1 != l2:
463 self.err(lr, _("unpacked size is %s, %s expected") %
464 self.err(lr, _("unpacked size is %s, %s expected") %
464 (l2, l1), f)
465 (l2, l1), f)
465 except error.CensoredNodeError:
466 except error.CensoredNodeError:
466 # experimental config: censor.policy
467 # experimental config: censor.policy
467 if ui.config("censor", "policy") == "abort":
468 if ui.config("censor", "policy") == "abort":
468 self.err(lr, _("censored file data"), f)
469 self.err(lr, _("censored file data"), f)
469 except Exception as inst:
470 except Exception as inst:
470 self.exc(lr, _("unpacking %s") % short(n), inst, f)
471 self.exc(lr, _("unpacking %s") % short(n), inst, f)
471
472
472 # check renames
473 # check renames
473 try:
474 try:
474 if rp:
475 if rp:
475 if lr is not None and ui.verbose:
476 if lr is not None and ui.verbose:
476 ctx = lrugetctx(lr)
477 ctx = lrugetctx(lr)
477 if not any(rp[0] in pctx for pctx in ctx.parents()):
478 if not any(rp[0] in pctx for pctx in ctx.parents()):
478 self.warn(_("warning: copy source of '%s' not"
479 self.warn(_("warning: copy source of '%s' not"
479 " in parents of %s") % (f, ctx))
480 " in parents of %s") % (f, ctx))
480 fl2 = repo.file(rp[0])
481 fl2 = repo.file(rp[0])
481 if not len(fl2):
482 if not len(fl2):
482 self.err(lr, _("empty or missing copy source "
483 self.err(lr, _("empty or missing copy source "
483 "revlog %s:%s") % (rp[0], short(rp[1])), f)
484 "revlog %s:%s") % (rp[0], short(rp[1])), f)
484 elif rp[1] == nullid:
485 elif rp[1] == nullid:
485 ui.note(_("warning: %s@%s: copy source"
486 ui.note(_("warning: %s@%s: copy source"
486 " revision is nullid %s:%s\n")
487 " revision is nullid %s:%s\n")
487 % (f, lr, rp[0], short(rp[1])))
488 % (f, lr, rp[0], short(rp[1])))
488 else:
489 else:
489 fl2.rev(rp[1])
490 fl2.rev(rp[1])
490 except Exception as inst:
491 except Exception as inst:
491 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
492 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
492
493
493 # cross-check
494 # cross-check
494 if f in filenodes:
495 if f in filenodes:
495 fns = [(v, k) for k, v in filenodes[f].iteritems()]
496 fns = [(v, k) for k, v in filenodes[f].iteritems()]
496 for lr, node in sorted(fns):
497 for lr, node in sorted(fns):
497 self.err(lr, _("manifest refers to unknown revision %s") %
498 self.err(lr, _("manifest refers to unknown revision %s") %
498 short(node), f)
499 short(node), f)
499 progress.complete()
500 progress.complete()
500
501
501 if self.warnorphanstorefiles:
502 if self.warnorphanstorefiles:
502 for f in sorted(storefiles):
503 for f in sorted(storefiles):
503 self.warn(_("warning: orphan data file '%s'") % f)
504 self.warn(_("warning: orphan data file '%s'") % f)
504
505
505 return len(files), revisions
506 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now