##// END OF EJS Templates
revlog: drop LookupError alias (API)...
Gregory Szorc -
r39811:97459247 default
parent child Browse files
Show More
@@ -1,2496 +1,2494
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import re
21 import re
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .thirdparty import (
57 from .thirdparty import (
58 attr,
58 attr,
59 )
59 )
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 stringutil,
75 stringutil,
76 )
76 )
77
77
78 # blanked usage of all the name to prevent pyflakes constraints
78 # blanked usage of all the name to prevent pyflakes constraints
79 # We need these name available in the module for extensions.
79 # We need these name available in the module for extensions.
80 REVLOGV0
80 REVLOGV0
81 REVLOGV1
81 REVLOGV1
82 REVLOGV2
82 REVLOGV2
83 FLAG_INLINE_DATA
83 FLAG_INLINE_DATA
84 FLAG_GENERALDELTA
84 FLAG_GENERALDELTA
85 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_VERSION
87 REVLOG_DEFAULT_VERSION
88 REVLOGV1_FLAGS
88 REVLOGV1_FLAGS
89 REVLOGV2_FLAGS
89 REVLOGV2_FLAGS
90 REVIDX_ISCENSORED
90 REVIDX_ISCENSORED
91 REVIDX_ELLIPSIS
91 REVIDX_ELLIPSIS
92 REVIDX_EXTSTORED
92 REVIDX_EXTSTORED
93 REVIDX_DEFAULT_FLAGS
93 REVIDX_DEFAULT_FLAGS
94 REVIDX_FLAGS_ORDER
94 REVIDX_FLAGS_ORDER
95 REVIDX_KNOWN_FLAGS
95 REVIDX_KNOWN_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
97
97
98 parsers = policy.importmod(r'parsers')
98 parsers = policy.importmod(r'parsers')
99
99
100 # Aliased for performance.
100 # Aliased for performance.
101 _zlibdecompress = zlib.decompress
101 _zlibdecompress = zlib.decompress
102
102
103 # max size of revlog with inline data
103 # max size of revlog with inline data
104 _maxinline = 131072
104 _maxinline = 131072
105 _chunksize = 1048576
105 _chunksize = 1048576
106
106
107 LookupError = error.LookupError
108
109 # Store flag processors (cf. 'addflagprocessor()' to register)
107 # Store flag processors (cf. 'addflagprocessor()' to register)
110 _flagprocessors = {
108 _flagprocessors = {
111 REVIDX_ISCENSORED: None,
109 REVIDX_ISCENSORED: None,
112 }
110 }
113
111
114 # Flag processors for REVIDX_ELLIPSIS.
112 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
113 def ellipsisreadprocessor(rl, text):
116 return text, False
114 return text, False
117
115
118 def ellipsiswriteprocessor(rl, text):
116 def ellipsiswriteprocessor(rl, text):
119 return text, False
117 return text, False
120
118
121 def ellipsisrawprocessor(rl, text):
119 def ellipsisrawprocessor(rl, text):
122 return False
120 return False
123
121
124 ellipsisprocessor = (
122 ellipsisprocessor = (
125 ellipsisreadprocessor,
123 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
124 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
125 ellipsisrawprocessor,
128 )
126 )
129
127
130 _mdre = re.compile('\1\n')
128 _mdre = re.compile('\1\n')
131 def parsemeta(text):
129 def parsemeta(text):
132 """return (metadatadict, metadatasize)"""
130 """return (metadatadict, metadatasize)"""
133 # text can be buffer, so we can't use .startswith or .index
131 # text can be buffer, so we can't use .startswith or .index
134 if text[:2] != '\1\n':
132 if text[:2] != '\1\n':
135 return None, None
133 return None, None
136 s = _mdre.search(text, 2).start()
134 s = _mdre.search(text, 2).start()
137 mtext = text[2:s]
135 mtext = text[2:s]
138 meta = {}
136 meta = {}
139 for l in mtext.splitlines():
137 for l in mtext.splitlines():
140 k, v = l.split(": ", 1)
138 k, v = l.split(": ", 1)
141 meta[k] = v
139 meta[k] = v
142 return meta, (s + 2)
140 return meta, (s + 2)
143
141
144 def packmeta(meta, text):
142 def packmeta(meta, text):
145 keys = sorted(meta)
143 keys = sorted(meta)
146 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
144 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
147 return "\1\n%s\1\n%s" % (metatext, text)
145 return "\1\n%s\1\n%s" % (metatext, text)
148
146
149 def _censoredtext(text):
147 def _censoredtext(text):
150 m, offs = parsemeta(text)
148 m, offs = parsemeta(text)
151 return m and "censored" in m
149 return m and "censored" in m
152
150
153 def addflagprocessor(flag, processor):
151 def addflagprocessor(flag, processor):
154 """Register a flag processor on a revision data flag.
152 """Register a flag processor on a revision data flag.
155
153
156 Invariant:
154 Invariant:
157 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
155 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
158 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
156 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
159 - Only one flag processor can be registered on a specific flag.
157 - Only one flag processor can be registered on a specific flag.
160 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
158 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
161 following signatures:
159 following signatures:
162 - (read) f(self, rawtext) -> text, bool
160 - (read) f(self, rawtext) -> text, bool
163 - (write) f(self, text) -> rawtext, bool
161 - (write) f(self, text) -> rawtext, bool
164 - (raw) f(self, rawtext) -> bool
162 - (raw) f(self, rawtext) -> bool
165 "text" is presented to the user. "rawtext" is stored in revlog data, not
163 "text" is presented to the user. "rawtext" is stored in revlog data, not
166 directly visible to the user.
164 directly visible to the user.
167 The boolean returned by these transforms is used to determine whether
165 The boolean returned by these transforms is used to determine whether
168 the returned text can be used for hash integrity checking. For example,
166 the returned text can be used for hash integrity checking. For example,
169 if "write" returns False, then "text" is used to generate hash. If
167 if "write" returns False, then "text" is used to generate hash. If
170 "write" returns True, that basically means "rawtext" returned by "write"
168 "write" returns True, that basically means "rawtext" returned by "write"
171 should be used to generate hash. Usually, "write" and "read" return
169 should be used to generate hash. Usually, "write" and "read" return
172 different booleans. And "raw" returns a same boolean as "write".
170 different booleans. And "raw" returns a same boolean as "write".
173
171
174 Note: The 'raw' transform is used for changegroup generation and in some
172 Note: The 'raw' transform is used for changegroup generation and in some
175 debug commands. In this case the transform only indicates whether the
173 debug commands. In this case the transform only indicates whether the
176 contents can be used for hash integrity checks.
174 contents can be used for hash integrity checks.
177 """
175 """
178 if not flag & REVIDX_KNOWN_FLAGS:
176 if not flag & REVIDX_KNOWN_FLAGS:
179 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
177 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
180 raise error.ProgrammingError(msg)
178 raise error.ProgrammingError(msg)
181 if flag not in REVIDX_FLAGS_ORDER:
179 if flag not in REVIDX_FLAGS_ORDER:
182 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
180 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
183 raise error.ProgrammingError(msg)
181 raise error.ProgrammingError(msg)
184 if flag in _flagprocessors:
182 if flag in _flagprocessors:
185 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
183 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
186 raise error.Abort(msg)
184 raise error.Abort(msg)
187 _flagprocessors[flag] = processor
185 _flagprocessors[flag] = processor
188
186
189 def getoffset(q):
187 def getoffset(q):
190 return int(q >> 16)
188 return int(q >> 16)
191
189
192 def gettype(q):
190 def gettype(q):
193 return int(q & 0xFFFF)
191 return int(q & 0xFFFF)
194
192
195 def offset_type(offset, type):
193 def offset_type(offset, type):
196 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
194 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
197 raise ValueError('unknown revlog index flags')
195 raise ValueError('unknown revlog index flags')
198 return int(int(offset) << 16 | type)
196 return int(int(offset) << 16 | type)
199
197
200 _nullhash = hashlib.sha1(nullid)
198 _nullhash = hashlib.sha1(nullid)
201
199
202 def hash(text, p1, p2):
200 def hash(text, p1, p2):
203 """generate a hash from the given text and its parent hashes
201 """generate a hash from the given text and its parent hashes
204
202
205 This hash combines both the current file contents and its history
203 This hash combines both the current file contents and its history
206 in a manner that makes it easy to distinguish nodes with the same
204 in a manner that makes it easy to distinguish nodes with the same
207 content in the revision graph.
205 content in the revision graph.
208 """
206 """
209 # As of now, if one of the parent node is null, p2 is null
207 # As of now, if one of the parent node is null, p2 is null
210 if p2 == nullid:
208 if p2 == nullid:
211 # deep copy of a hash is faster than creating one
209 # deep copy of a hash is faster than creating one
212 s = _nullhash.copy()
210 s = _nullhash.copy()
213 s.update(p1)
211 s.update(p1)
214 else:
212 else:
215 # none of the parent nodes are nullid
213 # none of the parent nodes are nullid
216 if p1 < p2:
214 if p1 < p2:
217 a = p1
215 a = p1
218 b = p2
216 b = p2
219 else:
217 else:
220 a = p2
218 a = p2
221 b = p1
219 b = p1
222 s = hashlib.sha1(a)
220 s = hashlib.sha1(a)
223 s.update(b)
221 s.update(b)
224 s.update(text)
222 s.update(text)
225 return s.digest()
223 return s.digest()
226
224
227 @attr.s(slots=True, frozen=True)
225 @attr.s(slots=True, frozen=True)
228 class _revisioninfo(object):
226 class _revisioninfo(object):
229 """Information about a revision that allows building its fulltext
227 """Information about a revision that allows building its fulltext
230 node: expected hash of the revision
228 node: expected hash of the revision
231 p1, p2: parent revs of the revision
229 p1, p2: parent revs of the revision
232 btext: built text cache consisting of a one-element list
230 btext: built text cache consisting of a one-element list
233 cachedelta: (baserev, uncompressed_delta) or None
231 cachedelta: (baserev, uncompressed_delta) or None
234 flags: flags associated to the revision storage
232 flags: flags associated to the revision storage
235
233
236 One of btext[0] or cachedelta must be set.
234 One of btext[0] or cachedelta must be set.
237 """
235 """
238 node = attr.ib()
236 node = attr.ib()
239 p1 = attr.ib()
237 p1 = attr.ib()
240 p2 = attr.ib()
238 p2 = attr.ib()
241 btext = attr.ib()
239 btext = attr.ib()
242 textlen = attr.ib()
240 textlen = attr.ib()
243 cachedelta = attr.ib()
241 cachedelta = attr.ib()
244 flags = attr.ib()
242 flags = attr.ib()
245
243
246 @interfaceutil.implementer(repository.irevisiondelta)
244 @interfaceutil.implementer(repository.irevisiondelta)
247 @attr.s(slots=True, frozen=True)
245 @attr.s(slots=True, frozen=True)
248 class revlogrevisiondelta(object):
246 class revlogrevisiondelta(object):
249 node = attr.ib()
247 node = attr.ib()
250 p1node = attr.ib()
248 p1node = attr.ib()
251 p2node = attr.ib()
249 p2node = attr.ib()
252 basenode = attr.ib()
250 basenode = attr.ib()
253 linknode = attr.ib()
251 linknode = attr.ib()
254 flags = attr.ib()
252 flags = attr.ib()
255 baserevisionsize = attr.ib()
253 baserevisionsize = attr.ib()
256 revision = attr.ib()
254 revision = attr.ib()
257 delta = attr.ib()
255 delta = attr.ib()
258
256
259 # index v0:
257 # index v0:
260 # 4 bytes: offset
258 # 4 bytes: offset
261 # 4 bytes: compressed length
259 # 4 bytes: compressed length
262 # 4 bytes: base rev
260 # 4 bytes: base rev
263 # 4 bytes: link rev
261 # 4 bytes: link rev
264 # 20 bytes: parent 1 nodeid
262 # 20 bytes: parent 1 nodeid
265 # 20 bytes: parent 2 nodeid
263 # 20 bytes: parent 2 nodeid
266 # 20 bytes: nodeid
264 # 20 bytes: nodeid
267 indexformatv0 = struct.Struct(">4l20s20s20s")
265 indexformatv0 = struct.Struct(">4l20s20s20s")
268 indexformatv0_pack = indexformatv0.pack
266 indexformatv0_pack = indexformatv0.pack
269 indexformatv0_unpack = indexformatv0.unpack
267 indexformatv0_unpack = indexformatv0.unpack
270
268
271 class revlogoldindex(list):
269 class revlogoldindex(list):
272 def __getitem__(self, i):
270 def __getitem__(self, i):
273 if i == -1:
271 if i == -1:
274 return (0, 0, 0, -1, -1, -1, -1, nullid)
272 return (0, 0, 0, -1, -1, -1, -1, nullid)
275 return list.__getitem__(self, i)
273 return list.__getitem__(self, i)
276
274
277 class revlogoldio(object):
275 class revlogoldio(object):
278 def __init__(self):
276 def __init__(self):
279 self.size = indexformatv0.size
277 self.size = indexformatv0.size
280
278
281 def parseindex(self, data, inline):
279 def parseindex(self, data, inline):
282 s = self.size
280 s = self.size
283 index = []
281 index = []
284 nodemap = {nullid: nullrev}
282 nodemap = {nullid: nullrev}
285 n = off = 0
283 n = off = 0
286 l = len(data)
284 l = len(data)
287 while off + s <= l:
285 while off + s <= l:
288 cur = data[off:off + s]
286 cur = data[off:off + s]
289 off += s
287 off += s
290 e = indexformatv0_unpack(cur)
288 e = indexformatv0_unpack(cur)
291 # transform to revlogv1 format
289 # transform to revlogv1 format
292 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
290 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
293 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
291 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
294 index.append(e2)
292 index.append(e2)
295 nodemap[e[6]] = n
293 nodemap[e[6]] = n
296 n += 1
294 n += 1
297
295
298 return revlogoldindex(index), nodemap, None
296 return revlogoldindex(index), nodemap, None
299
297
300 def packentry(self, entry, node, version, rev):
298 def packentry(self, entry, node, version, rev):
301 if gettype(entry[0]):
299 if gettype(entry[0]):
302 raise error.RevlogError(_('index entry flags need revlog '
300 raise error.RevlogError(_('index entry flags need revlog '
303 'version 1'))
301 'version 1'))
304 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
302 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
305 node(entry[5]), node(entry[6]), entry[7])
303 node(entry[5]), node(entry[6]), entry[7])
306 return indexformatv0_pack(*e2)
304 return indexformatv0_pack(*e2)
307
305
308 # index ng:
306 # index ng:
309 # 6 bytes: offset
307 # 6 bytes: offset
310 # 2 bytes: flags
308 # 2 bytes: flags
311 # 4 bytes: compressed length
309 # 4 bytes: compressed length
312 # 4 bytes: uncompressed length
310 # 4 bytes: uncompressed length
313 # 4 bytes: base rev
311 # 4 bytes: base rev
314 # 4 bytes: link rev
312 # 4 bytes: link rev
315 # 4 bytes: parent 1 rev
313 # 4 bytes: parent 1 rev
316 # 4 bytes: parent 2 rev
314 # 4 bytes: parent 2 rev
317 # 32 bytes: nodeid
315 # 32 bytes: nodeid
318 indexformatng = struct.Struct(">Qiiiiii20s12x")
316 indexformatng = struct.Struct(">Qiiiiii20s12x")
319 indexformatng_pack = indexformatng.pack
317 indexformatng_pack = indexformatng.pack
320 versionformat = struct.Struct(">I")
318 versionformat = struct.Struct(">I")
321 versionformat_pack = versionformat.pack
319 versionformat_pack = versionformat.pack
322 versionformat_unpack = versionformat.unpack
320 versionformat_unpack = versionformat.unpack
323
321
324 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
322 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
325 # signed integer)
323 # signed integer)
326 _maxentrysize = 0x7fffffff
324 _maxentrysize = 0x7fffffff
327
325
328 class revlogio(object):
326 class revlogio(object):
329 def __init__(self):
327 def __init__(self):
330 self.size = indexformatng.size
328 self.size = indexformatng.size
331
329
332 def parseindex(self, data, inline):
330 def parseindex(self, data, inline):
333 # call the C implementation to parse the index data
331 # call the C implementation to parse the index data
334 index, cache = parsers.parse_index2(data, inline)
332 index, cache = parsers.parse_index2(data, inline)
335 return index, getattr(index, 'nodemap', None), cache
333 return index, getattr(index, 'nodemap', None), cache
336
334
337 def packentry(self, entry, node, version, rev):
335 def packentry(self, entry, node, version, rev):
338 p = indexformatng_pack(*entry)
336 p = indexformatng_pack(*entry)
339 if rev == 0:
337 if rev == 0:
340 p = versionformat_pack(version) + p[4:]
338 p = versionformat_pack(version) + p[4:]
341 return p
339 return p
342
340
343 class revlog(object):
341 class revlog(object):
344 """
342 """
345 the underlying revision storage object
343 the underlying revision storage object
346
344
347 A revlog consists of two parts, an index and the revision data.
345 A revlog consists of two parts, an index and the revision data.
348
346
349 The index is a file with a fixed record size containing
347 The index is a file with a fixed record size containing
350 information on each revision, including its nodeid (hash), the
348 information on each revision, including its nodeid (hash), the
351 nodeids of its parents, the position and offset of its data within
349 nodeids of its parents, the position and offset of its data within
352 the data file, and the revision it's based on. Finally, each entry
350 the data file, and the revision it's based on. Finally, each entry
353 contains a linkrev entry that can serve as a pointer to external
351 contains a linkrev entry that can serve as a pointer to external
354 data.
352 data.
355
353
356 The revision data itself is a linear collection of data chunks.
354 The revision data itself is a linear collection of data chunks.
357 Each chunk represents a revision and is usually represented as a
355 Each chunk represents a revision and is usually represented as a
358 delta against the previous chunk. To bound lookup time, runs of
356 delta against the previous chunk. To bound lookup time, runs of
359 deltas are limited to about 2 times the length of the original
357 deltas are limited to about 2 times the length of the original
360 version data. This makes retrieval of a version proportional to
358 version data. This makes retrieval of a version proportional to
361 its size, or O(1) relative to the number of revisions.
359 its size, or O(1) relative to the number of revisions.
362
360
363 Both pieces of the revlog are written to in an append-only
361 Both pieces of the revlog are written to in an append-only
364 fashion, which means we never need to rewrite a file to insert or
362 fashion, which means we never need to rewrite a file to insert or
365 remove data, and can use some simple techniques to avoid the need
363 remove data, and can use some simple techniques to avoid the need
366 for locking while reading.
364 for locking while reading.
367
365
368 If checkambig, indexfile is opened with checkambig=True at
366 If checkambig, indexfile is opened with checkambig=True at
369 writing, to avoid file stat ambiguity.
367 writing, to avoid file stat ambiguity.
370
368
371 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 If mmaplargeindex is True, and an mmapindexthreshold is set, the
372 index will be mmapped rather than read if it is larger than the
370 index will be mmapped rather than read if it is larger than the
373 configured threshold.
371 configured threshold.
374
372
375 If censorable is True, the revlog can have censored revisions.
373 If censorable is True, the revlog can have censored revisions.
376 """
374 """
377 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
375 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
378 mmaplargeindex=False, censorable=False):
376 mmaplargeindex=False, censorable=False):
379 """
377 """
380 create a revlog object
378 create a revlog object
381
379
382 opener is a function that abstracts the file opening operation
380 opener is a function that abstracts the file opening operation
383 and can be used to implement COW semantics or the like.
381 and can be used to implement COW semantics or the like.
384 """
382 """
385 self.indexfile = indexfile
383 self.indexfile = indexfile
386 self.datafile = datafile or (indexfile[:-2] + ".d")
384 self.datafile = datafile or (indexfile[:-2] + ".d")
387 self.opener = opener
385 self.opener = opener
388 # When True, indexfile is opened with checkambig=True at writing, to
386 # When True, indexfile is opened with checkambig=True at writing, to
389 # avoid file stat ambiguity.
387 # avoid file stat ambiguity.
390 self._checkambig = checkambig
388 self._checkambig = checkambig
391 self._censorable = censorable
389 self._censorable = censorable
392 # 3-tuple of (node, rev, text) for a raw revision.
390 # 3-tuple of (node, rev, text) for a raw revision.
393 self._cache = None
391 self._cache = None
394 # Maps rev to chain base rev.
392 # Maps rev to chain base rev.
395 self._chainbasecache = util.lrucachedict(100)
393 self._chainbasecache = util.lrucachedict(100)
396 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
394 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
397 self._chunkcache = (0, '')
395 self._chunkcache = (0, '')
398 # How much data to read and cache into the raw revlog data cache.
396 # How much data to read and cache into the raw revlog data cache.
399 self._chunkcachesize = 65536
397 self._chunkcachesize = 65536
400 self._maxchainlen = None
398 self._maxchainlen = None
401 self._deltabothparents = True
399 self._deltabothparents = True
402 self.index = []
400 self.index = []
403 # Mapping of partial identifiers to full nodes.
401 # Mapping of partial identifiers to full nodes.
404 self._pcache = {}
402 self._pcache = {}
405 # Mapping of revision integer to full node.
403 # Mapping of revision integer to full node.
406 self._nodecache = {nullid: nullrev}
404 self._nodecache = {nullid: nullrev}
407 self._nodepos = None
405 self._nodepos = None
408 self._compengine = 'zlib'
406 self._compengine = 'zlib'
409 self._maxdeltachainspan = -1
407 self._maxdeltachainspan = -1
410 self._withsparseread = False
408 self._withsparseread = False
411 self._sparserevlog = False
409 self._sparserevlog = False
412 self._srdensitythreshold = 0.50
410 self._srdensitythreshold = 0.50
413 self._srmingapsize = 262144
411 self._srmingapsize = 262144
414
412
415 # Make copy of flag processors so each revlog instance can support
413 # Make copy of flag processors so each revlog instance can support
416 # custom flags.
414 # custom flags.
417 self._flagprocessors = dict(_flagprocessors)
415 self._flagprocessors = dict(_flagprocessors)
418
416
419 mmapindexthreshold = None
417 mmapindexthreshold = None
420 v = REVLOG_DEFAULT_VERSION
418 v = REVLOG_DEFAULT_VERSION
421 opts = getattr(opener, 'options', None)
419 opts = getattr(opener, 'options', None)
422 if opts is not None:
420 if opts is not None:
423 if 'revlogv2' in opts:
421 if 'revlogv2' in opts:
424 # version 2 revlogs always use generaldelta.
422 # version 2 revlogs always use generaldelta.
425 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
423 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
426 elif 'revlogv1' in opts:
424 elif 'revlogv1' in opts:
427 if 'generaldelta' in opts:
425 if 'generaldelta' in opts:
428 v |= FLAG_GENERALDELTA
426 v |= FLAG_GENERALDELTA
429 else:
427 else:
430 v = 0
428 v = 0
431 if 'chunkcachesize' in opts:
429 if 'chunkcachesize' in opts:
432 self._chunkcachesize = opts['chunkcachesize']
430 self._chunkcachesize = opts['chunkcachesize']
433 if 'maxchainlen' in opts:
431 if 'maxchainlen' in opts:
434 self._maxchainlen = opts['maxchainlen']
432 self._maxchainlen = opts['maxchainlen']
435 if 'deltabothparents' in opts:
433 if 'deltabothparents' in opts:
436 self._deltabothparents = opts['deltabothparents']
434 self._deltabothparents = opts['deltabothparents']
437 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
435 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
438 if 'compengine' in opts:
436 if 'compengine' in opts:
439 self._compengine = opts['compengine']
437 self._compengine = opts['compengine']
440 if 'maxdeltachainspan' in opts:
438 if 'maxdeltachainspan' in opts:
441 self._maxdeltachainspan = opts['maxdeltachainspan']
439 self._maxdeltachainspan = opts['maxdeltachainspan']
442 if mmaplargeindex and 'mmapindexthreshold' in opts:
440 if mmaplargeindex and 'mmapindexthreshold' in opts:
443 mmapindexthreshold = opts['mmapindexthreshold']
441 mmapindexthreshold = opts['mmapindexthreshold']
444 self._sparserevlog = bool(opts.get('sparse-revlog', False))
442 self._sparserevlog = bool(opts.get('sparse-revlog', False))
445 withsparseread = bool(opts.get('with-sparse-read', False))
443 withsparseread = bool(opts.get('with-sparse-read', False))
446 # sparse-revlog forces sparse-read
444 # sparse-revlog forces sparse-read
447 self._withsparseread = self._sparserevlog or withsparseread
445 self._withsparseread = self._sparserevlog or withsparseread
448 if 'sparse-read-density-threshold' in opts:
446 if 'sparse-read-density-threshold' in opts:
449 self._srdensitythreshold = opts['sparse-read-density-threshold']
447 self._srdensitythreshold = opts['sparse-read-density-threshold']
450 if 'sparse-read-min-gap-size' in opts:
448 if 'sparse-read-min-gap-size' in opts:
451 self._srmingapsize = opts['sparse-read-min-gap-size']
449 self._srmingapsize = opts['sparse-read-min-gap-size']
452 if opts.get('enableellipsis'):
450 if opts.get('enableellipsis'):
453 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
451 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
454
452
455 if self._chunkcachesize <= 0:
453 if self._chunkcachesize <= 0:
456 raise error.RevlogError(_('revlog chunk cache size %r is not '
454 raise error.RevlogError(_('revlog chunk cache size %r is not '
457 'greater than 0') % self._chunkcachesize)
455 'greater than 0') % self._chunkcachesize)
458 elif self._chunkcachesize & (self._chunkcachesize - 1):
456 elif self._chunkcachesize & (self._chunkcachesize - 1):
459 raise error.RevlogError(_('revlog chunk cache size %r is not a '
457 raise error.RevlogError(_('revlog chunk cache size %r is not a '
460 'power of 2') % self._chunkcachesize)
458 'power of 2') % self._chunkcachesize)
461
459
462 indexdata = ''
460 indexdata = ''
463 self._initempty = True
461 self._initempty = True
464 try:
462 try:
465 with self._indexfp() as f:
463 with self._indexfp() as f:
466 if (mmapindexthreshold is not None and
464 if (mmapindexthreshold is not None and
467 self.opener.fstat(f).st_size >= mmapindexthreshold):
465 self.opener.fstat(f).st_size >= mmapindexthreshold):
468 indexdata = util.buffer(util.mmapread(f))
466 indexdata = util.buffer(util.mmapread(f))
469 else:
467 else:
470 indexdata = f.read()
468 indexdata = f.read()
471 if len(indexdata) > 0:
469 if len(indexdata) > 0:
472 v = versionformat_unpack(indexdata[:4])[0]
470 v = versionformat_unpack(indexdata[:4])[0]
473 self._initempty = False
471 self._initempty = False
474 except IOError as inst:
472 except IOError as inst:
475 if inst.errno != errno.ENOENT:
473 if inst.errno != errno.ENOENT:
476 raise
474 raise
477
475
478 self.version = v
476 self.version = v
479 self._inline = v & FLAG_INLINE_DATA
477 self._inline = v & FLAG_INLINE_DATA
480 self._generaldelta = v & FLAG_GENERALDELTA
478 self._generaldelta = v & FLAG_GENERALDELTA
481 flags = v & ~0xFFFF
479 flags = v & ~0xFFFF
482 fmt = v & 0xFFFF
480 fmt = v & 0xFFFF
483 if fmt == REVLOGV0:
481 if fmt == REVLOGV0:
484 if flags:
482 if flags:
485 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
486 'revlog %s') %
484 'revlog %s') %
487 (flags >> 16, fmt, self.indexfile))
485 (flags >> 16, fmt, self.indexfile))
488 elif fmt == REVLOGV1:
486 elif fmt == REVLOGV1:
489 if flags & ~REVLOGV1_FLAGS:
487 if flags & ~REVLOGV1_FLAGS:
490 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
491 'revlog %s') %
489 'revlog %s') %
492 (flags >> 16, fmt, self.indexfile))
490 (flags >> 16, fmt, self.indexfile))
493 elif fmt == REVLOGV2:
491 elif fmt == REVLOGV2:
494 if flags & ~REVLOGV2_FLAGS:
492 if flags & ~REVLOGV2_FLAGS:
495 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
493 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
496 'revlog %s') %
494 'revlog %s') %
497 (flags >> 16, fmt, self.indexfile))
495 (flags >> 16, fmt, self.indexfile))
498 else:
496 else:
499 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
497 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
500 (fmt, self.indexfile))
498 (fmt, self.indexfile))
501
499
502 self._storedeltachains = True
500 self._storedeltachains = True
503
501
504 self._io = revlogio()
502 self._io = revlogio()
505 if self.version == REVLOGV0:
503 if self.version == REVLOGV0:
506 self._io = revlogoldio()
504 self._io = revlogoldio()
507 try:
505 try:
508 d = self._io.parseindex(indexdata, self._inline)
506 d = self._io.parseindex(indexdata, self._inline)
509 except (ValueError, IndexError):
507 except (ValueError, IndexError):
510 raise error.RevlogError(_("index %s is corrupted") %
508 raise error.RevlogError(_("index %s is corrupted") %
511 self.indexfile)
509 self.indexfile)
512 self.index, nodemap, self._chunkcache = d
510 self.index, nodemap, self._chunkcache = d
513 if nodemap is not None:
511 if nodemap is not None:
514 self.nodemap = self._nodecache = nodemap
512 self.nodemap = self._nodecache = nodemap
515 if not self._chunkcache:
513 if not self._chunkcache:
516 self._chunkclear()
514 self._chunkclear()
517 # revnum -> (chain-length, sum-delta-length)
515 # revnum -> (chain-length, sum-delta-length)
518 self._chaininfocache = {}
516 self._chaininfocache = {}
519 # revlog header -> revlog compressor
517 # revlog header -> revlog compressor
520 self._decompressors = {}
518 self._decompressors = {}
521
519
522 @util.propertycache
520 @util.propertycache
523 def _compressor(self):
521 def _compressor(self):
524 return util.compengines[self._compengine].revlogcompressor()
522 return util.compengines[self._compengine].revlogcompressor()
525
523
526 def _indexfp(self, mode='r'):
524 def _indexfp(self, mode='r'):
527 """file object for the revlog's index file"""
525 """file object for the revlog's index file"""
528 args = {r'mode': mode}
526 args = {r'mode': mode}
529 if mode != 'r':
527 if mode != 'r':
530 args[r'checkambig'] = self._checkambig
528 args[r'checkambig'] = self._checkambig
531 if mode == 'w':
529 if mode == 'w':
532 args[r'atomictemp'] = True
530 args[r'atomictemp'] = True
533 return self.opener(self.indexfile, **args)
531 return self.opener(self.indexfile, **args)
534
532
535 def _datafp(self, mode='r'):
533 def _datafp(self, mode='r'):
536 """file object for the revlog's data file"""
534 """file object for the revlog's data file"""
537 return self.opener(self.datafile, mode=mode)
535 return self.opener(self.datafile, mode=mode)
538
536
539 @contextlib.contextmanager
537 @contextlib.contextmanager
540 def _datareadfp(self, existingfp=None):
538 def _datareadfp(self, existingfp=None):
541 """file object suitable to read data"""
539 """file object suitable to read data"""
542 if existingfp is not None:
540 if existingfp is not None:
543 yield existingfp
541 yield existingfp
544 else:
542 else:
545 if self._inline:
543 if self._inline:
546 func = self._indexfp
544 func = self._indexfp
547 else:
545 else:
548 func = self._datafp
546 func = self._datafp
549 with func() as fp:
547 with func() as fp:
550 yield fp
548 yield fp
551
549
552 def tip(self):
550 def tip(self):
553 return self.node(len(self.index) - 1)
551 return self.node(len(self.index) - 1)
554 def __contains__(self, rev):
552 def __contains__(self, rev):
555 return 0 <= rev < len(self)
553 return 0 <= rev < len(self)
556 def __len__(self):
554 def __len__(self):
557 return len(self.index)
555 return len(self.index)
558 def __iter__(self):
556 def __iter__(self):
559 return iter(pycompat.xrange(len(self)))
557 return iter(pycompat.xrange(len(self)))
560 def revs(self, start=0, stop=None):
558 def revs(self, start=0, stop=None):
561 """iterate over all rev in this revlog (from start to stop)"""
559 """iterate over all rev in this revlog (from start to stop)"""
562 step = 1
560 step = 1
563 length = len(self)
561 length = len(self)
564 if stop is not None:
562 if stop is not None:
565 if start > stop:
563 if start > stop:
566 step = -1
564 step = -1
567 stop += step
565 stop += step
568 if stop > length:
566 if stop > length:
569 stop = length
567 stop = length
570 else:
568 else:
571 stop = length
569 stop = length
572 return pycompat.xrange(start, stop, step)
570 return pycompat.xrange(start, stop, step)
573
571
574 @util.propertycache
572 @util.propertycache
575 def nodemap(self):
573 def nodemap(self):
576 if self.index:
574 if self.index:
577 # populate mapping down to the initial node
575 # populate mapping down to the initial node
578 node0 = self.index[0][7] # get around changelog filtering
576 node0 = self.index[0][7] # get around changelog filtering
579 self.rev(node0)
577 self.rev(node0)
580 return self._nodecache
578 return self._nodecache
581
579
582 def hasnode(self, node):
580 def hasnode(self, node):
583 try:
581 try:
584 self.rev(node)
582 self.rev(node)
585 return True
583 return True
586 except KeyError:
584 except KeyError:
587 return False
585 return False
588
586
589 def candelta(self, baserev, rev):
587 def candelta(self, baserev, rev):
590 """whether two revisions (baserev, rev) can be delta-ed or not"""
588 """whether two revisions (baserev, rev) can be delta-ed or not"""
591 # Disable delta if either rev requires a content-changing flag
589 # Disable delta if either rev requires a content-changing flag
592 # processor (ex. LFS). This is because such flag processor can alter
590 # processor (ex. LFS). This is because such flag processor can alter
593 # the rawtext content that the delta will be based on, and two clients
591 # the rawtext content that the delta will be based on, and two clients
594 # could have a same revlog node with different flags (i.e. different
592 # could have a same revlog node with different flags (i.e. different
595 # rawtext contents) and the delta could be incompatible.
593 # rawtext contents) and the delta could be incompatible.
596 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
594 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
597 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
595 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
598 return False
596 return False
599 return True
597 return True
600
598
601 def clearcaches(self):
599 def clearcaches(self):
602 self._cache = None
600 self._cache = None
603 self._chainbasecache.clear()
601 self._chainbasecache.clear()
604 self._chunkcache = (0, '')
602 self._chunkcache = (0, '')
605 self._pcache = {}
603 self._pcache = {}
606
604
607 try:
605 try:
608 self._nodecache.clearcaches()
606 self._nodecache.clearcaches()
609 except AttributeError:
607 except AttributeError:
610 self._nodecache = {nullid: nullrev}
608 self._nodecache = {nullid: nullrev}
611 self._nodepos = None
609 self._nodepos = None
612
610
613 def rev(self, node):
611 def rev(self, node):
614 try:
612 try:
615 return self._nodecache[node]
613 return self._nodecache[node]
616 except TypeError:
614 except TypeError:
617 raise
615 raise
618 except error.RevlogError:
616 except error.RevlogError:
619 # parsers.c radix tree lookup failed
617 # parsers.c radix tree lookup failed
620 if node == wdirid or node in wdirfilenodeids:
618 if node == wdirid or node in wdirfilenodeids:
621 raise error.WdirUnsupported
619 raise error.WdirUnsupported
622 raise LookupError(node, self.indexfile, _('no node'))
620 raise error.LookupError(node, self.indexfile, _('no node'))
623 except KeyError:
621 except KeyError:
624 # pure python cache lookup failed
622 # pure python cache lookup failed
625 n = self._nodecache
623 n = self._nodecache
626 i = self.index
624 i = self.index
627 p = self._nodepos
625 p = self._nodepos
628 if p is None:
626 if p is None:
629 p = len(i) - 1
627 p = len(i) - 1
630 else:
628 else:
631 assert p < len(i)
629 assert p < len(i)
632 for r in pycompat.xrange(p, -1, -1):
630 for r in pycompat.xrange(p, -1, -1):
633 v = i[r][7]
631 v = i[r][7]
634 n[v] = r
632 n[v] = r
635 if v == node:
633 if v == node:
636 self._nodepos = r - 1
634 self._nodepos = r - 1
637 return r
635 return r
638 if node == wdirid or node in wdirfilenodeids:
636 if node == wdirid or node in wdirfilenodeids:
639 raise error.WdirUnsupported
637 raise error.WdirUnsupported
640 raise LookupError(node, self.indexfile, _('no node'))
638 raise error.LookupError(node, self.indexfile, _('no node'))
641
639
642 # Accessors for index entries.
640 # Accessors for index entries.
643
641
644 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
642 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
645 # are flags.
643 # are flags.
646 def start(self, rev):
644 def start(self, rev):
647 return int(self.index[rev][0] >> 16)
645 return int(self.index[rev][0] >> 16)
648
646
649 def flags(self, rev):
647 def flags(self, rev):
650 return self.index[rev][0] & 0xFFFF
648 return self.index[rev][0] & 0xFFFF
651
649
652 def length(self, rev):
650 def length(self, rev):
653 return self.index[rev][1]
651 return self.index[rev][1]
654
652
655 def rawsize(self, rev):
653 def rawsize(self, rev):
656 """return the length of the uncompressed text for a given revision"""
654 """return the length of the uncompressed text for a given revision"""
657 l = self.index[rev][2]
655 l = self.index[rev][2]
658 if l >= 0:
656 if l >= 0:
659 return l
657 return l
660
658
661 t = self.revision(rev, raw=True)
659 t = self.revision(rev, raw=True)
662 return len(t)
660 return len(t)
663
661
664 def size(self, rev):
662 def size(self, rev):
665 """length of non-raw text (processed by a "read" flag processor)"""
663 """length of non-raw text (processed by a "read" flag processor)"""
666 # fast path: if no "read" flag processor could change the content,
664 # fast path: if no "read" flag processor could change the content,
667 # size is rawsize. note: ELLIPSIS is known to not change the content.
665 # size is rawsize. note: ELLIPSIS is known to not change the content.
668 flags = self.flags(rev)
666 flags = self.flags(rev)
669 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
667 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
670 return self.rawsize(rev)
668 return self.rawsize(rev)
671
669
672 return len(self.revision(rev, raw=False))
670 return len(self.revision(rev, raw=False))
673
671
674 def chainbase(self, rev):
672 def chainbase(self, rev):
675 base = self._chainbasecache.get(rev)
673 base = self._chainbasecache.get(rev)
676 if base is not None:
674 if base is not None:
677 return base
675 return base
678
676
679 index = self.index
677 index = self.index
680 iterrev = rev
678 iterrev = rev
681 base = index[iterrev][3]
679 base = index[iterrev][3]
682 while base != iterrev:
680 while base != iterrev:
683 iterrev = base
681 iterrev = base
684 base = index[iterrev][3]
682 base = index[iterrev][3]
685
683
686 self._chainbasecache[rev] = base
684 self._chainbasecache[rev] = base
687 return base
685 return base
688
686
689 def linkrev(self, rev):
687 def linkrev(self, rev):
690 return self.index[rev][4]
688 return self.index[rev][4]
691
689
692 def parentrevs(self, rev):
690 def parentrevs(self, rev):
693 try:
691 try:
694 entry = self.index[rev]
692 entry = self.index[rev]
695 except IndexError:
693 except IndexError:
696 if rev == wdirrev:
694 if rev == wdirrev:
697 raise error.WdirUnsupported
695 raise error.WdirUnsupported
698 raise
696 raise
699
697
700 return entry[5], entry[6]
698 return entry[5], entry[6]
701
699
702 def node(self, rev):
700 def node(self, rev):
703 try:
701 try:
704 return self.index[rev][7]
702 return self.index[rev][7]
705 except IndexError:
703 except IndexError:
706 if rev == wdirrev:
704 if rev == wdirrev:
707 raise error.WdirUnsupported
705 raise error.WdirUnsupported
708 raise
706 raise
709
707
710 # Derived from index values.
708 # Derived from index values.
711
709
712 def end(self, rev):
710 def end(self, rev):
713 return self.start(rev) + self.length(rev)
711 return self.start(rev) + self.length(rev)
714
712
715 def parents(self, node):
713 def parents(self, node):
716 i = self.index
714 i = self.index
717 d = i[self.rev(node)]
715 d = i[self.rev(node)]
718 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
719
717
720 def chainlen(self, rev):
718 def chainlen(self, rev):
721 return self._chaininfo(rev)[0]
719 return self._chaininfo(rev)[0]
722
720
723 def _chaininfo(self, rev):
721 def _chaininfo(self, rev):
724 chaininfocache = self._chaininfocache
722 chaininfocache = self._chaininfocache
725 if rev in chaininfocache:
723 if rev in chaininfocache:
726 return chaininfocache[rev]
724 return chaininfocache[rev]
727 index = self.index
725 index = self.index
728 generaldelta = self._generaldelta
726 generaldelta = self._generaldelta
729 iterrev = rev
727 iterrev = rev
730 e = index[iterrev]
728 e = index[iterrev]
731 clen = 0
729 clen = 0
732 compresseddeltalen = 0
730 compresseddeltalen = 0
733 while iterrev != e[3]:
731 while iterrev != e[3]:
734 clen += 1
732 clen += 1
735 compresseddeltalen += e[1]
733 compresseddeltalen += e[1]
736 if generaldelta:
734 if generaldelta:
737 iterrev = e[3]
735 iterrev = e[3]
738 else:
736 else:
739 iterrev -= 1
737 iterrev -= 1
740 if iterrev in chaininfocache:
738 if iterrev in chaininfocache:
741 t = chaininfocache[iterrev]
739 t = chaininfocache[iterrev]
742 clen += t[0]
740 clen += t[0]
743 compresseddeltalen += t[1]
741 compresseddeltalen += t[1]
744 break
742 break
745 e = index[iterrev]
743 e = index[iterrev]
746 else:
744 else:
747 # Add text length of base since decompressing that also takes
745 # Add text length of base since decompressing that also takes
748 # work. For cache hits the length is already included.
746 # work. For cache hits the length is already included.
749 compresseddeltalen += e[1]
747 compresseddeltalen += e[1]
750 r = (clen, compresseddeltalen)
748 r = (clen, compresseddeltalen)
751 chaininfocache[rev] = r
749 chaininfocache[rev] = r
752 return r
750 return r
753
751
754 def _deltachain(self, rev, stoprev=None):
752 def _deltachain(self, rev, stoprev=None):
755 """Obtain the delta chain for a revision.
753 """Obtain the delta chain for a revision.
756
754
757 ``stoprev`` specifies a revision to stop at. If not specified, we
755 ``stoprev`` specifies a revision to stop at. If not specified, we
758 stop at the base of the chain.
756 stop at the base of the chain.
759
757
760 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
761 revs in ascending order and ``stopped`` is a bool indicating whether
759 revs in ascending order and ``stopped`` is a bool indicating whether
762 ``stoprev`` was hit.
760 ``stoprev`` was hit.
763 """
761 """
764 # Try C implementation.
762 # Try C implementation.
765 try:
763 try:
766 return self.index.deltachain(rev, stoprev, self._generaldelta)
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
767 except AttributeError:
765 except AttributeError:
768 pass
766 pass
769
767
770 chain = []
768 chain = []
771
769
772 # Alias to prevent attribute lookup in tight loop.
770 # Alias to prevent attribute lookup in tight loop.
773 index = self.index
771 index = self.index
774 generaldelta = self._generaldelta
772 generaldelta = self._generaldelta
775
773
776 iterrev = rev
774 iterrev = rev
777 e = index[iterrev]
775 e = index[iterrev]
778 while iterrev != e[3] and iterrev != stoprev:
776 while iterrev != e[3] and iterrev != stoprev:
779 chain.append(iterrev)
777 chain.append(iterrev)
780 if generaldelta:
778 if generaldelta:
781 iterrev = e[3]
779 iterrev = e[3]
782 else:
780 else:
783 iterrev -= 1
781 iterrev -= 1
784 e = index[iterrev]
782 e = index[iterrev]
785
783
786 if iterrev == stoprev:
784 if iterrev == stoprev:
787 stopped = True
785 stopped = True
788 else:
786 else:
789 chain.append(iterrev)
787 chain.append(iterrev)
790 stopped = False
788 stopped = False
791
789
792 chain.reverse()
790 chain.reverse()
793 return chain, stopped
791 return chain, stopped
794
792
795 def ancestors(self, revs, stoprev=0, inclusive=False):
793 def ancestors(self, revs, stoprev=0, inclusive=False):
796 """Generate the ancestors of 'revs' in reverse topological order.
794 """Generate the ancestors of 'revs' in reverse topological order.
797 Does not generate revs lower than stoprev.
795 Does not generate revs lower than stoprev.
798
796
799 See the documentation for ancestor.lazyancestors for more details."""
797 See the documentation for ancestor.lazyancestors for more details."""
800
798
801 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
799 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
802 inclusive=inclusive)
800 inclusive=inclusive)
803
801
804 def descendants(self, revs):
802 def descendants(self, revs):
805 """Generate the descendants of 'revs' in revision order.
803 """Generate the descendants of 'revs' in revision order.
806
804
807 Yield a sequence of revision numbers starting with a child of
805 Yield a sequence of revision numbers starting with a child of
808 some rev in revs, i.e., each revision is *not* considered a
806 some rev in revs, i.e., each revision is *not* considered a
809 descendant of itself. Results are ordered by revision number (a
807 descendant of itself. Results are ordered by revision number (a
810 topological sort)."""
808 topological sort)."""
811 first = min(revs)
809 first = min(revs)
812 if first == nullrev:
810 if first == nullrev:
813 for i in self:
811 for i in self:
814 yield i
812 yield i
815 return
813 return
816
814
817 seen = set(revs)
815 seen = set(revs)
818 for i in self.revs(start=first + 1):
816 for i in self.revs(start=first + 1):
819 for x in self.parentrevs(i):
817 for x in self.parentrevs(i):
820 if x != nullrev and x in seen:
818 if x != nullrev and x in seen:
821 seen.add(i)
819 seen.add(i)
822 yield i
820 yield i
823 break
821 break
824
822
825 def findcommonmissing(self, common=None, heads=None):
823 def findcommonmissing(self, common=None, heads=None):
826 """Return a tuple of the ancestors of common and the ancestors of heads
824 """Return a tuple of the ancestors of common and the ancestors of heads
827 that are not ancestors of common. In revset terminology, we return the
825 that are not ancestors of common. In revset terminology, we return the
828 tuple:
826 tuple:
829
827
830 ::common, (::heads) - (::common)
828 ::common, (::heads) - (::common)
831
829
832 The list is sorted by revision number, meaning it is
830 The list is sorted by revision number, meaning it is
833 topologically sorted.
831 topologically sorted.
834
832
835 'heads' and 'common' are both lists of node IDs. If heads is
833 'heads' and 'common' are both lists of node IDs. If heads is
836 not supplied, uses all of the revlog's heads. If common is not
834 not supplied, uses all of the revlog's heads. If common is not
837 supplied, uses nullid."""
835 supplied, uses nullid."""
838 if common is None:
836 if common is None:
839 common = [nullid]
837 common = [nullid]
840 if heads is None:
838 if heads is None:
841 heads = self.heads()
839 heads = self.heads()
842
840
843 common = [self.rev(n) for n in common]
841 common = [self.rev(n) for n in common]
844 heads = [self.rev(n) for n in heads]
842 heads = [self.rev(n) for n in heads]
845
843
846 # we want the ancestors, but inclusive
844 # we want the ancestors, but inclusive
847 class lazyset(object):
845 class lazyset(object):
848 def __init__(self, lazyvalues):
846 def __init__(self, lazyvalues):
849 self.addedvalues = set()
847 self.addedvalues = set()
850 self.lazyvalues = lazyvalues
848 self.lazyvalues = lazyvalues
851
849
852 def __contains__(self, value):
850 def __contains__(self, value):
853 return value in self.addedvalues or value in self.lazyvalues
851 return value in self.addedvalues or value in self.lazyvalues
854
852
855 def __iter__(self):
853 def __iter__(self):
856 added = self.addedvalues
854 added = self.addedvalues
857 for r in added:
855 for r in added:
858 yield r
856 yield r
859 for r in self.lazyvalues:
857 for r in self.lazyvalues:
860 if not r in added:
858 if not r in added:
861 yield r
859 yield r
862
860
863 def add(self, value):
861 def add(self, value):
864 self.addedvalues.add(value)
862 self.addedvalues.add(value)
865
863
866 def update(self, values):
864 def update(self, values):
867 self.addedvalues.update(values)
865 self.addedvalues.update(values)
868
866
869 has = lazyset(self.ancestors(common))
867 has = lazyset(self.ancestors(common))
870 has.add(nullrev)
868 has.add(nullrev)
871 has.update(common)
869 has.update(common)
872
870
873 # take all ancestors from heads that aren't in has
871 # take all ancestors from heads that aren't in has
874 missing = set()
872 missing = set()
875 visit = collections.deque(r for r in heads if r not in has)
873 visit = collections.deque(r for r in heads if r not in has)
876 while visit:
874 while visit:
877 r = visit.popleft()
875 r = visit.popleft()
878 if r in missing:
876 if r in missing:
879 continue
877 continue
880 else:
878 else:
881 missing.add(r)
879 missing.add(r)
882 for p in self.parentrevs(r):
880 for p in self.parentrevs(r):
883 if p not in has:
881 if p not in has:
884 visit.append(p)
882 visit.append(p)
885 missing = list(missing)
883 missing = list(missing)
886 missing.sort()
884 missing.sort()
887 return has, [self.node(miss) for miss in missing]
885 return has, [self.node(miss) for miss in missing]
888
886
889 def incrementalmissingrevs(self, common=None):
887 def incrementalmissingrevs(self, common=None):
890 """Return an object that can be used to incrementally compute the
888 """Return an object that can be used to incrementally compute the
891 revision numbers of the ancestors of arbitrary sets that are not
889 revision numbers of the ancestors of arbitrary sets that are not
892 ancestors of common. This is an ancestor.incrementalmissingancestors
890 ancestors of common. This is an ancestor.incrementalmissingancestors
893 object.
891 object.
894
892
895 'common' is a list of revision numbers. If common is not supplied, uses
893 'common' is a list of revision numbers. If common is not supplied, uses
896 nullrev.
894 nullrev.
897 """
895 """
898 if common is None:
896 if common is None:
899 common = [nullrev]
897 common = [nullrev]
900
898
901 return ancestor.incrementalmissingancestors(self.parentrevs, common)
899 return ancestor.incrementalmissingancestors(self.parentrevs, common)
902
900
903 def findmissingrevs(self, common=None, heads=None):
901 def findmissingrevs(self, common=None, heads=None):
904 """Return the revision numbers of the ancestors of heads that
902 """Return the revision numbers of the ancestors of heads that
905 are not ancestors of common.
903 are not ancestors of common.
906
904
907 More specifically, return a list of revision numbers corresponding to
905 More specifically, return a list of revision numbers corresponding to
908 nodes N such that every N satisfies the following constraints:
906 nodes N such that every N satisfies the following constraints:
909
907
910 1. N is an ancestor of some node in 'heads'
908 1. N is an ancestor of some node in 'heads'
911 2. N is not an ancestor of any node in 'common'
909 2. N is not an ancestor of any node in 'common'
912
910
913 The list is sorted by revision number, meaning it is
911 The list is sorted by revision number, meaning it is
914 topologically sorted.
912 topologically sorted.
915
913
916 'heads' and 'common' are both lists of revision numbers. If heads is
914 'heads' and 'common' are both lists of revision numbers. If heads is
917 not supplied, uses all of the revlog's heads. If common is not
915 not supplied, uses all of the revlog's heads. If common is not
918 supplied, uses nullid."""
916 supplied, uses nullid."""
919 if common is None:
917 if common is None:
920 common = [nullrev]
918 common = [nullrev]
921 if heads is None:
919 if heads is None:
922 heads = self.headrevs()
920 heads = self.headrevs()
923
921
924 inc = self.incrementalmissingrevs(common=common)
922 inc = self.incrementalmissingrevs(common=common)
925 return inc.missingancestors(heads)
923 return inc.missingancestors(heads)
926
924
927 def findmissing(self, common=None, heads=None):
925 def findmissing(self, common=None, heads=None):
928 """Return the ancestors of heads that are not ancestors of common.
926 """Return the ancestors of heads that are not ancestors of common.
929
927
930 More specifically, return a list of nodes N such that every N
928 More specifically, return a list of nodes N such that every N
931 satisfies the following constraints:
929 satisfies the following constraints:
932
930
933 1. N is an ancestor of some node in 'heads'
931 1. N is an ancestor of some node in 'heads'
934 2. N is not an ancestor of any node in 'common'
932 2. N is not an ancestor of any node in 'common'
935
933
936 The list is sorted by revision number, meaning it is
934 The list is sorted by revision number, meaning it is
937 topologically sorted.
935 topologically sorted.
938
936
939 'heads' and 'common' are both lists of node IDs. If heads is
937 'heads' and 'common' are both lists of node IDs. If heads is
940 not supplied, uses all of the revlog's heads. If common is not
938 not supplied, uses all of the revlog's heads. If common is not
941 supplied, uses nullid."""
939 supplied, uses nullid."""
942 if common is None:
940 if common is None:
943 common = [nullid]
941 common = [nullid]
944 if heads is None:
942 if heads is None:
945 heads = self.heads()
943 heads = self.heads()
946
944
947 common = [self.rev(n) for n in common]
945 common = [self.rev(n) for n in common]
948 heads = [self.rev(n) for n in heads]
946 heads = [self.rev(n) for n in heads]
949
947
950 inc = self.incrementalmissingrevs(common=common)
948 inc = self.incrementalmissingrevs(common=common)
951 return [self.node(r) for r in inc.missingancestors(heads)]
949 return [self.node(r) for r in inc.missingancestors(heads)]
952
950
953 def nodesbetween(self, roots=None, heads=None):
951 def nodesbetween(self, roots=None, heads=None):
954 """Return a topological path from 'roots' to 'heads'.
952 """Return a topological path from 'roots' to 'heads'.
955
953
956 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
954 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
957 topologically sorted list of all nodes N that satisfy both of
955 topologically sorted list of all nodes N that satisfy both of
958 these constraints:
956 these constraints:
959
957
960 1. N is a descendant of some node in 'roots'
958 1. N is a descendant of some node in 'roots'
961 2. N is an ancestor of some node in 'heads'
959 2. N is an ancestor of some node in 'heads'
962
960
963 Every node is considered to be both a descendant and an ancestor
961 Every node is considered to be both a descendant and an ancestor
964 of itself, so every reachable node in 'roots' and 'heads' will be
962 of itself, so every reachable node in 'roots' and 'heads' will be
965 included in 'nodes'.
963 included in 'nodes'.
966
964
967 'outroots' is the list of reachable nodes in 'roots', i.e., the
965 'outroots' is the list of reachable nodes in 'roots', i.e., the
968 subset of 'roots' that is returned in 'nodes'. Likewise,
966 subset of 'roots' that is returned in 'nodes'. Likewise,
969 'outheads' is the subset of 'heads' that is also in 'nodes'.
967 'outheads' is the subset of 'heads' that is also in 'nodes'.
970
968
971 'roots' and 'heads' are both lists of node IDs. If 'roots' is
969 'roots' and 'heads' are both lists of node IDs. If 'roots' is
972 unspecified, uses nullid as the only root. If 'heads' is
970 unspecified, uses nullid as the only root. If 'heads' is
973 unspecified, uses list of all of the revlog's heads."""
971 unspecified, uses list of all of the revlog's heads."""
974 nonodes = ([], [], [])
972 nonodes = ([], [], [])
975 if roots is not None:
973 if roots is not None:
976 roots = list(roots)
974 roots = list(roots)
977 if not roots:
975 if not roots:
978 return nonodes
976 return nonodes
979 lowestrev = min([self.rev(n) for n in roots])
977 lowestrev = min([self.rev(n) for n in roots])
980 else:
978 else:
981 roots = [nullid] # Everybody's a descendant of nullid
979 roots = [nullid] # Everybody's a descendant of nullid
982 lowestrev = nullrev
980 lowestrev = nullrev
983 if (lowestrev == nullrev) and (heads is None):
981 if (lowestrev == nullrev) and (heads is None):
984 # We want _all_ the nodes!
982 # We want _all_ the nodes!
985 return ([self.node(r) for r in self], [nullid], list(self.heads()))
983 return ([self.node(r) for r in self], [nullid], list(self.heads()))
986 if heads is None:
984 if heads is None:
987 # All nodes are ancestors, so the latest ancestor is the last
985 # All nodes are ancestors, so the latest ancestor is the last
988 # node.
986 # node.
989 highestrev = len(self) - 1
987 highestrev = len(self) - 1
990 # Set ancestors to None to signal that every node is an ancestor.
988 # Set ancestors to None to signal that every node is an ancestor.
991 ancestors = None
989 ancestors = None
992 # Set heads to an empty dictionary for later discovery of heads
990 # Set heads to an empty dictionary for later discovery of heads
993 heads = {}
991 heads = {}
994 else:
992 else:
995 heads = list(heads)
993 heads = list(heads)
996 if not heads:
994 if not heads:
997 return nonodes
995 return nonodes
998 ancestors = set()
996 ancestors = set()
999 # Turn heads into a dictionary so we can remove 'fake' heads.
997 # Turn heads into a dictionary so we can remove 'fake' heads.
1000 # Also, later we will be using it to filter out the heads we can't
998 # Also, later we will be using it to filter out the heads we can't
1001 # find from roots.
999 # find from roots.
1002 heads = dict.fromkeys(heads, False)
1000 heads = dict.fromkeys(heads, False)
1003 # Start at the top and keep marking parents until we're done.
1001 # Start at the top and keep marking parents until we're done.
1004 nodestotag = set(heads)
1002 nodestotag = set(heads)
1005 # Remember where the top was so we can use it as a limit later.
1003 # Remember where the top was so we can use it as a limit later.
1006 highestrev = max([self.rev(n) for n in nodestotag])
1004 highestrev = max([self.rev(n) for n in nodestotag])
1007 while nodestotag:
1005 while nodestotag:
1008 # grab a node to tag
1006 # grab a node to tag
1009 n = nodestotag.pop()
1007 n = nodestotag.pop()
1010 # Never tag nullid
1008 # Never tag nullid
1011 if n == nullid:
1009 if n == nullid:
1012 continue
1010 continue
1013 # A node's revision number represents its place in a
1011 # A node's revision number represents its place in a
1014 # topologically sorted list of nodes.
1012 # topologically sorted list of nodes.
1015 r = self.rev(n)
1013 r = self.rev(n)
1016 if r >= lowestrev:
1014 if r >= lowestrev:
1017 if n not in ancestors:
1015 if n not in ancestors:
1018 # If we are possibly a descendant of one of the roots
1016 # If we are possibly a descendant of one of the roots
1019 # and we haven't already been marked as an ancestor
1017 # and we haven't already been marked as an ancestor
1020 ancestors.add(n) # Mark as ancestor
1018 ancestors.add(n) # Mark as ancestor
1021 # Add non-nullid parents to list of nodes to tag.
1019 # Add non-nullid parents to list of nodes to tag.
1022 nodestotag.update([p for p in self.parents(n) if
1020 nodestotag.update([p for p in self.parents(n) if
1023 p != nullid])
1021 p != nullid])
1024 elif n in heads: # We've seen it before, is it a fake head?
1022 elif n in heads: # We've seen it before, is it a fake head?
1025 # So it is, real heads should not be the ancestors of
1023 # So it is, real heads should not be the ancestors of
1026 # any other heads.
1024 # any other heads.
1027 heads.pop(n)
1025 heads.pop(n)
1028 if not ancestors:
1026 if not ancestors:
1029 return nonodes
1027 return nonodes
1030 # Now that we have our set of ancestors, we want to remove any
1028 # Now that we have our set of ancestors, we want to remove any
1031 # roots that are not ancestors.
1029 # roots that are not ancestors.
1032
1030
1033 # If one of the roots was nullid, everything is included anyway.
1031 # If one of the roots was nullid, everything is included anyway.
1034 if lowestrev > nullrev:
1032 if lowestrev > nullrev:
1035 # But, since we weren't, let's recompute the lowest rev to not
1033 # But, since we weren't, let's recompute the lowest rev to not
1036 # include roots that aren't ancestors.
1034 # include roots that aren't ancestors.
1037
1035
1038 # Filter out roots that aren't ancestors of heads
1036 # Filter out roots that aren't ancestors of heads
1039 roots = [root for root in roots if root in ancestors]
1037 roots = [root for root in roots if root in ancestors]
1040 # Recompute the lowest revision
1038 # Recompute the lowest revision
1041 if roots:
1039 if roots:
1042 lowestrev = min([self.rev(root) for root in roots])
1040 lowestrev = min([self.rev(root) for root in roots])
1043 else:
1041 else:
1044 # No more roots? Return empty list
1042 # No more roots? Return empty list
1045 return nonodes
1043 return nonodes
1046 else:
1044 else:
1047 # We are descending from nullid, and don't need to care about
1045 # We are descending from nullid, and don't need to care about
1048 # any other roots.
1046 # any other roots.
1049 lowestrev = nullrev
1047 lowestrev = nullrev
1050 roots = [nullid]
1048 roots = [nullid]
1051 # Transform our roots list into a set.
1049 # Transform our roots list into a set.
1052 descendants = set(roots)
1050 descendants = set(roots)
1053 # Also, keep the original roots so we can filter out roots that aren't
1051 # Also, keep the original roots so we can filter out roots that aren't
1054 # 'real' roots (i.e. are descended from other roots).
1052 # 'real' roots (i.e. are descended from other roots).
1055 roots = descendants.copy()
1053 roots = descendants.copy()
1056 # Our topologically sorted list of output nodes.
1054 # Our topologically sorted list of output nodes.
1057 orderedout = []
1055 orderedout = []
1058 # Don't start at nullid since we don't want nullid in our output list,
1056 # Don't start at nullid since we don't want nullid in our output list,
1059 # and if nullid shows up in descendants, empty parents will look like
1057 # and if nullid shows up in descendants, empty parents will look like
1060 # they're descendants.
1058 # they're descendants.
1061 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1059 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1062 n = self.node(r)
1060 n = self.node(r)
1063 isdescendant = False
1061 isdescendant = False
1064 if lowestrev == nullrev: # Everybody is a descendant of nullid
1062 if lowestrev == nullrev: # Everybody is a descendant of nullid
1065 isdescendant = True
1063 isdescendant = True
1066 elif n in descendants:
1064 elif n in descendants:
1067 # n is already a descendant
1065 # n is already a descendant
1068 isdescendant = True
1066 isdescendant = True
1069 # This check only needs to be done here because all the roots
1067 # This check only needs to be done here because all the roots
1070 # will start being marked is descendants before the loop.
1068 # will start being marked is descendants before the loop.
1071 if n in roots:
1069 if n in roots:
1072 # If n was a root, check if it's a 'real' root.
1070 # If n was a root, check if it's a 'real' root.
1073 p = tuple(self.parents(n))
1071 p = tuple(self.parents(n))
1074 # If any of its parents are descendants, it's not a root.
1072 # If any of its parents are descendants, it's not a root.
1075 if (p[0] in descendants) or (p[1] in descendants):
1073 if (p[0] in descendants) or (p[1] in descendants):
1076 roots.remove(n)
1074 roots.remove(n)
1077 else:
1075 else:
1078 p = tuple(self.parents(n))
1076 p = tuple(self.parents(n))
1079 # A node is a descendant if either of its parents are
1077 # A node is a descendant if either of its parents are
1080 # descendants. (We seeded the dependents list with the roots
1078 # descendants. (We seeded the dependents list with the roots
1081 # up there, remember?)
1079 # up there, remember?)
1082 if (p[0] in descendants) or (p[1] in descendants):
1080 if (p[0] in descendants) or (p[1] in descendants):
1083 descendants.add(n)
1081 descendants.add(n)
1084 isdescendant = True
1082 isdescendant = True
1085 if isdescendant and ((ancestors is None) or (n in ancestors)):
1083 if isdescendant and ((ancestors is None) or (n in ancestors)):
1086 # Only include nodes that are both descendants and ancestors.
1084 # Only include nodes that are both descendants and ancestors.
1087 orderedout.append(n)
1085 orderedout.append(n)
1088 if (ancestors is not None) and (n in heads):
1086 if (ancestors is not None) and (n in heads):
1089 # We're trying to figure out which heads are reachable
1087 # We're trying to figure out which heads are reachable
1090 # from roots.
1088 # from roots.
1091 # Mark this head as having been reached
1089 # Mark this head as having been reached
1092 heads[n] = True
1090 heads[n] = True
1093 elif ancestors is None:
1091 elif ancestors is None:
1094 # Otherwise, we're trying to discover the heads.
1092 # Otherwise, we're trying to discover the heads.
1095 # Assume this is a head because if it isn't, the next step
1093 # Assume this is a head because if it isn't, the next step
1096 # will eventually remove it.
1094 # will eventually remove it.
1097 heads[n] = True
1095 heads[n] = True
1098 # But, obviously its parents aren't.
1096 # But, obviously its parents aren't.
1099 for p in self.parents(n):
1097 for p in self.parents(n):
1100 heads.pop(p, None)
1098 heads.pop(p, None)
1101 heads = [head for head, flag in heads.iteritems() if flag]
1099 heads = [head for head, flag in heads.iteritems() if flag]
1102 roots = list(roots)
1100 roots = list(roots)
1103 assert orderedout
1101 assert orderedout
1104 assert roots
1102 assert roots
1105 assert heads
1103 assert heads
1106 return (orderedout, roots, heads)
1104 return (orderedout, roots, heads)
1107
1105
1108 def headrevs(self):
1106 def headrevs(self):
1109 try:
1107 try:
1110 return self.index.headrevs()
1108 return self.index.headrevs()
1111 except AttributeError:
1109 except AttributeError:
1112 return self._headrevs()
1110 return self._headrevs()
1113
1111
1114 def computephases(self, roots):
1112 def computephases(self, roots):
1115 return self.index.computephasesmapsets(roots)
1113 return self.index.computephasesmapsets(roots)
1116
1114
1117 def _headrevs(self):
1115 def _headrevs(self):
1118 count = len(self)
1116 count = len(self)
1119 if not count:
1117 if not count:
1120 return [nullrev]
1118 return [nullrev]
1121 # we won't iter over filtered rev so nobody is a head at start
1119 # we won't iter over filtered rev so nobody is a head at start
1122 ishead = [0] * (count + 1)
1120 ishead = [0] * (count + 1)
1123 index = self.index
1121 index = self.index
1124 for r in self:
1122 for r in self:
1125 ishead[r] = 1 # I may be an head
1123 ishead[r] = 1 # I may be an head
1126 e = index[r]
1124 e = index[r]
1127 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1125 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1128 return [r for r, val in enumerate(ishead) if val]
1126 return [r for r, val in enumerate(ishead) if val]
1129
1127
1130 def heads(self, start=None, stop=None):
1128 def heads(self, start=None, stop=None):
1131 """return the list of all nodes that have no children
1129 """return the list of all nodes that have no children
1132
1130
1133 if start is specified, only heads that are descendants of
1131 if start is specified, only heads that are descendants of
1134 start will be returned
1132 start will be returned
1135 if stop is specified, it will consider all the revs from stop
1133 if stop is specified, it will consider all the revs from stop
1136 as if they had no children
1134 as if they had no children
1137 """
1135 """
1138 if start is None and stop is None:
1136 if start is None and stop is None:
1139 if not len(self):
1137 if not len(self):
1140 return [nullid]
1138 return [nullid]
1141 return [self.node(r) for r in self.headrevs()]
1139 return [self.node(r) for r in self.headrevs()]
1142
1140
1143 if start is None:
1141 if start is None:
1144 start = nullid
1142 start = nullid
1145 if stop is None:
1143 if stop is None:
1146 stop = []
1144 stop = []
1147 stoprevs = set([self.rev(n) for n in stop])
1145 stoprevs = set([self.rev(n) for n in stop])
1148 startrev = self.rev(start)
1146 startrev = self.rev(start)
1149 reachable = {startrev}
1147 reachable = {startrev}
1150 heads = {startrev}
1148 heads = {startrev}
1151
1149
1152 parentrevs = self.parentrevs
1150 parentrevs = self.parentrevs
1153 for r in self.revs(start=startrev + 1):
1151 for r in self.revs(start=startrev + 1):
1154 for p in parentrevs(r):
1152 for p in parentrevs(r):
1155 if p in reachable:
1153 if p in reachable:
1156 if r not in stoprevs:
1154 if r not in stoprevs:
1157 reachable.add(r)
1155 reachable.add(r)
1158 heads.add(r)
1156 heads.add(r)
1159 if p in heads and p not in stoprevs:
1157 if p in heads and p not in stoprevs:
1160 heads.remove(p)
1158 heads.remove(p)
1161
1159
1162 return [self.node(r) for r in heads]
1160 return [self.node(r) for r in heads]
1163
1161
1164 def children(self, node):
1162 def children(self, node):
1165 """find the children of a given node"""
1163 """find the children of a given node"""
1166 c = []
1164 c = []
1167 p = self.rev(node)
1165 p = self.rev(node)
1168 for r in self.revs(start=p + 1):
1166 for r in self.revs(start=p + 1):
1169 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1167 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1170 if prevs:
1168 if prevs:
1171 for pr in prevs:
1169 for pr in prevs:
1172 if pr == p:
1170 if pr == p:
1173 c.append(self.node(r))
1171 c.append(self.node(r))
1174 elif p == nullrev:
1172 elif p == nullrev:
1175 c.append(self.node(r))
1173 c.append(self.node(r))
1176 return c
1174 return c
1177
1175
1178 def commonancestorsheads(self, a, b):
1176 def commonancestorsheads(self, a, b):
1179 """calculate all the heads of the common ancestors of nodes a and b"""
1177 """calculate all the heads of the common ancestors of nodes a and b"""
1180 a, b = self.rev(a), self.rev(b)
1178 a, b = self.rev(a), self.rev(b)
1181 ancs = self._commonancestorsheads(a, b)
1179 ancs = self._commonancestorsheads(a, b)
1182 return pycompat.maplist(self.node, ancs)
1180 return pycompat.maplist(self.node, ancs)
1183
1181
1184 def _commonancestorsheads(self, *revs):
1182 def _commonancestorsheads(self, *revs):
1185 """calculate all the heads of the common ancestors of revs"""
1183 """calculate all the heads of the common ancestors of revs"""
1186 try:
1184 try:
1187 ancs = self.index.commonancestorsheads(*revs)
1185 ancs = self.index.commonancestorsheads(*revs)
1188 except (AttributeError, OverflowError): # C implementation failed
1186 except (AttributeError, OverflowError): # C implementation failed
1189 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1187 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1190 return ancs
1188 return ancs
1191
1189
1192 def isancestor(self, a, b):
1190 def isancestor(self, a, b):
1193 """return True if node a is an ancestor of node b
1191 """return True if node a is an ancestor of node b
1194
1192
1195 A revision is considered an ancestor of itself."""
1193 A revision is considered an ancestor of itself."""
1196 a, b = self.rev(a), self.rev(b)
1194 a, b = self.rev(a), self.rev(b)
1197 return self.isancestorrev(a, b)
1195 return self.isancestorrev(a, b)
1198
1196
1199 def isancestorrev(self, a, b):
1197 def isancestorrev(self, a, b):
1200 """return True if revision a is an ancestor of revision b
1198 """return True if revision a is an ancestor of revision b
1201
1199
1202 A revision is considered an ancestor of itself.
1200 A revision is considered an ancestor of itself.
1203
1201
1204 The implementation of this is trivial but the use of
1202 The implementation of this is trivial but the use of
1205 commonancestorsheads is not."""
1203 commonancestorsheads is not."""
1206 if a == nullrev:
1204 if a == nullrev:
1207 return True
1205 return True
1208 elif a == b:
1206 elif a == b:
1209 return True
1207 return True
1210 elif a > b:
1208 elif a > b:
1211 return False
1209 return False
1212 return a in self._commonancestorsheads(a, b)
1210 return a in self._commonancestorsheads(a, b)
1213
1211
1214 def ancestor(self, a, b):
1212 def ancestor(self, a, b):
1215 """calculate the "best" common ancestor of nodes a and b"""
1213 """calculate the "best" common ancestor of nodes a and b"""
1216
1214
1217 a, b = self.rev(a), self.rev(b)
1215 a, b = self.rev(a), self.rev(b)
1218 try:
1216 try:
1219 ancs = self.index.ancestors(a, b)
1217 ancs = self.index.ancestors(a, b)
1220 except (AttributeError, OverflowError):
1218 except (AttributeError, OverflowError):
1221 ancs = ancestor.ancestors(self.parentrevs, a, b)
1219 ancs = ancestor.ancestors(self.parentrevs, a, b)
1222 if ancs:
1220 if ancs:
1223 # choose a consistent winner when there's a tie
1221 # choose a consistent winner when there's a tie
1224 return min(map(self.node, ancs))
1222 return min(map(self.node, ancs))
1225 return nullid
1223 return nullid
1226
1224
1227 def _match(self, id):
1225 def _match(self, id):
1228 if isinstance(id, int):
1226 if isinstance(id, int):
1229 # rev
1227 # rev
1230 return self.node(id)
1228 return self.node(id)
1231 if len(id) == 20:
1229 if len(id) == 20:
1232 # possibly a binary node
1230 # possibly a binary node
1233 # odds of a binary node being all hex in ASCII are 1 in 10**25
1231 # odds of a binary node being all hex in ASCII are 1 in 10**25
1234 try:
1232 try:
1235 node = id
1233 node = id
1236 self.rev(node) # quick search the index
1234 self.rev(node) # quick search the index
1237 return node
1235 return node
1238 except LookupError:
1236 except error.LookupError:
1239 pass # may be partial hex id
1237 pass # may be partial hex id
1240 try:
1238 try:
1241 # str(rev)
1239 # str(rev)
1242 rev = int(id)
1240 rev = int(id)
1243 if "%d" % rev != id:
1241 if "%d" % rev != id:
1244 raise ValueError
1242 raise ValueError
1245 if rev < 0:
1243 if rev < 0:
1246 rev = len(self) + rev
1244 rev = len(self) + rev
1247 if rev < 0 or rev >= len(self):
1245 if rev < 0 or rev >= len(self):
1248 raise ValueError
1246 raise ValueError
1249 return self.node(rev)
1247 return self.node(rev)
1250 except (ValueError, OverflowError):
1248 except (ValueError, OverflowError):
1251 pass
1249 pass
1252 if len(id) == 40:
1250 if len(id) == 40:
1253 try:
1251 try:
1254 # a full hex nodeid?
1252 # a full hex nodeid?
1255 node = bin(id)
1253 node = bin(id)
1256 self.rev(node)
1254 self.rev(node)
1257 return node
1255 return node
1258 except (TypeError, LookupError):
1256 except (TypeError, error.LookupError):
1259 pass
1257 pass
1260
1258
1261 def _partialmatch(self, id):
1259 def _partialmatch(self, id):
1262 # we don't care wdirfilenodeids as they should be always full hash
1260 # we don't care wdirfilenodeids as they should be always full hash
1263 maybewdir = wdirhex.startswith(id)
1261 maybewdir = wdirhex.startswith(id)
1264 try:
1262 try:
1265 partial = self.index.partialmatch(id)
1263 partial = self.index.partialmatch(id)
1266 if partial and self.hasnode(partial):
1264 if partial and self.hasnode(partial):
1267 if maybewdir:
1265 if maybewdir:
1268 # single 'ff...' match in radix tree, ambiguous with wdir
1266 # single 'ff...' match in radix tree, ambiguous with wdir
1269 raise error.RevlogError
1267 raise error.RevlogError
1270 return partial
1268 return partial
1271 if maybewdir:
1269 if maybewdir:
1272 # no 'ff...' match in radix tree, wdir identified
1270 # no 'ff...' match in radix tree, wdir identified
1273 raise error.WdirUnsupported
1271 raise error.WdirUnsupported
1274 return None
1272 return None
1275 except error.RevlogError:
1273 except error.RevlogError:
1276 # parsers.c radix tree lookup gave multiple matches
1274 # parsers.c radix tree lookup gave multiple matches
1277 # fast path: for unfiltered changelog, radix tree is accurate
1275 # fast path: for unfiltered changelog, radix tree is accurate
1278 if not getattr(self, 'filteredrevs', None):
1276 if not getattr(self, 'filteredrevs', None):
1279 raise error.AmbiguousPrefixLookupError(
1277 raise error.AmbiguousPrefixLookupError(
1280 id, self.indexfile, _('ambiguous identifier'))
1278 id, self.indexfile, _('ambiguous identifier'))
1281 # fall through to slow path that filters hidden revisions
1279 # fall through to slow path that filters hidden revisions
1282 except (AttributeError, ValueError):
1280 except (AttributeError, ValueError):
1283 # we are pure python, or key was too short to search radix tree
1281 # we are pure python, or key was too short to search radix tree
1284 pass
1282 pass
1285
1283
1286 if id in self._pcache:
1284 if id in self._pcache:
1287 return self._pcache[id]
1285 return self._pcache[id]
1288
1286
1289 if len(id) <= 40:
1287 if len(id) <= 40:
1290 try:
1288 try:
1291 # hex(node)[:...]
1289 # hex(node)[:...]
1292 l = len(id) // 2 # grab an even number of digits
1290 l = len(id) // 2 # grab an even number of digits
1293 prefix = bin(id[:l * 2])
1291 prefix = bin(id[:l * 2])
1294 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1292 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1295 nl = [n for n in nl if hex(n).startswith(id) and
1293 nl = [n for n in nl if hex(n).startswith(id) and
1296 self.hasnode(n)]
1294 self.hasnode(n)]
1297 if nullhex.startswith(id):
1295 if nullhex.startswith(id):
1298 nl.append(nullid)
1296 nl.append(nullid)
1299 if len(nl) > 0:
1297 if len(nl) > 0:
1300 if len(nl) == 1 and not maybewdir:
1298 if len(nl) == 1 and not maybewdir:
1301 self._pcache[id] = nl[0]
1299 self._pcache[id] = nl[0]
1302 return nl[0]
1300 return nl[0]
1303 raise error.AmbiguousPrefixLookupError(
1301 raise error.AmbiguousPrefixLookupError(
1304 id, self.indexfile, _('ambiguous identifier'))
1302 id, self.indexfile, _('ambiguous identifier'))
1305 if maybewdir:
1303 if maybewdir:
1306 raise error.WdirUnsupported
1304 raise error.WdirUnsupported
1307 return None
1305 return None
1308 except TypeError:
1306 except TypeError:
1309 pass
1307 pass
1310
1308
1311 def lookup(self, id):
1309 def lookup(self, id):
1312 """locate a node based on:
1310 """locate a node based on:
1313 - revision number or str(revision number)
1311 - revision number or str(revision number)
1314 - nodeid or subset of hex nodeid
1312 - nodeid or subset of hex nodeid
1315 """
1313 """
1316 n = self._match(id)
1314 n = self._match(id)
1317 if n is not None:
1315 if n is not None:
1318 return n
1316 return n
1319 n = self._partialmatch(id)
1317 n = self._partialmatch(id)
1320 if n:
1318 if n:
1321 return n
1319 return n
1322
1320
1323 raise LookupError(id, self.indexfile, _('no match found'))
1321 raise error.LookupError(id, self.indexfile, _('no match found'))
1324
1322
1325 def shortest(self, node, minlength=1):
1323 def shortest(self, node, minlength=1):
1326 """Find the shortest unambiguous prefix that matches node."""
1324 """Find the shortest unambiguous prefix that matches node."""
1327 def isvalid(prefix):
1325 def isvalid(prefix):
1328 try:
1326 try:
1329 node = self._partialmatch(prefix)
1327 node = self._partialmatch(prefix)
1330 except error.RevlogError:
1328 except error.RevlogError:
1331 return False
1329 return False
1332 except error.WdirUnsupported:
1330 except error.WdirUnsupported:
1333 # single 'ff...' match
1331 # single 'ff...' match
1334 return True
1332 return True
1335 if node is None:
1333 if node is None:
1336 raise LookupError(node, self.indexfile, _('no node'))
1334 raise error.LookupError(node, self.indexfile, _('no node'))
1337 return True
1335 return True
1338
1336
1339 def maybewdir(prefix):
1337 def maybewdir(prefix):
1340 return all(c == 'f' for c in prefix)
1338 return all(c == 'f' for c in prefix)
1341
1339
1342 hexnode = hex(node)
1340 hexnode = hex(node)
1343
1341
1344 def disambiguate(hexnode, minlength):
1342 def disambiguate(hexnode, minlength):
1345 """Disambiguate against wdirid."""
1343 """Disambiguate against wdirid."""
1346 for length in range(minlength, 41):
1344 for length in range(minlength, 41):
1347 prefix = hexnode[:length]
1345 prefix = hexnode[:length]
1348 if not maybewdir(prefix):
1346 if not maybewdir(prefix):
1349 return prefix
1347 return prefix
1350
1348
1351 if not getattr(self, 'filteredrevs', None):
1349 if not getattr(self, 'filteredrevs', None):
1352 try:
1350 try:
1353 length = max(self.index.shortest(node), minlength)
1351 length = max(self.index.shortest(node), minlength)
1354 return disambiguate(hexnode, length)
1352 return disambiguate(hexnode, length)
1355 except error.RevlogError:
1353 except error.RevlogError:
1356 if node != wdirid:
1354 if node != wdirid:
1357 raise LookupError(node, self.indexfile, _('no node'))
1355 raise error.LookupError(node, self.indexfile, _('no node'))
1358 except AttributeError:
1356 except AttributeError:
1359 # Fall through to pure code
1357 # Fall through to pure code
1360 pass
1358 pass
1361
1359
1362 if node == wdirid:
1360 if node == wdirid:
1363 for length in range(minlength, 41):
1361 for length in range(minlength, 41):
1364 prefix = hexnode[:length]
1362 prefix = hexnode[:length]
1365 if isvalid(prefix):
1363 if isvalid(prefix):
1366 return prefix
1364 return prefix
1367
1365
1368 for length in range(minlength, 41):
1366 for length in range(minlength, 41):
1369 prefix = hexnode[:length]
1367 prefix = hexnode[:length]
1370 if isvalid(prefix):
1368 if isvalid(prefix):
1371 return disambiguate(hexnode, length)
1369 return disambiguate(hexnode, length)
1372
1370
1373 def cmp(self, node, text):
1371 def cmp(self, node, text):
1374 """compare text with a given file revision
1372 """compare text with a given file revision
1375
1373
1376 returns True if text is different than what is stored.
1374 returns True if text is different than what is stored.
1377 """
1375 """
1378 p1, p2 = self.parents(node)
1376 p1, p2 = self.parents(node)
1379 return hash(text, p1, p2) != node
1377 return hash(text, p1, p2) != node
1380
1378
1381 def _cachesegment(self, offset, data):
1379 def _cachesegment(self, offset, data):
1382 """Add a segment to the revlog cache.
1380 """Add a segment to the revlog cache.
1383
1381
1384 Accepts an absolute offset and the data that is at that location.
1382 Accepts an absolute offset and the data that is at that location.
1385 """
1383 """
1386 o, d = self._chunkcache
1384 o, d = self._chunkcache
1387 # try to add to existing cache
1385 # try to add to existing cache
1388 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1386 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1389 self._chunkcache = o, d + data
1387 self._chunkcache = o, d + data
1390 else:
1388 else:
1391 self._chunkcache = offset, data
1389 self._chunkcache = offset, data
1392
1390
1393 def _readsegment(self, offset, length, df=None):
1391 def _readsegment(self, offset, length, df=None):
1394 """Load a segment of raw data from the revlog.
1392 """Load a segment of raw data from the revlog.
1395
1393
1396 Accepts an absolute offset, length to read, and an optional existing
1394 Accepts an absolute offset, length to read, and an optional existing
1397 file handle to read from.
1395 file handle to read from.
1398
1396
1399 If an existing file handle is passed, it will be seeked and the
1397 If an existing file handle is passed, it will be seeked and the
1400 original seek position will NOT be restored.
1398 original seek position will NOT be restored.
1401
1399
1402 Returns a str or buffer of raw byte data.
1400 Returns a str or buffer of raw byte data.
1403 """
1401 """
1404 # Cache data both forward and backward around the requested
1402 # Cache data both forward and backward around the requested
1405 # data, in a fixed size window. This helps speed up operations
1403 # data, in a fixed size window. This helps speed up operations
1406 # involving reading the revlog backwards.
1404 # involving reading the revlog backwards.
1407 cachesize = self._chunkcachesize
1405 cachesize = self._chunkcachesize
1408 realoffset = offset & ~(cachesize - 1)
1406 realoffset = offset & ~(cachesize - 1)
1409 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1407 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1410 - realoffset)
1408 - realoffset)
1411 with self._datareadfp(df) as df:
1409 with self._datareadfp(df) as df:
1412 df.seek(realoffset)
1410 df.seek(realoffset)
1413 d = df.read(reallength)
1411 d = df.read(reallength)
1414 self._cachesegment(realoffset, d)
1412 self._cachesegment(realoffset, d)
1415 if offset != realoffset or reallength != length:
1413 if offset != realoffset or reallength != length:
1416 return util.buffer(d, offset - realoffset, length)
1414 return util.buffer(d, offset - realoffset, length)
1417 return d
1415 return d
1418
1416
1419 def _getsegment(self, offset, length, df=None):
1417 def _getsegment(self, offset, length, df=None):
1420 """Obtain a segment of raw data from the revlog.
1418 """Obtain a segment of raw data from the revlog.
1421
1419
1422 Accepts an absolute offset, length of bytes to obtain, and an
1420 Accepts an absolute offset, length of bytes to obtain, and an
1423 optional file handle to the already-opened revlog. If the file
1421 optional file handle to the already-opened revlog. If the file
1424 handle is used, it's original seek position will not be preserved.
1422 handle is used, it's original seek position will not be preserved.
1425
1423
1426 Requests for data may be returned from a cache.
1424 Requests for data may be returned from a cache.
1427
1425
1428 Returns a str or a buffer instance of raw byte data.
1426 Returns a str or a buffer instance of raw byte data.
1429 """
1427 """
1430 o, d = self._chunkcache
1428 o, d = self._chunkcache
1431 l = len(d)
1429 l = len(d)
1432
1430
1433 # is it in the cache?
1431 # is it in the cache?
1434 cachestart = offset - o
1432 cachestart = offset - o
1435 cacheend = cachestart + length
1433 cacheend = cachestart + length
1436 if cachestart >= 0 and cacheend <= l:
1434 if cachestart >= 0 and cacheend <= l:
1437 if cachestart == 0 and cacheend == l:
1435 if cachestart == 0 and cacheend == l:
1438 return d # avoid a copy
1436 return d # avoid a copy
1439 return util.buffer(d, cachestart, cacheend - cachestart)
1437 return util.buffer(d, cachestart, cacheend - cachestart)
1440
1438
1441 return self._readsegment(offset, length, df=df)
1439 return self._readsegment(offset, length, df=df)
1442
1440
1443 def _getsegmentforrevs(self, startrev, endrev, df=None):
1441 def _getsegmentforrevs(self, startrev, endrev, df=None):
1444 """Obtain a segment of raw data corresponding to a range of revisions.
1442 """Obtain a segment of raw data corresponding to a range of revisions.
1445
1443
1446 Accepts the start and end revisions and an optional already-open
1444 Accepts the start and end revisions and an optional already-open
1447 file handle to be used for reading. If the file handle is read, its
1445 file handle to be used for reading. If the file handle is read, its
1448 seek position will not be preserved.
1446 seek position will not be preserved.
1449
1447
1450 Requests for data may be satisfied by a cache.
1448 Requests for data may be satisfied by a cache.
1451
1449
1452 Returns a 2-tuple of (offset, data) for the requested range of
1450 Returns a 2-tuple of (offset, data) for the requested range of
1453 revisions. Offset is the integer offset from the beginning of the
1451 revisions. Offset is the integer offset from the beginning of the
1454 revlog and data is a str or buffer of the raw byte data.
1452 revlog and data is a str or buffer of the raw byte data.
1455
1453
1456 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1454 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1457 to determine where each revision's data begins and ends.
1455 to determine where each revision's data begins and ends.
1458 """
1456 """
1459 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1457 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1460 # (functions are expensive).
1458 # (functions are expensive).
1461 index = self.index
1459 index = self.index
1462 istart = index[startrev]
1460 istart = index[startrev]
1463 start = int(istart[0] >> 16)
1461 start = int(istart[0] >> 16)
1464 if startrev == endrev:
1462 if startrev == endrev:
1465 end = start + istart[1]
1463 end = start + istart[1]
1466 else:
1464 else:
1467 iend = index[endrev]
1465 iend = index[endrev]
1468 end = int(iend[0] >> 16) + iend[1]
1466 end = int(iend[0] >> 16) + iend[1]
1469
1467
1470 if self._inline:
1468 if self._inline:
1471 start += (startrev + 1) * self._io.size
1469 start += (startrev + 1) * self._io.size
1472 end += (endrev + 1) * self._io.size
1470 end += (endrev + 1) * self._io.size
1473 length = end - start
1471 length = end - start
1474
1472
1475 return start, self._getsegment(start, length, df=df)
1473 return start, self._getsegment(start, length, df=df)
1476
1474
1477 def _chunk(self, rev, df=None):
1475 def _chunk(self, rev, df=None):
1478 """Obtain a single decompressed chunk for a revision.
1476 """Obtain a single decompressed chunk for a revision.
1479
1477
1480 Accepts an integer revision and an optional already-open file handle
1478 Accepts an integer revision and an optional already-open file handle
1481 to be used for reading. If used, the seek position of the file will not
1479 to be used for reading. If used, the seek position of the file will not
1482 be preserved.
1480 be preserved.
1483
1481
1484 Returns a str holding uncompressed data for the requested revision.
1482 Returns a str holding uncompressed data for the requested revision.
1485 """
1483 """
1486 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1484 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1487
1485
1488 def _chunks(self, revs, df=None, targetsize=None):
1486 def _chunks(self, revs, df=None, targetsize=None):
1489 """Obtain decompressed chunks for the specified revisions.
1487 """Obtain decompressed chunks for the specified revisions.
1490
1488
1491 Accepts an iterable of numeric revisions that are assumed to be in
1489 Accepts an iterable of numeric revisions that are assumed to be in
1492 ascending order. Also accepts an optional already-open file handle
1490 ascending order. Also accepts an optional already-open file handle
1493 to be used for reading. If used, the seek position of the file will
1491 to be used for reading. If used, the seek position of the file will
1494 not be preserved.
1492 not be preserved.
1495
1493
1496 This function is similar to calling ``self._chunk()`` multiple times,
1494 This function is similar to calling ``self._chunk()`` multiple times,
1497 but is faster.
1495 but is faster.
1498
1496
1499 Returns a list with decompressed data for each requested revision.
1497 Returns a list with decompressed data for each requested revision.
1500 """
1498 """
1501 if not revs:
1499 if not revs:
1502 return []
1500 return []
1503 start = self.start
1501 start = self.start
1504 length = self.length
1502 length = self.length
1505 inline = self._inline
1503 inline = self._inline
1506 iosize = self._io.size
1504 iosize = self._io.size
1507 buffer = util.buffer
1505 buffer = util.buffer
1508
1506
1509 l = []
1507 l = []
1510 ladd = l.append
1508 ladd = l.append
1511
1509
1512 if not self._withsparseread:
1510 if not self._withsparseread:
1513 slicedchunks = (revs,)
1511 slicedchunks = (revs,)
1514 else:
1512 else:
1515 slicedchunks = deltautil.slicechunk(self, revs,
1513 slicedchunks = deltautil.slicechunk(self, revs,
1516 targetsize=targetsize)
1514 targetsize=targetsize)
1517
1515
1518 for revschunk in slicedchunks:
1516 for revschunk in slicedchunks:
1519 firstrev = revschunk[0]
1517 firstrev = revschunk[0]
1520 # Skip trailing revisions with empty diff
1518 # Skip trailing revisions with empty diff
1521 for lastrev in revschunk[::-1]:
1519 for lastrev in revschunk[::-1]:
1522 if length(lastrev) != 0:
1520 if length(lastrev) != 0:
1523 break
1521 break
1524
1522
1525 try:
1523 try:
1526 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1524 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1527 except OverflowError:
1525 except OverflowError:
1528 # issue4215 - we can't cache a run of chunks greater than
1526 # issue4215 - we can't cache a run of chunks greater than
1529 # 2G on Windows
1527 # 2G on Windows
1530 return [self._chunk(rev, df=df) for rev in revschunk]
1528 return [self._chunk(rev, df=df) for rev in revschunk]
1531
1529
1532 decomp = self.decompress
1530 decomp = self.decompress
1533 for rev in revschunk:
1531 for rev in revschunk:
1534 chunkstart = start(rev)
1532 chunkstart = start(rev)
1535 if inline:
1533 if inline:
1536 chunkstart += (rev + 1) * iosize
1534 chunkstart += (rev + 1) * iosize
1537 chunklength = length(rev)
1535 chunklength = length(rev)
1538 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1536 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1539
1537
1540 return l
1538 return l
1541
1539
1542 def _chunkclear(self):
1540 def _chunkclear(self):
1543 """Clear the raw chunk cache."""
1541 """Clear the raw chunk cache."""
1544 self._chunkcache = (0, '')
1542 self._chunkcache = (0, '')
1545
1543
1546 def deltaparent(self, rev):
1544 def deltaparent(self, rev):
1547 """return deltaparent of the given revision"""
1545 """return deltaparent of the given revision"""
1548 base = self.index[rev][3]
1546 base = self.index[rev][3]
1549 if base == rev:
1547 if base == rev:
1550 return nullrev
1548 return nullrev
1551 elif self._generaldelta:
1549 elif self._generaldelta:
1552 return base
1550 return base
1553 else:
1551 else:
1554 return rev - 1
1552 return rev - 1
1555
1553
1556 def issnapshot(self, rev):
1554 def issnapshot(self, rev):
1557 """tells whether rev is a snapshot
1555 """tells whether rev is a snapshot
1558 """
1556 """
1559 if rev == nullrev:
1557 if rev == nullrev:
1560 return True
1558 return True
1561 deltap = self.deltaparent(rev)
1559 deltap = self.deltaparent(rev)
1562 if deltap == nullrev:
1560 if deltap == nullrev:
1563 return True
1561 return True
1564 p1, p2 = self.parentrevs(rev)
1562 p1, p2 = self.parentrevs(rev)
1565 if deltap in (p1, p2):
1563 if deltap in (p1, p2):
1566 return False
1564 return False
1567 return self.issnapshot(deltap)
1565 return self.issnapshot(deltap)
1568
1566
1569 def snapshotdepth(self, rev):
1567 def snapshotdepth(self, rev):
1570 """number of snapshot in the chain before this one"""
1568 """number of snapshot in the chain before this one"""
1571 if not self.issnapshot(rev):
1569 if not self.issnapshot(rev):
1572 raise error.ProgrammingError('revision %d not a snapshot')
1570 raise error.ProgrammingError('revision %d not a snapshot')
1573 return len(self._deltachain(rev)[0]) - 1
1571 return len(self._deltachain(rev)[0]) - 1
1574
1572
1575 def revdiff(self, rev1, rev2):
1573 def revdiff(self, rev1, rev2):
1576 """return or calculate a delta between two revisions
1574 """return or calculate a delta between two revisions
1577
1575
1578 The delta calculated is in binary form and is intended to be written to
1576 The delta calculated is in binary form and is intended to be written to
1579 revlog data directly. So this function needs raw revision data.
1577 revlog data directly. So this function needs raw revision data.
1580 """
1578 """
1581 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1579 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1582 return bytes(self._chunk(rev2))
1580 return bytes(self._chunk(rev2))
1583
1581
1584 return mdiff.textdiff(self.revision(rev1, raw=True),
1582 return mdiff.textdiff(self.revision(rev1, raw=True),
1585 self.revision(rev2, raw=True))
1583 self.revision(rev2, raw=True))
1586
1584
1587 def revision(self, nodeorrev, _df=None, raw=False):
1585 def revision(self, nodeorrev, _df=None, raw=False):
1588 """return an uncompressed revision of a given node or revision
1586 """return an uncompressed revision of a given node or revision
1589 number.
1587 number.
1590
1588
1591 _df - an existing file handle to read from. (internal-only)
1589 _df - an existing file handle to read from. (internal-only)
1592 raw - an optional argument specifying if the revision data is to be
1590 raw - an optional argument specifying if the revision data is to be
1593 treated as raw data when applying flag transforms. 'raw' should be set
1591 treated as raw data when applying flag transforms. 'raw' should be set
1594 to True when generating changegroups or in debug commands.
1592 to True when generating changegroups or in debug commands.
1595 """
1593 """
1596 if isinstance(nodeorrev, int):
1594 if isinstance(nodeorrev, int):
1597 rev = nodeorrev
1595 rev = nodeorrev
1598 node = self.node(rev)
1596 node = self.node(rev)
1599 else:
1597 else:
1600 node = nodeorrev
1598 node = nodeorrev
1601 rev = None
1599 rev = None
1602
1600
1603 cachedrev = None
1601 cachedrev = None
1604 flags = None
1602 flags = None
1605 rawtext = None
1603 rawtext = None
1606 if node == nullid:
1604 if node == nullid:
1607 return ""
1605 return ""
1608 if self._cache:
1606 if self._cache:
1609 if self._cache[0] == node:
1607 if self._cache[0] == node:
1610 # _cache only stores rawtext
1608 # _cache only stores rawtext
1611 if raw:
1609 if raw:
1612 return self._cache[2]
1610 return self._cache[2]
1613 # duplicated, but good for perf
1611 # duplicated, but good for perf
1614 if rev is None:
1612 if rev is None:
1615 rev = self.rev(node)
1613 rev = self.rev(node)
1616 if flags is None:
1614 if flags is None:
1617 flags = self.flags(rev)
1615 flags = self.flags(rev)
1618 # no extra flags set, no flag processor runs, text = rawtext
1616 # no extra flags set, no flag processor runs, text = rawtext
1619 if flags == REVIDX_DEFAULT_FLAGS:
1617 if flags == REVIDX_DEFAULT_FLAGS:
1620 return self._cache[2]
1618 return self._cache[2]
1621 # rawtext is reusable. need to run flag processor
1619 # rawtext is reusable. need to run flag processor
1622 rawtext = self._cache[2]
1620 rawtext = self._cache[2]
1623
1621
1624 cachedrev = self._cache[1]
1622 cachedrev = self._cache[1]
1625
1623
1626 # look up what we need to read
1624 # look up what we need to read
1627 if rawtext is None:
1625 if rawtext is None:
1628 if rev is None:
1626 if rev is None:
1629 rev = self.rev(node)
1627 rev = self.rev(node)
1630
1628
1631 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1629 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1632 if stopped:
1630 if stopped:
1633 rawtext = self._cache[2]
1631 rawtext = self._cache[2]
1634
1632
1635 # drop cache to save memory
1633 # drop cache to save memory
1636 self._cache = None
1634 self._cache = None
1637
1635
1638 targetsize = None
1636 targetsize = None
1639 rawsize = self.index[rev][2]
1637 rawsize = self.index[rev][2]
1640 if 0 <= rawsize:
1638 if 0 <= rawsize:
1641 targetsize = 4 * rawsize
1639 targetsize = 4 * rawsize
1642
1640
1643 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1641 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1644 if rawtext is None:
1642 if rawtext is None:
1645 rawtext = bytes(bins[0])
1643 rawtext = bytes(bins[0])
1646 bins = bins[1:]
1644 bins = bins[1:]
1647
1645
1648 rawtext = mdiff.patches(rawtext, bins)
1646 rawtext = mdiff.patches(rawtext, bins)
1649 self._cache = (node, rev, rawtext)
1647 self._cache = (node, rev, rawtext)
1650
1648
1651 if flags is None:
1649 if flags is None:
1652 if rev is None:
1650 if rev is None:
1653 rev = self.rev(node)
1651 rev = self.rev(node)
1654 flags = self.flags(rev)
1652 flags = self.flags(rev)
1655
1653
1656 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1654 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1657 if validatehash:
1655 if validatehash:
1658 self.checkhash(text, node, rev=rev)
1656 self.checkhash(text, node, rev=rev)
1659
1657
1660 return text
1658 return text
1661
1659
1662 def hash(self, text, p1, p2):
1660 def hash(self, text, p1, p2):
1663 """Compute a node hash.
1661 """Compute a node hash.
1664
1662
1665 Available as a function so that subclasses can replace the hash
1663 Available as a function so that subclasses can replace the hash
1666 as needed.
1664 as needed.
1667 """
1665 """
1668 return hash(text, p1, p2)
1666 return hash(text, p1, p2)
1669
1667
1670 def _processflags(self, text, flags, operation, raw=False):
1668 def _processflags(self, text, flags, operation, raw=False):
1671 """Inspect revision data flags and applies transforms defined by
1669 """Inspect revision data flags and applies transforms defined by
1672 registered flag processors.
1670 registered flag processors.
1673
1671
1674 ``text`` - the revision data to process
1672 ``text`` - the revision data to process
1675 ``flags`` - the revision flags
1673 ``flags`` - the revision flags
1676 ``operation`` - the operation being performed (read or write)
1674 ``operation`` - the operation being performed (read or write)
1677 ``raw`` - an optional argument describing if the raw transform should be
1675 ``raw`` - an optional argument describing if the raw transform should be
1678 applied.
1676 applied.
1679
1677
1680 This method processes the flags in the order (or reverse order if
1678 This method processes the flags in the order (or reverse order if
1681 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1679 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1682 flag processors registered for present flags. The order of flags defined
1680 flag processors registered for present flags. The order of flags defined
1683 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1681 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1684
1682
1685 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1683 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1686 processed text and ``validatehash`` is a bool indicating whether the
1684 processed text and ``validatehash`` is a bool indicating whether the
1687 returned text should be checked for hash integrity.
1685 returned text should be checked for hash integrity.
1688
1686
1689 Note: If the ``raw`` argument is set, it has precedence over the
1687 Note: If the ``raw`` argument is set, it has precedence over the
1690 operation and will only update the value of ``validatehash``.
1688 operation and will only update the value of ``validatehash``.
1691 """
1689 """
1692 # fast path: no flag processors will run
1690 # fast path: no flag processors will run
1693 if flags == 0:
1691 if flags == 0:
1694 return text, True
1692 return text, True
1695 if not operation in ('read', 'write'):
1693 if not operation in ('read', 'write'):
1696 raise error.ProgrammingError(_("invalid '%s' operation") %
1694 raise error.ProgrammingError(_("invalid '%s' operation") %
1697 operation)
1695 operation)
1698 # Check all flags are known.
1696 # Check all flags are known.
1699 if flags & ~REVIDX_KNOWN_FLAGS:
1697 if flags & ~REVIDX_KNOWN_FLAGS:
1700 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1698 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1701 (flags & ~REVIDX_KNOWN_FLAGS))
1699 (flags & ~REVIDX_KNOWN_FLAGS))
1702 validatehash = True
1700 validatehash = True
1703 # Depending on the operation (read or write), the order might be
1701 # Depending on the operation (read or write), the order might be
1704 # reversed due to non-commutative transforms.
1702 # reversed due to non-commutative transforms.
1705 orderedflags = REVIDX_FLAGS_ORDER
1703 orderedflags = REVIDX_FLAGS_ORDER
1706 if operation == 'write':
1704 if operation == 'write':
1707 orderedflags = reversed(orderedflags)
1705 orderedflags = reversed(orderedflags)
1708
1706
1709 for flag in orderedflags:
1707 for flag in orderedflags:
1710 # If a flagprocessor has been registered for a known flag, apply the
1708 # If a flagprocessor has been registered for a known flag, apply the
1711 # related operation transform and update result tuple.
1709 # related operation transform and update result tuple.
1712 if flag & flags:
1710 if flag & flags:
1713 vhash = True
1711 vhash = True
1714
1712
1715 if flag not in self._flagprocessors:
1713 if flag not in self._flagprocessors:
1716 message = _("missing processor for flag '%#x'") % (flag)
1714 message = _("missing processor for flag '%#x'") % (flag)
1717 raise error.RevlogError(message)
1715 raise error.RevlogError(message)
1718
1716
1719 processor = self._flagprocessors[flag]
1717 processor = self._flagprocessors[flag]
1720 if processor is not None:
1718 if processor is not None:
1721 readtransform, writetransform, rawtransform = processor
1719 readtransform, writetransform, rawtransform = processor
1722
1720
1723 if raw:
1721 if raw:
1724 vhash = rawtransform(self, text)
1722 vhash = rawtransform(self, text)
1725 elif operation == 'read':
1723 elif operation == 'read':
1726 text, vhash = readtransform(self, text)
1724 text, vhash = readtransform(self, text)
1727 else: # write operation
1725 else: # write operation
1728 text, vhash = writetransform(self, text)
1726 text, vhash = writetransform(self, text)
1729 validatehash = validatehash and vhash
1727 validatehash = validatehash and vhash
1730
1728
1731 return text, validatehash
1729 return text, validatehash
1732
1730
1733 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1731 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1734 """Check node hash integrity.
1732 """Check node hash integrity.
1735
1733
1736 Available as a function so that subclasses can extend hash mismatch
1734 Available as a function so that subclasses can extend hash mismatch
1737 behaviors as needed.
1735 behaviors as needed.
1738 """
1736 """
1739 try:
1737 try:
1740 if p1 is None and p2 is None:
1738 if p1 is None and p2 is None:
1741 p1, p2 = self.parents(node)
1739 p1, p2 = self.parents(node)
1742 if node != self.hash(text, p1, p2):
1740 if node != self.hash(text, p1, p2):
1743 revornode = rev
1741 revornode = rev
1744 if revornode is None:
1742 if revornode is None:
1745 revornode = templatefilters.short(hex(node))
1743 revornode = templatefilters.short(hex(node))
1746 raise error.RevlogError(_("integrity check failed on %s:%s")
1744 raise error.RevlogError(_("integrity check failed on %s:%s")
1747 % (self.indexfile, pycompat.bytestr(revornode)))
1745 % (self.indexfile, pycompat.bytestr(revornode)))
1748 except error.RevlogError:
1746 except error.RevlogError:
1749 if self._censorable and _censoredtext(text):
1747 if self._censorable and _censoredtext(text):
1750 raise error.CensoredNodeError(self.indexfile, node, text)
1748 raise error.CensoredNodeError(self.indexfile, node, text)
1751 raise
1749 raise
1752
1750
1753 def _enforceinlinesize(self, tr, fp=None):
1751 def _enforceinlinesize(self, tr, fp=None):
1754 """Check if the revlog is too big for inline and convert if so.
1752 """Check if the revlog is too big for inline and convert if so.
1755
1753
1756 This should be called after revisions are added to the revlog. If the
1754 This should be called after revisions are added to the revlog. If the
1757 revlog has grown too large to be an inline revlog, it will convert it
1755 revlog has grown too large to be an inline revlog, it will convert it
1758 to use multiple index and data files.
1756 to use multiple index and data files.
1759 """
1757 """
1760 tiprev = len(self) - 1
1758 tiprev = len(self) - 1
1761 if (not self._inline or
1759 if (not self._inline or
1762 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1760 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1763 return
1761 return
1764
1762
1765 trinfo = tr.find(self.indexfile)
1763 trinfo = tr.find(self.indexfile)
1766 if trinfo is None:
1764 if trinfo is None:
1767 raise error.RevlogError(_("%s not found in the transaction")
1765 raise error.RevlogError(_("%s not found in the transaction")
1768 % self.indexfile)
1766 % self.indexfile)
1769
1767
1770 trindex = trinfo[2]
1768 trindex = trinfo[2]
1771 if trindex is not None:
1769 if trindex is not None:
1772 dataoff = self.start(trindex)
1770 dataoff = self.start(trindex)
1773 else:
1771 else:
1774 # revlog was stripped at start of transaction, use all leftover data
1772 # revlog was stripped at start of transaction, use all leftover data
1775 trindex = len(self) - 1
1773 trindex = len(self) - 1
1776 dataoff = self.end(tiprev)
1774 dataoff = self.end(tiprev)
1777
1775
1778 tr.add(self.datafile, dataoff)
1776 tr.add(self.datafile, dataoff)
1779
1777
1780 if fp:
1778 if fp:
1781 fp.flush()
1779 fp.flush()
1782 fp.close()
1780 fp.close()
1783
1781
1784 with self._datafp('w') as df:
1782 with self._datafp('w') as df:
1785 for r in self:
1783 for r in self:
1786 df.write(self._getsegmentforrevs(r, r)[1])
1784 df.write(self._getsegmentforrevs(r, r)[1])
1787
1785
1788 with self._indexfp('w') as fp:
1786 with self._indexfp('w') as fp:
1789 self.version &= ~FLAG_INLINE_DATA
1787 self.version &= ~FLAG_INLINE_DATA
1790 self._inline = False
1788 self._inline = False
1791 io = self._io
1789 io = self._io
1792 for i in self:
1790 for i in self:
1793 e = io.packentry(self.index[i], self.node, self.version, i)
1791 e = io.packentry(self.index[i], self.node, self.version, i)
1794 fp.write(e)
1792 fp.write(e)
1795
1793
1796 # the temp file replace the real index when we exit the context
1794 # the temp file replace the real index when we exit the context
1797 # manager
1795 # manager
1798
1796
1799 tr.replace(self.indexfile, trindex * self._io.size)
1797 tr.replace(self.indexfile, trindex * self._io.size)
1800 self._chunkclear()
1798 self._chunkclear()
1801
1799
1802 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1800 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1803 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1801 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1804 """add a revision to the log
1802 """add a revision to the log
1805
1803
1806 text - the revision data to add
1804 text - the revision data to add
1807 transaction - the transaction object used for rollback
1805 transaction - the transaction object used for rollback
1808 link - the linkrev data to add
1806 link - the linkrev data to add
1809 p1, p2 - the parent nodeids of the revision
1807 p1, p2 - the parent nodeids of the revision
1810 cachedelta - an optional precomputed delta
1808 cachedelta - an optional precomputed delta
1811 node - nodeid of revision; typically node is not specified, and it is
1809 node - nodeid of revision; typically node is not specified, and it is
1812 computed by default as hash(text, p1, p2), however subclasses might
1810 computed by default as hash(text, p1, p2), however subclasses might
1813 use different hashing method (and override checkhash() in such case)
1811 use different hashing method (and override checkhash() in such case)
1814 flags - the known flags to set on the revision
1812 flags - the known flags to set on the revision
1815 deltacomputer - an optional deltacomputer instance shared between
1813 deltacomputer - an optional deltacomputer instance shared between
1816 multiple calls
1814 multiple calls
1817 """
1815 """
1818 if link == nullrev:
1816 if link == nullrev:
1819 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1817 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1820 % self.indexfile)
1818 % self.indexfile)
1821
1819
1822 if flags:
1820 if flags:
1823 node = node or self.hash(text, p1, p2)
1821 node = node or self.hash(text, p1, p2)
1824
1822
1825 rawtext, validatehash = self._processflags(text, flags, 'write')
1823 rawtext, validatehash = self._processflags(text, flags, 'write')
1826
1824
1827 # If the flag processor modifies the revision data, ignore any provided
1825 # If the flag processor modifies the revision data, ignore any provided
1828 # cachedelta.
1826 # cachedelta.
1829 if rawtext != text:
1827 if rawtext != text:
1830 cachedelta = None
1828 cachedelta = None
1831
1829
1832 if len(rawtext) > _maxentrysize:
1830 if len(rawtext) > _maxentrysize:
1833 raise error.RevlogError(
1831 raise error.RevlogError(
1834 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1832 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1835 % (self.indexfile, len(rawtext)))
1833 % (self.indexfile, len(rawtext)))
1836
1834
1837 node = node or self.hash(rawtext, p1, p2)
1835 node = node or self.hash(rawtext, p1, p2)
1838 if node in self.nodemap:
1836 if node in self.nodemap:
1839 return node
1837 return node
1840
1838
1841 if validatehash:
1839 if validatehash:
1842 self.checkhash(rawtext, node, p1=p1, p2=p2)
1840 self.checkhash(rawtext, node, p1=p1, p2=p2)
1843
1841
1844 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1842 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1845 flags, cachedelta=cachedelta,
1843 flags, cachedelta=cachedelta,
1846 deltacomputer=deltacomputer)
1844 deltacomputer=deltacomputer)
1847
1845
1848 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1846 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1849 cachedelta=None, deltacomputer=None):
1847 cachedelta=None, deltacomputer=None):
1850 """add a raw revision with known flags, node and parents
1848 """add a raw revision with known flags, node and parents
1851 useful when reusing a revision not stored in this revlog (ex: received
1849 useful when reusing a revision not stored in this revlog (ex: received
1852 over wire, or read from an external bundle).
1850 over wire, or read from an external bundle).
1853 """
1851 """
1854 dfh = None
1852 dfh = None
1855 if not self._inline:
1853 if not self._inline:
1856 dfh = self._datafp("a+")
1854 dfh = self._datafp("a+")
1857 ifh = self._indexfp("a+")
1855 ifh = self._indexfp("a+")
1858 try:
1856 try:
1859 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1857 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1860 flags, cachedelta, ifh, dfh,
1858 flags, cachedelta, ifh, dfh,
1861 deltacomputer=deltacomputer)
1859 deltacomputer=deltacomputer)
1862 finally:
1860 finally:
1863 if dfh:
1861 if dfh:
1864 dfh.close()
1862 dfh.close()
1865 ifh.close()
1863 ifh.close()
1866
1864
1867 def compress(self, data):
1865 def compress(self, data):
1868 """Generate a possibly-compressed representation of data."""
1866 """Generate a possibly-compressed representation of data."""
1869 if not data:
1867 if not data:
1870 return '', data
1868 return '', data
1871
1869
1872 compressed = self._compressor.compress(data)
1870 compressed = self._compressor.compress(data)
1873
1871
1874 if compressed:
1872 if compressed:
1875 # The revlog compressor added the header in the returned data.
1873 # The revlog compressor added the header in the returned data.
1876 return '', compressed
1874 return '', compressed
1877
1875
1878 if data[0:1] == '\0':
1876 if data[0:1] == '\0':
1879 return '', data
1877 return '', data
1880 return 'u', data
1878 return 'u', data
1881
1879
1882 def decompress(self, data):
1880 def decompress(self, data):
1883 """Decompress a revlog chunk.
1881 """Decompress a revlog chunk.
1884
1882
1885 The chunk is expected to begin with a header identifying the
1883 The chunk is expected to begin with a header identifying the
1886 format type so it can be routed to an appropriate decompressor.
1884 format type so it can be routed to an appropriate decompressor.
1887 """
1885 """
1888 if not data:
1886 if not data:
1889 return data
1887 return data
1890
1888
1891 # Revlogs are read much more frequently than they are written and many
1889 # Revlogs are read much more frequently than they are written and many
1892 # chunks only take microseconds to decompress, so performance is
1890 # chunks only take microseconds to decompress, so performance is
1893 # important here.
1891 # important here.
1894 #
1892 #
1895 # We can make a few assumptions about revlogs:
1893 # We can make a few assumptions about revlogs:
1896 #
1894 #
1897 # 1) the majority of chunks will be compressed (as opposed to inline
1895 # 1) the majority of chunks will be compressed (as opposed to inline
1898 # raw data).
1896 # raw data).
1899 # 2) decompressing *any* data will likely by at least 10x slower than
1897 # 2) decompressing *any* data will likely by at least 10x slower than
1900 # returning raw inline data.
1898 # returning raw inline data.
1901 # 3) we want to prioritize common and officially supported compression
1899 # 3) we want to prioritize common and officially supported compression
1902 # engines
1900 # engines
1903 #
1901 #
1904 # It follows that we want to optimize for "decompress compressed data
1902 # It follows that we want to optimize for "decompress compressed data
1905 # when encoded with common and officially supported compression engines"
1903 # when encoded with common and officially supported compression engines"
1906 # case over "raw data" and "data encoded by less common or non-official
1904 # case over "raw data" and "data encoded by less common or non-official
1907 # compression engines." That is why we have the inline lookup first
1905 # compression engines." That is why we have the inline lookup first
1908 # followed by the compengines lookup.
1906 # followed by the compengines lookup.
1909 #
1907 #
1910 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1908 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1911 # compressed chunks. And this matters for changelog and manifest reads.
1909 # compressed chunks. And this matters for changelog and manifest reads.
1912 t = data[0:1]
1910 t = data[0:1]
1913
1911
1914 if t == 'x':
1912 if t == 'x':
1915 try:
1913 try:
1916 return _zlibdecompress(data)
1914 return _zlibdecompress(data)
1917 except zlib.error as e:
1915 except zlib.error as e:
1918 raise error.RevlogError(_('revlog decompress error: %s') %
1916 raise error.RevlogError(_('revlog decompress error: %s') %
1919 stringutil.forcebytestr(e))
1917 stringutil.forcebytestr(e))
1920 # '\0' is more common than 'u' so it goes first.
1918 # '\0' is more common than 'u' so it goes first.
1921 elif t == '\0':
1919 elif t == '\0':
1922 return data
1920 return data
1923 elif t == 'u':
1921 elif t == 'u':
1924 return util.buffer(data, 1)
1922 return util.buffer(data, 1)
1925
1923
1926 try:
1924 try:
1927 compressor = self._decompressors[t]
1925 compressor = self._decompressors[t]
1928 except KeyError:
1926 except KeyError:
1929 try:
1927 try:
1930 engine = util.compengines.forrevlogheader(t)
1928 engine = util.compengines.forrevlogheader(t)
1931 compressor = engine.revlogcompressor()
1929 compressor = engine.revlogcompressor()
1932 self._decompressors[t] = compressor
1930 self._decompressors[t] = compressor
1933 except KeyError:
1931 except KeyError:
1934 raise error.RevlogError(_('unknown compression type %r') % t)
1932 raise error.RevlogError(_('unknown compression type %r') % t)
1935
1933
1936 return compressor.decompress(data)
1934 return compressor.decompress(data)
1937
1935
1938 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1936 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1939 cachedelta, ifh, dfh, alwayscache=False,
1937 cachedelta, ifh, dfh, alwayscache=False,
1940 deltacomputer=None):
1938 deltacomputer=None):
1941 """internal function to add revisions to the log
1939 """internal function to add revisions to the log
1942
1940
1943 see addrevision for argument descriptions.
1941 see addrevision for argument descriptions.
1944
1942
1945 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1943 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1946
1944
1947 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1945 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1948 be used.
1946 be used.
1949
1947
1950 invariants:
1948 invariants:
1951 - rawtext is optional (can be None); if not set, cachedelta must be set.
1949 - rawtext is optional (can be None); if not set, cachedelta must be set.
1952 if both are set, they must correspond to each other.
1950 if both are set, they must correspond to each other.
1953 """
1951 """
1954 if node == nullid:
1952 if node == nullid:
1955 raise error.RevlogError(_("%s: attempt to add null revision") %
1953 raise error.RevlogError(_("%s: attempt to add null revision") %
1956 self.indexfile)
1954 self.indexfile)
1957 if node == wdirid or node in wdirfilenodeids:
1955 if node == wdirid or node in wdirfilenodeids:
1958 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1956 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1959 self.indexfile)
1957 self.indexfile)
1960
1958
1961 if self._inline:
1959 if self._inline:
1962 fh = ifh
1960 fh = ifh
1963 else:
1961 else:
1964 fh = dfh
1962 fh = dfh
1965
1963
1966 btext = [rawtext]
1964 btext = [rawtext]
1967
1965
1968 curr = len(self)
1966 curr = len(self)
1969 prev = curr - 1
1967 prev = curr - 1
1970 offset = self.end(prev)
1968 offset = self.end(prev)
1971 p1r, p2r = self.rev(p1), self.rev(p2)
1969 p1r, p2r = self.rev(p1), self.rev(p2)
1972
1970
1973 # full versions are inserted when the needed deltas
1971 # full versions are inserted when the needed deltas
1974 # become comparable to the uncompressed text
1972 # become comparable to the uncompressed text
1975 if rawtext is None:
1973 if rawtext is None:
1976 # need rawtext size, before changed by flag processors, which is
1974 # need rawtext size, before changed by flag processors, which is
1977 # the non-raw size. use revlog explicitly to avoid filelog's extra
1975 # the non-raw size. use revlog explicitly to avoid filelog's extra
1978 # logic that might remove metadata size.
1976 # logic that might remove metadata size.
1979 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1977 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1980 cachedelta[1])
1978 cachedelta[1])
1981 else:
1979 else:
1982 textlen = len(rawtext)
1980 textlen = len(rawtext)
1983
1981
1984 if deltacomputer is None:
1982 if deltacomputer is None:
1985 deltacomputer = deltautil.deltacomputer(self)
1983 deltacomputer = deltautil.deltacomputer(self)
1986
1984
1987 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1985 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1988
1986
1989 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1987 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1990
1988
1991 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1989 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1992 deltainfo.base, link, p1r, p2r, node)
1990 deltainfo.base, link, p1r, p2r, node)
1993 self.index.append(e)
1991 self.index.append(e)
1994 self.nodemap[node] = curr
1992 self.nodemap[node] = curr
1995
1993
1996 entry = self._io.packentry(e, self.node, self.version, curr)
1994 entry = self._io.packentry(e, self.node, self.version, curr)
1997 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1995 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1998 link, offset)
1996 link, offset)
1999
1997
2000 rawtext = btext[0]
1998 rawtext = btext[0]
2001
1999
2002 if alwayscache and rawtext is None:
2000 if alwayscache and rawtext is None:
2003 rawtext = deltacomputer.buildtext(revinfo, fh)
2001 rawtext = deltacomputer.buildtext(revinfo, fh)
2004
2002
2005 if type(rawtext) == bytes: # only accept immutable objects
2003 if type(rawtext) == bytes: # only accept immutable objects
2006 self._cache = (node, curr, rawtext)
2004 self._cache = (node, curr, rawtext)
2007 self._chainbasecache[curr] = deltainfo.chainbase
2005 self._chainbasecache[curr] = deltainfo.chainbase
2008 return node
2006 return node
2009
2007
2010 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2008 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2011 # Files opened in a+ mode have inconsistent behavior on various
2009 # Files opened in a+ mode have inconsistent behavior on various
2012 # platforms. Windows requires that a file positioning call be made
2010 # platforms. Windows requires that a file positioning call be made
2013 # when the file handle transitions between reads and writes. See
2011 # when the file handle transitions between reads and writes. See
2014 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2015 # platforms, Python or the platform itself can be buggy. Some versions
2013 # platforms, Python or the platform itself can be buggy. Some versions
2016 # of Solaris have been observed to not append at the end of the file
2014 # of Solaris have been observed to not append at the end of the file
2017 # if the file was seeked to before the end. See issue4943 for more.
2015 # if the file was seeked to before the end. See issue4943 for more.
2018 #
2016 #
2019 # We work around this issue by inserting a seek() before writing.
2017 # We work around this issue by inserting a seek() before writing.
2020 # Note: This is likely not necessary on Python 3.
2018 # Note: This is likely not necessary on Python 3.
2021 ifh.seek(0, os.SEEK_END)
2019 ifh.seek(0, os.SEEK_END)
2022 if dfh:
2020 if dfh:
2023 dfh.seek(0, os.SEEK_END)
2021 dfh.seek(0, os.SEEK_END)
2024
2022
2025 curr = len(self) - 1
2023 curr = len(self) - 1
2026 if not self._inline:
2024 if not self._inline:
2027 transaction.add(self.datafile, offset)
2025 transaction.add(self.datafile, offset)
2028 transaction.add(self.indexfile, curr * len(entry))
2026 transaction.add(self.indexfile, curr * len(entry))
2029 if data[0]:
2027 if data[0]:
2030 dfh.write(data[0])
2028 dfh.write(data[0])
2031 dfh.write(data[1])
2029 dfh.write(data[1])
2032 ifh.write(entry)
2030 ifh.write(entry)
2033 else:
2031 else:
2034 offset += curr * self._io.size
2032 offset += curr * self._io.size
2035 transaction.add(self.indexfile, offset, curr)
2033 transaction.add(self.indexfile, offset, curr)
2036 ifh.write(entry)
2034 ifh.write(entry)
2037 ifh.write(data[0])
2035 ifh.write(data[0])
2038 ifh.write(data[1])
2036 ifh.write(data[1])
2039 self._enforceinlinesize(transaction, ifh)
2037 self._enforceinlinesize(transaction, ifh)
2040
2038
2041 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2039 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2042 """
2040 """
2043 add a delta group
2041 add a delta group
2044
2042
2045 given a set of deltas, add them to the revision log. the
2043 given a set of deltas, add them to the revision log. the
2046 first delta is against its parent, which should be in our
2044 first delta is against its parent, which should be in our
2047 log, the rest are against the previous delta.
2045 log, the rest are against the previous delta.
2048
2046
2049 If ``addrevisioncb`` is defined, it will be called with arguments of
2047 If ``addrevisioncb`` is defined, it will be called with arguments of
2050 this revlog and the node that was added.
2048 this revlog and the node that was added.
2051 """
2049 """
2052
2050
2053 nodes = []
2051 nodes = []
2054
2052
2055 r = len(self)
2053 r = len(self)
2056 end = 0
2054 end = 0
2057 if r:
2055 if r:
2058 end = self.end(r - 1)
2056 end = self.end(r - 1)
2059 ifh = self._indexfp("a+")
2057 ifh = self._indexfp("a+")
2060 isize = r * self._io.size
2058 isize = r * self._io.size
2061 if self._inline:
2059 if self._inline:
2062 transaction.add(self.indexfile, end + isize, r)
2060 transaction.add(self.indexfile, end + isize, r)
2063 dfh = None
2061 dfh = None
2064 else:
2062 else:
2065 transaction.add(self.indexfile, isize, r)
2063 transaction.add(self.indexfile, isize, r)
2066 transaction.add(self.datafile, end)
2064 transaction.add(self.datafile, end)
2067 dfh = self._datafp("a+")
2065 dfh = self._datafp("a+")
2068 def flush():
2066 def flush():
2069 if dfh:
2067 if dfh:
2070 dfh.flush()
2068 dfh.flush()
2071 ifh.flush()
2069 ifh.flush()
2072 try:
2070 try:
2073 deltacomputer = deltautil.deltacomputer(self)
2071 deltacomputer = deltautil.deltacomputer(self)
2074 # loop through our set of deltas
2072 # loop through our set of deltas
2075 for data in deltas:
2073 for data in deltas:
2076 node, p1, p2, linknode, deltabase, delta, flags = data
2074 node, p1, p2, linknode, deltabase, delta, flags = data
2077 link = linkmapper(linknode)
2075 link = linkmapper(linknode)
2078 flags = flags or REVIDX_DEFAULT_FLAGS
2076 flags = flags or REVIDX_DEFAULT_FLAGS
2079
2077
2080 nodes.append(node)
2078 nodes.append(node)
2081
2079
2082 if node in self.nodemap:
2080 if node in self.nodemap:
2083 # this can happen if two branches make the same change
2081 # this can happen if two branches make the same change
2084 continue
2082 continue
2085
2083
2086 for p in (p1, p2):
2084 for p in (p1, p2):
2087 if p not in self.nodemap:
2085 if p not in self.nodemap:
2088 raise LookupError(p, self.indexfile,
2086 raise error.LookupError(p, self.indexfile,
2089 _('unknown parent'))
2087 _('unknown parent'))
2090
2088
2091 if deltabase not in self.nodemap:
2089 if deltabase not in self.nodemap:
2092 raise LookupError(deltabase, self.indexfile,
2090 raise error.LookupError(deltabase, self.indexfile,
2093 _('unknown delta base'))
2091 _('unknown delta base'))
2094
2092
2095 baserev = self.rev(deltabase)
2093 baserev = self.rev(deltabase)
2096
2094
2097 if baserev != nullrev and self.iscensored(baserev):
2095 if baserev != nullrev and self.iscensored(baserev):
2098 # if base is censored, delta must be full replacement in a
2096 # if base is censored, delta must be full replacement in a
2099 # single patch operation
2097 # single patch operation
2100 hlen = struct.calcsize(">lll")
2098 hlen = struct.calcsize(">lll")
2101 oldlen = self.rawsize(baserev)
2099 oldlen = self.rawsize(baserev)
2102 newlen = len(delta) - hlen
2100 newlen = len(delta) - hlen
2103 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2101 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2104 raise error.CensoredBaseError(self.indexfile,
2102 raise error.CensoredBaseError(self.indexfile,
2105 self.node(baserev))
2103 self.node(baserev))
2106
2104
2107 if not flags and self._peek_iscensored(baserev, delta, flush):
2105 if not flags and self._peek_iscensored(baserev, delta, flush):
2108 flags |= REVIDX_ISCENSORED
2106 flags |= REVIDX_ISCENSORED
2109
2107
2110 # We assume consumers of addrevisioncb will want to retrieve
2108 # We assume consumers of addrevisioncb will want to retrieve
2111 # the added revision, which will require a call to
2109 # the added revision, which will require a call to
2112 # revision(). revision() will fast path if there is a cache
2110 # revision(). revision() will fast path if there is a cache
2113 # hit. So, we tell _addrevision() to always cache in this case.
2111 # hit. So, we tell _addrevision() to always cache in this case.
2114 # We're only using addgroup() in the context of changegroup
2112 # We're only using addgroup() in the context of changegroup
2115 # generation so the revision data can always be handled as raw
2113 # generation so the revision data can always be handled as raw
2116 # by the flagprocessor.
2114 # by the flagprocessor.
2117 self._addrevision(node, None, transaction, link,
2115 self._addrevision(node, None, transaction, link,
2118 p1, p2, flags, (baserev, delta),
2116 p1, p2, flags, (baserev, delta),
2119 ifh, dfh,
2117 ifh, dfh,
2120 alwayscache=bool(addrevisioncb),
2118 alwayscache=bool(addrevisioncb),
2121 deltacomputer=deltacomputer)
2119 deltacomputer=deltacomputer)
2122
2120
2123 if addrevisioncb:
2121 if addrevisioncb:
2124 addrevisioncb(self, node)
2122 addrevisioncb(self, node)
2125
2123
2126 if not dfh and not self._inline:
2124 if not dfh and not self._inline:
2127 # addrevision switched from inline to conventional
2125 # addrevision switched from inline to conventional
2128 # reopen the index
2126 # reopen the index
2129 ifh.close()
2127 ifh.close()
2130 dfh = self._datafp("a+")
2128 dfh = self._datafp("a+")
2131 ifh = self._indexfp("a+")
2129 ifh = self._indexfp("a+")
2132 finally:
2130 finally:
2133 if dfh:
2131 if dfh:
2134 dfh.close()
2132 dfh.close()
2135 ifh.close()
2133 ifh.close()
2136
2134
2137 return nodes
2135 return nodes
2138
2136
2139 def iscensored(self, rev):
2137 def iscensored(self, rev):
2140 """Check if a file revision is censored."""
2138 """Check if a file revision is censored."""
2141 if not self._censorable:
2139 if not self._censorable:
2142 return False
2140 return False
2143
2141
2144 return self.flags(rev) & REVIDX_ISCENSORED
2142 return self.flags(rev) & REVIDX_ISCENSORED
2145
2143
2146 def _peek_iscensored(self, baserev, delta, flush):
2144 def _peek_iscensored(self, baserev, delta, flush):
2147 """Quickly check if a delta produces a censored revision."""
2145 """Quickly check if a delta produces a censored revision."""
2148 if not self._censorable:
2146 if not self._censorable:
2149 return False
2147 return False
2150
2148
2151 # Fragile heuristic: unless new file meta keys are added alphabetically
2149 # Fragile heuristic: unless new file meta keys are added alphabetically
2152 # preceding "censored", all censored revisions are prefixed by
2150 # preceding "censored", all censored revisions are prefixed by
2153 # "\1\ncensored:". A delta producing such a censored revision must be a
2151 # "\1\ncensored:". A delta producing such a censored revision must be a
2154 # full-replacement delta, so we inspect the first and only patch in the
2152 # full-replacement delta, so we inspect the first and only patch in the
2155 # delta for this prefix.
2153 # delta for this prefix.
2156 hlen = struct.calcsize(">lll")
2154 hlen = struct.calcsize(">lll")
2157 if len(delta) <= hlen:
2155 if len(delta) <= hlen:
2158 return False
2156 return False
2159
2157
2160 oldlen = self.rawsize(baserev)
2158 oldlen = self.rawsize(baserev)
2161 newlen = len(delta) - hlen
2159 newlen = len(delta) - hlen
2162 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2160 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2163 return False
2161 return False
2164
2162
2165 add = "\1\ncensored:"
2163 add = "\1\ncensored:"
2166 addlen = len(add)
2164 addlen = len(add)
2167 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2165 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2168
2166
2169 def getstrippoint(self, minlink):
2167 def getstrippoint(self, minlink):
2170 """find the minimum rev that must be stripped to strip the linkrev
2168 """find the minimum rev that must be stripped to strip the linkrev
2171
2169
2172 Returns a tuple containing the minimum rev and a set of all revs that
2170 Returns a tuple containing the minimum rev and a set of all revs that
2173 have linkrevs that will be broken by this strip.
2171 have linkrevs that will be broken by this strip.
2174 """
2172 """
2175 brokenrevs = set()
2173 brokenrevs = set()
2176 strippoint = len(self)
2174 strippoint = len(self)
2177
2175
2178 heads = {}
2176 heads = {}
2179 futurelargelinkrevs = set()
2177 futurelargelinkrevs = set()
2180 for head in self.headrevs():
2178 for head in self.headrevs():
2181 headlinkrev = self.linkrev(head)
2179 headlinkrev = self.linkrev(head)
2182 heads[head] = headlinkrev
2180 heads[head] = headlinkrev
2183 if headlinkrev >= minlink:
2181 if headlinkrev >= minlink:
2184 futurelargelinkrevs.add(headlinkrev)
2182 futurelargelinkrevs.add(headlinkrev)
2185
2183
2186 # This algorithm involves walking down the rev graph, starting at the
2184 # This algorithm involves walking down the rev graph, starting at the
2187 # heads. Since the revs are topologically sorted according to linkrev,
2185 # heads. Since the revs are topologically sorted according to linkrev,
2188 # once all head linkrevs are below the minlink, we know there are
2186 # once all head linkrevs are below the minlink, we know there are
2189 # no more revs that could have a linkrev greater than minlink.
2187 # no more revs that could have a linkrev greater than minlink.
2190 # So we can stop walking.
2188 # So we can stop walking.
2191 while futurelargelinkrevs:
2189 while futurelargelinkrevs:
2192 strippoint -= 1
2190 strippoint -= 1
2193 linkrev = heads.pop(strippoint)
2191 linkrev = heads.pop(strippoint)
2194
2192
2195 if linkrev < minlink:
2193 if linkrev < minlink:
2196 brokenrevs.add(strippoint)
2194 brokenrevs.add(strippoint)
2197 else:
2195 else:
2198 futurelargelinkrevs.remove(linkrev)
2196 futurelargelinkrevs.remove(linkrev)
2199
2197
2200 for p in self.parentrevs(strippoint):
2198 for p in self.parentrevs(strippoint):
2201 if p != nullrev:
2199 if p != nullrev:
2202 plinkrev = self.linkrev(p)
2200 plinkrev = self.linkrev(p)
2203 heads[p] = plinkrev
2201 heads[p] = plinkrev
2204 if plinkrev >= minlink:
2202 if plinkrev >= minlink:
2205 futurelargelinkrevs.add(plinkrev)
2203 futurelargelinkrevs.add(plinkrev)
2206
2204
2207 return strippoint, brokenrevs
2205 return strippoint, brokenrevs
2208
2206
2209 def strip(self, minlink, transaction):
2207 def strip(self, minlink, transaction):
2210 """truncate the revlog on the first revision with a linkrev >= minlink
2208 """truncate the revlog on the first revision with a linkrev >= minlink
2211
2209
2212 This function is called when we're stripping revision minlink and
2210 This function is called when we're stripping revision minlink and
2213 its descendants from the repository.
2211 its descendants from the repository.
2214
2212
2215 We have to remove all revisions with linkrev >= minlink, because
2213 We have to remove all revisions with linkrev >= minlink, because
2216 the equivalent changelog revisions will be renumbered after the
2214 the equivalent changelog revisions will be renumbered after the
2217 strip.
2215 strip.
2218
2216
2219 So we truncate the revlog on the first of these revisions, and
2217 So we truncate the revlog on the first of these revisions, and
2220 trust that the caller has saved the revisions that shouldn't be
2218 trust that the caller has saved the revisions that shouldn't be
2221 removed and that it'll re-add them after this truncation.
2219 removed and that it'll re-add them after this truncation.
2222 """
2220 """
2223 if len(self) == 0:
2221 if len(self) == 0:
2224 return
2222 return
2225
2223
2226 rev, _ = self.getstrippoint(minlink)
2224 rev, _ = self.getstrippoint(minlink)
2227 if rev == len(self):
2225 if rev == len(self):
2228 return
2226 return
2229
2227
2230 # first truncate the files on disk
2228 # first truncate the files on disk
2231 end = self.start(rev)
2229 end = self.start(rev)
2232 if not self._inline:
2230 if not self._inline:
2233 transaction.add(self.datafile, end)
2231 transaction.add(self.datafile, end)
2234 end = rev * self._io.size
2232 end = rev * self._io.size
2235 else:
2233 else:
2236 end += rev * self._io.size
2234 end += rev * self._io.size
2237
2235
2238 transaction.add(self.indexfile, end)
2236 transaction.add(self.indexfile, end)
2239
2237
2240 # then reset internal state in memory to forget those revisions
2238 # then reset internal state in memory to forget those revisions
2241 self._cache = None
2239 self._cache = None
2242 self._chaininfocache = {}
2240 self._chaininfocache = {}
2243 self._chunkclear()
2241 self._chunkclear()
2244 for x in pycompat.xrange(rev, len(self)):
2242 for x in pycompat.xrange(rev, len(self)):
2245 del self.nodemap[self.node(x)]
2243 del self.nodemap[self.node(x)]
2246
2244
2247 del self.index[rev:-1]
2245 del self.index[rev:-1]
2248 self._nodepos = None
2246 self._nodepos = None
2249
2247
2250 def checksize(self):
2248 def checksize(self):
2251 expected = 0
2249 expected = 0
2252 if len(self):
2250 if len(self):
2253 expected = max(0, self.end(len(self) - 1))
2251 expected = max(0, self.end(len(self) - 1))
2254
2252
2255 try:
2253 try:
2256 with self._datafp() as f:
2254 with self._datafp() as f:
2257 f.seek(0, 2)
2255 f.seek(0, 2)
2258 actual = f.tell()
2256 actual = f.tell()
2259 dd = actual - expected
2257 dd = actual - expected
2260 except IOError as inst:
2258 except IOError as inst:
2261 if inst.errno != errno.ENOENT:
2259 if inst.errno != errno.ENOENT:
2262 raise
2260 raise
2263 dd = 0
2261 dd = 0
2264
2262
2265 try:
2263 try:
2266 f = self.opener(self.indexfile)
2264 f = self.opener(self.indexfile)
2267 f.seek(0, 2)
2265 f.seek(0, 2)
2268 actual = f.tell()
2266 actual = f.tell()
2269 f.close()
2267 f.close()
2270 s = self._io.size
2268 s = self._io.size
2271 i = max(0, actual // s)
2269 i = max(0, actual // s)
2272 di = actual - (i * s)
2270 di = actual - (i * s)
2273 if self._inline:
2271 if self._inline:
2274 databytes = 0
2272 databytes = 0
2275 for r in self:
2273 for r in self:
2276 databytes += max(0, self.length(r))
2274 databytes += max(0, self.length(r))
2277 dd = 0
2275 dd = 0
2278 di = actual - len(self) * s - databytes
2276 di = actual - len(self) * s - databytes
2279 except IOError as inst:
2277 except IOError as inst:
2280 if inst.errno != errno.ENOENT:
2278 if inst.errno != errno.ENOENT:
2281 raise
2279 raise
2282 di = 0
2280 di = 0
2283
2281
2284 return (dd, di)
2282 return (dd, di)
2285
2283
2286 def files(self):
2284 def files(self):
2287 res = [self.indexfile]
2285 res = [self.indexfile]
2288 if not self._inline:
2286 if not self._inline:
2289 res.append(self.datafile)
2287 res.append(self.datafile)
2290 return res
2288 return res
2291
2289
2292 def emitrevisiondeltas(self, requests):
2290 def emitrevisiondeltas(self, requests):
2293 frev = self.rev
2291 frev = self.rev
2294
2292
2295 prevrev = None
2293 prevrev = None
2296 for request in requests:
2294 for request in requests:
2297 node = request.node
2295 node = request.node
2298 rev = frev(node)
2296 rev = frev(node)
2299
2297
2300 if prevrev is None:
2298 if prevrev is None:
2301 prevrev = self.index[rev][5]
2299 prevrev = self.index[rev][5]
2302
2300
2303 # Requesting a full revision.
2301 # Requesting a full revision.
2304 if request.basenode == nullid:
2302 if request.basenode == nullid:
2305 baserev = nullrev
2303 baserev = nullrev
2306 # Requesting an explicit revision.
2304 # Requesting an explicit revision.
2307 elif request.basenode is not None:
2305 elif request.basenode is not None:
2308 baserev = frev(request.basenode)
2306 baserev = frev(request.basenode)
2309 # Allowing us to choose.
2307 # Allowing us to choose.
2310 else:
2308 else:
2311 p1rev, p2rev = self.parentrevs(rev)
2309 p1rev, p2rev = self.parentrevs(rev)
2312 deltaparentrev = self.deltaparent(rev)
2310 deltaparentrev = self.deltaparent(rev)
2313
2311
2314 # Avoid sending full revisions when delta parent is null. Pick
2312 # Avoid sending full revisions when delta parent is null. Pick
2315 # prev in that case. It's tempting to pick p1 in this case, as
2313 # prev in that case. It's tempting to pick p1 in this case, as
2316 # p1 will be smaller in the common case. However, computing a
2314 # p1 will be smaller in the common case. However, computing a
2317 # delta against p1 may require resolving the raw text of p1,
2315 # delta against p1 may require resolving the raw text of p1,
2318 # which could be expensive. The revlog caches should have prev
2316 # which could be expensive. The revlog caches should have prev
2319 # cached, meaning less CPU for delta generation. There is
2317 # cached, meaning less CPU for delta generation. There is
2320 # likely room to add a flag and/or config option to control this
2318 # likely room to add a flag and/or config option to control this
2321 # behavior.
2319 # behavior.
2322 if deltaparentrev == nullrev and self._storedeltachains:
2320 if deltaparentrev == nullrev and self._storedeltachains:
2323 baserev = prevrev
2321 baserev = prevrev
2324
2322
2325 # Revlog is configured to use full snapshot for a reason.
2323 # Revlog is configured to use full snapshot for a reason.
2326 # Stick to full snapshot.
2324 # Stick to full snapshot.
2327 elif deltaparentrev == nullrev:
2325 elif deltaparentrev == nullrev:
2328 baserev = nullrev
2326 baserev = nullrev
2329
2327
2330 # Pick previous when we can't be sure the base is available
2328 # Pick previous when we can't be sure the base is available
2331 # on consumer.
2329 # on consumer.
2332 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2330 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2333 baserev = prevrev
2331 baserev = prevrev
2334 else:
2332 else:
2335 baserev = deltaparentrev
2333 baserev = deltaparentrev
2336
2334
2337 if baserev != nullrev and not self.candelta(baserev, rev):
2335 if baserev != nullrev and not self.candelta(baserev, rev):
2338 baserev = nullrev
2336 baserev = nullrev
2339
2337
2340 revision = None
2338 revision = None
2341 delta = None
2339 delta = None
2342 baserevisionsize = None
2340 baserevisionsize = None
2343
2341
2344 if self.iscensored(baserev) or self.iscensored(rev):
2342 if self.iscensored(baserev) or self.iscensored(rev):
2345 try:
2343 try:
2346 revision = self.revision(node, raw=True)
2344 revision = self.revision(node, raw=True)
2347 except error.CensoredNodeError as e:
2345 except error.CensoredNodeError as e:
2348 revision = e.tombstone
2346 revision = e.tombstone
2349
2347
2350 if baserev != nullrev:
2348 if baserev != nullrev:
2351 baserevisionsize = self.rawsize(baserev)
2349 baserevisionsize = self.rawsize(baserev)
2352
2350
2353 elif baserev == nullrev:
2351 elif baserev == nullrev:
2354 revision = self.revision(node, raw=True)
2352 revision = self.revision(node, raw=True)
2355 else:
2353 else:
2356 delta = self.revdiff(baserev, rev)
2354 delta = self.revdiff(baserev, rev)
2357
2355
2358 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2356 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2359
2357
2360 yield revlogrevisiondelta(
2358 yield revlogrevisiondelta(
2361 node=node,
2359 node=node,
2362 p1node=request.p1node,
2360 p1node=request.p1node,
2363 p2node=request.p2node,
2361 p2node=request.p2node,
2364 linknode=request.linknode,
2362 linknode=request.linknode,
2365 basenode=self.node(baserev),
2363 basenode=self.node(baserev),
2366 flags=self.flags(rev) | extraflags,
2364 flags=self.flags(rev) | extraflags,
2367 baserevisionsize=baserevisionsize,
2365 baserevisionsize=baserevisionsize,
2368 revision=revision,
2366 revision=revision,
2369 delta=delta)
2367 delta=delta)
2370
2368
2371 prevrev = rev
2369 prevrev = rev
2372
2370
2373 DELTAREUSEALWAYS = 'always'
2371 DELTAREUSEALWAYS = 'always'
2374 DELTAREUSESAMEREVS = 'samerevs'
2372 DELTAREUSESAMEREVS = 'samerevs'
2375 DELTAREUSENEVER = 'never'
2373 DELTAREUSENEVER = 'never'
2376
2374
2377 DELTAREUSEFULLADD = 'fulladd'
2375 DELTAREUSEFULLADD = 'fulladd'
2378
2376
2379 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2377 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2380
2378
2381 def clone(self, tr, destrevlog, addrevisioncb=None,
2379 def clone(self, tr, destrevlog, addrevisioncb=None,
2382 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2380 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2383 """Copy this revlog to another, possibly with format changes.
2381 """Copy this revlog to another, possibly with format changes.
2384
2382
2385 The destination revlog will contain the same revisions and nodes.
2383 The destination revlog will contain the same revisions and nodes.
2386 However, it may not be bit-for-bit identical due to e.g. delta encoding
2384 However, it may not be bit-for-bit identical due to e.g. delta encoding
2387 differences.
2385 differences.
2388
2386
2389 The ``deltareuse`` argument control how deltas from the existing revlog
2387 The ``deltareuse`` argument control how deltas from the existing revlog
2390 are preserved in the destination revlog. The argument can have the
2388 are preserved in the destination revlog. The argument can have the
2391 following values:
2389 following values:
2392
2390
2393 DELTAREUSEALWAYS
2391 DELTAREUSEALWAYS
2394 Deltas will always be reused (if possible), even if the destination
2392 Deltas will always be reused (if possible), even if the destination
2395 revlog would not select the same revisions for the delta. This is the
2393 revlog would not select the same revisions for the delta. This is the
2396 fastest mode of operation.
2394 fastest mode of operation.
2397 DELTAREUSESAMEREVS
2395 DELTAREUSESAMEREVS
2398 Deltas will be reused if the destination revlog would pick the same
2396 Deltas will be reused if the destination revlog would pick the same
2399 revisions for the delta. This mode strikes a balance between speed
2397 revisions for the delta. This mode strikes a balance between speed
2400 and optimization.
2398 and optimization.
2401 DELTAREUSENEVER
2399 DELTAREUSENEVER
2402 Deltas will never be reused. This is the slowest mode of execution.
2400 Deltas will never be reused. This is the slowest mode of execution.
2403 This mode can be used to recompute deltas (e.g. if the diff/delta
2401 This mode can be used to recompute deltas (e.g. if the diff/delta
2404 algorithm changes).
2402 algorithm changes).
2405
2403
2406 Delta computation can be slow, so the choice of delta reuse policy can
2404 Delta computation can be slow, so the choice of delta reuse policy can
2407 significantly affect run time.
2405 significantly affect run time.
2408
2406
2409 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2407 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2410 two extremes. Deltas will be reused if they are appropriate. But if the
2408 two extremes. Deltas will be reused if they are appropriate. But if the
2411 delta could choose a better revision, it will do so. This means if you
2409 delta could choose a better revision, it will do so. This means if you
2412 are converting a non-generaldelta revlog to a generaldelta revlog,
2410 are converting a non-generaldelta revlog to a generaldelta revlog,
2413 deltas will be recomputed if the delta's parent isn't a parent of the
2411 deltas will be recomputed if the delta's parent isn't a parent of the
2414 revision.
2412 revision.
2415
2413
2416 In addition to the delta policy, the ``deltabothparents`` argument
2414 In addition to the delta policy, the ``deltabothparents`` argument
2417 controls whether to compute deltas against both parents for merges.
2415 controls whether to compute deltas against both parents for merges.
2418 By default, the current default is used.
2416 By default, the current default is used.
2419 """
2417 """
2420 if deltareuse not in self.DELTAREUSEALL:
2418 if deltareuse not in self.DELTAREUSEALL:
2421 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2419 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2422
2420
2423 if len(destrevlog):
2421 if len(destrevlog):
2424 raise ValueError(_('destination revlog is not empty'))
2422 raise ValueError(_('destination revlog is not empty'))
2425
2423
2426 if getattr(self, 'filteredrevs', None):
2424 if getattr(self, 'filteredrevs', None):
2427 raise ValueError(_('source revlog has filtered revisions'))
2425 raise ValueError(_('source revlog has filtered revisions'))
2428 if getattr(destrevlog, 'filteredrevs', None):
2426 if getattr(destrevlog, 'filteredrevs', None):
2429 raise ValueError(_('destination revlog has filtered revisions'))
2427 raise ValueError(_('destination revlog has filtered revisions'))
2430
2428
2431 # lazydeltabase controls whether to reuse a cached delta, if possible.
2429 # lazydeltabase controls whether to reuse a cached delta, if possible.
2432 oldlazydeltabase = destrevlog._lazydeltabase
2430 oldlazydeltabase = destrevlog._lazydeltabase
2433 oldamd = destrevlog._deltabothparents
2431 oldamd = destrevlog._deltabothparents
2434
2432
2435 try:
2433 try:
2436 if deltareuse == self.DELTAREUSEALWAYS:
2434 if deltareuse == self.DELTAREUSEALWAYS:
2437 destrevlog._lazydeltabase = True
2435 destrevlog._lazydeltabase = True
2438 elif deltareuse == self.DELTAREUSESAMEREVS:
2436 elif deltareuse == self.DELTAREUSESAMEREVS:
2439 destrevlog._lazydeltabase = False
2437 destrevlog._lazydeltabase = False
2440
2438
2441 destrevlog._deltabothparents = deltabothparents or oldamd
2439 destrevlog._deltabothparents = deltabothparents or oldamd
2442
2440
2443 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2441 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2444 self.DELTAREUSESAMEREVS)
2442 self.DELTAREUSESAMEREVS)
2445
2443
2446 deltacomputer = deltautil.deltacomputer(destrevlog)
2444 deltacomputer = deltautil.deltacomputer(destrevlog)
2447 index = self.index
2445 index = self.index
2448 for rev in self:
2446 for rev in self:
2449 entry = index[rev]
2447 entry = index[rev]
2450
2448
2451 # Some classes override linkrev to take filtered revs into
2449 # Some classes override linkrev to take filtered revs into
2452 # account. Use raw entry from index.
2450 # account. Use raw entry from index.
2453 flags = entry[0] & 0xffff
2451 flags = entry[0] & 0xffff
2454 linkrev = entry[4]
2452 linkrev = entry[4]
2455 p1 = index[entry[5]][7]
2453 p1 = index[entry[5]][7]
2456 p2 = index[entry[6]][7]
2454 p2 = index[entry[6]][7]
2457 node = entry[7]
2455 node = entry[7]
2458
2456
2459 # (Possibly) reuse the delta from the revlog if allowed and
2457 # (Possibly) reuse the delta from the revlog if allowed and
2460 # the revlog chunk is a delta.
2458 # the revlog chunk is a delta.
2461 cachedelta = None
2459 cachedelta = None
2462 rawtext = None
2460 rawtext = None
2463 if populatecachedelta:
2461 if populatecachedelta:
2464 dp = self.deltaparent(rev)
2462 dp = self.deltaparent(rev)
2465 if dp != nullrev:
2463 if dp != nullrev:
2466 cachedelta = (dp, bytes(self._chunk(rev)))
2464 cachedelta = (dp, bytes(self._chunk(rev)))
2467
2465
2468 if not cachedelta:
2466 if not cachedelta:
2469 rawtext = self.revision(rev, raw=True)
2467 rawtext = self.revision(rev, raw=True)
2470
2468
2471
2469
2472 if deltareuse == self.DELTAREUSEFULLADD:
2470 if deltareuse == self.DELTAREUSEFULLADD:
2473 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2471 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2474 cachedelta=cachedelta,
2472 cachedelta=cachedelta,
2475 node=node, flags=flags,
2473 node=node, flags=flags,
2476 deltacomputer=deltacomputer)
2474 deltacomputer=deltacomputer)
2477 else:
2475 else:
2478 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2476 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2479 checkambig=False)
2477 checkambig=False)
2480 dfh = None
2478 dfh = None
2481 if not destrevlog._inline:
2479 if not destrevlog._inline:
2482 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2480 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2483 try:
2481 try:
2484 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2482 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2485 p2, flags, cachedelta, ifh, dfh,
2483 p2, flags, cachedelta, ifh, dfh,
2486 deltacomputer=deltacomputer)
2484 deltacomputer=deltacomputer)
2487 finally:
2485 finally:
2488 if dfh:
2486 if dfh:
2489 dfh.close()
2487 dfh.close()
2490 ifh.close()
2488 ifh.close()
2491
2489
2492 if addrevisioncb:
2490 if addrevisioncb:
2493 addrevisioncb(self, rev, node)
2491 addrevisioncb(self, rev, node)
2494 finally:
2492 finally:
2495 destrevlog._lazydeltabase = oldlazydeltabase
2493 destrevlog._lazydeltabase = oldlazydeltabase
2496 destrevlog._deltabothparents = oldamd
2494 destrevlog._deltabothparents = oldamd
General Comments 0
You need to be logged in to leave comments. Login now