##// END OF EJS Templates
revlog: drop some more error aliases (API)...
Gregory Szorc -
r39810:4a2466b2 default
parent child Browse files
Show More
@@ -1,2498 +1,2496 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import re
21 import re
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .thirdparty import (
57 from .thirdparty import (
58 attr,
58 attr,
59 )
59 )
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 stringutil,
75 stringutil,
76 )
76 )
77
77
78 # blanked usage of all the name to prevent pyflakes constraints
78 # blanked usage of all the name to prevent pyflakes constraints
79 # We need these name available in the module for extensions.
79 # We need these name available in the module for extensions.
80 REVLOGV0
80 REVLOGV0
81 REVLOGV1
81 REVLOGV1
82 REVLOGV2
82 REVLOGV2
83 FLAG_INLINE_DATA
83 FLAG_INLINE_DATA
84 FLAG_GENERALDELTA
84 FLAG_GENERALDELTA
85 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_VERSION
87 REVLOG_DEFAULT_VERSION
88 REVLOGV1_FLAGS
88 REVLOGV1_FLAGS
89 REVLOGV2_FLAGS
89 REVLOGV2_FLAGS
90 REVIDX_ISCENSORED
90 REVIDX_ISCENSORED
91 REVIDX_ELLIPSIS
91 REVIDX_ELLIPSIS
92 REVIDX_EXTSTORED
92 REVIDX_EXTSTORED
93 REVIDX_DEFAULT_FLAGS
93 REVIDX_DEFAULT_FLAGS
94 REVIDX_FLAGS_ORDER
94 REVIDX_FLAGS_ORDER
95 REVIDX_KNOWN_FLAGS
95 REVIDX_KNOWN_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
97
97
98 parsers = policy.importmod(r'parsers')
98 parsers = policy.importmod(r'parsers')
99
99
100 # Aliased for performance.
100 # Aliased for performance.
101 _zlibdecompress = zlib.decompress
101 _zlibdecompress = zlib.decompress
102
102
103 # max size of revlog with inline data
103 # max size of revlog with inline data
104 _maxinline = 131072
104 _maxinline = 131072
105 _chunksize = 1048576
105 _chunksize = 1048576
106
106
107 LookupError = error.LookupError
107 LookupError = error.LookupError
108 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
109 CensoredNodeError = error.CensoredNodeError
110 ProgrammingError = error.ProgrammingError
111
108
112 # Store flag processors (cf. 'addflagprocessor()' to register)
109 # Store flag processors (cf. 'addflagprocessor()' to register)
113 _flagprocessors = {
110 _flagprocessors = {
114 REVIDX_ISCENSORED: None,
111 REVIDX_ISCENSORED: None,
115 }
112 }
116
113
117 # Flag processors for REVIDX_ELLIPSIS.
114 # Flag processors for REVIDX_ELLIPSIS.
118 def ellipsisreadprocessor(rl, text):
115 def ellipsisreadprocessor(rl, text):
119 return text, False
116 return text, False
120
117
121 def ellipsiswriteprocessor(rl, text):
118 def ellipsiswriteprocessor(rl, text):
122 return text, False
119 return text, False
123
120
124 def ellipsisrawprocessor(rl, text):
121 def ellipsisrawprocessor(rl, text):
125 return False
122 return False
126
123
127 ellipsisprocessor = (
124 ellipsisprocessor = (
128 ellipsisreadprocessor,
125 ellipsisreadprocessor,
129 ellipsiswriteprocessor,
126 ellipsiswriteprocessor,
130 ellipsisrawprocessor,
127 ellipsisrawprocessor,
131 )
128 )
132
129
133 _mdre = re.compile('\1\n')
130 _mdre = re.compile('\1\n')
134 def parsemeta(text):
131 def parsemeta(text):
135 """return (metadatadict, metadatasize)"""
132 """return (metadatadict, metadatasize)"""
136 # text can be buffer, so we can't use .startswith or .index
133 # text can be buffer, so we can't use .startswith or .index
137 if text[:2] != '\1\n':
134 if text[:2] != '\1\n':
138 return None, None
135 return None, None
139 s = _mdre.search(text, 2).start()
136 s = _mdre.search(text, 2).start()
140 mtext = text[2:s]
137 mtext = text[2:s]
141 meta = {}
138 meta = {}
142 for l in mtext.splitlines():
139 for l in mtext.splitlines():
143 k, v = l.split(": ", 1)
140 k, v = l.split(": ", 1)
144 meta[k] = v
141 meta[k] = v
145 return meta, (s + 2)
142 return meta, (s + 2)
146
143
147 def packmeta(meta, text):
144 def packmeta(meta, text):
148 keys = sorted(meta)
145 keys = sorted(meta)
149 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
146 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
150 return "\1\n%s\1\n%s" % (metatext, text)
147 return "\1\n%s\1\n%s" % (metatext, text)
151
148
152 def _censoredtext(text):
149 def _censoredtext(text):
153 m, offs = parsemeta(text)
150 m, offs = parsemeta(text)
154 return m and "censored" in m
151 return m and "censored" in m
155
152
156 def addflagprocessor(flag, processor):
153 def addflagprocessor(flag, processor):
157 """Register a flag processor on a revision data flag.
154 """Register a flag processor on a revision data flag.
158
155
159 Invariant:
156 Invariant:
160 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
157 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
161 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
158 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
162 - Only one flag processor can be registered on a specific flag.
159 - Only one flag processor can be registered on a specific flag.
163 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
160 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
164 following signatures:
161 following signatures:
165 - (read) f(self, rawtext) -> text, bool
162 - (read) f(self, rawtext) -> text, bool
166 - (write) f(self, text) -> rawtext, bool
163 - (write) f(self, text) -> rawtext, bool
167 - (raw) f(self, rawtext) -> bool
164 - (raw) f(self, rawtext) -> bool
168 "text" is presented to the user. "rawtext" is stored in revlog data, not
165 "text" is presented to the user. "rawtext" is stored in revlog data, not
169 directly visible to the user.
166 directly visible to the user.
170 The boolean returned by these transforms is used to determine whether
167 The boolean returned by these transforms is used to determine whether
171 the returned text can be used for hash integrity checking. For example,
168 the returned text can be used for hash integrity checking. For example,
172 if "write" returns False, then "text" is used to generate hash. If
169 if "write" returns False, then "text" is used to generate hash. If
173 "write" returns True, that basically means "rawtext" returned by "write"
170 "write" returns True, that basically means "rawtext" returned by "write"
174 should be used to generate hash. Usually, "write" and "read" return
171 should be used to generate hash. Usually, "write" and "read" return
175 different booleans. And "raw" returns a same boolean as "write".
172 different booleans. And "raw" returns a same boolean as "write".
176
173
177 Note: The 'raw' transform is used for changegroup generation and in some
174 Note: The 'raw' transform is used for changegroup generation and in some
178 debug commands. In this case the transform only indicates whether the
175 debug commands. In this case the transform only indicates whether the
179 contents can be used for hash integrity checks.
176 contents can be used for hash integrity checks.
180 """
177 """
181 if not flag & REVIDX_KNOWN_FLAGS:
178 if not flag & REVIDX_KNOWN_FLAGS:
182 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
179 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
183 raise ProgrammingError(msg)
180 raise error.ProgrammingError(msg)
184 if flag not in REVIDX_FLAGS_ORDER:
181 if flag not in REVIDX_FLAGS_ORDER:
185 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
182 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
186 raise ProgrammingError(msg)
183 raise error.ProgrammingError(msg)
187 if flag in _flagprocessors:
184 if flag in _flagprocessors:
188 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
185 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
189 raise error.Abort(msg)
186 raise error.Abort(msg)
190 _flagprocessors[flag] = processor
187 _flagprocessors[flag] = processor
191
188
192 def getoffset(q):
189 def getoffset(q):
193 return int(q >> 16)
190 return int(q >> 16)
194
191
195 def gettype(q):
192 def gettype(q):
196 return int(q & 0xFFFF)
193 return int(q & 0xFFFF)
197
194
198 def offset_type(offset, type):
195 def offset_type(offset, type):
199 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
196 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
200 raise ValueError('unknown revlog index flags')
197 raise ValueError('unknown revlog index flags')
201 return int(int(offset) << 16 | type)
198 return int(int(offset) << 16 | type)
202
199
203 _nullhash = hashlib.sha1(nullid)
200 _nullhash = hashlib.sha1(nullid)
204
201
205 def hash(text, p1, p2):
202 def hash(text, p1, p2):
206 """generate a hash from the given text and its parent hashes
203 """generate a hash from the given text and its parent hashes
207
204
208 This hash combines both the current file contents and its history
205 This hash combines both the current file contents and its history
209 in a manner that makes it easy to distinguish nodes with the same
206 in a manner that makes it easy to distinguish nodes with the same
210 content in the revision graph.
207 content in the revision graph.
211 """
208 """
212 # As of now, if one of the parent node is null, p2 is null
209 # As of now, if one of the parent node is null, p2 is null
213 if p2 == nullid:
210 if p2 == nullid:
214 # deep copy of a hash is faster than creating one
211 # deep copy of a hash is faster than creating one
215 s = _nullhash.copy()
212 s = _nullhash.copy()
216 s.update(p1)
213 s.update(p1)
217 else:
214 else:
218 # none of the parent nodes are nullid
215 # none of the parent nodes are nullid
219 if p1 < p2:
216 if p1 < p2:
220 a = p1
217 a = p1
221 b = p2
218 b = p2
222 else:
219 else:
223 a = p2
220 a = p2
224 b = p1
221 b = p1
225 s = hashlib.sha1(a)
222 s = hashlib.sha1(a)
226 s.update(b)
223 s.update(b)
227 s.update(text)
224 s.update(text)
228 return s.digest()
225 return s.digest()
229
226
230 @attr.s(slots=True, frozen=True)
227 @attr.s(slots=True, frozen=True)
231 class _revisioninfo(object):
228 class _revisioninfo(object):
232 """Information about a revision that allows building its fulltext
229 """Information about a revision that allows building its fulltext
233 node: expected hash of the revision
230 node: expected hash of the revision
234 p1, p2: parent revs of the revision
231 p1, p2: parent revs of the revision
235 btext: built text cache consisting of a one-element list
232 btext: built text cache consisting of a one-element list
236 cachedelta: (baserev, uncompressed_delta) or None
233 cachedelta: (baserev, uncompressed_delta) or None
237 flags: flags associated to the revision storage
234 flags: flags associated to the revision storage
238
235
239 One of btext[0] or cachedelta must be set.
236 One of btext[0] or cachedelta must be set.
240 """
237 """
241 node = attr.ib()
238 node = attr.ib()
242 p1 = attr.ib()
239 p1 = attr.ib()
243 p2 = attr.ib()
240 p2 = attr.ib()
244 btext = attr.ib()
241 btext = attr.ib()
245 textlen = attr.ib()
242 textlen = attr.ib()
246 cachedelta = attr.ib()
243 cachedelta = attr.ib()
247 flags = attr.ib()
244 flags = attr.ib()
248
245
249 @interfaceutil.implementer(repository.irevisiondelta)
246 @interfaceutil.implementer(repository.irevisiondelta)
250 @attr.s(slots=True, frozen=True)
247 @attr.s(slots=True, frozen=True)
251 class revlogrevisiondelta(object):
248 class revlogrevisiondelta(object):
252 node = attr.ib()
249 node = attr.ib()
253 p1node = attr.ib()
250 p1node = attr.ib()
254 p2node = attr.ib()
251 p2node = attr.ib()
255 basenode = attr.ib()
252 basenode = attr.ib()
256 linknode = attr.ib()
253 linknode = attr.ib()
257 flags = attr.ib()
254 flags = attr.ib()
258 baserevisionsize = attr.ib()
255 baserevisionsize = attr.ib()
259 revision = attr.ib()
256 revision = attr.ib()
260 delta = attr.ib()
257 delta = attr.ib()
261
258
262 # index v0:
259 # index v0:
263 # 4 bytes: offset
260 # 4 bytes: offset
264 # 4 bytes: compressed length
261 # 4 bytes: compressed length
265 # 4 bytes: base rev
262 # 4 bytes: base rev
266 # 4 bytes: link rev
263 # 4 bytes: link rev
267 # 20 bytes: parent 1 nodeid
264 # 20 bytes: parent 1 nodeid
268 # 20 bytes: parent 2 nodeid
265 # 20 bytes: parent 2 nodeid
269 # 20 bytes: nodeid
266 # 20 bytes: nodeid
270 indexformatv0 = struct.Struct(">4l20s20s20s")
267 indexformatv0 = struct.Struct(">4l20s20s20s")
271 indexformatv0_pack = indexformatv0.pack
268 indexformatv0_pack = indexformatv0.pack
272 indexformatv0_unpack = indexformatv0.unpack
269 indexformatv0_unpack = indexformatv0.unpack
273
270
274 class revlogoldindex(list):
271 class revlogoldindex(list):
275 def __getitem__(self, i):
272 def __getitem__(self, i):
276 if i == -1:
273 if i == -1:
277 return (0, 0, 0, -1, -1, -1, -1, nullid)
274 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 return list.__getitem__(self, i)
275 return list.__getitem__(self, i)
279
276
280 class revlogoldio(object):
277 class revlogoldio(object):
281 def __init__(self):
278 def __init__(self):
282 self.size = indexformatv0.size
279 self.size = indexformatv0.size
283
280
284 def parseindex(self, data, inline):
281 def parseindex(self, data, inline):
285 s = self.size
282 s = self.size
286 index = []
283 index = []
287 nodemap = {nullid: nullrev}
284 nodemap = {nullid: nullrev}
288 n = off = 0
285 n = off = 0
289 l = len(data)
286 l = len(data)
290 while off + s <= l:
287 while off + s <= l:
291 cur = data[off:off + s]
288 cur = data[off:off + s]
292 off += s
289 off += s
293 e = indexformatv0_unpack(cur)
290 e = indexformatv0_unpack(cur)
294 # transform to revlogv1 format
291 # transform to revlogv1 format
295 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
292 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
296 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
293 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
297 index.append(e2)
294 index.append(e2)
298 nodemap[e[6]] = n
295 nodemap[e[6]] = n
299 n += 1
296 n += 1
300
297
301 return revlogoldindex(index), nodemap, None
298 return revlogoldindex(index), nodemap, None
302
299
303 def packentry(self, entry, node, version, rev):
300 def packentry(self, entry, node, version, rev):
304 if gettype(entry[0]):
301 if gettype(entry[0]):
305 raise error.RevlogError(_('index entry flags need revlog '
302 raise error.RevlogError(_('index entry flags need revlog '
306 'version 1'))
303 'version 1'))
307 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
304 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
308 node(entry[5]), node(entry[6]), entry[7])
305 node(entry[5]), node(entry[6]), entry[7])
309 return indexformatv0_pack(*e2)
306 return indexformatv0_pack(*e2)
310
307
311 # index ng:
308 # index ng:
312 # 6 bytes: offset
309 # 6 bytes: offset
313 # 2 bytes: flags
310 # 2 bytes: flags
314 # 4 bytes: compressed length
311 # 4 bytes: compressed length
315 # 4 bytes: uncompressed length
312 # 4 bytes: uncompressed length
316 # 4 bytes: base rev
313 # 4 bytes: base rev
317 # 4 bytes: link rev
314 # 4 bytes: link rev
318 # 4 bytes: parent 1 rev
315 # 4 bytes: parent 1 rev
319 # 4 bytes: parent 2 rev
316 # 4 bytes: parent 2 rev
320 # 32 bytes: nodeid
317 # 32 bytes: nodeid
321 indexformatng = struct.Struct(">Qiiiiii20s12x")
318 indexformatng = struct.Struct(">Qiiiiii20s12x")
322 indexformatng_pack = indexformatng.pack
319 indexformatng_pack = indexformatng.pack
323 versionformat = struct.Struct(">I")
320 versionformat = struct.Struct(">I")
324 versionformat_pack = versionformat.pack
321 versionformat_pack = versionformat.pack
325 versionformat_unpack = versionformat.unpack
322 versionformat_unpack = versionformat.unpack
326
323
327 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
324 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
328 # signed integer)
325 # signed integer)
329 _maxentrysize = 0x7fffffff
326 _maxentrysize = 0x7fffffff
330
327
331 class revlogio(object):
328 class revlogio(object):
332 def __init__(self):
329 def __init__(self):
333 self.size = indexformatng.size
330 self.size = indexformatng.size
334
331
335 def parseindex(self, data, inline):
332 def parseindex(self, data, inline):
336 # call the C implementation to parse the index data
333 # call the C implementation to parse the index data
337 index, cache = parsers.parse_index2(data, inline)
334 index, cache = parsers.parse_index2(data, inline)
338 return index, getattr(index, 'nodemap', None), cache
335 return index, getattr(index, 'nodemap', None), cache
339
336
340 def packentry(self, entry, node, version, rev):
337 def packentry(self, entry, node, version, rev):
341 p = indexformatng_pack(*entry)
338 p = indexformatng_pack(*entry)
342 if rev == 0:
339 if rev == 0:
343 p = versionformat_pack(version) + p[4:]
340 p = versionformat_pack(version) + p[4:]
344 return p
341 return p
345
342
346 class revlog(object):
343 class revlog(object):
347 """
344 """
348 the underlying revision storage object
345 the underlying revision storage object
349
346
350 A revlog consists of two parts, an index and the revision data.
347 A revlog consists of two parts, an index and the revision data.
351
348
352 The index is a file with a fixed record size containing
349 The index is a file with a fixed record size containing
353 information on each revision, including its nodeid (hash), the
350 information on each revision, including its nodeid (hash), the
354 nodeids of its parents, the position and offset of its data within
351 nodeids of its parents, the position and offset of its data within
355 the data file, and the revision it's based on. Finally, each entry
352 the data file, and the revision it's based on. Finally, each entry
356 contains a linkrev entry that can serve as a pointer to external
353 contains a linkrev entry that can serve as a pointer to external
357 data.
354 data.
358
355
359 The revision data itself is a linear collection of data chunks.
356 The revision data itself is a linear collection of data chunks.
360 Each chunk represents a revision and is usually represented as a
357 Each chunk represents a revision and is usually represented as a
361 delta against the previous chunk. To bound lookup time, runs of
358 delta against the previous chunk. To bound lookup time, runs of
362 deltas are limited to about 2 times the length of the original
359 deltas are limited to about 2 times the length of the original
363 version data. This makes retrieval of a version proportional to
360 version data. This makes retrieval of a version proportional to
364 its size, or O(1) relative to the number of revisions.
361 its size, or O(1) relative to the number of revisions.
365
362
366 Both pieces of the revlog are written to in an append-only
363 Both pieces of the revlog are written to in an append-only
367 fashion, which means we never need to rewrite a file to insert or
364 fashion, which means we never need to rewrite a file to insert or
368 remove data, and can use some simple techniques to avoid the need
365 remove data, and can use some simple techniques to avoid the need
369 for locking while reading.
366 for locking while reading.
370
367
371 If checkambig, indexfile is opened with checkambig=True at
368 If checkambig, indexfile is opened with checkambig=True at
372 writing, to avoid file stat ambiguity.
369 writing, to avoid file stat ambiguity.
373
370
374 If mmaplargeindex is True, and an mmapindexthreshold is set, the
371 If mmaplargeindex is True, and an mmapindexthreshold is set, the
375 index will be mmapped rather than read if it is larger than the
372 index will be mmapped rather than read if it is larger than the
376 configured threshold.
373 configured threshold.
377
374
378 If censorable is True, the revlog can have censored revisions.
375 If censorable is True, the revlog can have censored revisions.
379 """
376 """
380 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
377 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
381 mmaplargeindex=False, censorable=False):
378 mmaplargeindex=False, censorable=False):
382 """
379 """
383 create a revlog object
380 create a revlog object
384
381
385 opener is a function that abstracts the file opening operation
382 opener is a function that abstracts the file opening operation
386 and can be used to implement COW semantics or the like.
383 and can be used to implement COW semantics or the like.
387 """
384 """
388 self.indexfile = indexfile
385 self.indexfile = indexfile
389 self.datafile = datafile or (indexfile[:-2] + ".d")
386 self.datafile = datafile or (indexfile[:-2] + ".d")
390 self.opener = opener
387 self.opener = opener
391 # When True, indexfile is opened with checkambig=True at writing, to
388 # When True, indexfile is opened with checkambig=True at writing, to
392 # avoid file stat ambiguity.
389 # avoid file stat ambiguity.
393 self._checkambig = checkambig
390 self._checkambig = checkambig
394 self._censorable = censorable
391 self._censorable = censorable
395 # 3-tuple of (node, rev, text) for a raw revision.
392 # 3-tuple of (node, rev, text) for a raw revision.
396 self._cache = None
393 self._cache = None
397 # Maps rev to chain base rev.
394 # Maps rev to chain base rev.
398 self._chainbasecache = util.lrucachedict(100)
395 self._chainbasecache = util.lrucachedict(100)
399 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
396 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
400 self._chunkcache = (0, '')
397 self._chunkcache = (0, '')
401 # How much data to read and cache into the raw revlog data cache.
398 # How much data to read and cache into the raw revlog data cache.
402 self._chunkcachesize = 65536
399 self._chunkcachesize = 65536
403 self._maxchainlen = None
400 self._maxchainlen = None
404 self._deltabothparents = True
401 self._deltabothparents = True
405 self.index = []
402 self.index = []
406 # Mapping of partial identifiers to full nodes.
403 # Mapping of partial identifiers to full nodes.
407 self._pcache = {}
404 self._pcache = {}
408 # Mapping of revision integer to full node.
405 # Mapping of revision integer to full node.
409 self._nodecache = {nullid: nullrev}
406 self._nodecache = {nullid: nullrev}
410 self._nodepos = None
407 self._nodepos = None
411 self._compengine = 'zlib'
408 self._compengine = 'zlib'
412 self._maxdeltachainspan = -1
409 self._maxdeltachainspan = -1
413 self._withsparseread = False
410 self._withsparseread = False
414 self._sparserevlog = False
411 self._sparserevlog = False
415 self._srdensitythreshold = 0.50
412 self._srdensitythreshold = 0.50
416 self._srmingapsize = 262144
413 self._srmingapsize = 262144
417
414
418 # Make copy of flag processors so each revlog instance can support
415 # Make copy of flag processors so each revlog instance can support
419 # custom flags.
416 # custom flags.
420 self._flagprocessors = dict(_flagprocessors)
417 self._flagprocessors = dict(_flagprocessors)
421
418
422 mmapindexthreshold = None
419 mmapindexthreshold = None
423 v = REVLOG_DEFAULT_VERSION
420 v = REVLOG_DEFAULT_VERSION
424 opts = getattr(opener, 'options', None)
421 opts = getattr(opener, 'options', None)
425 if opts is not None:
422 if opts is not None:
426 if 'revlogv2' in opts:
423 if 'revlogv2' in opts:
427 # version 2 revlogs always use generaldelta.
424 # version 2 revlogs always use generaldelta.
428 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
425 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
429 elif 'revlogv1' in opts:
426 elif 'revlogv1' in opts:
430 if 'generaldelta' in opts:
427 if 'generaldelta' in opts:
431 v |= FLAG_GENERALDELTA
428 v |= FLAG_GENERALDELTA
432 else:
429 else:
433 v = 0
430 v = 0
434 if 'chunkcachesize' in opts:
431 if 'chunkcachesize' in opts:
435 self._chunkcachesize = opts['chunkcachesize']
432 self._chunkcachesize = opts['chunkcachesize']
436 if 'maxchainlen' in opts:
433 if 'maxchainlen' in opts:
437 self._maxchainlen = opts['maxchainlen']
434 self._maxchainlen = opts['maxchainlen']
438 if 'deltabothparents' in opts:
435 if 'deltabothparents' in opts:
439 self._deltabothparents = opts['deltabothparents']
436 self._deltabothparents = opts['deltabothparents']
440 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
437 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
441 if 'compengine' in opts:
438 if 'compengine' in opts:
442 self._compengine = opts['compengine']
439 self._compengine = opts['compengine']
443 if 'maxdeltachainspan' in opts:
440 if 'maxdeltachainspan' in opts:
444 self._maxdeltachainspan = opts['maxdeltachainspan']
441 self._maxdeltachainspan = opts['maxdeltachainspan']
445 if mmaplargeindex and 'mmapindexthreshold' in opts:
442 if mmaplargeindex and 'mmapindexthreshold' in opts:
446 mmapindexthreshold = opts['mmapindexthreshold']
443 mmapindexthreshold = opts['mmapindexthreshold']
447 self._sparserevlog = bool(opts.get('sparse-revlog', False))
444 self._sparserevlog = bool(opts.get('sparse-revlog', False))
448 withsparseread = bool(opts.get('with-sparse-read', False))
445 withsparseread = bool(opts.get('with-sparse-read', False))
449 # sparse-revlog forces sparse-read
446 # sparse-revlog forces sparse-read
450 self._withsparseread = self._sparserevlog or withsparseread
447 self._withsparseread = self._sparserevlog or withsparseread
451 if 'sparse-read-density-threshold' in opts:
448 if 'sparse-read-density-threshold' in opts:
452 self._srdensitythreshold = opts['sparse-read-density-threshold']
449 self._srdensitythreshold = opts['sparse-read-density-threshold']
453 if 'sparse-read-min-gap-size' in opts:
450 if 'sparse-read-min-gap-size' in opts:
454 self._srmingapsize = opts['sparse-read-min-gap-size']
451 self._srmingapsize = opts['sparse-read-min-gap-size']
455 if opts.get('enableellipsis'):
452 if opts.get('enableellipsis'):
456 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
453 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
457
454
458 if self._chunkcachesize <= 0:
455 if self._chunkcachesize <= 0:
459 raise error.RevlogError(_('revlog chunk cache size %r is not '
456 raise error.RevlogError(_('revlog chunk cache size %r is not '
460 'greater than 0') % self._chunkcachesize)
457 'greater than 0') % self._chunkcachesize)
461 elif self._chunkcachesize & (self._chunkcachesize - 1):
458 elif self._chunkcachesize & (self._chunkcachesize - 1):
462 raise error.RevlogError(_('revlog chunk cache size %r is not a '
459 raise error.RevlogError(_('revlog chunk cache size %r is not a '
463 'power of 2') % self._chunkcachesize)
460 'power of 2') % self._chunkcachesize)
464
461
465 indexdata = ''
462 indexdata = ''
466 self._initempty = True
463 self._initempty = True
467 try:
464 try:
468 with self._indexfp() as f:
465 with self._indexfp() as f:
469 if (mmapindexthreshold is not None and
466 if (mmapindexthreshold is not None and
470 self.opener.fstat(f).st_size >= mmapindexthreshold):
467 self.opener.fstat(f).st_size >= mmapindexthreshold):
471 indexdata = util.buffer(util.mmapread(f))
468 indexdata = util.buffer(util.mmapread(f))
472 else:
469 else:
473 indexdata = f.read()
470 indexdata = f.read()
474 if len(indexdata) > 0:
471 if len(indexdata) > 0:
475 v = versionformat_unpack(indexdata[:4])[0]
472 v = versionformat_unpack(indexdata[:4])[0]
476 self._initempty = False
473 self._initempty = False
477 except IOError as inst:
474 except IOError as inst:
478 if inst.errno != errno.ENOENT:
475 if inst.errno != errno.ENOENT:
479 raise
476 raise
480
477
481 self.version = v
478 self.version = v
482 self._inline = v & FLAG_INLINE_DATA
479 self._inline = v & FLAG_INLINE_DATA
483 self._generaldelta = v & FLAG_GENERALDELTA
480 self._generaldelta = v & FLAG_GENERALDELTA
484 flags = v & ~0xFFFF
481 flags = v & ~0xFFFF
485 fmt = v & 0xFFFF
482 fmt = v & 0xFFFF
486 if fmt == REVLOGV0:
483 if fmt == REVLOGV0:
487 if flags:
484 if flags:
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
485 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 'revlog %s') %
486 'revlog %s') %
490 (flags >> 16, fmt, self.indexfile))
487 (flags >> 16, fmt, self.indexfile))
491 elif fmt == REVLOGV1:
488 elif fmt == REVLOGV1:
492 if flags & ~REVLOGV1_FLAGS:
489 if flags & ~REVLOGV1_FLAGS:
493 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
490 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
494 'revlog %s') %
491 'revlog %s') %
495 (flags >> 16, fmt, self.indexfile))
492 (flags >> 16, fmt, self.indexfile))
496 elif fmt == REVLOGV2:
493 elif fmt == REVLOGV2:
497 if flags & ~REVLOGV2_FLAGS:
494 if flags & ~REVLOGV2_FLAGS:
498 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
495 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
499 'revlog %s') %
496 'revlog %s') %
500 (flags >> 16, fmt, self.indexfile))
497 (flags >> 16, fmt, self.indexfile))
501 else:
498 else:
502 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
499 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
503 (fmt, self.indexfile))
500 (fmt, self.indexfile))
504
501
505 self._storedeltachains = True
502 self._storedeltachains = True
506
503
507 self._io = revlogio()
504 self._io = revlogio()
508 if self.version == REVLOGV0:
505 if self.version == REVLOGV0:
509 self._io = revlogoldio()
506 self._io = revlogoldio()
510 try:
507 try:
511 d = self._io.parseindex(indexdata, self._inline)
508 d = self._io.parseindex(indexdata, self._inline)
512 except (ValueError, IndexError):
509 except (ValueError, IndexError):
513 raise error.RevlogError(_("index %s is corrupted") %
510 raise error.RevlogError(_("index %s is corrupted") %
514 self.indexfile)
511 self.indexfile)
515 self.index, nodemap, self._chunkcache = d
512 self.index, nodemap, self._chunkcache = d
516 if nodemap is not None:
513 if nodemap is not None:
517 self.nodemap = self._nodecache = nodemap
514 self.nodemap = self._nodecache = nodemap
518 if not self._chunkcache:
515 if not self._chunkcache:
519 self._chunkclear()
516 self._chunkclear()
520 # revnum -> (chain-length, sum-delta-length)
517 # revnum -> (chain-length, sum-delta-length)
521 self._chaininfocache = {}
518 self._chaininfocache = {}
522 # revlog header -> revlog compressor
519 # revlog header -> revlog compressor
523 self._decompressors = {}
520 self._decompressors = {}
524
521
525 @util.propertycache
522 @util.propertycache
526 def _compressor(self):
523 def _compressor(self):
527 return util.compengines[self._compengine].revlogcompressor()
524 return util.compengines[self._compengine].revlogcompressor()
528
525
529 def _indexfp(self, mode='r'):
526 def _indexfp(self, mode='r'):
530 """file object for the revlog's index file"""
527 """file object for the revlog's index file"""
531 args = {r'mode': mode}
528 args = {r'mode': mode}
532 if mode != 'r':
529 if mode != 'r':
533 args[r'checkambig'] = self._checkambig
530 args[r'checkambig'] = self._checkambig
534 if mode == 'w':
531 if mode == 'w':
535 args[r'atomictemp'] = True
532 args[r'atomictemp'] = True
536 return self.opener(self.indexfile, **args)
533 return self.opener(self.indexfile, **args)
537
534
538 def _datafp(self, mode='r'):
535 def _datafp(self, mode='r'):
539 """file object for the revlog's data file"""
536 """file object for the revlog's data file"""
540 return self.opener(self.datafile, mode=mode)
537 return self.opener(self.datafile, mode=mode)
541
538
542 @contextlib.contextmanager
539 @contextlib.contextmanager
543 def _datareadfp(self, existingfp=None):
540 def _datareadfp(self, existingfp=None):
544 """file object suitable to read data"""
541 """file object suitable to read data"""
545 if existingfp is not None:
542 if existingfp is not None:
546 yield existingfp
543 yield existingfp
547 else:
544 else:
548 if self._inline:
545 if self._inline:
549 func = self._indexfp
546 func = self._indexfp
550 else:
547 else:
551 func = self._datafp
548 func = self._datafp
552 with func() as fp:
549 with func() as fp:
553 yield fp
550 yield fp
554
551
555 def tip(self):
552 def tip(self):
556 return self.node(len(self.index) - 1)
553 return self.node(len(self.index) - 1)
557 def __contains__(self, rev):
554 def __contains__(self, rev):
558 return 0 <= rev < len(self)
555 return 0 <= rev < len(self)
559 def __len__(self):
556 def __len__(self):
560 return len(self.index)
557 return len(self.index)
561 def __iter__(self):
558 def __iter__(self):
562 return iter(pycompat.xrange(len(self)))
559 return iter(pycompat.xrange(len(self)))
563 def revs(self, start=0, stop=None):
560 def revs(self, start=0, stop=None):
564 """iterate over all rev in this revlog (from start to stop)"""
561 """iterate over all rev in this revlog (from start to stop)"""
565 step = 1
562 step = 1
566 length = len(self)
563 length = len(self)
567 if stop is not None:
564 if stop is not None:
568 if start > stop:
565 if start > stop:
569 step = -1
566 step = -1
570 stop += step
567 stop += step
571 if stop > length:
568 if stop > length:
572 stop = length
569 stop = length
573 else:
570 else:
574 stop = length
571 stop = length
575 return pycompat.xrange(start, stop, step)
572 return pycompat.xrange(start, stop, step)
576
573
577 @util.propertycache
574 @util.propertycache
578 def nodemap(self):
575 def nodemap(self):
579 if self.index:
576 if self.index:
580 # populate mapping down to the initial node
577 # populate mapping down to the initial node
581 node0 = self.index[0][7] # get around changelog filtering
578 node0 = self.index[0][7] # get around changelog filtering
582 self.rev(node0)
579 self.rev(node0)
583 return self._nodecache
580 return self._nodecache
584
581
585 def hasnode(self, node):
582 def hasnode(self, node):
586 try:
583 try:
587 self.rev(node)
584 self.rev(node)
588 return True
585 return True
589 except KeyError:
586 except KeyError:
590 return False
587 return False
591
588
592 def candelta(self, baserev, rev):
589 def candelta(self, baserev, rev):
593 """whether two revisions (baserev, rev) can be delta-ed or not"""
590 """whether two revisions (baserev, rev) can be delta-ed or not"""
594 # Disable delta if either rev requires a content-changing flag
591 # Disable delta if either rev requires a content-changing flag
595 # processor (ex. LFS). This is because such flag processor can alter
592 # processor (ex. LFS). This is because such flag processor can alter
596 # the rawtext content that the delta will be based on, and two clients
593 # the rawtext content that the delta will be based on, and two clients
597 # could have a same revlog node with different flags (i.e. different
594 # could have a same revlog node with different flags (i.e. different
598 # rawtext contents) and the delta could be incompatible.
595 # rawtext contents) and the delta could be incompatible.
599 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
596 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
600 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
597 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
601 return False
598 return False
602 return True
599 return True
603
600
604 def clearcaches(self):
601 def clearcaches(self):
605 self._cache = None
602 self._cache = None
606 self._chainbasecache.clear()
603 self._chainbasecache.clear()
607 self._chunkcache = (0, '')
604 self._chunkcache = (0, '')
608 self._pcache = {}
605 self._pcache = {}
609
606
610 try:
607 try:
611 self._nodecache.clearcaches()
608 self._nodecache.clearcaches()
612 except AttributeError:
609 except AttributeError:
613 self._nodecache = {nullid: nullrev}
610 self._nodecache = {nullid: nullrev}
614 self._nodepos = None
611 self._nodepos = None
615
612
616 def rev(self, node):
613 def rev(self, node):
617 try:
614 try:
618 return self._nodecache[node]
615 return self._nodecache[node]
619 except TypeError:
616 except TypeError:
620 raise
617 raise
621 except error.RevlogError:
618 except error.RevlogError:
622 # parsers.c radix tree lookup failed
619 # parsers.c radix tree lookup failed
623 if node == wdirid or node in wdirfilenodeids:
620 if node == wdirid or node in wdirfilenodeids:
624 raise error.WdirUnsupported
621 raise error.WdirUnsupported
625 raise LookupError(node, self.indexfile, _('no node'))
622 raise LookupError(node, self.indexfile, _('no node'))
626 except KeyError:
623 except KeyError:
627 # pure python cache lookup failed
624 # pure python cache lookup failed
628 n = self._nodecache
625 n = self._nodecache
629 i = self.index
626 i = self.index
630 p = self._nodepos
627 p = self._nodepos
631 if p is None:
628 if p is None:
632 p = len(i) - 1
629 p = len(i) - 1
633 else:
630 else:
634 assert p < len(i)
631 assert p < len(i)
635 for r in pycompat.xrange(p, -1, -1):
632 for r in pycompat.xrange(p, -1, -1):
636 v = i[r][7]
633 v = i[r][7]
637 n[v] = r
634 n[v] = r
638 if v == node:
635 if v == node:
639 self._nodepos = r - 1
636 self._nodepos = r - 1
640 return r
637 return r
641 if node == wdirid or node in wdirfilenodeids:
638 if node == wdirid or node in wdirfilenodeids:
642 raise error.WdirUnsupported
639 raise error.WdirUnsupported
643 raise LookupError(node, self.indexfile, _('no node'))
640 raise LookupError(node, self.indexfile, _('no node'))
644
641
645 # Accessors for index entries.
642 # Accessors for index entries.
646
643
647 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
644 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
648 # are flags.
645 # are flags.
649 def start(self, rev):
646 def start(self, rev):
650 return int(self.index[rev][0] >> 16)
647 return int(self.index[rev][0] >> 16)
651
648
652 def flags(self, rev):
649 def flags(self, rev):
653 return self.index[rev][0] & 0xFFFF
650 return self.index[rev][0] & 0xFFFF
654
651
655 def length(self, rev):
652 def length(self, rev):
656 return self.index[rev][1]
653 return self.index[rev][1]
657
654
658 def rawsize(self, rev):
655 def rawsize(self, rev):
659 """return the length of the uncompressed text for a given revision"""
656 """return the length of the uncompressed text for a given revision"""
660 l = self.index[rev][2]
657 l = self.index[rev][2]
661 if l >= 0:
658 if l >= 0:
662 return l
659 return l
663
660
664 t = self.revision(rev, raw=True)
661 t = self.revision(rev, raw=True)
665 return len(t)
662 return len(t)
666
663
667 def size(self, rev):
664 def size(self, rev):
668 """length of non-raw text (processed by a "read" flag processor)"""
665 """length of non-raw text (processed by a "read" flag processor)"""
669 # fast path: if no "read" flag processor could change the content,
666 # fast path: if no "read" flag processor could change the content,
670 # size is rawsize. note: ELLIPSIS is known to not change the content.
667 # size is rawsize. note: ELLIPSIS is known to not change the content.
671 flags = self.flags(rev)
668 flags = self.flags(rev)
672 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
669 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
673 return self.rawsize(rev)
670 return self.rawsize(rev)
674
671
675 return len(self.revision(rev, raw=False))
672 return len(self.revision(rev, raw=False))
676
673
677 def chainbase(self, rev):
674 def chainbase(self, rev):
678 base = self._chainbasecache.get(rev)
675 base = self._chainbasecache.get(rev)
679 if base is not None:
676 if base is not None:
680 return base
677 return base
681
678
682 index = self.index
679 index = self.index
683 iterrev = rev
680 iterrev = rev
684 base = index[iterrev][3]
681 base = index[iterrev][3]
685 while base != iterrev:
682 while base != iterrev:
686 iterrev = base
683 iterrev = base
687 base = index[iterrev][3]
684 base = index[iterrev][3]
688
685
689 self._chainbasecache[rev] = base
686 self._chainbasecache[rev] = base
690 return base
687 return base
691
688
692 def linkrev(self, rev):
689 def linkrev(self, rev):
693 return self.index[rev][4]
690 return self.index[rev][4]
694
691
695 def parentrevs(self, rev):
692 def parentrevs(self, rev):
696 try:
693 try:
697 entry = self.index[rev]
694 entry = self.index[rev]
698 except IndexError:
695 except IndexError:
699 if rev == wdirrev:
696 if rev == wdirrev:
700 raise error.WdirUnsupported
697 raise error.WdirUnsupported
701 raise
698 raise
702
699
703 return entry[5], entry[6]
700 return entry[5], entry[6]
704
701
705 def node(self, rev):
702 def node(self, rev):
706 try:
703 try:
707 return self.index[rev][7]
704 return self.index[rev][7]
708 except IndexError:
705 except IndexError:
709 if rev == wdirrev:
706 if rev == wdirrev:
710 raise error.WdirUnsupported
707 raise error.WdirUnsupported
711 raise
708 raise
712
709
713 # Derived from index values.
710 # Derived from index values.
714
711
715 def end(self, rev):
712 def end(self, rev):
716 return self.start(rev) + self.length(rev)
713 return self.start(rev) + self.length(rev)
717
714
718 def parents(self, node):
715 def parents(self, node):
719 i = self.index
716 i = self.index
720 d = i[self.rev(node)]
717 d = i[self.rev(node)]
721 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
718 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
722
719
723 def chainlen(self, rev):
720 def chainlen(self, rev):
724 return self._chaininfo(rev)[0]
721 return self._chaininfo(rev)[0]
725
722
726 def _chaininfo(self, rev):
723 def _chaininfo(self, rev):
727 chaininfocache = self._chaininfocache
724 chaininfocache = self._chaininfocache
728 if rev in chaininfocache:
725 if rev in chaininfocache:
729 return chaininfocache[rev]
726 return chaininfocache[rev]
730 index = self.index
727 index = self.index
731 generaldelta = self._generaldelta
728 generaldelta = self._generaldelta
732 iterrev = rev
729 iterrev = rev
733 e = index[iterrev]
730 e = index[iterrev]
734 clen = 0
731 clen = 0
735 compresseddeltalen = 0
732 compresseddeltalen = 0
736 while iterrev != e[3]:
733 while iterrev != e[3]:
737 clen += 1
734 clen += 1
738 compresseddeltalen += e[1]
735 compresseddeltalen += e[1]
739 if generaldelta:
736 if generaldelta:
740 iterrev = e[3]
737 iterrev = e[3]
741 else:
738 else:
742 iterrev -= 1
739 iterrev -= 1
743 if iterrev in chaininfocache:
740 if iterrev in chaininfocache:
744 t = chaininfocache[iterrev]
741 t = chaininfocache[iterrev]
745 clen += t[0]
742 clen += t[0]
746 compresseddeltalen += t[1]
743 compresseddeltalen += t[1]
747 break
744 break
748 e = index[iterrev]
745 e = index[iterrev]
749 else:
746 else:
750 # Add text length of base since decompressing that also takes
747 # Add text length of base since decompressing that also takes
751 # work. For cache hits the length is already included.
748 # work. For cache hits the length is already included.
752 compresseddeltalen += e[1]
749 compresseddeltalen += e[1]
753 r = (clen, compresseddeltalen)
750 r = (clen, compresseddeltalen)
754 chaininfocache[rev] = r
751 chaininfocache[rev] = r
755 return r
752 return r
756
753
757 def _deltachain(self, rev, stoprev=None):
754 def _deltachain(self, rev, stoprev=None):
758 """Obtain the delta chain for a revision.
755 """Obtain the delta chain for a revision.
759
756
760 ``stoprev`` specifies a revision to stop at. If not specified, we
757 ``stoprev`` specifies a revision to stop at. If not specified, we
761 stop at the base of the chain.
758 stop at the base of the chain.
762
759
763 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
760 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
764 revs in ascending order and ``stopped`` is a bool indicating whether
761 revs in ascending order and ``stopped`` is a bool indicating whether
765 ``stoprev`` was hit.
762 ``stoprev`` was hit.
766 """
763 """
767 # Try C implementation.
764 # Try C implementation.
768 try:
765 try:
769 return self.index.deltachain(rev, stoprev, self._generaldelta)
766 return self.index.deltachain(rev, stoprev, self._generaldelta)
770 except AttributeError:
767 except AttributeError:
771 pass
768 pass
772
769
773 chain = []
770 chain = []
774
771
775 # Alias to prevent attribute lookup in tight loop.
772 # Alias to prevent attribute lookup in tight loop.
776 index = self.index
773 index = self.index
777 generaldelta = self._generaldelta
774 generaldelta = self._generaldelta
778
775
779 iterrev = rev
776 iterrev = rev
780 e = index[iterrev]
777 e = index[iterrev]
781 while iterrev != e[3] and iterrev != stoprev:
778 while iterrev != e[3] and iterrev != stoprev:
782 chain.append(iterrev)
779 chain.append(iterrev)
783 if generaldelta:
780 if generaldelta:
784 iterrev = e[3]
781 iterrev = e[3]
785 else:
782 else:
786 iterrev -= 1
783 iterrev -= 1
787 e = index[iterrev]
784 e = index[iterrev]
788
785
789 if iterrev == stoprev:
786 if iterrev == stoprev:
790 stopped = True
787 stopped = True
791 else:
788 else:
792 chain.append(iterrev)
789 chain.append(iterrev)
793 stopped = False
790 stopped = False
794
791
795 chain.reverse()
792 chain.reverse()
796 return chain, stopped
793 return chain, stopped
797
794
798 def ancestors(self, revs, stoprev=0, inclusive=False):
795 def ancestors(self, revs, stoprev=0, inclusive=False):
799 """Generate the ancestors of 'revs' in reverse topological order.
796 """Generate the ancestors of 'revs' in reverse topological order.
800 Does not generate revs lower than stoprev.
797 Does not generate revs lower than stoprev.
801
798
802 See the documentation for ancestor.lazyancestors for more details."""
799 See the documentation for ancestor.lazyancestors for more details."""
803
800
804 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
801 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
805 inclusive=inclusive)
802 inclusive=inclusive)
806
803
807 def descendants(self, revs):
804 def descendants(self, revs):
808 """Generate the descendants of 'revs' in revision order.
805 """Generate the descendants of 'revs' in revision order.
809
806
810 Yield a sequence of revision numbers starting with a child of
807 Yield a sequence of revision numbers starting with a child of
811 some rev in revs, i.e., each revision is *not* considered a
808 some rev in revs, i.e., each revision is *not* considered a
812 descendant of itself. Results are ordered by revision number (a
809 descendant of itself. Results are ordered by revision number (a
813 topological sort)."""
810 topological sort)."""
814 first = min(revs)
811 first = min(revs)
815 if first == nullrev:
812 if first == nullrev:
816 for i in self:
813 for i in self:
817 yield i
814 yield i
818 return
815 return
819
816
820 seen = set(revs)
817 seen = set(revs)
821 for i in self.revs(start=first + 1):
818 for i in self.revs(start=first + 1):
822 for x in self.parentrevs(i):
819 for x in self.parentrevs(i):
823 if x != nullrev and x in seen:
820 if x != nullrev and x in seen:
824 seen.add(i)
821 seen.add(i)
825 yield i
822 yield i
826 break
823 break
827
824
828 def findcommonmissing(self, common=None, heads=None):
825 def findcommonmissing(self, common=None, heads=None):
829 """Return a tuple of the ancestors of common and the ancestors of heads
826 """Return a tuple of the ancestors of common and the ancestors of heads
830 that are not ancestors of common. In revset terminology, we return the
827 that are not ancestors of common. In revset terminology, we return the
831 tuple:
828 tuple:
832
829
833 ::common, (::heads) - (::common)
830 ::common, (::heads) - (::common)
834
831
835 The list is sorted by revision number, meaning it is
832 The list is sorted by revision number, meaning it is
836 topologically sorted.
833 topologically sorted.
837
834
838 'heads' and 'common' are both lists of node IDs. If heads is
835 'heads' and 'common' are both lists of node IDs. If heads is
839 not supplied, uses all of the revlog's heads. If common is not
836 not supplied, uses all of the revlog's heads. If common is not
840 supplied, uses nullid."""
837 supplied, uses nullid."""
841 if common is None:
838 if common is None:
842 common = [nullid]
839 common = [nullid]
843 if heads is None:
840 if heads is None:
844 heads = self.heads()
841 heads = self.heads()
845
842
846 common = [self.rev(n) for n in common]
843 common = [self.rev(n) for n in common]
847 heads = [self.rev(n) for n in heads]
844 heads = [self.rev(n) for n in heads]
848
845
849 # we want the ancestors, but inclusive
846 # we want the ancestors, but inclusive
850 class lazyset(object):
847 class lazyset(object):
851 def __init__(self, lazyvalues):
848 def __init__(self, lazyvalues):
852 self.addedvalues = set()
849 self.addedvalues = set()
853 self.lazyvalues = lazyvalues
850 self.lazyvalues = lazyvalues
854
851
855 def __contains__(self, value):
852 def __contains__(self, value):
856 return value in self.addedvalues or value in self.lazyvalues
853 return value in self.addedvalues or value in self.lazyvalues
857
854
858 def __iter__(self):
855 def __iter__(self):
859 added = self.addedvalues
856 added = self.addedvalues
860 for r in added:
857 for r in added:
861 yield r
858 yield r
862 for r in self.lazyvalues:
859 for r in self.lazyvalues:
863 if not r in added:
860 if not r in added:
864 yield r
861 yield r
865
862
866 def add(self, value):
863 def add(self, value):
867 self.addedvalues.add(value)
864 self.addedvalues.add(value)
868
865
869 def update(self, values):
866 def update(self, values):
870 self.addedvalues.update(values)
867 self.addedvalues.update(values)
871
868
872 has = lazyset(self.ancestors(common))
869 has = lazyset(self.ancestors(common))
873 has.add(nullrev)
870 has.add(nullrev)
874 has.update(common)
871 has.update(common)
875
872
876 # take all ancestors from heads that aren't in has
873 # take all ancestors from heads that aren't in has
877 missing = set()
874 missing = set()
878 visit = collections.deque(r for r in heads if r not in has)
875 visit = collections.deque(r for r in heads if r not in has)
879 while visit:
876 while visit:
880 r = visit.popleft()
877 r = visit.popleft()
881 if r in missing:
878 if r in missing:
882 continue
879 continue
883 else:
880 else:
884 missing.add(r)
881 missing.add(r)
885 for p in self.parentrevs(r):
882 for p in self.parentrevs(r):
886 if p not in has:
883 if p not in has:
887 visit.append(p)
884 visit.append(p)
888 missing = list(missing)
885 missing = list(missing)
889 missing.sort()
886 missing.sort()
890 return has, [self.node(miss) for miss in missing]
887 return has, [self.node(miss) for miss in missing]
891
888
892 def incrementalmissingrevs(self, common=None):
889 def incrementalmissingrevs(self, common=None):
893 """Return an object that can be used to incrementally compute the
890 """Return an object that can be used to incrementally compute the
894 revision numbers of the ancestors of arbitrary sets that are not
891 revision numbers of the ancestors of arbitrary sets that are not
895 ancestors of common. This is an ancestor.incrementalmissingancestors
892 ancestors of common. This is an ancestor.incrementalmissingancestors
896 object.
893 object.
897
894
898 'common' is a list of revision numbers. If common is not supplied, uses
895 'common' is a list of revision numbers. If common is not supplied, uses
899 nullrev.
896 nullrev.
900 """
897 """
901 if common is None:
898 if common is None:
902 common = [nullrev]
899 common = [nullrev]
903
900
904 return ancestor.incrementalmissingancestors(self.parentrevs, common)
901 return ancestor.incrementalmissingancestors(self.parentrevs, common)
905
902
906 def findmissingrevs(self, common=None, heads=None):
903 def findmissingrevs(self, common=None, heads=None):
907 """Return the revision numbers of the ancestors of heads that
904 """Return the revision numbers of the ancestors of heads that
908 are not ancestors of common.
905 are not ancestors of common.
909
906
910 More specifically, return a list of revision numbers corresponding to
907 More specifically, return a list of revision numbers corresponding to
911 nodes N such that every N satisfies the following constraints:
908 nodes N such that every N satisfies the following constraints:
912
909
913 1. N is an ancestor of some node in 'heads'
910 1. N is an ancestor of some node in 'heads'
914 2. N is not an ancestor of any node in 'common'
911 2. N is not an ancestor of any node in 'common'
915
912
916 The list is sorted by revision number, meaning it is
913 The list is sorted by revision number, meaning it is
917 topologically sorted.
914 topologically sorted.
918
915
919 'heads' and 'common' are both lists of revision numbers. If heads is
916 'heads' and 'common' are both lists of revision numbers. If heads is
920 not supplied, uses all of the revlog's heads. If common is not
917 not supplied, uses all of the revlog's heads. If common is not
921 supplied, uses nullid."""
918 supplied, uses nullid."""
922 if common is None:
919 if common is None:
923 common = [nullrev]
920 common = [nullrev]
924 if heads is None:
921 if heads is None:
925 heads = self.headrevs()
922 heads = self.headrevs()
926
923
927 inc = self.incrementalmissingrevs(common=common)
924 inc = self.incrementalmissingrevs(common=common)
928 return inc.missingancestors(heads)
925 return inc.missingancestors(heads)
929
926
930 def findmissing(self, common=None, heads=None):
927 def findmissing(self, common=None, heads=None):
931 """Return the ancestors of heads that are not ancestors of common.
928 """Return the ancestors of heads that are not ancestors of common.
932
929
933 More specifically, return a list of nodes N such that every N
930 More specifically, return a list of nodes N such that every N
934 satisfies the following constraints:
931 satisfies the following constraints:
935
932
936 1. N is an ancestor of some node in 'heads'
933 1. N is an ancestor of some node in 'heads'
937 2. N is not an ancestor of any node in 'common'
934 2. N is not an ancestor of any node in 'common'
938
935
939 The list is sorted by revision number, meaning it is
936 The list is sorted by revision number, meaning it is
940 topologically sorted.
937 topologically sorted.
941
938
942 'heads' and 'common' are both lists of node IDs. If heads is
939 'heads' and 'common' are both lists of node IDs. If heads is
943 not supplied, uses all of the revlog's heads. If common is not
940 not supplied, uses all of the revlog's heads. If common is not
944 supplied, uses nullid."""
941 supplied, uses nullid."""
945 if common is None:
942 if common is None:
946 common = [nullid]
943 common = [nullid]
947 if heads is None:
944 if heads is None:
948 heads = self.heads()
945 heads = self.heads()
949
946
950 common = [self.rev(n) for n in common]
947 common = [self.rev(n) for n in common]
951 heads = [self.rev(n) for n in heads]
948 heads = [self.rev(n) for n in heads]
952
949
953 inc = self.incrementalmissingrevs(common=common)
950 inc = self.incrementalmissingrevs(common=common)
954 return [self.node(r) for r in inc.missingancestors(heads)]
951 return [self.node(r) for r in inc.missingancestors(heads)]
955
952
956 def nodesbetween(self, roots=None, heads=None):
953 def nodesbetween(self, roots=None, heads=None):
957 """Return a topological path from 'roots' to 'heads'.
954 """Return a topological path from 'roots' to 'heads'.
958
955
959 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
956 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
960 topologically sorted list of all nodes N that satisfy both of
957 topologically sorted list of all nodes N that satisfy both of
961 these constraints:
958 these constraints:
962
959
963 1. N is a descendant of some node in 'roots'
960 1. N is a descendant of some node in 'roots'
964 2. N is an ancestor of some node in 'heads'
961 2. N is an ancestor of some node in 'heads'
965
962
966 Every node is considered to be both a descendant and an ancestor
963 Every node is considered to be both a descendant and an ancestor
967 of itself, so every reachable node in 'roots' and 'heads' will be
964 of itself, so every reachable node in 'roots' and 'heads' will be
968 included in 'nodes'.
965 included in 'nodes'.
969
966
970 'outroots' is the list of reachable nodes in 'roots', i.e., the
967 'outroots' is the list of reachable nodes in 'roots', i.e., the
971 subset of 'roots' that is returned in 'nodes'. Likewise,
968 subset of 'roots' that is returned in 'nodes'. Likewise,
972 'outheads' is the subset of 'heads' that is also in 'nodes'.
969 'outheads' is the subset of 'heads' that is also in 'nodes'.
973
970
974 'roots' and 'heads' are both lists of node IDs. If 'roots' is
971 'roots' and 'heads' are both lists of node IDs. If 'roots' is
975 unspecified, uses nullid as the only root. If 'heads' is
972 unspecified, uses nullid as the only root. If 'heads' is
976 unspecified, uses list of all of the revlog's heads."""
973 unspecified, uses list of all of the revlog's heads."""
977 nonodes = ([], [], [])
974 nonodes = ([], [], [])
978 if roots is not None:
975 if roots is not None:
979 roots = list(roots)
976 roots = list(roots)
980 if not roots:
977 if not roots:
981 return nonodes
978 return nonodes
982 lowestrev = min([self.rev(n) for n in roots])
979 lowestrev = min([self.rev(n) for n in roots])
983 else:
980 else:
984 roots = [nullid] # Everybody's a descendant of nullid
981 roots = [nullid] # Everybody's a descendant of nullid
985 lowestrev = nullrev
982 lowestrev = nullrev
986 if (lowestrev == nullrev) and (heads is None):
983 if (lowestrev == nullrev) and (heads is None):
987 # We want _all_ the nodes!
984 # We want _all_ the nodes!
988 return ([self.node(r) for r in self], [nullid], list(self.heads()))
985 return ([self.node(r) for r in self], [nullid], list(self.heads()))
989 if heads is None:
986 if heads is None:
990 # All nodes are ancestors, so the latest ancestor is the last
987 # All nodes are ancestors, so the latest ancestor is the last
991 # node.
988 # node.
992 highestrev = len(self) - 1
989 highestrev = len(self) - 1
993 # Set ancestors to None to signal that every node is an ancestor.
990 # Set ancestors to None to signal that every node is an ancestor.
994 ancestors = None
991 ancestors = None
995 # Set heads to an empty dictionary for later discovery of heads
992 # Set heads to an empty dictionary for later discovery of heads
996 heads = {}
993 heads = {}
997 else:
994 else:
998 heads = list(heads)
995 heads = list(heads)
999 if not heads:
996 if not heads:
1000 return nonodes
997 return nonodes
1001 ancestors = set()
998 ancestors = set()
1002 # Turn heads into a dictionary so we can remove 'fake' heads.
999 # Turn heads into a dictionary so we can remove 'fake' heads.
1003 # Also, later we will be using it to filter out the heads we can't
1000 # Also, later we will be using it to filter out the heads we can't
1004 # find from roots.
1001 # find from roots.
1005 heads = dict.fromkeys(heads, False)
1002 heads = dict.fromkeys(heads, False)
1006 # Start at the top and keep marking parents until we're done.
1003 # Start at the top and keep marking parents until we're done.
1007 nodestotag = set(heads)
1004 nodestotag = set(heads)
1008 # Remember where the top was so we can use it as a limit later.
1005 # Remember where the top was so we can use it as a limit later.
1009 highestrev = max([self.rev(n) for n in nodestotag])
1006 highestrev = max([self.rev(n) for n in nodestotag])
1010 while nodestotag:
1007 while nodestotag:
1011 # grab a node to tag
1008 # grab a node to tag
1012 n = nodestotag.pop()
1009 n = nodestotag.pop()
1013 # Never tag nullid
1010 # Never tag nullid
1014 if n == nullid:
1011 if n == nullid:
1015 continue
1012 continue
1016 # A node's revision number represents its place in a
1013 # A node's revision number represents its place in a
1017 # topologically sorted list of nodes.
1014 # topologically sorted list of nodes.
1018 r = self.rev(n)
1015 r = self.rev(n)
1019 if r >= lowestrev:
1016 if r >= lowestrev:
1020 if n not in ancestors:
1017 if n not in ancestors:
1021 # If we are possibly a descendant of one of the roots
1018 # If we are possibly a descendant of one of the roots
1022 # and we haven't already been marked as an ancestor
1019 # and we haven't already been marked as an ancestor
1023 ancestors.add(n) # Mark as ancestor
1020 ancestors.add(n) # Mark as ancestor
1024 # Add non-nullid parents to list of nodes to tag.
1021 # Add non-nullid parents to list of nodes to tag.
1025 nodestotag.update([p for p in self.parents(n) if
1022 nodestotag.update([p for p in self.parents(n) if
1026 p != nullid])
1023 p != nullid])
1027 elif n in heads: # We've seen it before, is it a fake head?
1024 elif n in heads: # We've seen it before, is it a fake head?
1028 # So it is, real heads should not be the ancestors of
1025 # So it is, real heads should not be the ancestors of
1029 # any other heads.
1026 # any other heads.
1030 heads.pop(n)
1027 heads.pop(n)
1031 if not ancestors:
1028 if not ancestors:
1032 return nonodes
1029 return nonodes
1033 # Now that we have our set of ancestors, we want to remove any
1030 # Now that we have our set of ancestors, we want to remove any
1034 # roots that are not ancestors.
1031 # roots that are not ancestors.
1035
1032
1036 # If one of the roots was nullid, everything is included anyway.
1033 # If one of the roots was nullid, everything is included anyway.
1037 if lowestrev > nullrev:
1034 if lowestrev > nullrev:
1038 # But, since we weren't, let's recompute the lowest rev to not
1035 # But, since we weren't, let's recompute the lowest rev to not
1039 # include roots that aren't ancestors.
1036 # include roots that aren't ancestors.
1040
1037
1041 # Filter out roots that aren't ancestors of heads
1038 # Filter out roots that aren't ancestors of heads
1042 roots = [root for root in roots if root in ancestors]
1039 roots = [root for root in roots if root in ancestors]
1043 # Recompute the lowest revision
1040 # Recompute the lowest revision
1044 if roots:
1041 if roots:
1045 lowestrev = min([self.rev(root) for root in roots])
1042 lowestrev = min([self.rev(root) for root in roots])
1046 else:
1043 else:
1047 # No more roots? Return empty list
1044 # No more roots? Return empty list
1048 return nonodes
1045 return nonodes
1049 else:
1046 else:
1050 # We are descending from nullid, and don't need to care about
1047 # We are descending from nullid, and don't need to care about
1051 # any other roots.
1048 # any other roots.
1052 lowestrev = nullrev
1049 lowestrev = nullrev
1053 roots = [nullid]
1050 roots = [nullid]
1054 # Transform our roots list into a set.
1051 # Transform our roots list into a set.
1055 descendants = set(roots)
1052 descendants = set(roots)
1056 # Also, keep the original roots so we can filter out roots that aren't
1053 # Also, keep the original roots so we can filter out roots that aren't
1057 # 'real' roots (i.e. are descended from other roots).
1054 # 'real' roots (i.e. are descended from other roots).
1058 roots = descendants.copy()
1055 roots = descendants.copy()
1059 # Our topologically sorted list of output nodes.
1056 # Our topologically sorted list of output nodes.
1060 orderedout = []
1057 orderedout = []
1061 # Don't start at nullid since we don't want nullid in our output list,
1058 # Don't start at nullid since we don't want nullid in our output list,
1062 # and if nullid shows up in descendants, empty parents will look like
1059 # and if nullid shows up in descendants, empty parents will look like
1063 # they're descendants.
1060 # they're descendants.
1064 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1061 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1065 n = self.node(r)
1062 n = self.node(r)
1066 isdescendant = False
1063 isdescendant = False
1067 if lowestrev == nullrev: # Everybody is a descendant of nullid
1064 if lowestrev == nullrev: # Everybody is a descendant of nullid
1068 isdescendant = True
1065 isdescendant = True
1069 elif n in descendants:
1066 elif n in descendants:
1070 # n is already a descendant
1067 # n is already a descendant
1071 isdescendant = True
1068 isdescendant = True
1072 # This check only needs to be done here because all the roots
1069 # This check only needs to be done here because all the roots
1073 # will start being marked is descendants before the loop.
1070 # will start being marked is descendants before the loop.
1074 if n in roots:
1071 if n in roots:
1075 # If n was a root, check if it's a 'real' root.
1072 # If n was a root, check if it's a 'real' root.
1076 p = tuple(self.parents(n))
1073 p = tuple(self.parents(n))
1077 # If any of its parents are descendants, it's not a root.
1074 # If any of its parents are descendants, it's not a root.
1078 if (p[0] in descendants) or (p[1] in descendants):
1075 if (p[0] in descendants) or (p[1] in descendants):
1079 roots.remove(n)
1076 roots.remove(n)
1080 else:
1077 else:
1081 p = tuple(self.parents(n))
1078 p = tuple(self.parents(n))
1082 # A node is a descendant if either of its parents are
1079 # A node is a descendant if either of its parents are
1083 # descendants. (We seeded the dependents list with the roots
1080 # descendants. (We seeded the dependents list with the roots
1084 # up there, remember?)
1081 # up there, remember?)
1085 if (p[0] in descendants) or (p[1] in descendants):
1082 if (p[0] in descendants) or (p[1] in descendants):
1086 descendants.add(n)
1083 descendants.add(n)
1087 isdescendant = True
1084 isdescendant = True
1088 if isdescendant and ((ancestors is None) or (n in ancestors)):
1085 if isdescendant and ((ancestors is None) or (n in ancestors)):
1089 # Only include nodes that are both descendants and ancestors.
1086 # Only include nodes that are both descendants and ancestors.
1090 orderedout.append(n)
1087 orderedout.append(n)
1091 if (ancestors is not None) and (n in heads):
1088 if (ancestors is not None) and (n in heads):
1092 # We're trying to figure out which heads are reachable
1089 # We're trying to figure out which heads are reachable
1093 # from roots.
1090 # from roots.
1094 # Mark this head as having been reached
1091 # Mark this head as having been reached
1095 heads[n] = True
1092 heads[n] = True
1096 elif ancestors is None:
1093 elif ancestors is None:
1097 # Otherwise, we're trying to discover the heads.
1094 # Otherwise, we're trying to discover the heads.
1098 # Assume this is a head because if it isn't, the next step
1095 # Assume this is a head because if it isn't, the next step
1099 # will eventually remove it.
1096 # will eventually remove it.
1100 heads[n] = True
1097 heads[n] = True
1101 # But, obviously its parents aren't.
1098 # But, obviously its parents aren't.
1102 for p in self.parents(n):
1099 for p in self.parents(n):
1103 heads.pop(p, None)
1100 heads.pop(p, None)
1104 heads = [head for head, flag in heads.iteritems() if flag]
1101 heads = [head for head, flag in heads.iteritems() if flag]
1105 roots = list(roots)
1102 roots = list(roots)
1106 assert orderedout
1103 assert orderedout
1107 assert roots
1104 assert roots
1108 assert heads
1105 assert heads
1109 return (orderedout, roots, heads)
1106 return (orderedout, roots, heads)
1110
1107
1111 def headrevs(self):
1108 def headrevs(self):
1112 try:
1109 try:
1113 return self.index.headrevs()
1110 return self.index.headrevs()
1114 except AttributeError:
1111 except AttributeError:
1115 return self._headrevs()
1112 return self._headrevs()
1116
1113
1117 def computephases(self, roots):
1114 def computephases(self, roots):
1118 return self.index.computephasesmapsets(roots)
1115 return self.index.computephasesmapsets(roots)
1119
1116
1120 def _headrevs(self):
1117 def _headrevs(self):
1121 count = len(self)
1118 count = len(self)
1122 if not count:
1119 if not count:
1123 return [nullrev]
1120 return [nullrev]
1124 # we won't iter over filtered rev so nobody is a head at start
1121 # we won't iter over filtered rev so nobody is a head at start
1125 ishead = [0] * (count + 1)
1122 ishead = [0] * (count + 1)
1126 index = self.index
1123 index = self.index
1127 for r in self:
1124 for r in self:
1128 ishead[r] = 1 # I may be an head
1125 ishead[r] = 1 # I may be an head
1129 e = index[r]
1126 e = index[r]
1130 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1127 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1131 return [r for r, val in enumerate(ishead) if val]
1128 return [r for r, val in enumerate(ishead) if val]
1132
1129
1133 def heads(self, start=None, stop=None):
1130 def heads(self, start=None, stop=None):
1134 """return the list of all nodes that have no children
1131 """return the list of all nodes that have no children
1135
1132
1136 if start is specified, only heads that are descendants of
1133 if start is specified, only heads that are descendants of
1137 start will be returned
1134 start will be returned
1138 if stop is specified, it will consider all the revs from stop
1135 if stop is specified, it will consider all the revs from stop
1139 as if they had no children
1136 as if they had no children
1140 """
1137 """
1141 if start is None and stop is None:
1138 if start is None and stop is None:
1142 if not len(self):
1139 if not len(self):
1143 return [nullid]
1140 return [nullid]
1144 return [self.node(r) for r in self.headrevs()]
1141 return [self.node(r) for r in self.headrevs()]
1145
1142
1146 if start is None:
1143 if start is None:
1147 start = nullid
1144 start = nullid
1148 if stop is None:
1145 if stop is None:
1149 stop = []
1146 stop = []
1150 stoprevs = set([self.rev(n) for n in stop])
1147 stoprevs = set([self.rev(n) for n in stop])
1151 startrev = self.rev(start)
1148 startrev = self.rev(start)
1152 reachable = {startrev}
1149 reachable = {startrev}
1153 heads = {startrev}
1150 heads = {startrev}
1154
1151
1155 parentrevs = self.parentrevs
1152 parentrevs = self.parentrevs
1156 for r in self.revs(start=startrev + 1):
1153 for r in self.revs(start=startrev + 1):
1157 for p in parentrevs(r):
1154 for p in parentrevs(r):
1158 if p in reachable:
1155 if p in reachable:
1159 if r not in stoprevs:
1156 if r not in stoprevs:
1160 reachable.add(r)
1157 reachable.add(r)
1161 heads.add(r)
1158 heads.add(r)
1162 if p in heads and p not in stoprevs:
1159 if p in heads and p not in stoprevs:
1163 heads.remove(p)
1160 heads.remove(p)
1164
1161
1165 return [self.node(r) for r in heads]
1162 return [self.node(r) for r in heads]
1166
1163
1167 def children(self, node):
1164 def children(self, node):
1168 """find the children of a given node"""
1165 """find the children of a given node"""
1169 c = []
1166 c = []
1170 p = self.rev(node)
1167 p = self.rev(node)
1171 for r in self.revs(start=p + 1):
1168 for r in self.revs(start=p + 1):
1172 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1169 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1173 if prevs:
1170 if prevs:
1174 for pr in prevs:
1171 for pr in prevs:
1175 if pr == p:
1172 if pr == p:
1176 c.append(self.node(r))
1173 c.append(self.node(r))
1177 elif p == nullrev:
1174 elif p == nullrev:
1178 c.append(self.node(r))
1175 c.append(self.node(r))
1179 return c
1176 return c
1180
1177
1181 def commonancestorsheads(self, a, b):
1178 def commonancestorsheads(self, a, b):
1182 """calculate all the heads of the common ancestors of nodes a and b"""
1179 """calculate all the heads of the common ancestors of nodes a and b"""
1183 a, b = self.rev(a), self.rev(b)
1180 a, b = self.rev(a), self.rev(b)
1184 ancs = self._commonancestorsheads(a, b)
1181 ancs = self._commonancestorsheads(a, b)
1185 return pycompat.maplist(self.node, ancs)
1182 return pycompat.maplist(self.node, ancs)
1186
1183
1187 def _commonancestorsheads(self, *revs):
1184 def _commonancestorsheads(self, *revs):
1188 """calculate all the heads of the common ancestors of revs"""
1185 """calculate all the heads of the common ancestors of revs"""
1189 try:
1186 try:
1190 ancs = self.index.commonancestorsheads(*revs)
1187 ancs = self.index.commonancestorsheads(*revs)
1191 except (AttributeError, OverflowError): # C implementation failed
1188 except (AttributeError, OverflowError): # C implementation failed
1192 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1189 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1193 return ancs
1190 return ancs
1194
1191
1195 def isancestor(self, a, b):
1192 def isancestor(self, a, b):
1196 """return True if node a is an ancestor of node b
1193 """return True if node a is an ancestor of node b
1197
1194
1198 A revision is considered an ancestor of itself."""
1195 A revision is considered an ancestor of itself."""
1199 a, b = self.rev(a), self.rev(b)
1196 a, b = self.rev(a), self.rev(b)
1200 return self.isancestorrev(a, b)
1197 return self.isancestorrev(a, b)
1201
1198
1202 def isancestorrev(self, a, b):
1199 def isancestorrev(self, a, b):
1203 """return True if revision a is an ancestor of revision b
1200 """return True if revision a is an ancestor of revision b
1204
1201
1205 A revision is considered an ancestor of itself.
1202 A revision is considered an ancestor of itself.
1206
1203
1207 The implementation of this is trivial but the use of
1204 The implementation of this is trivial but the use of
1208 commonancestorsheads is not."""
1205 commonancestorsheads is not."""
1209 if a == nullrev:
1206 if a == nullrev:
1210 return True
1207 return True
1211 elif a == b:
1208 elif a == b:
1212 return True
1209 return True
1213 elif a > b:
1210 elif a > b:
1214 return False
1211 return False
1215 return a in self._commonancestorsheads(a, b)
1212 return a in self._commonancestorsheads(a, b)
1216
1213
1217 def ancestor(self, a, b):
1214 def ancestor(self, a, b):
1218 """calculate the "best" common ancestor of nodes a and b"""
1215 """calculate the "best" common ancestor of nodes a and b"""
1219
1216
1220 a, b = self.rev(a), self.rev(b)
1217 a, b = self.rev(a), self.rev(b)
1221 try:
1218 try:
1222 ancs = self.index.ancestors(a, b)
1219 ancs = self.index.ancestors(a, b)
1223 except (AttributeError, OverflowError):
1220 except (AttributeError, OverflowError):
1224 ancs = ancestor.ancestors(self.parentrevs, a, b)
1221 ancs = ancestor.ancestors(self.parentrevs, a, b)
1225 if ancs:
1222 if ancs:
1226 # choose a consistent winner when there's a tie
1223 # choose a consistent winner when there's a tie
1227 return min(map(self.node, ancs))
1224 return min(map(self.node, ancs))
1228 return nullid
1225 return nullid
1229
1226
1230 def _match(self, id):
1227 def _match(self, id):
1231 if isinstance(id, int):
1228 if isinstance(id, int):
1232 # rev
1229 # rev
1233 return self.node(id)
1230 return self.node(id)
1234 if len(id) == 20:
1231 if len(id) == 20:
1235 # possibly a binary node
1232 # possibly a binary node
1236 # odds of a binary node being all hex in ASCII are 1 in 10**25
1233 # odds of a binary node being all hex in ASCII are 1 in 10**25
1237 try:
1234 try:
1238 node = id
1235 node = id
1239 self.rev(node) # quick search the index
1236 self.rev(node) # quick search the index
1240 return node
1237 return node
1241 except LookupError:
1238 except LookupError:
1242 pass # may be partial hex id
1239 pass # may be partial hex id
1243 try:
1240 try:
1244 # str(rev)
1241 # str(rev)
1245 rev = int(id)
1242 rev = int(id)
1246 if "%d" % rev != id:
1243 if "%d" % rev != id:
1247 raise ValueError
1244 raise ValueError
1248 if rev < 0:
1245 if rev < 0:
1249 rev = len(self) + rev
1246 rev = len(self) + rev
1250 if rev < 0 or rev >= len(self):
1247 if rev < 0 or rev >= len(self):
1251 raise ValueError
1248 raise ValueError
1252 return self.node(rev)
1249 return self.node(rev)
1253 except (ValueError, OverflowError):
1250 except (ValueError, OverflowError):
1254 pass
1251 pass
1255 if len(id) == 40:
1252 if len(id) == 40:
1256 try:
1253 try:
1257 # a full hex nodeid?
1254 # a full hex nodeid?
1258 node = bin(id)
1255 node = bin(id)
1259 self.rev(node)
1256 self.rev(node)
1260 return node
1257 return node
1261 except (TypeError, LookupError):
1258 except (TypeError, LookupError):
1262 pass
1259 pass
1263
1260
1264 def _partialmatch(self, id):
1261 def _partialmatch(self, id):
1265 # we don't care wdirfilenodeids as they should be always full hash
1262 # we don't care wdirfilenodeids as they should be always full hash
1266 maybewdir = wdirhex.startswith(id)
1263 maybewdir = wdirhex.startswith(id)
1267 try:
1264 try:
1268 partial = self.index.partialmatch(id)
1265 partial = self.index.partialmatch(id)
1269 if partial and self.hasnode(partial):
1266 if partial and self.hasnode(partial):
1270 if maybewdir:
1267 if maybewdir:
1271 # single 'ff...' match in radix tree, ambiguous with wdir
1268 # single 'ff...' match in radix tree, ambiguous with wdir
1272 raise error.RevlogError
1269 raise error.RevlogError
1273 return partial
1270 return partial
1274 if maybewdir:
1271 if maybewdir:
1275 # no 'ff...' match in radix tree, wdir identified
1272 # no 'ff...' match in radix tree, wdir identified
1276 raise error.WdirUnsupported
1273 raise error.WdirUnsupported
1277 return None
1274 return None
1278 except error.RevlogError:
1275 except error.RevlogError:
1279 # parsers.c radix tree lookup gave multiple matches
1276 # parsers.c radix tree lookup gave multiple matches
1280 # fast path: for unfiltered changelog, radix tree is accurate
1277 # fast path: for unfiltered changelog, radix tree is accurate
1281 if not getattr(self, 'filteredrevs', None):
1278 if not getattr(self, 'filteredrevs', None):
1282 raise AmbiguousPrefixLookupError(id, self.indexfile,
1279 raise error.AmbiguousPrefixLookupError(
1283 _('ambiguous identifier'))
1280 id, self.indexfile, _('ambiguous identifier'))
1284 # fall through to slow path that filters hidden revisions
1281 # fall through to slow path that filters hidden revisions
1285 except (AttributeError, ValueError):
1282 except (AttributeError, ValueError):
1286 # we are pure python, or key was too short to search radix tree
1283 # we are pure python, or key was too short to search radix tree
1287 pass
1284 pass
1288
1285
1289 if id in self._pcache:
1286 if id in self._pcache:
1290 return self._pcache[id]
1287 return self._pcache[id]
1291
1288
1292 if len(id) <= 40:
1289 if len(id) <= 40:
1293 try:
1290 try:
1294 # hex(node)[:...]
1291 # hex(node)[:...]
1295 l = len(id) // 2 # grab an even number of digits
1292 l = len(id) // 2 # grab an even number of digits
1296 prefix = bin(id[:l * 2])
1293 prefix = bin(id[:l * 2])
1297 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1294 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1298 nl = [n for n in nl if hex(n).startswith(id) and
1295 nl = [n for n in nl if hex(n).startswith(id) and
1299 self.hasnode(n)]
1296 self.hasnode(n)]
1300 if nullhex.startswith(id):
1297 if nullhex.startswith(id):
1301 nl.append(nullid)
1298 nl.append(nullid)
1302 if len(nl) > 0:
1299 if len(nl) > 0:
1303 if len(nl) == 1 and not maybewdir:
1300 if len(nl) == 1 and not maybewdir:
1304 self._pcache[id] = nl[0]
1301 self._pcache[id] = nl[0]
1305 return nl[0]
1302 return nl[0]
1306 raise AmbiguousPrefixLookupError(id, self.indexfile,
1303 raise error.AmbiguousPrefixLookupError(
1307 _('ambiguous identifier'))
1304 id, self.indexfile, _('ambiguous identifier'))
1308 if maybewdir:
1305 if maybewdir:
1309 raise error.WdirUnsupported
1306 raise error.WdirUnsupported
1310 return None
1307 return None
1311 except TypeError:
1308 except TypeError:
1312 pass
1309 pass
1313
1310
1314 def lookup(self, id):
1311 def lookup(self, id):
1315 """locate a node based on:
1312 """locate a node based on:
1316 - revision number or str(revision number)
1313 - revision number or str(revision number)
1317 - nodeid or subset of hex nodeid
1314 - nodeid or subset of hex nodeid
1318 """
1315 """
1319 n = self._match(id)
1316 n = self._match(id)
1320 if n is not None:
1317 if n is not None:
1321 return n
1318 return n
1322 n = self._partialmatch(id)
1319 n = self._partialmatch(id)
1323 if n:
1320 if n:
1324 return n
1321 return n
1325
1322
1326 raise LookupError(id, self.indexfile, _('no match found'))
1323 raise LookupError(id, self.indexfile, _('no match found'))
1327
1324
1328 def shortest(self, node, minlength=1):
1325 def shortest(self, node, minlength=1):
1329 """Find the shortest unambiguous prefix that matches node."""
1326 """Find the shortest unambiguous prefix that matches node."""
1330 def isvalid(prefix):
1327 def isvalid(prefix):
1331 try:
1328 try:
1332 node = self._partialmatch(prefix)
1329 node = self._partialmatch(prefix)
1333 except error.RevlogError:
1330 except error.RevlogError:
1334 return False
1331 return False
1335 except error.WdirUnsupported:
1332 except error.WdirUnsupported:
1336 # single 'ff...' match
1333 # single 'ff...' match
1337 return True
1334 return True
1338 if node is None:
1335 if node is None:
1339 raise LookupError(node, self.indexfile, _('no node'))
1336 raise LookupError(node, self.indexfile, _('no node'))
1340 return True
1337 return True
1341
1338
1342 def maybewdir(prefix):
1339 def maybewdir(prefix):
1343 return all(c == 'f' for c in prefix)
1340 return all(c == 'f' for c in prefix)
1344
1341
1345 hexnode = hex(node)
1342 hexnode = hex(node)
1346
1343
1347 def disambiguate(hexnode, minlength):
1344 def disambiguate(hexnode, minlength):
1348 """Disambiguate against wdirid."""
1345 """Disambiguate against wdirid."""
1349 for length in range(minlength, 41):
1346 for length in range(minlength, 41):
1350 prefix = hexnode[:length]
1347 prefix = hexnode[:length]
1351 if not maybewdir(prefix):
1348 if not maybewdir(prefix):
1352 return prefix
1349 return prefix
1353
1350
1354 if not getattr(self, 'filteredrevs', None):
1351 if not getattr(self, 'filteredrevs', None):
1355 try:
1352 try:
1356 length = max(self.index.shortest(node), minlength)
1353 length = max(self.index.shortest(node), minlength)
1357 return disambiguate(hexnode, length)
1354 return disambiguate(hexnode, length)
1358 except error.RevlogError:
1355 except error.RevlogError:
1359 if node != wdirid:
1356 if node != wdirid:
1360 raise LookupError(node, self.indexfile, _('no node'))
1357 raise LookupError(node, self.indexfile, _('no node'))
1361 except AttributeError:
1358 except AttributeError:
1362 # Fall through to pure code
1359 # Fall through to pure code
1363 pass
1360 pass
1364
1361
1365 if node == wdirid:
1362 if node == wdirid:
1366 for length in range(minlength, 41):
1363 for length in range(minlength, 41):
1367 prefix = hexnode[:length]
1364 prefix = hexnode[:length]
1368 if isvalid(prefix):
1365 if isvalid(prefix):
1369 return prefix
1366 return prefix
1370
1367
1371 for length in range(minlength, 41):
1368 for length in range(minlength, 41):
1372 prefix = hexnode[:length]
1369 prefix = hexnode[:length]
1373 if isvalid(prefix):
1370 if isvalid(prefix):
1374 return disambiguate(hexnode, length)
1371 return disambiguate(hexnode, length)
1375
1372
1376 def cmp(self, node, text):
1373 def cmp(self, node, text):
1377 """compare text with a given file revision
1374 """compare text with a given file revision
1378
1375
1379 returns True if text is different than what is stored.
1376 returns True if text is different than what is stored.
1380 """
1377 """
1381 p1, p2 = self.parents(node)
1378 p1, p2 = self.parents(node)
1382 return hash(text, p1, p2) != node
1379 return hash(text, p1, p2) != node
1383
1380
1384 def _cachesegment(self, offset, data):
1381 def _cachesegment(self, offset, data):
1385 """Add a segment to the revlog cache.
1382 """Add a segment to the revlog cache.
1386
1383
1387 Accepts an absolute offset and the data that is at that location.
1384 Accepts an absolute offset and the data that is at that location.
1388 """
1385 """
1389 o, d = self._chunkcache
1386 o, d = self._chunkcache
1390 # try to add to existing cache
1387 # try to add to existing cache
1391 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1388 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1392 self._chunkcache = o, d + data
1389 self._chunkcache = o, d + data
1393 else:
1390 else:
1394 self._chunkcache = offset, data
1391 self._chunkcache = offset, data
1395
1392
1396 def _readsegment(self, offset, length, df=None):
1393 def _readsegment(self, offset, length, df=None):
1397 """Load a segment of raw data from the revlog.
1394 """Load a segment of raw data from the revlog.
1398
1395
1399 Accepts an absolute offset, length to read, and an optional existing
1396 Accepts an absolute offset, length to read, and an optional existing
1400 file handle to read from.
1397 file handle to read from.
1401
1398
1402 If an existing file handle is passed, it will be seeked and the
1399 If an existing file handle is passed, it will be seeked and the
1403 original seek position will NOT be restored.
1400 original seek position will NOT be restored.
1404
1401
1405 Returns a str or buffer of raw byte data.
1402 Returns a str or buffer of raw byte data.
1406 """
1403 """
1407 # Cache data both forward and backward around the requested
1404 # Cache data both forward and backward around the requested
1408 # data, in a fixed size window. This helps speed up operations
1405 # data, in a fixed size window. This helps speed up operations
1409 # involving reading the revlog backwards.
1406 # involving reading the revlog backwards.
1410 cachesize = self._chunkcachesize
1407 cachesize = self._chunkcachesize
1411 realoffset = offset & ~(cachesize - 1)
1408 realoffset = offset & ~(cachesize - 1)
1412 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1409 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1413 - realoffset)
1410 - realoffset)
1414 with self._datareadfp(df) as df:
1411 with self._datareadfp(df) as df:
1415 df.seek(realoffset)
1412 df.seek(realoffset)
1416 d = df.read(reallength)
1413 d = df.read(reallength)
1417 self._cachesegment(realoffset, d)
1414 self._cachesegment(realoffset, d)
1418 if offset != realoffset or reallength != length:
1415 if offset != realoffset or reallength != length:
1419 return util.buffer(d, offset - realoffset, length)
1416 return util.buffer(d, offset - realoffset, length)
1420 return d
1417 return d
1421
1418
1422 def _getsegment(self, offset, length, df=None):
1419 def _getsegment(self, offset, length, df=None):
1423 """Obtain a segment of raw data from the revlog.
1420 """Obtain a segment of raw data from the revlog.
1424
1421
1425 Accepts an absolute offset, length of bytes to obtain, and an
1422 Accepts an absolute offset, length of bytes to obtain, and an
1426 optional file handle to the already-opened revlog. If the file
1423 optional file handle to the already-opened revlog. If the file
1427 handle is used, it's original seek position will not be preserved.
1424 handle is used, it's original seek position will not be preserved.
1428
1425
1429 Requests for data may be returned from a cache.
1426 Requests for data may be returned from a cache.
1430
1427
1431 Returns a str or a buffer instance of raw byte data.
1428 Returns a str or a buffer instance of raw byte data.
1432 """
1429 """
1433 o, d = self._chunkcache
1430 o, d = self._chunkcache
1434 l = len(d)
1431 l = len(d)
1435
1432
1436 # is it in the cache?
1433 # is it in the cache?
1437 cachestart = offset - o
1434 cachestart = offset - o
1438 cacheend = cachestart + length
1435 cacheend = cachestart + length
1439 if cachestart >= 0 and cacheend <= l:
1436 if cachestart >= 0 and cacheend <= l:
1440 if cachestart == 0 and cacheend == l:
1437 if cachestart == 0 and cacheend == l:
1441 return d # avoid a copy
1438 return d # avoid a copy
1442 return util.buffer(d, cachestart, cacheend - cachestart)
1439 return util.buffer(d, cachestart, cacheend - cachestart)
1443
1440
1444 return self._readsegment(offset, length, df=df)
1441 return self._readsegment(offset, length, df=df)
1445
1442
1446 def _getsegmentforrevs(self, startrev, endrev, df=None):
1443 def _getsegmentforrevs(self, startrev, endrev, df=None):
1447 """Obtain a segment of raw data corresponding to a range of revisions.
1444 """Obtain a segment of raw data corresponding to a range of revisions.
1448
1445
1449 Accepts the start and end revisions and an optional already-open
1446 Accepts the start and end revisions and an optional already-open
1450 file handle to be used for reading. If the file handle is read, its
1447 file handle to be used for reading. If the file handle is read, its
1451 seek position will not be preserved.
1448 seek position will not be preserved.
1452
1449
1453 Requests for data may be satisfied by a cache.
1450 Requests for data may be satisfied by a cache.
1454
1451
1455 Returns a 2-tuple of (offset, data) for the requested range of
1452 Returns a 2-tuple of (offset, data) for the requested range of
1456 revisions. Offset is the integer offset from the beginning of the
1453 revisions. Offset is the integer offset from the beginning of the
1457 revlog and data is a str or buffer of the raw byte data.
1454 revlog and data is a str or buffer of the raw byte data.
1458
1455
1459 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1456 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1460 to determine where each revision's data begins and ends.
1457 to determine where each revision's data begins and ends.
1461 """
1458 """
1462 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1459 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1463 # (functions are expensive).
1460 # (functions are expensive).
1464 index = self.index
1461 index = self.index
1465 istart = index[startrev]
1462 istart = index[startrev]
1466 start = int(istart[0] >> 16)
1463 start = int(istart[0] >> 16)
1467 if startrev == endrev:
1464 if startrev == endrev:
1468 end = start + istart[1]
1465 end = start + istart[1]
1469 else:
1466 else:
1470 iend = index[endrev]
1467 iend = index[endrev]
1471 end = int(iend[0] >> 16) + iend[1]
1468 end = int(iend[0] >> 16) + iend[1]
1472
1469
1473 if self._inline:
1470 if self._inline:
1474 start += (startrev + 1) * self._io.size
1471 start += (startrev + 1) * self._io.size
1475 end += (endrev + 1) * self._io.size
1472 end += (endrev + 1) * self._io.size
1476 length = end - start
1473 length = end - start
1477
1474
1478 return start, self._getsegment(start, length, df=df)
1475 return start, self._getsegment(start, length, df=df)
1479
1476
1480 def _chunk(self, rev, df=None):
1477 def _chunk(self, rev, df=None):
1481 """Obtain a single decompressed chunk for a revision.
1478 """Obtain a single decompressed chunk for a revision.
1482
1479
1483 Accepts an integer revision and an optional already-open file handle
1480 Accepts an integer revision and an optional already-open file handle
1484 to be used for reading. If used, the seek position of the file will not
1481 to be used for reading. If used, the seek position of the file will not
1485 be preserved.
1482 be preserved.
1486
1483
1487 Returns a str holding uncompressed data for the requested revision.
1484 Returns a str holding uncompressed data for the requested revision.
1488 """
1485 """
1489 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1486 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1490
1487
1491 def _chunks(self, revs, df=None, targetsize=None):
1488 def _chunks(self, revs, df=None, targetsize=None):
1492 """Obtain decompressed chunks for the specified revisions.
1489 """Obtain decompressed chunks for the specified revisions.
1493
1490
1494 Accepts an iterable of numeric revisions that are assumed to be in
1491 Accepts an iterable of numeric revisions that are assumed to be in
1495 ascending order. Also accepts an optional already-open file handle
1492 ascending order. Also accepts an optional already-open file handle
1496 to be used for reading. If used, the seek position of the file will
1493 to be used for reading. If used, the seek position of the file will
1497 not be preserved.
1494 not be preserved.
1498
1495
1499 This function is similar to calling ``self._chunk()`` multiple times,
1496 This function is similar to calling ``self._chunk()`` multiple times,
1500 but is faster.
1497 but is faster.
1501
1498
1502 Returns a list with decompressed data for each requested revision.
1499 Returns a list with decompressed data for each requested revision.
1503 """
1500 """
1504 if not revs:
1501 if not revs:
1505 return []
1502 return []
1506 start = self.start
1503 start = self.start
1507 length = self.length
1504 length = self.length
1508 inline = self._inline
1505 inline = self._inline
1509 iosize = self._io.size
1506 iosize = self._io.size
1510 buffer = util.buffer
1507 buffer = util.buffer
1511
1508
1512 l = []
1509 l = []
1513 ladd = l.append
1510 ladd = l.append
1514
1511
1515 if not self._withsparseread:
1512 if not self._withsparseread:
1516 slicedchunks = (revs,)
1513 slicedchunks = (revs,)
1517 else:
1514 else:
1518 slicedchunks = deltautil.slicechunk(self, revs,
1515 slicedchunks = deltautil.slicechunk(self, revs,
1519 targetsize=targetsize)
1516 targetsize=targetsize)
1520
1517
1521 for revschunk in slicedchunks:
1518 for revschunk in slicedchunks:
1522 firstrev = revschunk[0]
1519 firstrev = revschunk[0]
1523 # Skip trailing revisions with empty diff
1520 # Skip trailing revisions with empty diff
1524 for lastrev in revschunk[::-1]:
1521 for lastrev in revschunk[::-1]:
1525 if length(lastrev) != 0:
1522 if length(lastrev) != 0:
1526 break
1523 break
1527
1524
1528 try:
1525 try:
1529 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1526 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1530 except OverflowError:
1527 except OverflowError:
1531 # issue4215 - we can't cache a run of chunks greater than
1528 # issue4215 - we can't cache a run of chunks greater than
1532 # 2G on Windows
1529 # 2G on Windows
1533 return [self._chunk(rev, df=df) for rev in revschunk]
1530 return [self._chunk(rev, df=df) for rev in revschunk]
1534
1531
1535 decomp = self.decompress
1532 decomp = self.decompress
1536 for rev in revschunk:
1533 for rev in revschunk:
1537 chunkstart = start(rev)
1534 chunkstart = start(rev)
1538 if inline:
1535 if inline:
1539 chunkstart += (rev + 1) * iosize
1536 chunkstart += (rev + 1) * iosize
1540 chunklength = length(rev)
1537 chunklength = length(rev)
1541 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1538 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1542
1539
1543 return l
1540 return l
1544
1541
1545 def _chunkclear(self):
1542 def _chunkclear(self):
1546 """Clear the raw chunk cache."""
1543 """Clear the raw chunk cache."""
1547 self._chunkcache = (0, '')
1544 self._chunkcache = (0, '')
1548
1545
1549 def deltaparent(self, rev):
1546 def deltaparent(self, rev):
1550 """return deltaparent of the given revision"""
1547 """return deltaparent of the given revision"""
1551 base = self.index[rev][3]
1548 base = self.index[rev][3]
1552 if base == rev:
1549 if base == rev:
1553 return nullrev
1550 return nullrev
1554 elif self._generaldelta:
1551 elif self._generaldelta:
1555 return base
1552 return base
1556 else:
1553 else:
1557 return rev - 1
1554 return rev - 1
1558
1555
1559 def issnapshot(self, rev):
1556 def issnapshot(self, rev):
1560 """tells whether rev is a snapshot
1557 """tells whether rev is a snapshot
1561 """
1558 """
1562 if rev == nullrev:
1559 if rev == nullrev:
1563 return True
1560 return True
1564 deltap = self.deltaparent(rev)
1561 deltap = self.deltaparent(rev)
1565 if deltap == nullrev:
1562 if deltap == nullrev:
1566 return True
1563 return True
1567 p1, p2 = self.parentrevs(rev)
1564 p1, p2 = self.parentrevs(rev)
1568 if deltap in (p1, p2):
1565 if deltap in (p1, p2):
1569 return False
1566 return False
1570 return self.issnapshot(deltap)
1567 return self.issnapshot(deltap)
1571
1568
1572 def snapshotdepth(self, rev):
1569 def snapshotdepth(self, rev):
1573 """number of snapshot in the chain before this one"""
1570 """number of snapshot in the chain before this one"""
1574 if not self.issnapshot(rev):
1571 if not self.issnapshot(rev):
1575 raise ProgrammingError('revision %d not a snapshot')
1572 raise error.ProgrammingError('revision %d not a snapshot')
1576 return len(self._deltachain(rev)[0]) - 1
1573 return len(self._deltachain(rev)[0]) - 1
1577
1574
1578 def revdiff(self, rev1, rev2):
1575 def revdiff(self, rev1, rev2):
1579 """return or calculate a delta between two revisions
1576 """return or calculate a delta between two revisions
1580
1577
1581 The delta calculated is in binary form and is intended to be written to
1578 The delta calculated is in binary form and is intended to be written to
1582 revlog data directly. So this function needs raw revision data.
1579 revlog data directly. So this function needs raw revision data.
1583 """
1580 """
1584 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1581 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1585 return bytes(self._chunk(rev2))
1582 return bytes(self._chunk(rev2))
1586
1583
1587 return mdiff.textdiff(self.revision(rev1, raw=True),
1584 return mdiff.textdiff(self.revision(rev1, raw=True),
1588 self.revision(rev2, raw=True))
1585 self.revision(rev2, raw=True))
1589
1586
1590 def revision(self, nodeorrev, _df=None, raw=False):
1587 def revision(self, nodeorrev, _df=None, raw=False):
1591 """return an uncompressed revision of a given node or revision
1588 """return an uncompressed revision of a given node or revision
1592 number.
1589 number.
1593
1590
1594 _df - an existing file handle to read from. (internal-only)
1591 _df - an existing file handle to read from. (internal-only)
1595 raw - an optional argument specifying if the revision data is to be
1592 raw - an optional argument specifying if the revision data is to be
1596 treated as raw data when applying flag transforms. 'raw' should be set
1593 treated as raw data when applying flag transforms. 'raw' should be set
1597 to True when generating changegroups or in debug commands.
1594 to True when generating changegroups or in debug commands.
1598 """
1595 """
1599 if isinstance(nodeorrev, int):
1596 if isinstance(nodeorrev, int):
1600 rev = nodeorrev
1597 rev = nodeorrev
1601 node = self.node(rev)
1598 node = self.node(rev)
1602 else:
1599 else:
1603 node = nodeorrev
1600 node = nodeorrev
1604 rev = None
1601 rev = None
1605
1602
1606 cachedrev = None
1603 cachedrev = None
1607 flags = None
1604 flags = None
1608 rawtext = None
1605 rawtext = None
1609 if node == nullid:
1606 if node == nullid:
1610 return ""
1607 return ""
1611 if self._cache:
1608 if self._cache:
1612 if self._cache[0] == node:
1609 if self._cache[0] == node:
1613 # _cache only stores rawtext
1610 # _cache only stores rawtext
1614 if raw:
1611 if raw:
1615 return self._cache[2]
1612 return self._cache[2]
1616 # duplicated, but good for perf
1613 # duplicated, but good for perf
1617 if rev is None:
1614 if rev is None:
1618 rev = self.rev(node)
1615 rev = self.rev(node)
1619 if flags is None:
1616 if flags is None:
1620 flags = self.flags(rev)
1617 flags = self.flags(rev)
1621 # no extra flags set, no flag processor runs, text = rawtext
1618 # no extra flags set, no flag processor runs, text = rawtext
1622 if flags == REVIDX_DEFAULT_FLAGS:
1619 if flags == REVIDX_DEFAULT_FLAGS:
1623 return self._cache[2]
1620 return self._cache[2]
1624 # rawtext is reusable. need to run flag processor
1621 # rawtext is reusable. need to run flag processor
1625 rawtext = self._cache[2]
1622 rawtext = self._cache[2]
1626
1623
1627 cachedrev = self._cache[1]
1624 cachedrev = self._cache[1]
1628
1625
1629 # look up what we need to read
1626 # look up what we need to read
1630 if rawtext is None:
1627 if rawtext is None:
1631 if rev is None:
1628 if rev is None:
1632 rev = self.rev(node)
1629 rev = self.rev(node)
1633
1630
1634 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1631 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1635 if stopped:
1632 if stopped:
1636 rawtext = self._cache[2]
1633 rawtext = self._cache[2]
1637
1634
1638 # drop cache to save memory
1635 # drop cache to save memory
1639 self._cache = None
1636 self._cache = None
1640
1637
1641 targetsize = None
1638 targetsize = None
1642 rawsize = self.index[rev][2]
1639 rawsize = self.index[rev][2]
1643 if 0 <= rawsize:
1640 if 0 <= rawsize:
1644 targetsize = 4 * rawsize
1641 targetsize = 4 * rawsize
1645
1642
1646 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1643 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1647 if rawtext is None:
1644 if rawtext is None:
1648 rawtext = bytes(bins[0])
1645 rawtext = bytes(bins[0])
1649 bins = bins[1:]
1646 bins = bins[1:]
1650
1647
1651 rawtext = mdiff.patches(rawtext, bins)
1648 rawtext = mdiff.patches(rawtext, bins)
1652 self._cache = (node, rev, rawtext)
1649 self._cache = (node, rev, rawtext)
1653
1650
1654 if flags is None:
1651 if flags is None:
1655 if rev is None:
1652 if rev is None:
1656 rev = self.rev(node)
1653 rev = self.rev(node)
1657 flags = self.flags(rev)
1654 flags = self.flags(rev)
1658
1655
1659 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1656 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1660 if validatehash:
1657 if validatehash:
1661 self.checkhash(text, node, rev=rev)
1658 self.checkhash(text, node, rev=rev)
1662
1659
1663 return text
1660 return text
1664
1661
1665 def hash(self, text, p1, p2):
1662 def hash(self, text, p1, p2):
1666 """Compute a node hash.
1663 """Compute a node hash.
1667
1664
1668 Available as a function so that subclasses can replace the hash
1665 Available as a function so that subclasses can replace the hash
1669 as needed.
1666 as needed.
1670 """
1667 """
1671 return hash(text, p1, p2)
1668 return hash(text, p1, p2)
1672
1669
1673 def _processflags(self, text, flags, operation, raw=False):
1670 def _processflags(self, text, flags, operation, raw=False):
1674 """Inspect revision data flags and applies transforms defined by
1671 """Inspect revision data flags and applies transforms defined by
1675 registered flag processors.
1672 registered flag processors.
1676
1673
1677 ``text`` - the revision data to process
1674 ``text`` - the revision data to process
1678 ``flags`` - the revision flags
1675 ``flags`` - the revision flags
1679 ``operation`` - the operation being performed (read or write)
1676 ``operation`` - the operation being performed (read or write)
1680 ``raw`` - an optional argument describing if the raw transform should be
1677 ``raw`` - an optional argument describing if the raw transform should be
1681 applied.
1678 applied.
1682
1679
1683 This method processes the flags in the order (or reverse order if
1680 This method processes the flags in the order (or reverse order if
1684 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1681 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1685 flag processors registered for present flags. The order of flags defined
1682 flag processors registered for present flags. The order of flags defined
1686 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1683 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1687
1684
1688 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1685 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1689 processed text and ``validatehash`` is a bool indicating whether the
1686 processed text and ``validatehash`` is a bool indicating whether the
1690 returned text should be checked for hash integrity.
1687 returned text should be checked for hash integrity.
1691
1688
1692 Note: If the ``raw`` argument is set, it has precedence over the
1689 Note: If the ``raw`` argument is set, it has precedence over the
1693 operation and will only update the value of ``validatehash``.
1690 operation and will only update the value of ``validatehash``.
1694 """
1691 """
1695 # fast path: no flag processors will run
1692 # fast path: no flag processors will run
1696 if flags == 0:
1693 if flags == 0:
1697 return text, True
1694 return text, True
1698 if not operation in ('read', 'write'):
1695 if not operation in ('read', 'write'):
1699 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1696 raise error.ProgrammingError(_("invalid '%s' operation") %
1697 operation)
1700 # Check all flags are known.
1698 # Check all flags are known.
1701 if flags & ~REVIDX_KNOWN_FLAGS:
1699 if flags & ~REVIDX_KNOWN_FLAGS:
1702 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1700 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1703 (flags & ~REVIDX_KNOWN_FLAGS))
1701 (flags & ~REVIDX_KNOWN_FLAGS))
1704 validatehash = True
1702 validatehash = True
1705 # Depending on the operation (read or write), the order might be
1703 # Depending on the operation (read or write), the order might be
1706 # reversed due to non-commutative transforms.
1704 # reversed due to non-commutative transforms.
1707 orderedflags = REVIDX_FLAGS_ORDER
1705 orderedflags = REVIDX_FLAGS_ORDER
1708 if operation == 'write':
1706 if operation == 'write':
1709 orderedflags = reversed(orderedflags)
1707 orderedflags = reversed(orderedflags)
1710
1708
1711 for flag in orderedflags:
1709 for flag in orderedflags:
1712 # If a flagprocessor has been registered for a known flag, apply the
1710 # If a flagprocessor has been registered for a known flag, apply the
1713 # related operation transform and update result tuple.
1711 # related operation transform and update result tuple.
1714 if flag & flags:
1712 if flag & flags:
1715 vhash = True
1713 vhash = True
1716
1714
1717 if flag not in self._flagprocessors:
1715 if flag not in self._flagprocessors:
1718 message = _("missing processor for flag '%#x'") % (flag)
1716 message = _("missing processor for flag '%#x'") % (flag)
1719 raise error.RevlogError(message)
1717 raise error.RevlogError(message)
1720
1718
1721 processor = self._flagprocessors[flag]
1719 processor = self._flagprocessors[flag]
1722 if processor is not None:
1720 if processor is not None:
1723 readtransform, writetransform, rawtransform = processor
1721 readtransform, writetransform, rawtransform = processor
1724
1722
1725 if raw:
1723 if raw:
1726 vhash = rawtransform(self, text)
1724 vhash = rawtransform(self, text)
1727 elif operation == 'read':
1725 elif operation == 'read':
1728 text, vhash = readtransform(self, text)
1726 text, vhash = readtransform(self, text)
1729 else: # write operation
1727 else: # write operation
1730 text, vhash = writetransform(self, text)
1728 text, vhash = writetransform(self, text)
1731 validatehash = validatehash and vhash
1729 validatehash = validatehash and vhash
1732
1730
1733 return text, validatehash
1731 return text, validatehash
1734
1732
1735 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1733 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1736 """Check node hash integrity.
1734 """Check node hash integrity.
1737
1735
1738 Available as a function so that subclasses can extend hash mismatch
1736 Available as a function so that subclasses can extend hash mismatch
1739 behaviors as needed.
1737 behaviors as needed.
1740 """
1738 """
1741 try:
1739 try:
1742 if p1 is None and p2 is None:
1740 if p1 is None and p2 is None:
1743 p1, p2 = self.parents(node)
1741 p1, p2 = self.parents(node)
1744 if node != self.hash(text, p1, p2):
1742 if node != self.hash(text, p1, p2):
1745 revornode = rev
1743 revornode = rev
1746 if revornode is None:
1744 if revornode is None:
1747 revornode = templatefilters.short(hex(node))
1745 revornode = templatefilters.short(hex(node))
1748 raise error.RevlogError(_("integrity check failed on %s:%s")
1746 raise error.RevlogError(_("integrity check failed on %s:%s")
1749 % (self.indexfile, pycompat.bytestr(revornode)))
1747 % (self.indexfile, pycompat.bytestr(revornode)))
1750 except error.RevlogError:
1748 except error.RevlogError:
1751 if self._censorable and _censoredtext(text):
1749 if self._censorable and _censoredtext(text):
1752 raise error.CensoredNodeError(self.indexfile, node, text)
1750 raise error.CensoredNodeError(self.indexfile, node, text)
1753 raise
1751 raise
1754
1752
1755 def _enforceinlinesize(self, tr, fp=None):
1753 def _enforceinlinesize(self, tr, fp=None):
1756 """Check if the revlog is too big for inline and convert if so.
1754 """Check if the revlog is too big for inline and convert if so.
1757
1755
1758 This should be called after revisions are added to the revlog. If the
1756 This should be called after revisions are added to the revlog. If the
1759 revlog has grown too large to be an inline revlog, it will convert it
1757 revlog has grown too large to be an inline revlog, it will convert it
1760 to use multiple index and data files.
1758 to use multiple index and data files.
1761 """
1759 """
1762 tiprev = len(self) - 1
1760 tiprev = len(self) - 1
1763 if (not self._inline or
1761 if (not self._inline or
1764 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1762 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1765 return
1763 return
1766
1764
1767 trinfo = tr.find(self.indexfile)
1765 trinfo = tr.find(self.indexfile)
1768 if trinfo is None:
1766 if trinfo is None:
1769 raise error.RevlogError(_("%s not found in the transaction")
1767 raise error.RevlogError(_("%s not found in the transaction")
1770 % self.indexfile)
1768 % self.indexfile)
1771
1769
1772 trindex = trinfo[2]
1770 trindex = trinfo[2]
1773 if trindex is not None:
1771 if trindex is not None:
1774 dataoff = self.start(trindex)
1772 dataoff = self.start(trindex)
1775 else:
1773 else:
1776 # revlog was stripped at start of transaction, use all leftover data
1774 # revlog was stripped at start of transaction, use all leftover data
1777 trindex = len(self) - 1
1775 trindex = len(self) - 1
1778 dataoff = self.end(tiprev)
1776 dataoff = self.end(tiprev)
1779
1777
1780 tr.add(self.datafile, dataoff)
1778 tr.add(self.datafile, dataoff)
1781
1779
1782 if fp:
1780 if fp:
1783 fp.flush()
1781 fp.flush()
1784 fp.close()
1782 fp.close()
1785
1783
1786 with self._datafp('w') as df:
1784 with self._datafp('w') as df:
1787 for r in self:
1785 for r in self:
1788 df.write(self._getsegmentforrevs(r, r)[1])
1786 df.write(self._getsegmentforrevs(r, r)[1])
1789
1787
1790 with self._indexfp('w') as fp:
1788 with self._indexfp('w') as fp:
1791 self.version &= ~FLAG_INLINE_DATA
1789 self.version &= ~FLAG_INLINE_DATA
1792 self._inline = False
1790 self._inline = False
1793 io = self._io
1791 io = self._io
1794 for i in self:
1792 for i in self:
1795 e = io.packentry(self.index[i], self.node, self.version, i)
1793 e = io.packentry(self.index[i], self.node, self.version, i)
1796 fp.write(e)
1794 fp.write(e)
1797
1795
1798 # the temp file replace the real index when we exit the context
1796 # the temp file replace the real index when we exit the context
1799 # manager
1797 # manager
1800
1798
1801 tr.replace(self.indexfile, trindex * self._io.size)
1799 tr.replace(self.indexfile, trindex * self._io.size)
1802 self._chunkclear()
1800 self._chunkclear()
1803
1801
1804 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1802 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1805 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1803 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1806 """add a revision to the log
1804 """add a revision to the log
1807
1805
1808 text - the revision data to add
1806 text - the revision data to add
1809 transaction - the transaction object used for rollback
1807 transaction - the transaction object used for rollback
1810 link - the linkrev data to add
1808 link - the linkrev data to add
1811 p1, p2 - the parent nodeids of the revision
1809 p1, p2 - the parent nodeids of the revision
1812 cachedelta - an optional precomputed delta
1810 cachedelta - an optional precomputed delta
1813 node - nodeid of revision; typically node is not specified, and it is
1811 node - nodeid of revision; typically node is not specified, and it is
1814 computed by default as hash(text, p1, p2), however subclasses might
1812 computed by default as hash(text, p1, p2), however subclasses might
1815 use different hashing method (and override checkhash() in such case)
1813 use different hashing method (and override checkhash() in such case)
1816 flags - the known flags to set on the revision
1814 flags - the known flags to set on the revision
1817 deltacomputer - an optional deltacomputer instance shared between
1815 deltacomputer - an optional deltacomputer instance shared between
1818 multiple calls
1816 multiple calls
1819 """
1817 """
1820 if link == nullrev:
1818 if link == nullrev:
1821 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1819 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1822 % self.indexfile)
1820 % self.indexfile)
1823
1821
1824 if flags:
1822 if flags:
1825 node = node or self.hash(text, p1, p2)
1823 node = node or self.hash(text, p1, p2)
1826
1824
1827 rawtext, validatehash = self._processflags(text, flags, 'write')
1825 rawtext, validatehash = self._processflags(text, flags, 'write')
1828
1826
1829 # If the flag processor modifies the revision data, ignore any provided
1827 # If the flag processor modifies the revision data, ignore any provided
1830 # cachedelta.
1828 # cachedelta.
1831 if rawtext != text:
1829 if rawtext != text:
1832 cachedelta = None
1830 cachedelta = None
1833
1831
1834 if len(rawtext) > _maxentrysize:
1832 if len(rawtext) > _maxentrysize:
1835 raise error.RevlogError(
1833 raise error.RevlogError(
1836 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1834 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1837 % (self.indexfile, len(rawtext)))
1835 % (self.indexfile, len(rawtext)))
1838
1836
1839 node = node or self.hash(rawtext, p1, p2)
1837 node = node or self.hash(rawtext, p1, p2)
1840 if node in self.nodemap:
1838 if node in self.nodemap:
1841 return node
1839 return node
1842
1840
1843 if validatehash:
1841 if validatehash:
1844 self.checkhash(rawtext, node, p1=p1, p2=p2)
1842 self.checkhash(rawtext, node, p1=p1, p2=p2)
1845
1843
1846 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1844 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1847 flags, cachedelta=cachedelta,
1845 flags, cachedelta=cachedelta,
1848 deltacomputer=deltacomputer)
1846 deltacomputer=deltacomputer)
1849
1847
1850 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1848 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1851 cachedelta=None, deltacomputer=None):
1849 cachedelta=None, deltacomputer=None):
1852 """add a raw revision with known flags, node and parents
1850 """add a raw revision with known flags, node and parents
1853 useful when reusing a revision not stored in this revlog (ex: received
1851 useful when reusing a revision not stored in this revlog (ex: received
1854 over wire, or read from an external bundle).
1852 over wire, or read from an external bundle).
1855 """
1853 """
1856 dfh = None
1854 dfh = None
1857 if not self._inline:
1855 if not self._inline:
1858 dfh = self._datafp("a+")
1856 dfh = self._datafp("a+")
1859 ifh = self._indexfp("a+")
1857 ifh = self._indexfp("a+")
1860 try:
1858 try:
1861 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1859 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1862 flags, cachedelta, ifh, dfh,
1860 flags, cachedelta, ifh, dfh,
1863 deltacomputer=deltacomputer)
1861 deltacomputer=deltacomputer)
1864 finally:
1862 finally:
1865 if dfh:
1863 if dfh:
1866 dfh.close()
1864 dfh.close()
1867 ifh.close()
1865 ifh.close()
1868
1866
1869 def compress(self, data):
1867 def compress(self, data):
1870 """Generate a possibly-compressed representation of data."""
1868 """Generate a possibly-compressed representation of data."""
1871 if not data:
1869 if not data:
1872 return '', data
1870 return '', data
1873
1871
1874 compressed = self._compressor.compress(data)
1872 compressed = self._compressor.compress(data)
1875
1873
1876 if compressed:
1874 if compressed:
1877 # The revlog compressor added the header in the returned data.
1875 # The revlog compressor added the header in the returned data.
1878 return '', compressed
1876 return '', compressed
1879
1877
1880 if data[0:1] == '\0':
1878 if data[0:1] == '\0':
1881 return '', data
1879 return '', data
1882 return 'u', data
1880 return 'u', data
1883
1881
1884 def decompress(self, data):
1882 def decompress(self, data):
1885 """Decompress a revlog chunk.
1883 """Decompress a revlog chunk.
1886
1884
1887 The chunk is expected to begin with a header identifying the
1885 The chunk is expected to begin with a header identifying the
1888 format type so it can be routed to an appropriate decompressor.
1886 format type so it can be routed to an appropriate decompressor.
1889 """
1887 """
1890 if not data:
1888 if not data:
1891 return data
1889 return data
1892
1890
1893 # Revlogs are read much more frequently than they are written and many
1891 # Revlogs are read much more frequently than they are written and many
1894 # chunks only take microseconds to decompress, so performance is
1892 # chunks only take microseconds to decompress, so performance is
1895 # important here.
1893 # important here.
1896 #
1894 #
1897 # We can make a few assumptions about revlogs:
1895 # We can make a few assumptions about revlogs:
1898 #
1896 #
1899 # 1) the majority of chunks will be compressed (as opposed to inline
1897 # 1) the majority of chunks will be compressed (as opposed to inline
1900 # raw data).
1898 # raw data).
1901 # 2) decompressing *any* data will likely by at least 10x slower than
1899 # 2) decompressing *any* data will likely by at least 10x slower than
1902 # returning raw inline data.
1900 # returning raw inline data.
1903 # 3) we want to prioritize common and officially supported compression
1901 # 3) we want to prioritize common and officially supported compression
1904 # engines
1902 # engines
1905 #
1903 #
1906 # It follows that we want to optimize for "decompress compressed data
1904 # It follows that we want to optimize for "decompress compressed data
1907 # when encoded with common and officially supported compression engines"
1905 # when encoded with common and officially supported compression engines"
1908 # case over "raw data" and "data encoded by less common or non-official
1906 # case over "raw data" and "data encoded by less common or non-official
1909 # compression engines." That is why we have the inline lookup first
1907 # compression engines." That is why we have the inline lookup first
1910 # followed by the compengines lookup.
1908 # followed by the compengines lookup.
1911 #
1909 #
1912 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1910 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1913 # compressed chunks. And this matters for changelog and manifest reads.
1911 # compressed chunks. And this matters for changelog and manifest reads.
1914 t = data[0:1]
1912 t = data[0:1]
1915
1913
1916 if t == 'x':
1914 if t == 'x':
1917 try:
1915 try:
1918 return _zlibdecompress(data)
1916 return _zlibdecompress(data)
1919 except zlib.error as e:
1917 except zlib.error as e:
1920 raise error.RevlogError(_('revlog decompress error: %s') %
1918 raise error.RevlogError(_('revlog decompress error: %s') %
1921 stringutil.forcebytestr(e))
1919 stringutil.forcebytestr(e))
1922 # '\0' is more common than 'u' so it goes first.
1920 # '\0' is more common than 'u' so it goes first.
1923 elif t == '\0':
1921 elif t == '\0':
1924 return data
1922 return data
1925 elif t == 'u':
1923 elif t == 'u':
1926 return util.buffer(data, 1)
1924 return util.buffer(data, 1)
1927
1925
1928 try:
1926 try:
1929 compressor = self._decompressors[t]
1927 compressor = self._decompressors[t]
1930 except KeyError:
1928 except KeyError:
1931 try:
1929 try:
1932 engine = util.compengines.forrevlogheader(t)
1930 engine = util.compengines.forrevlogheader(t)
1933 compressor = engine.revlogcompressor()
1931 compressor = engine.revlogcompressor()
1934 self._decompressors[t] = compressor
1932 self._decompressors[t] = compressor
1935 except KeyError:
1933 except KeyError:
1936 raise error.RevlogError(_('unknown compression type %r') % t)
1934 raise error.RevlogError(_('unknown compression type %r') % t)
1937
1935
1938 return compressor.decompress(data)
1936 return compressor.decompress(data)
1939
1937
1940 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1938 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1941 cachedelta, ifh, dfh, alwayscache=False,
1939 cachedelta, ifh, dfh, alwayscache=False,
1942 deltacomputer=None):
1940 deltacomputer=None):
1943 """internal function to add revisions to the log
1941 """internal function to add revisions to the log
1944
1942
1945 see addrevision for argument descriptions.
1943 see addrevision for argument descriptions.
1946
1944
1947 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1945 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1948
1946
1949 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1947 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1950 be used.
1948 be used.
1951
1949
1952 invariants:
1950 invariants:
1953 - rawtext is optional (can be None); if not set, cachedelta must be set.
1951 - rawtext is optional (can be None); if not set, cachedelta must be set.
1954 if both are set, they must correspond to each other.
1952 if both are set, they must correspond to each other.
1955 """
1953 """
1956 if node == nullid:
1954 if node == nullid:
1957 raise error.RevlogError(_("%s: attempt to add null revision") %
1955 raise error.RevlogError(_("%s: attempt to add null revision") %
1958 self.indexfile)
1956 self.indexfile)
1959 if node == wdirid or node in wdirfilenodeids:
1957 if node == wdirid or node in wdirfilenodeids:
1960 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1958 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1961 self.indexfile)
1959 self.indexfile)
1962
1960
1963 if self._inline:
1961 if self._inline:
1964 fh = ifh
1962 fh = ifh
1965 else:
1963 else:
1966 fh = dfh
1964 fh = dfh
1967
1965
1968 btext = [rawtext]
1966 btext = [rawtext]
1969
1967
1970 curr = len(self)
1968 curr = len(self)
1971 prev = curr - 1
1969 prev = curr - 1
1972 offset = self.end(prev)
1970 offset = self.end(prev)
1973 p1r, p2r = self.rev(p1), self.rev(p2)
1971 p1r, p2r = self.rev(p1), self.rev(p2)
1974
1972
1975 # full versions are inserted when the needed deltas
1973 # full versions are inserted when the needed deltas
1976 # become comparable to the uncompressed text
1974 # become comparable to the uncompressed text
1977 if rawtext is None:
1975 if rawtext is None:
1978 # need rawtext size, before changed by flag processors, which is
1976 # need rawtext size, before changed by flag processors, which is
1979 # the non-raw size. use revlog explicitly to avoid filelog's extra
1977 # the non-raw size. use revlog explicitly to avoid filelog's extra
1980 # logic that might remove metadata size.
1978 # logic that might remove metadata size.
1981 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1979 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1982 cachedelta[1])
1980 cachedelta[1])
1983 else:
1981 else:
1984 textlen = len(rawtext)
1982 textlen = len(rawtext)
1985
1983
1986 if deltacomputer is None:
1984 if deltacomputer is None:
1987 deltacomputer = deltautil.deltacomputer(self)
1985 deltacomputer = deltautil.deltacomputer(self)
1988
1986
1989 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1987 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1990
1988
1991 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1989 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1992
1990
1993 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1991 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1994 deltainfo.base, link, p1r, p2r, node)
1992 deltainfo.base, link, p1r, p2r, node)
1995 self.index.append(e)
1993 self.index.append(e)
1996 self.nodemap[node] = curr
1994 self.nodemap[node] = curr
1997
1995
1998 entry = self._io.packentry(e, self.node, self.version, curr)
1996 entry = self._io.packentry(e, self.node, self.version, curr)
1999 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1997 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2000 link, offset)
1998 link, offset)
2001
1999
2002 rawtext = btext[0]
2000 rawtext = btext[0]
2003
2001
2004 if alwayscache and rawtext is None:
2002 if alwayscache and rawtext is None:
2005 rawtext = deltacomputer.buildtext(revinfo, fh)
2003 rawtext = deltacomputer.buildtext(revinfo, fh)
2006
2004
2007 if type(rawtext) == bytes: # only accept immutable objects
2005 if type(rawtext) == bytes: # only accept immutable objects
2008 self._cache = (node, curr, rawtext)
2006 self._cache = (node, curr, rawtext)
2009 self._chainbasecache[curr] = deltainfo.chainbase
2007 self._chainbasecache[curr] = deltainfo.chainbase
2010 return node
2008 return node
2011
2009
2012 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2010 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2013 # Files opened in a+ mode have inconsistent behavior on various
2011 # Files opened in a+ mode have inconsistent behavior on various
2014 # platforms. Windows requires that a file positioning call be made
2012 # platforms. Windows requires that a file positioning call be made
2015 # when the file handle transitions between reads and writes. See
2013 # when the file handle transitions between reads and writes. See
2016 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2014 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2017 # platforms, Python or the platform itself can be buggy. Some versions
2015 # platforms, Python or the platform itself can be buggy. Some versions
2018 # of Solaris have been observed to not append at the end of the file
2016 # of Solaris have been observed to not append at the end of the file
2019 # if the file was seeked to before the end. See issue4943 for more.
2017 # if the file was seeked to before the end. See issue4943 for more.
2020 #
2018 #
2021 # We work around this issue by inserting a seek() before writing.
2019 # We work around this issue by inserting a seek() before writing.
2022 # Note: This is likely not necessary on Python 3.
2020 # Note: This is likely not necessary on Python 3.
2023 ifh.seek(0, os.SEEK_END)
2021 ifh.seek(0, os.SEEK_END)
2024 if dfh:
2022 if dfh:
2025 dfh.seek(0, os.SEEK_END)
2023 dfh.seek(0, os.SEEK_END)
2026
2024
2027 curr = len(self) - 1
2025 curr = len(self) - 1
2028 if not self._inline:
2026 if not self._inline:
2029 transaction.add(self.datafile, offset)
2027 transaction.add(self.datafile, offset)
2030 transaction.add(self.indexfile, curr * len(entry))
2028 transaction.add(self.indexfile, curr * len(entry))
2031 if data[0]:
2029 if data[0]:
2032 dfh.write(data[0])
2030 dfh.write(data[0])
2033 dfh.write(data[1])
2031 dfh.write(data[1])
2034 ifh.write(entry)
2032 ifh.write(entry)
2035 else:
2033 else:
2036 offset += curr * self._io.size
2034 offset += curr * self._io.size
2037 transaction.add(self.indexfile, offset, curr)
2035 transaction.add(self.indexfile, offset, curr)
2038 ifh.write(entry)
2036 ifh.write(entry)
2039 ifh.write(data[0])
2037 ifh.write(data[0])
2040 ifh.write(data[1])
2038 ifh.write(data[1])
2041 self._enforceinlinesize(transaction, ifh)
2039 self._enforceinlinesize(transaction, ifh)
2042
2040
2043 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2041 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2044 """
2042 """
2045 add a delta group
2043 add a delta group
2046
2044
2047 given a set of deltas, add them to the revision log. the
2045 given a set of deltas, add them to the revision log. the
2048 first delta is against its parent, which should be in our
2046 first delta is against its parent, which should be in our
2049 log, the rest are against the previous delta.
2047 log, the rest are against the previous delta.
2050
2048
2051 If ``addrevisioncb`` is defined, it will be called with arguments of
2049 If ``addrevisioncb`` is defined, it will be called with arguments of
2052 this revlog and the node that was added.
2050 this revlog and the node that was added.
2053 """
2051 """
2054
2052
2055 nodes = []
2053 nodes = []
2056
2054
2057 r = len(self)
2055 r = len(self)
2058 end = 0
2056 end = 0
2059 if r:
2057 if r:
2060 end = self.end(r - 1)
2058 end = self.end(r - 1)
2061 ifh = self._indexfp("a+")
2059 ifh = self._indexfp("a+")
2062 isize = r * self._io.size
2060 isize = r * self._io.size
2063 if self._inline:
2061 if self._inline:
2064 transaction.add(self.indexfile, end + isize, r)
2062 transaction.add(self.indexfile, end + isize, r)
2065 dfh = None
2063 dfh = None
2066 else:
2064 else:
2067 transaction.add(self.indexfile, isize, r)
2065 transaction.add(self.indexfile, isize, r)
2068 transaction.add(self.datafile, end)
2066 transaction.add(self.datafile, end)
2069 dfh = self._datafp("a+")
2067 dfh = self._datafp("a+")
2070 def flush():
2068 def flush():
2071 if dfh:
2069 if dfh:
2072 dfh.flush()
2070 dfh.flush()
2073 ifh.flush()
2071 ifh.flush()
2074 try:
2072 try:
2075 deltacomputer = deltautil.deltacomputer(self)
2073 deltacomputer = deltautil.deltacomputer(self)
2076 # loop through our set of deltas
2074 # loop through our set of deltas
2077 for data in deltas:
2075 for data in deltas:
2078 node, p1, p2, linknode, deltabase, delta, flags = data
2076 node, p1, p2, linknode, deltabase, delta, flags = data
2079 link = linkmapper(linknode)
2077 link = linkmapper(linknode)
2080 flags = flags or REVIDX_DEFAULT_FLAGS
2078 flags = flags or REVIDX_DEFAULT_FLAGS
2081
2079
2082 nodes.append(node)
2080 nodes.append(node)
2083
2081
2084 if node in self.nodemap:
2082 if node in self.nodemap:
2085 # this can happen if two branches make the same change
2083 # this can happen if two branches make the same change
2086 continue
2084 continue
2087
2085
2088 for p in (p1, p2):
2086 for p in (p1, p2):
2089 if p not in self.nodemap:
2087 if p not in self.nodemap:
2090 raise LookupError(p, self.indexfile,
2088 raise LookupError(p, self.indexfile,
2091 _('unknown parent'))
2089 _('unknown parent'))
2092
2090
2093 if deltabase not in self.nodemap:
2091 if deltabase not in self.nodemap:
2094 raise LookupError(deltabase, self.indexfile,
2092 raise LookupError(deltabase, self.indexfile,
2095 _('unknown delta base'))
2093 _('unknown delta base'))
2096
2094
2097 baserev = self.rev(deltabase)
2095 baserev = self.rev(deltabase)
2098
2096
2099 if baserev != nullrev and self.iscensored(baserev):
2097 if baserev != nullrev and self.iscensored(baserev):
2100 # if base is censored, delta must be full replacement in a
2098 # if base is censored, delta must be full replacement in a
2101 # single patch operation
2099 # single patch operation
2102 hlen = struct.calcsize(">lll")
2100 hlen = struct.calcsize(">lll")
2103 oldlen = self.rawsize(baserev)
2101 oldlen = self.rawsize(baserev)
2104 newlen = len(delta) - hlen
2102 newlen = len(delta) - hlen
2105 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2103 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2106 raise error.CensoredBaseError(self.indexfile,
2104 raise error.CensoredBaseError(self.indexfile,
2107 self.node(baserev))
2105 self.node(baserev))
2108
2106
2109 if not flags and self._peek_iscensored(baserev, delta, flush):
2107 if not flags and self._peek_iscensored(baserev, delta, flush):
2110 flags |= REVIDX_ISCENSORED
2108 flags |= REVIDX_ISCENSORED
2111
2109
2112 # We assume consumers of addrevisioncb will want to retrieve
2110 # We assume consumers of addrevisioncb will want to retrieve
2113 # the added revision, which will require a call to
2111 # the added revision, which will require a call to
2114 # revision(). revision() will fast path if there is a cache
2112 # revision(). revision() will fast path if there is a cache
2115 # hit. So, we tell _addrevision() to always cache in this case.
2113 # hit. So, we tell _addrevision() to always cache in this case.
2116 # We're only using addgroup() in the context of changegroup
2114 # We're only using addgroup() in the context of changegroup
2117 # generation so the revision data can always be handled as raw
2115 # generation so the revision data can always be handled as raw
2118 # by the flagprocessor.
2116 # by the flagprocessor.
2119 self._addrevision(node, None, transaction, link,
2117 self._addrevision(node, None, transaction, link,
2120 p1, p2, flags, (baserev, delta),
2118 p1, p2, flags, (baserev, delta),
2121 ifh, dfh,
2119 ifh, dfh,
2122 alwayscache=bool(addrevisioncb),
2120 alwayscache=bool(addrevisioncb),
2123 deltacomputer=deltacomputer)
2121 deltacomputer=deltacomputer)
2124
2122
2125 if addrevisioncb:
2123 if addrevisioncb:
2126 addrevisioncb(self, node)
2124 addrevisioncb(self, node)
2127
2125
2128 if not dfh and not self._inline:
2126 if not dfh and not self._inline:
2129 # addrevision switched from inline to conventional
2127 # addrevision switched from inline to conventional
2130 # reopen the index
2128 # reopen the index
2131 ifh.close()
2129 ifh.close()
2132 dfh = self._datafp("a+")
2130 dfh = self._datafp("a+")
2133 ifh = self._indexfp("a+")
2131 ifh = self._indexfp("a+")
2134 finally:
2132 finally:
2135 if dfh:
2133 if dfh:
2136 dfh.close()
2134 dfh.close()
2137 ifh.close()
2135 ifh.close()
2138
2136
2139 return nodes
2137 return nodes
2140
2138
2141 def iscensored(self, rev):
2139 def iscensored(self, rev):
2142 """Check if a file revision is censored."""
2140 """Check if a file revision is censored."""
2143 if not self._censorable:
2141 if not self._censorable:
2144 return False
2142 return False
2145
2143
2146 return self.flags(rev) & REVIDX_ISCENSORED
2144 return self.flags(rev) & REVIDX_ISCENSORED
2147
2145
2148 def _peek_iscensored(self, baserev, delta, flush):
2146 def _peek_iscensored(self, baserev, delta, flush):
2149 """Quickly check if a delta produces a censored revision."""
2147 """Quickly check if a delta produces a censored revision."""
2150 if not self._censorable:
2148 if not self._censorable:
2151 return False
2149 return False
2152
2150
2153 # Fragile heuristic: unless new file meta keys are added alphabetically
2151 # Fragile heuristic: unless new file meta keys are added alphabetically
2154 # preceding "censored", all censored revisions are prefixed by
2152 # preceding "censored", all censored revisions are prefixed by
2155 # "\1\ncensored:". A delta producing such a censored revision must be a
2153 # "\1\ncensored:". A delta producing such a censored revision must be a
2156 # full-replacement delta, so we inspect the first and only patch in the
2154 # full-replacement delta, so we inspect the first and only patch in the
2157 # delta for this prefix.
2155 # delta for this prefix.
2158 hlen = struct.calcsize(">lll")
2156 hlen = struct.calcsize(">lll")
2159 if len(delta) <= hlen:
2157 if len(delta) <= hlen:
2160 return False
2158 return False
2161
2159
2162 oldlen = self.rawsize(baserev)
2160 oldlen = self.rawsize(baserev)
2163 newlen = len(delta) - hlen
2161 newlen = len(delta) - hlen
2164 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2162 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2165 return False
2163 return False
2166
2164
2167 add = "\1\ncensored:"
2165 add = "\1\ncensored:"
2168 addlen = len(add)
2166 addlen = len(add)
2169 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2167 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2170
2168
2171 def getstrippoint(self, minlink):
2169 def getstrippoint(self, minlink):
2172 """find the minimum rev that must be stripped to strip the linkrev
2170 """find the minimum rev that must be stripped to strip the linkrev
2173
2171
2174 Returns a tuple containing the minimum rev and a set of all revs that
2172 Returns a tuple containing the minimum rev and a set of all revs that
2175 have linkrevs that will be broken by this strip.
2173 have linkrevs that will be broken by this strip.
2176 """
2174 """
2177 brokenrevs = set()
2175 brokenrevs = set()
2178 strippoint = len(self)
2176 strippoint = len(self)
2179
2177
2180 heads = {}
2178 heads = {}
2181 futurelargelinkrevs = set()
2179 futurelargelinkrevs = set()
2182 for head in self.headrevs():
2180 for head in self.headrevs():
2183 headlinkrev = self.linkrev(head)
2181 headlinkrev = self.linkrev(head)
2184 heads[head] = headlinkrev
2182 heads[head] = headlinkrev
2185 if headlinkrev >= minlink:
2183 if headlinkrev >= minlink:
2186 futurelargelinkrevs.add(headlinkrev)
2184 futurelargelinkrevs.add(headlinkrev)
2187
2185
2188 # This algorithm involves walking down the rev graph, starting at the
2186 # This algorithm involves walking down the rev graph, starting at the
2189 # heads. Since the revs are topologically sorted according to linkrev,
2187 # heads. Since the revs are topologically sorted according to linkrev,
2190 # once all head linkrevs are below the minlink, we know there are
2188 # once all head linkrevs are below the minlink, we know there are
2191 # no more revs that could have a linkrev greater than minlink.
2189 # no more revs that could have a linkrev greater than minlink.
2192 # So we can stop walking.
2190 # So we can stop walking.
2193 while futurelargelinkrevs:
2191 while futurelargelinkrevs:
2194 strippoint -= 1
2192 strippoint -= 1
2195 linkrev = heads.pop(strippoint)
2193 linkrev = heads.pop(strippoint)
2196
2194
2197 if linkrev < minlink:
2195 if linkrev < minlink:
2198 brokenrevs.add(strippoint)
2196 brokenrevs.add(strippoint)
2199 else:
2197 else:
2200 futurelargelinkrevs.remove(linkrev)
2198 futurelargelinkrevs.remove(linkrev)
2201
2199
2202 for p in self.parentrevs(strippoint):
2200 for p in self.parentrevs(strippoint):
2203 if p != nullrev:
2201 if p != nullrev:
2204 plinkrev = self.linkrev(p)
2202 plinkrev = self.linkrev(p)
2205 heads[p] = plinkrev
2203 heads[p] = plinkrev
2206 if plinkrev >= minlink:
2204 if plinkrev >= minlink:
2207 futurelargelinkrevs.add(plinkrev)
2205 futurelargelinkrevs.add(plinkrev)
2208
2206
2209 return strippoint, brokenrevs
2207 return strippoint, brokenrevs
2210
2208
2211 def strip(self, minlink, transaction):
2209 def strip(self, minlink, transaction):
2212 """truncate the revlog on the first revision with a linkrev >= minlink
2210 """truncate the revlog on the first revision with a linkrev >= minlink
2213
2211
2214 This function is called when we're stripping revision minlink and
2212 This function is called when we're stripping revision minlink and
2215 its descendants from the repository.
2213 its descendants from the repository.
2216
2214
2217 We have to remove all revisions with linkrev >= minlink, because
2215 We have to remove all revisions with linkrev >= minlink, because
2218 the equivalent changelog revisions will be renumbered after the
2216 the equivalent changelog revisions will be renumbered after the
2219 strip.
2217 strip.
2220
2218
2221 So we truncate the revlog on the first of these revisions, and
2219 So we truncate the revlog on the first of these revisions, and
2222 trust that the caller has saved the revisions that shouldn't be
2220 trust that the caller has saved the revisions that shouldn't be
2223 removed and that it'll re-add them after this truncation.
2221 removed and that it'll re-add them after this truncation.
2224 """
2222 """
2225 if len(self) == 0:
2223 if len(self) == 0:
2226 return
2224 return
2227
2225
2228 rev, _ = self.getstrippoint(minlink)
2226 rev, _ = self.getstrippoint(minlink)
2229 if rev == len(self):
2227 if rev == len(self):
2230 return
2228 return
2231
2229
2232 # first truncate the files on disk
2230 # first truncate the files on disk
2233 end = self.start(rev)
2231 end = self.start(rev)
2234 if not self._inline:
2232 if not self._inline:
2235 transaction.add(self.datafile, end)
2233 transaction.add(self.datafile, end)
2236 end = rev * self._io.size
2234 end = rev * self._io.size
2237 else:
2235 else:
2238 end += rev * self._io.size
2236 end += rev * self._io.size
2239
2237
2240 transaction.add(self.indexfile, end)
2238 transaction.add(self.indexfile, end)
2241
2239
2242 # then reset internal state in memory to forget those revisions
2240 # then reset internal state in memory to forget those revisions
2243 self._cache = None
2241 self._cache = None
2244 self._chaininfocache = {}
2242 self._chaininfocache = {}
2245 self._chunkclear()
2243 self._chunkclear()
2246 for x in pycompat.xrange(rev, len(self)):
2244 for x in pycompat.xrange(rev, len(self)):
2247 del self.nodemap[self.node(x)]
2245 del self.nodemap[self.node(x)]
2248
2246
2249 del self.index[rev:-1]
2247 del self.index[rev:-1]
2250 self._nodepos = None
2248 self._nodepos = None
2251
2249
2252 def checksize(self):
2250 def checksize(self):
2253 expected = 0
2251 expected = 0
2254 if len(self):
2252 if len(self):
2255 expected = max(0, self.end(len(self) - 1))
2253 expected = max(0, self.end(len(self) - 1))
2256
2254
2257 try:
2255 try:
2258 with self._datafp() as f:
2256 with self._datafp() as f:
2259 f.seek(0, 2)
2257 f.seek(0, 2)
2260 actual = f.tell()
2258 actual = f.tell()
2261 dd = actual - expected
2259 dd = actual - expected
2262 except IOError as inst:
2260 except IOError as inst:
2263 if inst.errno != errno.ENOENT:
2261 if inst.errno != errno.ENOENT:
2264 raise
2262 raise
2265 dd = 0
2263 dd = 0
2266
2264
2267 try:
2265 try:
2268 f = self.opener(self.indexfile)
2266 f = self.opener(self.indexfile)
2269 f.seek(0, 2)
2267 f.seek(0, 2)
2270 actual = f.tell()
2268 actual = f.tell()
2271 f.close()
2269 f.close()
2272 s = self._io.size
2270 s = self._io.size
2273 i = max(0, actual // s)
2271 i = max(0, actual // s)
2274 di = actual - (i * s)
2272 di = actual - (i * s)
2275 if self._inline:
2273 if self._inline:
2276 databytes = 0
2274 databytes = 0
2277 for r in self:
2275 for r in self:
2278 databytes += max(0, self.length(r))
2276 databytes += max(0, self.length(r))
2279 dd = 0
2277 dd = 0
2280 di = actual - len(self) * s - databytes
2278 di = actual - len(self) * s - databytes
2281 except IOError as inst:
2279 except IOError as inst:
2282 if inst.errno != errno.ENOENT:
2280 if inst.errno != errno.ENOENT:
2283 raise
2281 raise
2284 di = 0
2282 di = 0
2285
2283
2286 return (dd, di)
2284 return (dd, di)
2287
2285
2288 def files(self):
2286 def files(self):
2289 res = [self.indexfile]
2287 res = [self.indexfile]
2290 if not self._inline:
2288 if not self._inline:
2291 res.append(self.datafile)
2289 res.append(self.datafile)
2292 return res
2290 return res
2293
2291
2294 def emitrevisiondeltas(self, requests):
2292 def emitrevisiondeltas(self, requests):
2295 frev = self.rev
2293 frev = self.rev
2296
2294
2297 prevrev = None
2295 prevrev = None
2298 for request in requests:
2296 for request in requests:
2299 node = request.node
2297 node = request.node
2300 rev = frev(node)
2298 rev = frev(node)
2301
2299
2302 if prevrev is None:
2300 if prevrev is None:
2303 prevrev = self.index[rev][5]
2301 prevrev = self.index[rev][5]
2304
2302
2305 # Requesting a full revision.
2303 # Requesting a full revision.
2306 if request.basenode == nullid:
2304 if request.basenode == nullid:
2307 baserev = nullrev
2305 baserev = nullrev
2308 # Requesting an explicit revision.
2306 # Requesting an explicit revision.
2309 elif request.basenode is not None:
2307 elif request.basenode is not None:
2310 baserev = frev(request.basenode)
2308 baserev = frev(request.basenode)
2311 # Allowing us to choose.
2309 # Allowing us to choose.
2312 else:
2310 else:
2313 p1rev, p2rev = self.parentrevs(rev)
2311 p1rev, p2rev = self.parentrevs(rev)
2314 deltaparentrev = self.deltaparent(rev)
2312 deltaparentrev = self.deltaparent(rev)
2315
2313
2316 # Avoid sending full revisions when delta parent is null. Pick
2314 # Avoid sending full revisions when delta parent is null. Pick
2317 # prev in that case. It's tempting to pick p1 in this case, as
2315 # prev in that case. It's tempting to pick p1 in this case, as
2318 # p1 will be smaller in the common case. However, computing a
2316 # p1 will be smaller in the common case. However, computing a
2319 # delta against p1 may require resolving the raw text of p1,
2317 # delta against p1 may require resolving the raw text of p1,
2320 # which could be expensive. The revlog caches should have prev
2318 # which could be expensive. The revlog caches should have prev
2321 # cached, meaning less CPU for delta generation. There is
2319 # cached, meaning less CPU for delta generation. There is
2322 # likely room to add a flag and/or config option to control this
2320 # likely room to add a flag and/or config option to control this
2323 # behavior.
2321 # behavior.
2324 if deltaparentrev == nullrev and self._storedeltachains:
2322 if deltaparentrev == nullrev and self._storedeltachains:
2325 baserev = prevrev
2323 baserev = prevrev
2326
2324
2327 # Revlog is configured to use full snapshot for a reason.
2325 # Revlog is configured to use full snapshot for a reason.
2328 # Stick to full snapshot.
2326 # Stick to full snapshot.
2329 elif deltaparentrev == nullrev:
2327 elif deltaparentrev == nullrev:
2330 baserev = nullrev
2328 baserev = nullrev
2331
2329
2332 # Pick previous when we can't be sure the base is available
2330 # Pick previous when we can't be sure the base is available
2333 # on consumer.
2331 # on consumer.
2334 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2332 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2335 baserev = prevrev
2333 baserev = prevrev
2336 else:
2334 else:
2337 baserev = deltaparentrev
2335 baserev = deltaparentrev
2338
2336
2339 if baserev != nullrev and not self.candelta(baserev, rev):
2337 if baserev != nullrev and not self.candelta(baserev, rev):
2340 baserev = nullrev
2338 baserev = nullrev
2341
2339
2342 revision = None
2340 revision = None
2343 delta = None
2341 delta = None
2344 baserevisionsize = None
2342 baserevisionsize = None
2345
2343
2346 if self.iscensored(baserev) or self.iscensored(rev):
2344 if self.iscensored(baserev) or self.iscensored(rev):
2347 try:
2345 try:
2348 revision = self.revision(node, raw=True)
2346 revision = self.revision(node, raw=True)
2349 except error.CensoredNodeError as e:
2347 except error.CensoredNodeError as e:
2350 revision = e.tombstone
2348 revision = e.tombstone
2351
2349
2352 if baserev != nullrev:
2350 if baserev != nullrev:
2353 baserevisionsize = self.rawsize(baserev)
2351 baserevisionsize = self.rawsize(baserev)
2354
2352
2355 elif baserev == nullrev:
2353 elif baserev == nullrev:
2356 revision = self.revision(node, raw=True)
2354 revision = self.revision(node, raw=True)
2357 else:
2355 else:
2358 delta = self.revdiff(baserev, rev)
2356 delta = self.revdiff(baserev, rev)
2359
2357
2360 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2358 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2361
2359
2362 yield revlogrevisiondelta(
2360 yield revlogrevisiondelta(
2363 node=node,
2361 node=node,
2364 p1node=request.p1node,
2362 p1node=request.p1node,
2365 p2node=request.p2node,
2363 p2node=request.p2node,
2366 linknode=request.linknode,
2364 linknode=request.linknode,
2367 basenode=self.node(baserev),
2365 basenode=self.node(baserev),
2368 flags=self.flags(rev) | extraflags,
2366 flags=self.flags(rev) | extraflags,
2369 baserevisionsize=baserevisionsize,
2367 baserevisionsize=baserevisionsize,
2370 revision=revision,
2368 revision=revision,
2371 delta=delta)
2369 delta=delta)
2372
2370
2373 prevrev = rev
2371 prevrev = rev
2374
2372
2375 DELTAREUSEALWAYS = 'always'
2373 DELTAREUSEALWAYS = 'always'
2376 DELTAREUSESAMEREVS = 'samerevs'
2374 DELTAREUSESAMEREVS = 'samerevs'
2377 DELTAREUSENEVER = 'never'
2375 DELTAREUSENEVER = 'never'
2378
2376
2379 DELTAREUSEFULLADD = 'fulladd'
2377 DELTAREUSEFULLADD = 'fulladd'
2380
2378
2381 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2379 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2382
2380
2383 def clone(self, tr, destrevlog, addrevisioncb=None,
2381 def clone(self, tr, destrevlog, addrevisioncb=None,
2384 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2382 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2385 """Copy this revlog to another, possibly with format changes.
2383 """Copy this revlog to another, possibly with format changes.
2386
2384
2387 The destination revlog will contain the same revisions and nodes.
2385 The destination revlog will contain the same revisions and nodes.
2388 However, it may not be bit-for-bit identical due to e.g. delta encoding
2386 However, it may not be bit-for-bit identical due to e.g. delta encoding
2389 differences.
2387 differences.
2390
2388
2391 The ``deltareuse`` argument control how deltas from the existing revlog
2389 The ``deltareuse`` argument control how deltas from the existing revlog
2392 are preserved in the destination revlog. The argument can have the
2390 are preserved in the destination revlog. The argument can have the
2393 following values:
2391 following values:
2394
2392
2395 DELTAREUSEALWAYS
2393 DELTAREUSEALWAYS
2396 Deltas will always be reused (if possible), even if the destination
2394 Deltas will always be reused (if possible), even if the destination
2397 revlog would not select the same revisions for the delta. This is the
2395 revlog would not select the same revisions for the delta. This is the
2398 fastest mode of operation.
2396 fastest mode of operation.
2399 DELTAREUSESAMEREVS
2397 DELTAREUSESAMEREVS
2400 Deltas will be reused if the destination revlog would pick the same
2398 Deltas will be reused if the destination revlog would pick the same
2401 revisions for the delta. This mode strikes a balance between speed
2399 revisions for the delta. This mode strikes a balance between speed
2402 and optimization.
2400 and optimization.
2403 DELTAREUSENEVER
2401 DELTAREUSENEVER
2404 Deltas will never be reused. This is the slowest mode of execution.
2402 Deltas will never be reused. This is the slowest mode of execution.
2405 This mode can be used to recompute deltas (e.g. if the diff/delta
2403 This mode can be used to recompute deltas (e.g. if the diff/delta
2406 algorithm changes).
2404 algorithm changes).
2407
2405
2408 Delta computation can be slow, so the choice of delta reuse policy can
2406 Delta computation can be slow, so the choice of delta reuse policy can
2409 significantly affect run time.
2407 significantly affect run time.
2410
2408
2411 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2409 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2412 two extremes. Deltas will be reused if they are appropriate. But if the
2410 two extremes. Deltas will be reused if they are appropriate. But if the
2413 delta could choose a better revision, it will do so. This means if you
2411 delta could choose a better revision, it will do so. This means if you
2414 are converting a non-generaldelta revlog to a generaldelta revlog,
2412 are converting a non-generaldelta revlog to a generaldelta revlog,
2415 deltas will be recomputed if the delta's parent isn't a parent of the
2413 deltas will be recomputed if the delta's parent isn't a parent of the
2416 revision.
2414 revision.
2417
2415
2418 In addition to the delta policy, the ``deltabothparents`` argument
2416 In addition to the delta policy, the ``deltabothparents`` argument
2419 controls whether to compute deltas against both parents for merges.
2417 controls whether to compute deltas against both parents for merges.
2420 By default, the current default is used.
2418 By default, the current default is used.
2421 """
2419 """
2422 if deltareuse not in self.DELTAREUSEALL:
2420 if deltareuse not in self.DELTAREUSEALL:
2423 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2421 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2424
2422
2425 if len(destrevlog):
2423 if len(destrevlog):
2426 raise ValueError(_('destination revlog is not empty'))
2424 raise ValueError(_('destination revlog is not empty'))
2427
2425
2428 if getattr(self, 'filteredrevs', None):
2426 if getattr(self, 'filteredrevs', None):
2429 raise ValueError(_('source revlog has filtered revisions'))
2427 raise ValueError(_('source revlog has filtered revisions'))
2430 if getattr(destrevlog, 'filteredrevs', None):
2428 if getattr(destrevlog, 'filteredrevs', None):
2431 raise ValueError(_('destination revlog has filtered revisions'))
2429 raise ValueError(_('destination revlog has filtered revisions'))
2432
2430
2433 # lazydeltabase controls whether to reuse a cached delta, if possible.
2431 # lazydeltabase controls whether to reuse a cached delta, if possible.
2434 oldlazydeltabase = destrevlog._lazydeltabase
2432 oldlazydeltabase = destrevlog._lazydeltabase
2435 oldamd = destrevlog._deltabothparents
2433 oldamd = destrevlog._deltabothparents
2436
2434
2437 try:
2435 try:
2438 if deltareuse == self.DELTAREUSEALWAYS:
2436 if deltareuse == self.DELTAREUSEALWAYS:
2439 destrevlog._lazydeltabase = True
2437 destrevlog._lazydeltabase = True
2440 elif deltareuse == self.DELTAREUSESAMEREVS:
2438 elif deltareuse == self.DELTAREUSESAMEREVS:
2441 destrevlog._lazydeltabase = False
2439 destrevlog._lazydeltabase = False
2442
2440
2443 destrevlog._deltabothparents = deltabothparents or oldamd
2441 destrevlog._deltabothparents = deltabothparents or oldamd
2444
2442
2445 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2443 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2446 self.DELTAREUSESAMEREVS)
2444 self.DELTAREUSESAMEREVS)
2447
2445
2448 deltacomputer = deltautil.deltacomputer(destrevlog)
2446 deltacomputer = deltautil.deltacomputer(destrevlog)
2449 index = self.index
2447 index = self.index
2450 for rev in self:
2448 for rev in self:
2451 entry = index[rev]
2449 entry = index[rev]
2452
2450
2453 # Some classes override linkrev to take filtered revs into
2451 # Some classes override linkrev to take filtered revs into
2454 # account. Use raw entry from index.
2452 # account. Use raw entry from index.
2455 flags = entry[0] & 0xffff
2453 flags = entry[0] & 0xffff
2456 linkrev = entry[4]
2454 linkrev = entry[4]
2457 p1 = index[entry[5]][7]
2455 p1 = index[entry[5]][7]
2458 p2 = index[entry[6]][7]
2456 p2 = index[entry[6]][7]
2459 node = entry[7]
2457 node = entry[7]
2460
2458
2461 # (Possibly) reuse the delta from the revlog if allowed and
2459 # (Possibly) reuse the delta from the revlog if allowed and
2462 # the revlog chunk is a delta.
2460 # the revlog chunk is a delta.
2463 cachedelta = None
2461 cachedelta = None
2464 rawtext = None
2462 rawtext = None
2465 if populatecachedelta:
2463 if populatecachedelta:
2466 dp = self.deltaparent(rev)
2464 dp = self.deltaparent(rev)
2467 if dp != nullrev:
2465 if dp != nullrev:
2468 cachedelta = (dp, bytes(self._chunk(rev)))
2466 cachedelta = (dp, bytes(self._chunk(rev)))
2469
2467
2470 if not cachedelta:
2468 if not cachedelta:
2471 rawtext = self.revision(rev, raw=True)
2469 rawtext = self.revision(rev, raw=True)
2472
2470
2473
2471
2474 if deltareuse == self.DELTAREUSEFULLADD:
2472 if deltareuse == self.DELTAREUSEFULLADD:
2475 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2473 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2476 cachedelta=cachedelta,
2474 cachedelta=cachedelta,
2477 node=node, flags=flags,
2475 node=node, flags=flags,
2478 deltacomputer=deltacomputer)
2476 deltacomputer=deltacomputer)
2479 else:
2477 else:
2480 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2478 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2481 checkambig=False)
2479 checkambig=False)
2482 dfh = None
2480 dfh = None
2483 if not destrevlog._inline:
2481 if not destrevlog._inline:
2484 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2482 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2485 try:
2483 try:
2486 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2484 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2487 p2, flags, cachedelta, ifh, dfh,
2485 p2, flags, cachedelta, ifh, dfh,
2488 deltacomputer=deltacomputer)
2486 deltacomputer=deltacomputer)
2489 finally:
2487 finally:
2490 if dfh:
2488 if dfh:
2491 dfh.close()
2489 dfh.close()
2492 ifh.close()
2490 ifh.close()
2493
2491
2494 if addrevisioncb:
2492 if addrevisioncb:
2495 addrevisioncb(self, rev, node)
2493 addrevisioncb(self, rev, node)
2496 finally:
2494 finally:
2497 destrevlog._lazydeltabase = oldlazydeltabase
2495 destrevlog._lazydeltabase = oldlazydeltabase
2498 destrevlog._deltabothparents = oldamd
2496 destrevlog._deltabothparents = oldamd
@@ -1,940 +1,938 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import heapq
13 import heapq
14 import struct
14 import struct
15
15
16 # import stuff from node for others to import from revlog
16 # import stuff from node for others to import from revlog
17 from ..node import (
17 from ..node import (
18 nullrev,
18 nullrev,
19 )
19 )
20 from ..i18n import _
20 from ..i18n import _
21
21
22 from .constants import (
22 from .constants import (
23 REVIDX_ISCENSORED,
23 REVIDX_ISCENSORED,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 )
25 )
26
26
27 from ..thirdparty import (
27 from ..thirdparty import (
28 attr,
28 attr,
29 )
29 )
30
30
31 from .. import (
31 from .. import (
32 error,
32 error,
33 mdiff,
33 mdiff,
34 )
34 )
35
35
36 CensoredNodeError = error.CensoredNodeError
37
38 # maximum <delta-chain-data>/<revision-text-length> ratio
36 # maximum <delta-chain-data>/<revision-text-length> ratio
39 LIMIT_DELTA2TEXT = 2
37 LIMIT_DELTA2TEXT = 2
40
38
41 class _testrevlog(object):
39 class _testrevlog(object):
42 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
43
41
44 def __init__(self, data, density=0.5, mingap=0):
42 def __init__(self, data, density=0.5, mingap=0):
45 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
46 self._data = data
44 self._data = data
47 self._srdensitythreshold = density
45 self._srdensitythreshold = density
48 self._srmingapsize = mingap
46 self._srmingapsize = mingap
49
47
50 def start(self, rev):
48 def start(self, rev):
51 if rev == 0:
49 if rev == 0:
52 return 0
50 return 0
53 return self._data[rev - 1]
51 return self._data[rev - 1]
54
52
55 def end(self, rev):
53 def end(self, rev):
56 return self._data[rev]
54 return self._data[rev]
57
55
58 def length(self, rev):
56 def length(self, rev):
59 return self.end(rev) - self.start(rev)
57 return self.end(rev) - self.start(rev)
60
58
61 def __len__(self):
59 def __len__(self):
62 return len(self._data)
60 return len(self._data)
63
61
64 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
62 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
65 """slice revs to reduce the amount of unrelated data to be read from disk.
63 """slice revs to reduce the amount of unrelated data to be read from disk.
66
64
67 ``revs`` is sliced into groups that should be read in one time.
65 ``revs`` is sliced into groups that should be read in one time.
68 Assume that revs are sorted.
66 Assume that revs are sorted.
69
67
70 The initial chunk is sliced until the overall density (payload/chunks-span
68 The initial chunk is sliced until the overall density (payload/chunks-span
71 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
72 `revlog._srmingapsize` is skipped.
70 `revlog._srmingapsize` is skipped.
73
71
74 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
75 For consistency with other slicing choice, this limit won't go lower than
73 For consistency with other slicing choice, this limit won't go lower than
76 `revlog._srmingapsize`.
74 `revlog._srmingapsize`.
77
75
78 If individual revisions chunk are larger than this limit, they will still
76 If individual revisions chunk are larger than this limit, they will still
79 be raised individually.
77 be raised individually.
80
78
81 >>> revlog = _testrevlog([
79 >>> revlog = _testrevlog([
82 ... 5, #00 (5)
80 ... 5, #00 (5)
83 ... 10, #01 (5)
81 ... 10, #01 (5)
84 ... 12, #02 (2)
82 ... 12, #02 (2)
85 ... 12, #03 (empty)
83 ... 12, #03 (empty)
86 ... 27, #04 (15)
84 ... 27, #04 (15)
87 ... 31, #05 (4)
85 ... 31, #05 (4)
88 ... 31, #06 (empty)
86 ... 31, #06 (empty)
89 ... 42, #07 (11)
87 ... 42, #07 (11)
90 ... 47, #08 (5)
88 ... 47, #08 (5)
91 ... 47, #09 (empty)
89 ... 47, #09 (empty)
92 ... 48, #10 (1)
90 ... 48, #10 (1)
93 ... 51, #11 (3)
91 ... 51, #11 (3)
94 ... 74, #12 (23)
92 ... 74, #12 (23)
95 ... 85, #13 (11)
93 ... 85, #13 (11)
96 ... 86, #14 (1)
94 ... 86, #14 (1)
97 ... 91, #15 (5)
95 ... 91, #15 (5)
98 ... ])
96 ... ])
99
97
100 >>> list(slicechunk(revlog, list(range(16))))
98 >>> list(slicechunk(revlog, list(range(16))))
101 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
102 >>> list(slicechunk(revlog, [0, 15]))
100 >>> list(slicechunk(revlog, [0, 15]))
103 [[0], [15]]
101 [[0], [15]]
104 >>> list(slicechunk(revlog, [0, 11, 15]))
102 >>> list(slicechunk(revlog, [0, 11, 15]))
105 [[0], [11], [15]]
103 [[0], [11], [15]]
106 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
107 [[0], [11, 13, 15]]
105 [[0], [11, 13, 15]]
108 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
109 [[1, 2], [5, 8, 10, 11], [14]]
107 [[1, 2], [5, 8, 10, 11], [14]]
110
108
111 Slicing with a maximum chunk size
109 Slicing with a maximum chunk size
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
113 [[0], [11], [13], [15]]
111 [[0], [11], [13], [15]]
114 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
115 [[0], [11], [13, 15]]
113 [[0], [11], [13, 15]]
116 """
114 """
117 if targetsize is not None:
115 if targetsize is not None:
118 targetsize = max(targetsize, revlog._srmingapsize)
116 targetsize = max(targetsize, revlog._srmingapsize)
119 # targetsize should not be specified when evaluating delta candidates:
117 # targetsize should not be specified when evaluating delta candidates:
120 # * targetsize is used to ensure we stay within specification when reading,
118 # * targetsize is used to ensure we stay within specification when reading,
121 # * deltainfo is used to pick are good delta chain when writing.
119 # * deltainfo is used to pick are good delta chain when writing.
122 if not (deltainfo is None or targetsize is None):
120 if not (deltainfo is None or targetsize is None):
123 msg = 'cannot use `targetsize` with a `deltainfo`'
121 msg = 'cannot use `targetsize` with a `deltainfo`'
124 raise error.ProgrammingError(msg)
122 raise error.ProgrammingError(msg)
125 for chunk in _slicechunktodensity(revlog, revs,
123 for chunk in _slicechunktodensity(revlog, revs,
126 deltainfo,
124 deltainfo,
127 revlog._srdensitythreshold,
125 revlog._srdensitythreshold,
128 revlog._srmingapsize):
126 revlog._srmingapsize):
129 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
127 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
130 yield subchunk
128 yield subchunk
131
129
132 def _slicechunktosize(revlog, revs, targetsize=None):
130 def _slicechunktosize(revlog, revs, targetsize=None):
133 """slice revs to match the target size
131 """slice revs to match the target size
134
132
135 This is intended to be used on chunk that density slicing selected by that
133 This is intended to be used on chunk that density slicing selected by that
136 are still too large compared to the read garantee of revlog. This might
134 are still too large compared to the read garantee of revlog. This might
137 happens when "minimal gap size" interrupted the slicing or when chain are
135 happens when "minimal gap size" interrupted the slicing or when chain are
138 built in a way that create large blocks next to each other.
136 built in a way that create large blocks next to each other.
139
137
140 >>> revlog = _testrevlog([
138 >>> revlog = _testrevlog([
141 ... 3, #0 (3)
139 ... 3, #0 (3)
142 ... 5, #1 (2)
140 ... 5, #1 (2)
143 ... 6, #2 (1)
141 ... 6, #2 (1)
144 ... 8, #3 (2)
142 ... 8, #3 (2)
145 ... 8, #4 (empty)
143 ... 8, #4 (empty)
146 ... 11, #5 (3)
144 ... 11, #5 (3)
147 ... 12, #6 (1)
145 ... 12, #6 (1)
148 ... 13, #7 (1)
146 ... 13, #7 (1)
149 ... 14, #8 (1)
147 ... 14, #8 (1)
150 ... ])
148 ... ])
151
149
152 Cases where chunk is already small enough
150 Cases where chunk is already small enough
153 >>> list(_slicechunktosize(revlog, [0], 3))
151 >>> list(_slicechunktosize(revlog, [0], 3))
154 [[0]]
152 [[0]]
155 >>> list(_slicechunktosize(revlog, [6, 7], 3))
153 >>> list(_slicechunktosize(revlog, [6, 7], 3))
156 [[6, 7]]
154 [[6, 7]]
157 >>> list(_slicechunktosize(revlog, [0], None))
155 >>> list(_slicechunktosize(revlog, [0], None))
158 [[0]]
156 [[0]]
159 >>> list(_slicechunktosize(revlog, [6, 7], None))
157 >>> list(_slicechunktosize(revlog, [6, 7], None))
160 [[6, 7]]
158 [[6, 7]]
161
159
162 cases where we need actual slicing
160 cases where we need actual slicing
163 >>> list(_slicechunktosize(revlog, [0, 1], 3))
161 >>> list(_slicechunktosize(revlog, [0, 1], 3))
164 [[0], [1]]
162 [[0], [1]]
165 >>> list(_slicechunktosize(revlog, [1, 3], 3))
163 >>> list(_slicechunktosize(revlog, [1, 3], 3))
166 [[1], [3]]
164 [[1], [3]]
167 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
165 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
168 [[1, 2], [3]]
166 [[1, 2], [3]]
169 >>> list(_slicechunktosize(revlog, [3, 5], 3))
167 >>> list(_slicechunktosize(revlog, [3, 5], 3))
170 [[3], [5]]
168 [[3], [5]]
171 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
169 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
172 [[3], [5]]
170 [[3], [5]]
173 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
171 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
174 [[5], [6, 7, 8]]
172 [[5], [6, 7, 8]]
175 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
173 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
176 [[0], [1, 2], [3], [5], [6, 7, 8]]
174 [[0], [1, 2], [3], [5], [6, 7, 8]]
177
175
178 Case with too large individual chunk (must return valid chunk)
176 Case with too large individual chunk (must return valid chunk)
179 >>> list(_slicechunktosize(revlog, [0, 1], 2))
177 >>> list(_slicechunktosize(revlog, [0, 1], 2))
180 [[0], [1]]
178 [[0], [1]]
181 >>> list(_slicechunktosize(revlog, [1, 3], 1))
179 >>> list(_slicechunktosize(revlog, [1, 3], 1))
182 [[1], [3]]
180 [[1], [3]]
183 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
181 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
184 [[3], [5]]
182 [[3], [5]]
185 """
183 """
186 assert targetsize is None or 0 <= targetsize
184 assert targetsize is None or 0 <= targetsize
187 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
185 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
188 yield revs
186 yield revs
189 return
187 return
190
188
191 startrevidx = 0
189 startrevidx = 0
192 startdata = revlog.start(revs[0])
190 startdata = revlog.start(revs[0])
193 endrevidx = 0
191 endrevidx = 0
194 iterrevs = enumerate(revs)
192 iterrevs = enumerate(revs)
195 next(iterrevs) # skip first rev.
193 next(iterrevs) # skip first rev.
196 for idx, r in iterrevs:
194 for idx, r in iterrevs:
197 span = revlog.end(r) - startdata
195 span = revlog.end(r) - startdata
198 if span <= targetsize:
196 if span <= targetsize:
199 endrevidx = idx
197 endrevidx = idx
200 else:
198 else:
201 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
199 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
202 if chunk:
200 if chunk:
203 yield chunk
201 yield chunk
204 startrevidx = idx
202 startrevidx = idx
205 startdata = revlog.start(r)
203 startdata = revlog.start(r)
206 endrevidx = idx
204 endrevidx = idx
207 yield _trimchunk(revlog, revs, startrevidx)
205 yield _trimchunk(revlog, revs, startrevidx)
208
206
209 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
207 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
210 mingapsize=0):
208 mingapsize=0):
211 """slice revs to reduce the amount of unrelated data to be read from disk.
209 """slice revs to reduce the amount of unrelated data to be read from disk.
212
210
213 ``revs`` is sliced into groups that should be read in one time.
211 ``revs`` is sliced into groups that should be read in one time.
214 Assume that revs are sorted.
212 Assume that revs are sorted.
215
213
216 ``deltainfo`` is a _deltainfo instance of a revision that we would append
214 ``deltainfo`` is a _deltainfo instance of a revision that we would append
217 to the top of the revlog.
215 to the top of the revlog.
218
216
219 The initial chunk is sliced until the overall density (payload/chunks-span
217 The initial chunk is sliced until the overall density (payload/chunks-span
220 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
218 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
221 skipped.
219 skipped.
222
220
223 >>> revlog = _testrevlog([
221 >>> revlog = _testrevlog([
224 ... 5, #00 (5)
222 ... 5, #00 (5)
225 ... 10, #01 (5)
223 ... 10, #01 (5)
226 ... 12, #02 (2)
224 ... 12, #02 (2)
227 ... 12, #03 (empty)
225 ... 12, #03 (empty)
228 ... 27, #04 (15)
226 ... 27, #04 (15)
229 ... 31, #05 (4)
227 ... 31, #05 (4)
230 ... 31, #06 (empty)
228 ... 31, #06 (empty)
231 ... 42, #07 (11)
229 ... 42, #07 (11)
232 ... 47, #08 (5)
230 ... 47, #08 (5)
233 ... 47, #09 (empty)
231 ... 47, #09 (empty)
234 ... 48, #10 (1)
232 ... 48, #10 (1)
235 ... 51, #11 (3)
233 ... 51, #11 (3)
236 ... 74, #12 (23)
234 ... 74, #12 (23)
237 ... 85, #13 (11)
235 ... 85, #13 (11)
238 ... 86, #14 (1)
236 ... 86, #14 (1)
239 ... 91, #15 (5)
237 ... 91, #15 (5)
240 ... ])
238 ... ])
241
239
242 >>> list(_slicechunktodensity(revlog, list(range(16))))
240 >>> list(_slicechunktodensity(revlog, list(range(16))))
243 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
241 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
244 >>> list(_slicechunktodensity(revlog, [0, 15]))
242 >>> list(_slicechunktodensity(revlog, [0, 15]))
245 [[0], [15]]
243 [[0], [15]]
246 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
244 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
247 [[0], [11], [15]]
245 [[0], [11], [15]]
248 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
246 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
249 [[0], [11, 13, 15]]
247 [[0], [11, 13, 15]]
250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
251 [[1, 2], [5, 8, 10, 11], [14]]
249 [[1, 2], [5, 8, 10, 11], [14]]
252 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
253 ... mingapsize=20))
251 ... mingapsize=20))
254 [[1, 2, 3, 5, 8, 10, 11], [14]]
252 [[1, 2, 3, 5, 8, 10, 11], [14]]
255 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
253 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
256 ... targetdensity=0.95))
254 ... targetdensity=0.95))
257 [[1, 2], [5], [8, 10, 11], [14]]
255 [[1, 2], [5], [8, 10, 11], [14]]
258 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
256 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
259 ... targetdensity=0.95, mingapsize=12))
257 ... targetdensity=0.95, mingapsize=12))
260 [[1, 2], [5, 8, 10, 11], [14]]
258 [[1, 2], [5, 8, 10, 11], [14]]
261 """
259 """
262 start = revlog.start
260 start = revlog.start
263 length = revlog.length
261 length = revlog.length
264
262
265 if len(revs) <= 1:
263 if len(revs) <= 1:
266 yield revs
264 yield revs
267 return
265 return
268
266
269 nextrev = len(revlog)
267 nextrev = len(revlog)
270 nextoffset = revlog.end(nextrev - 1)
268 nextoffset = revlog.end(nextrev - 1)
271
269
272 if deltainfo is None:
270 if deltainfo is None:
273 deltachainspan = segmentspan(revlog, revs)
271 deltachainspan = segmentspan(revlog, revs)
274 chainpayload = sum(length(r) for r in revs)
272 chainpayload = sum(length(r) for r in revs)
275 else:
273 else:
276 deltachainspan = deltainfo.distance
274 deltachainspan = deltainfo.distance
277 chainpayload = deltainfo.compresseddeltalen
275 chainpayload = deltainfo.compresseddeltalen
278
276
279 if deltachainspan < mingapsize:
277 if deltachainspan < mingapsize:
280 yield revs
278 yield revs
281 return
279 return
282
280
283 readdata = deltachainspan
281 readdata = deltachainspan
284
282
285 if deltachainspan:
283 if deltachainspan:
286 density = chainpayload / float(deltachainspan)
284 density = chainpayload / float(deltachainspan)
287 else:
285 else:
288 density = 1.0
286 density = 1.0
289
287
290 if density >= targetdensity:
288 if density >= targetdensity:
291 yield revs
289 yield revs
292 return
290 return
293
291
294 if deltainfo is not None and deltainfo.deltalen:
292 if deltainfo is not None and deltainfo.deltalen:
295 revs = list(revs)
293 revs = list(revs)
296 revs.append(nextrev)
294 revs.append(nextrev)
297
295
298 # Store the gaps in a heap to have them sorted by decreasing size
296 # Store the gaps in a heap to have them sorted by decreasing size
299 gapsheap = []
297 gapsheap = []
300 heapq.heapify(gapsheap)
298 heapq.heapify(gapsheap)
301 prevend = None
299 prevend = None
302 for i, rev in enumerate(revs):
300 for i, rev in enumerate(revs):
303 if rev < nextrev:
301 if rev < nextrev:
304 revstart = start(rev)
302 revstart = start(rev)
305 revlen = length(rev)
303 revlen = length(rev)
306 else:
304 else:
307 revstart = nextoffset
305 revstart = nextoffset
308 revlen = deltainfo.deltalen
306 revlen = deltainfo.deltalen
309
307
310 # Skip empty revisions to form larger holes
308 # Skip empty revisions to form larger holes
311 if revlen == 0:
309 if revlen == 0:
312 continue
310 continue
313
311
314 if prevend is not None:
312 if prevend is not None:
315 gapsize = revstart - prevend
313 gapsize = revstart - prevend
316 # only consider holes that are large enough
314 # only consider holes that are large enough
317 if gapsize > mingapsize:
315 if gapsize > mingapsize:
318 heapq.heappush(gapsheap, (-gapsize, i))
316 heapq.heappush(gapsheap, (-gapsize, i))
319
317
320 prevend = revstart + revlen
318 prevend = revstart + revlen
321
319
322 # Collect the indices of the largest holes until the density is acceptable
320 # Collect the indices of the largest holes until the density is acceptable
323 indicesheap = []
321 indicesheap = []
324 heapq.heapify(indicesheap)
322 heapq.heapify(indicesheap)
325 while gapsheap and density < targetdensity:
323 while gapsheap and density < targetdensity:
326 oppgapsize, gapidx = heapq.heappop(gapsheap)
324 oppgapsize, gapidx = heapq.heappop(gapsheap)
327
325
328 heapq.heappush(indicesheap, gapidx)
326 heapq.heappush(indicesheap, gapidx)
329
327
330 # the gap sizes are stored as negatives to be sorted decreasingly
328 # the gap sizes are stored as negatives to be sorted decreasingly
331 # by the heap
329 # by the heap
332 readdata -= (-oppgapsize)
330 readdata -= (-oppgapsize)
333 if readdata > 0:
331 if readdata > 0:
334 density = chainpayload / float(readdata)
332 density = chainpayload / float(readdata)
335 else:
333 else:
336 density = 1.0
334 density = 1.0
337
335
338 # Cut the revs at collected indices
336 # Cut the revs at collected indices
339 previdx = 0
337 previdx = 0
340 while indicesheap:
338 while indicesheap:
341 idx = heapq.heappop(indicesheap)
339 idx = heapq.heappop(indicesheap)
342
340
343 chunk = _trimchunk(revlog, revs, previdx, idx)
341 chunk = _trimchunk(revlog, revs, previdx, idx)
344 if chunk:
342 if chunk:
345 yield chunk
343 yield chunk
346
344
347 previdx = idx
345 previdx = idx
348
346
349 chunk = _trimchunk(revlog, revs, previdx)
347 chunk = _trimchunk(revlog, revs, previdx)
350 if chunk:
348 if chunk:
351 yield chunk
349 yield chunk
352
350
353 def _trimchunk(revlog, revs, startidx, endidx=None):
351 def _trimchunk(revlog, revs, startidx, endidx=None):
354 """returns revs[startidx:endidx] without empty trailing revs
352 """returns revs[startidx:endidx] without empty trailing revs
355
353
356 Doctest Setup
354 Doctest Setup
357 >>> revlog = _testrevlog([
355 >>> revlog = _testrevlog([
358 ... 5, #0
356 ... 5, #0
359 ... 10, #1
357 ... 10, #1
360 ... 12, #2
358 ... 12, #2
361 ... 12, #3 (empty)
359 ... 12, #3 (empty)
362 ... 17, #4
360 ... 17, #4
363 ... 21, #5
361 ... 21, #5
364 ... 21, #6 (empty)
362 ... 21, #6 (empty)
365 ... ])
363 ... ])
366
364
367 Contiguous cases:
365 Contiguous cases:
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
366 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
369 [0, 1, 2, 3, 4, 5]
367 [0, 1, 2, 3, 4, 5]
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
371 [0, 1, 2, 3, 4]
369 [0, 1, 2, 3, 4]
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
373 [0, 1, 2]
371 [0, 1, 2]
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
375 [2]
373 [2]
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
377 [3, 4, 5]
375 [3, 4, 5]
378 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
379 [3, 4]
377 [3, 4]
380
378
381 Discontiguous cases:
379 Discontiguous cases:
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
380 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
383 [1, 3, 5]
381 [1, 3, 5]
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
385 [1]
383 [1]
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
387 [3, 5]
385 [3, 5]
388 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
389 [3, 5]
387 [3, 5]
390 """
388 """
391 length = revlog.length
389 length = revlog.length
392
390
393 if endidx is None:
391 if endidx is None:
394 endidx = len(revs)
392 endidx = len(revs)
395
393
396 # If we have a non-emtpy delta candidate, there are nothing to trim
394 # If we have a non-emtpy delta candidate, there are nothing to trim
397 if revs[endidx - 1] < len(revlog):
395 if revs[endidx - 1] < len(revlog):
398 # Trim empty revs at the end, except the very first revision of a chain
396 # Trim empty revs at the end, except the very first revision of a chain
399 while (endidx > 1
397 while (endidx > 1
400 and endidx > startidx
398 and endidx > startidx
401 and length(revs[endidx - 1]) == 0):
399 and length(revs[endidx - 1]) == 0):
402 endidx -= 1
400 endidx -= 1
403
401
404 return revs[startidx:endidx]
402 return revs[startidx:endidx]
405
403
406 def segmentspan(revlog, revs, deltainfo=None):
404 def segmentspan(revlog, revs, deltainfo=None):
407 """Get the byte span of a segment of revisions
405 """Get the byte span of a segment of revisions
408
406
409 revs is a sorted array of revision numbers
407 revs is a sorted array of revision numbers
410
408
411 >>> revlog = _testrevlog([
409 >>> revlog = _testrevlog([
412 ... 5, #0
410 ... 5, #0
413 ... 10, #1
411 ... 10, #1
414 ... 12, #2
412 ... 12, #2
415 ... 12, #3 (empty)
413 ... 12, #3 (empty)
416 ... 17, #4
414 ... 17, #4
417 ... ])
415 ... ])
418
416
419 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
417 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
420 17
418 17
421 >>> segmentspan(revlog, [0, 4])
419 >>> segmentspan(revlog, [0, 4])
422 17
420 17
423 >>> segmentspan(revlog, [3, 4])
421 >>> segmentspan(revlog, [3, 4])
424 5
422 5
425 >>> segmentspan(revlog, [1, 2, 3,])
423 >>> segmentspan(revlog, [1, 2, 3,])
426 7
424 7
427 >>> segmentspan(revlog, [1, 3])
425 >>> segmentspan(revlog, [1, 3])
428 7
426 7
429 """
427 """
430 if not revs:
428 if not revs:
431 return 0
429 return 0
432 if deltainfo is not None and len(revlog) <= revs[-1]:
430 if deltainfo is not None and len(revlog) <= revs[-1]:
433 if len(revs) == 1:
431 if len(revs) == 1:
434 return deltainfo.deltalen
432 return deltainfo.deltalen
435 offset = revlog.end(len(revlog) - 1)
433 offset = revlog.end(len(revlog) - 1)
436 end = deltainfo.deltalen + offset
434 end = deltainfo.deltalen + offset
437 else:
435 else:
438 end = revlog.end(revs[-1])
436 end = revlog.end(revs[-1])
439 return end - revlog.start(revs[0])
437 return end - revlog.start(revs[0])
440
438
441 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
439 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
442 """build full text from a (base, delta) pair and other metadata"""
440 """build full text from a (base, delta) pair and other metadata"""
443 # special case deltas which replace entire base; no need to decode
441 # special case deltas which replace entire base; no need to decode
444 # base revision. this neatly avoids censored bases, which throw when
442 # base revision. this neatly avoids censored bases, which throw when
445 # they're decoded.
443 # they're decoded.
446 hlen = struct.calcsize(">lll")
444 hlen = struct.calcsize(">lll")
447 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
445 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
448 len(delta) - hlen):
446 len(delta) - hlen):
449 fulltext = delta[hlen:]
447 fulltext = delta[hlen:]
450 else:
448 else:
451 # deltabase is rawtext before changed by flag processors, which is
449 # deltabase is rawtext before changed by flag processors, which is
452 # equivalent to non-raw text
450 # equivalent to non-raw text
453 basetext = revlog.revision(baserev, _df=fh, raw=False)
451 basetext = revlog.revision(baserev, _df=fh, raw=False)
454 fulltext = mdiff.patch(basetext, delta)
452 fulltext = mdiff.patch(basetext, delta)
455
453
456 try:
454 try:
457 res = revlog._processflags(fulltext, flags, 'read', raw=True)
455 res = revlog._processflags(fulltext, flags, 'read', raw=True)
458 fulltext, validatehash = res
456 fulltext, validatehash = res
459 if validatehash:
457 if validatehash:
460 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
458 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
461 if flags & REVIDX_ISCENSORED:
459 if flags & REVIDX_ISCENSORED:
462 raise error.RevlogError(_('node %s is not censored') % expectednode)
460 raise error.RevlogError(_('node %s is not censored') % expectednode)
463 except CensoredNodeError:
461 except error.CensoredNodeError:
464 # must pass the censored index flag to add censored revisions
462 # must pass the censored index flag to add censored revisions
465 if not flags & REVIDX_ISCENSORED:
463 if not flags & REVIDX_ISCENSORED:
466 raise
464 raise
467 return fulltext
465 return fulltext
468
466
469 @attr.s(slots=True, frozen=True)
467 @attr.s(slots=True, frozen=True)
470 class _deltainfo(object):
468 class _deltainfo(object):
471 distance = attr.ib()
469 distance = attr.ib()
472 deltalen = attr.ib()
470 deltalen = attr.ib()
473 data = attr.ib()
471 data = attr.ib()
474 base = attr.ib()
472 base = attr.ib()
475 chainbase = attr.ib()
473 chainbase = attr.ib()
476 chainlen = attr.ib()
474 chainlen = attr.ib()
477 compresseddeltalen = attr.ib()
475 compresseddeltalen = attr.ib()
478 snapshotdepth = attr.ib()
476 snapshotdepth = attr.ib()
479
477
480 def isgooddeltainfo(revlog, deltainfo, revinfo):
478 def isgooddeltainfo(revlog, deltainfo, revinfo):
481 """Returns True if the given delta is good. Good means that it is within
479 """Returns True if the given delta is good. Good means that it is within
482 the disk span, disk size, and chain length bounds that we know to be
480 the disk span, disk size, and chain length bounds that we know to be
483 performant."""
481 performant."""
484 if deltainfo is None:
482 if deltainfo is None:
485 return False
483 return False
486
484
487 # - 'deltainfo.distance' is the distance from the base revision --
485 # - 'deltainfo.distance' is the distance from the base revision --
488 # bounding it limits the amount of I/O we need to do.
486 # bounding it limits the amount of I/O we need to do.
489 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
487 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
490 # deltas we need to apply -- bounding it limits the amount of CPU
488 # deltas we need to apply -- bounding it limits the amount of CPU
491 # we consume.
489 # we consume.
492
490
493 if revlog._sparserevlog:
491 if revlog._sparserevlog:
494 # As sparse-read will be used, we can consider that the distance,
492 # As sparse-read will be used, we can consider that the distance,
495 # instead of being the span of the whole chunk,
493 # instead of being the span of the whole chunk,
496 # is the span of the largest read chunk
494 # is the span of the largest read chunk
497 base = deltainfo.base
495 base = deltainfo.base
498
496
499 if base != nullrev:
497 if base != nullrev:
500 deltachain = revlog._deltachain(base)[0]
498 deltachain = revlog._deltachain(base)[0]
501 else:
499 else:
502 deltachain = []
500 deltachain = []
503
501
504 # search for the first non-snapshot revision
502 # search for the first non-snapshot revision
505 for idx, r in enumerate(deltachain):
503 for idx, r in enumerate(deltachain):
506 if not revlog.issnapshot(r):
504 if not revlog.issnapshot(r):
507 break
505 break
508 deltachain = deltachain[idx:]
506 deltachain = deltachain[idx:]
509 chunks = slicechunk(revlog, deltachain, deltainfo)
507 chunks = slicechunk(revlog, deltachain, deltainfo)
510 all_span = [segmentspan(revlog, revs, deltainfo)
508 all_span = [segmentspan(revlog, revs, deltainfo)
511 for revs in chunks]
509 for revs in chunks]
512 distance = max(all_span)
510 distance = max(all_span)
513 else:
511 else:
514 distance = deltainfo.distance
512 distance = deltainfo.distance
515
513
516 textlen = revinfo.textlen
514 textlen = revinfo.textlen
517 defaultmax = textlen * 4
515 defaultmax = textlen * 4
518 maxdist = revlog._maxdeltachainspan
516 maxdist = revlog._maxdeltachainspan
519 if not maxdist:
517 if not maxdist:
520 maxdist = distance # ensure the conditional pass
518 maxdist = distance # ensure the conditional pass
521 maxdist = max(maxdist, defaultmax)
519 maxdist = max(maxdist, defaultmax)
522 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
520 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
523 # In multiple place, we are ignoring irrelevant data range below a
521 # In multiple place, we are ignoring irrelevant data range below a
524 # certain size. Be also apply this tradeoff here and relax span
522 # certain size. Be also apply this tradeoff here and relax span
525 # constraint for small enought content.
523 # constraint for small enought content.
526 maxdist = revlog._srmingapsize
524 maxdist = revlog._srmingapsize
527
525
528 # Bad delta from read span:
526 # Bad delta from read span:
529 #
527 #
530 # If the span of data read is larger than the maximum allowed.
528 # If the span of data read is larger than the maximum allowed.
531 if maxdist < distance:
529 if maxdist < distance:
532 return False
530 return False
533
531
534 # Bad delta from new delta size:
532 # Bad delta from new delta size:
535 #
533 #
536 # If the delta size is larger than the target text, storing the
534 # If the delta size is larger than the target text, storing the
537 # delta will be inefficient.
535 # delta will be inefficient.
538 if textlen < deltainfo.deltalen:
536 if textlen < deltainfo.deltalen:
539 return False
537 return False
540
538
541 # Bad delta from cumulated payload size:
539 # Bad delta from cumulated payload size:
542 #
540 #
543 # If the sum of delta get larger than K * target text length.
541 # If the sum of delta get larger than K * target text length.
544 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
542 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
545 return False
543 return False
546
544
547 # Bad delta from chain length:
545 # Bad delta from chain length:
548 #
546 #
549 # If the number of delta in the chain gets too high.
547 # If the number of delta in the chain gets too high.
550 if (revlog._maxchainlen
548 if (revlog._maxchainlen
551 and revlog._maxchainlen < deltainfo.chainlen):
549 and revlog._maxchainlen < deltainfo.chainlen):
552 return False
550 return False
553
551
554 # bad delta from intermediate snapshot size limit
552 # bad delta from intermediate snapshot size limit
555 #
553 #
556 # If an intermediate snapshot size is higher than the limit. The
554 # If an intermediate snapshot size is higher than the limit. The
557 # limit exist to prevent endless chain of intermediate delta to be
555 # limit exist to prevent endless chain of intermediate delta to be
558 # created.
556 # created.
559 if (deltainfo.snapshotdepth is not None and
557 if (deltainfo.snapshotdepth is not None and
560 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
558 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
561 return False
559 return False
562
560
563 # bad delta if new intermediate snapshot is larger than the previous
561 # bad delta if new intermediate snapshot is larger than the previous
564 # snapshot
562 # snapshot
565 if (deltainfo.snapshotdepth
563 if (deltainfo.snapshotdepth
566 and revlog.length(deltainfo.base) < deltainfo.deltalen):
564 and revlog.length(deltainfo.base) < deltainfo.deltalen):
567 return False
565 return False
568
566
569 return True
567 return True
570
568
571 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
569 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
572 """Provides group of revision to be tested as delta base
570 """Provides group of revision to be tested as delta base
573
571
574 This top level function focus on emitting groups with unique and worthwhile
572 This top level function focus on emitting groups with unique and worthwhile
575 content. See _raw_candidate_groups for details about the group order.
573 content. See _raw_candidate_groups for details about the group order.
576 """
574 """
577 # should we try to build a delta?
575 # should we try to build a delta?
578 if not (len(revlog) and revlog._storedeltachains):
576 if not (len(revlog) and revlog._storedeltachains):
579 yield None
577 yield None
580 return
578 return
581
579
582 deltalength = revlog.length
580 deltalength = revlog.length
583 deltaparent = revlog.deltaparent
581 deltaparent = revlog.deltaparent
584 good = None
582 good = None
585
583
586 deltas_limit = textlen * LIMIT_DELTA2TEXT
584 deltas_limit = textlen * LIMIT_DELTA2TEXT
587
585
588 tested = set([nullrev])
586 tested = set([nullrev])
589 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
587 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
590 while True:
588 while True:
591 temptative = candidates.send(good)
589 temptative = candidates.send(good)
592 if temptative is None:
590 if temptative is None:
593 break
591 break
594 group = []
592 group = []
595 for rev in temptative:
593 for rev in temptative:
596 # skip over empty delta (no need to include them in a chain)
594 # skip over empty delta (no need to include them in a chain)
597 while not (rev == nullrev or rev in tested or deltalength(rev)):
595 while not (rev == nullrev or rev in tested or deltalength(rev)):
598 tested.add(rev)
596 tested.add(rev)
599 rev = deltaparent(rev)
597 rev = deltaparent(rev)
600 # filter out revision we tested already
598 # filter out revision we tested already
601 if rev in tested:
599 if rev in tested:
602 continue
600 continue
603 tested.add(rev)
601 tested.add(rev)
604 # filter out delta base that will never produce good delta
602 # filter out delta base that will never produce good delta
605 if deltas_limit < revlog.length(rev):
603 if deltas_limit < revlog.length(rev):
606 continue
604 continue
607 # no need to try a delta against nullrev, this will be done as a
605 # no need to try a delta against nullrev, this will be done as a
608 # last resort.
606 # last resort.
609 if rev == nullrev:
607 if rev == nullrev:
610 continue
608 continue
611 # no delta for rawtext-changing revs (see "candelta" for why)
609 # no delta for rawtext-changing revs (see "candelta" for why)
612 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
610 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
613 continue
611 continue
614 group.append(rev)
612 group.append(rev)
615 if group:
613 if group:
616 # XXX: in the sparse revlog case, group can become large,
614 # XXX: in the sparse revlog case, group can become large,
617 # impacting performances. Some bounding or slicing mecanism
615 # impacting performances. Some bounding or slicing mecanism
618 # would help to reduce this impact.
616 # would help to reduce this impact.
619 good = yield tuple(group)
617 good = yield tuple(group)
620 yield None
618 yield None
621
619
622 def _findsnapshots(revlog, cache, start_rev):
620 def _findsnapshots(revlog, cache, start_rev):
623 """find snapshot from start_rev to tip"""
621 """find snapshot from start_rev to tip"""
624 deltaparent = revlog.deltaparent
622 deltaparent = revlog.deltaparent
625 issnapshot = revlog.issnapshot
623 issnapshot = revlog.issnapshot
626 for rev in revlog.revs(start_rev):
624 for rev in revlog.revs(start_rev):
627 if issnapshot(rev):
625 if issnapshot(rev):
628 cache[deltaparent(rev)].append(rev)
626 cache[deltaparent(rev)].append(rev)
629
627
630 def _refinedgroups(revlog, p1, p2, cachedelta):
628 def _refinedgroups(revlog, p1, p2, cachedelta):
631 good = None
629 good = None
632 # First we try to reuse a the delta contained in the bundle.
630 # First we try to reuse a the delta contained in the bundle.
633 # (or from the source revlog)
631 # (or from the source revlog)
634 #
632 #
635 # This logic only applies to general delta repositories and can be disabled
633 # This logic only applies to general delta repositories and can be disabled
636 # through configuration. Disabling reuse source delta is useful when
634 # through configuration. Disabling reuse source delta is useful when
637 # we want to make sure we recomputed "optimal" deltas.
635 # we want to make sure we recomputed "optimal" deltas.
638 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
636 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
639 # Assume what we received from the server is a good choice
637 # Assume what we received from the server is a good choice
640 # build delta will reuse the cache
638 # build delta will reuse the cache
641 good = yield (cachedelta[0],)
639 good = yield (cachedelta[0],)
642 if good is not None:
640 if good is not None:
643 yield None
641 yield None
644 return
642 return
645 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
643 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
646 good = yield candidates
644 good = yield candidates
647 if good is not None:
645 if good is not None:
648 break
646 break
649
647
650 # if we have a refinable value, try to refine it
648 # if we have a refinable value, try to refine it
651 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
649 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
652 # refine snapshot down
650 # refine snapshot down
653 previous = None
651 previous = None
654 while previous != good:
652 while previous != good:
655 previous = good
653 previous = good
656 base = revlog.deltaparent(good)
654 base = revlog.deltaparent(good)
657 if base == nullrev:
655 if base == nullrev:
658 break
656 break
659 good = yield (base,)
657 good = yield (base,)
660 # refine snapshot up
658 # refine snapshot up
661 #
659 #
662 # XXX the _findsnapshots call can be expensive and is "duplicated" with
660 # XXX the _findsnapshots call can be expensive and is "duplicated" with
663 # the one done in `_rawgroups`. Once we start working on performance,
661 # the one done in `_rawgroups`. Once we start working on performance,
664 # we should make the two logics share this computation.
662 # we should make the two logics share this computation.
665 snapshots = collections.defaultdict(list)
663 snapshots = collections.defaultdict(list)
666 _findsnapshots(revlog, snapshots, good + 1)
664 _findsnapshots(revlog, snapshots, good + 1)
667 previous = None
665 previous = None
668 while good != previous:
666 while good != previous:
669 previous = good
667 previous = good
670 children = tuple(sorted(c for c in snapshots[good]))
668 children = tuple(sorted(c for c in snapshots[good]))
671 good = yield children
669 good = yield children
672
670
673 # we have found nothing
671 # we have found nothing
674 yield None
672 yield None
675
673
676 def _rawgroups(revlog, p1, p2, cachedelta):
674 def _rawgroups(revlog, p1, p2, cachedelta):
677 """Provides group of revision to be tested as delta base
675 """Provides group of revision to be tested as delta base
678
676
679 This lower level function focus on emitting delta theorically interresting
677 This lower level function focus on emitting delta theorically interresting
680 without looking it any practical details.
678 without looking it any practical details.
681
679
682 The group order aims at providing fast or small candidates first.
680 The group order aims at providing fast or small candidates first.
683 """
681 """
684 gdelta = revlog._generaldelta
682 gdelta = revlog._generaldelta
685 sparse = revlog._sparserevlog
683 sparse = revlog._sparserevlog
686 curr = len(revlog)
684 curr = len(revlog)
687 prev = curr - 1
685 prev = curr - 1
688 deltachain = lambda rev: revlog._deltachain(rev)[0]
686 deltachain = lambda rev: revlog._deltachain(rev)[0]
689
687
690 if gdelta:
688 if gdelta:
691 # exclude already lazy tested base if any
689 # exclude already lazy tested base if any
692 parents = [p for p in (p1, p2) if p != nullrev]
690 parents = [p for p in (p1, p2) if p != nullrev]
693
691
694 if not revlog._deltabothparents and len(parents) == 2:
692 if not revlog._deltabothparents and len(parents) == 2:
695 parents.sort()
693 parents.sort()
696 # To minimize the chance of having to build a fulltext,
694 # To minimize the chance of having to build a fulltext,
697 # pick first whichever parent is closest to us (max rev)
695 # pick first whichever parent is closest to us (max rev)
698 yield (parents[1],)
696 yield (parents[1],)
699 # then the other one (min rev) if the first did not fit
697 # then the other one (min rev) if the first did not fit
700 yield (parents[0],)
698 yield (parents[0],)
701 elif len(parents) > 0:
699 elif len(parents) > 0:
702 # Test all parents (1 or 2), and keep the best candidate
700 # Test all parents (1 or 2), and keep the best candidate
703 yield parents
701 yield parents
704
702
705 if sparse and parents:
703 if sparse and parents:
706 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
704 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
707 # See if we can use an existing snapshot in the parent chains to use as
705 # See if we can use an existing snapshot in the parent chains to use as
708 # a base for a new intermediate-snapshot
706 # a base for a new intermediate-snapshot
709 #
707 #
710 # search for snapshot in parents delta chain
708 # search for snapshot in parents delta chain
711 # map: snapshot-level: snapshot-rev
709 # map: snapshot-level: snapshot-rev
712 parents_snaps = collections.defaultdict(set)
710 parents_snaps = collections.defaultdict(set)
713 candidate_chains = [deltachain(p) for p in parents]
711 candidate_chains = [deltachain(p) for p in parents]
714 for chain in candidate_chains:
712 for chain in candidate_chains:
715 for idx, s in enumerate(chain):
713 for idx, s in enumerate(chain):
716 if not revlog.issnapshot(s):
714 if not revlog.issnapshot(s):
717 break
715 break
718 parents_snaps[idx].add(s)
716 parents_snaps[idx].add(s)
719 snapfloor = min(parents_snaps[0]) + 1
717 snapfloor = min(parents_snaps[0]) + 1
720 _findsnapshots(revlog, snapshots, snapfloor)
718 _findsnapshots(revlog, snapshots, snapfloor)
721 # search for the highest "unrelated" revision
719 # search for the highest "unrelated" revision
722 #
720 #
723 # Adding snapshots used by "unrelated" revision increase the odd we
721 # Adding snapshots used by "unrelated" revision increase the odd we
724 # reuse an independant, yet better snapshot chain.
722 # reuse an independant, yet better snapshot chain.
725 #
723 #
726 # XXX instead of building a set of revisions, we could lazily enumerate
724 # XXX instead of building a set of revisions, we could lazily enumerate
727 # over the chains. That would be more efficient, however we stick to
725 # over the chains. That would be more efficient, however we stick to
728 # simple code for now.
726 # simple code for now.
729 all_revs = set()
727 all_revs = set()
730 for chain in candidate_chains:
728 for chain in candidate_chains:
731 all_revs.update(chain)
729 all_revs.update(chain)
732 other = None
730 other = None
733 for r in revlog.revs(prev, snapfloor):
731 for r in revlog.revs(prev, snapfloor):
734 if r not in all_revs:
732 if r not in all_revs:
735 other = r
733 other = r
736 break
734 break
737 if other is not None:
735 if other is not None:
738 # To avoid unfair competition, we won't use unrelated intermediate
736 # To avoid unfair competition, we won't use unrelated intermediate
739 # snapshot that are deeper than the ones from the parent delta
737 # snapshot that are deeper than the ones from the parent delta
740 # chain.
738 # chain.
741 max_depth = max(parents_snaps.keys())
739 max_depth = max(parents_snaps.keys())
742 chain = deltachain(other)
740 chain = deltachain(other)
743 for idx, s in enumerate(chain):
741 for idx, s in enumerate(chain):
744 if s < snapfloor:
742 if s < snapfloor:
745 continue
743 continue
746 if max_depth < idx:
744 if max_depth < idx:
747 break
745 break
748 if not revlog.issnapshot(s):
746 if not revlog.issnapshot(s):
749 break
747 break
750 parents_snaps[idx].add(s)
748 parents_snaps[idx].add(s)
751 # Test them as possible intermediate snapshot base
749 # Test them as possible intermediate snapshot base
752 # We test them from highest to lowest level. High level one are more
750 # We test them from highest to lowest level. High level one are more
753 # likely to result in small delta
751 # likely to result in small delta
754 floor = None
752 floor = None
755 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
753 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
756 siblings = set()
754 siblings = set()
757 for s in snaps:
755 for s in snaps:
758 siblings.update(snapshots[s])
756 siblings.update(snapshots[s])
759 # Before considering making a new intermediate snapshot, we check
757 # Before considering making a new intermediate snapshot, we check
760 # if an existing snapshot, children of base we consider, would be
758 # if an existing snapshot, children of base we consider, would be
761 # suitable.
759 # suitable.
762 #
760 #
763 # It give a change to reuse a delta chain "unrelated" to the
761 # It give a change to reuse a delta chain "unrelated" to the
764 # current revision instead of starting our own. Without such
762 # current revision instead of starting our own. Without such
765 # re-use, topological branches would keep reopening new chains.
763 # re-use, topological branches would keep reopening new chains.
766 # Creating more and more snapshot as the repository grow.
764 # Creating more and more snapshot as the repository grow.
767
765
768 if floor is not None:
766 if floor is not None:
769 # We only do this for siblings created after the one in our
767 # We only do this for siblings created after the one in our
770 # parent's delta chain. Those created before has less chances
768 # parent's delta chain. Those created before has less chances
771 # to be valid base since our ancestors had to create a new
769 # to be valid base since our ancestors had to create a new
772 # snapshot.
770 # snapshot.
773 siblings = [r for r in siblings if floor < r]
771 siblings = [r for r in siblings if floor < r]
774 yield tuple(sorted(siblings))
772 yield tuple(sorted(siblings))
775 # then test the base from our parent's delta chain.
773 # then test the base from our parent's delta chain.
776 yield tuple(sorted(snaps))
774 yield tuple(sorted(snaps))
777 floor = min(snaps)
775 floor = min(snaps)
778 # No suitable base found in the parent chain, search if any full
776 # No suitable base found in the parent chain, search if any full
779 # snapshots emitted since parent's base would be a suitable base for an
777 # snapshots emitted since parent's base would be a suitable base for an
780 # intermediate snapshot.
778 # intermediate snapshot.
781 #
779 #
782 # It give a chance to reuse a delta chain unrelated to the current
780 # It give a chance to reuse a delta chain unrelated to the current
783 # revisions instead of starting our own. Without such re-use,
781 # revisions instead of starting our own. Without such re-use,
784 # topological branches would keep reopening new full chains. Creating
782 # topological branches would keep reopening new full chains. Creating
785 # more and more snapshot as the repository grow.
783 # more and more snapshot as the repository grow.
786 yield tuple(snapshots[nullrev])
784 yield tuple(snapshots[nullrev])
787
785
788 if not sparse:
786 if not sparse:
789 # other approach failed try against prev to hopefully save us a
787 # other approach failed try against prev to hopefully save us a
790 # fulltext.
788 # fulltext.
791 yield (prev,)
789 yield (prev,)
792
790
793 class deltacomputer(object):
791 class deltacomputer(object):
794 def __init__(self, revlog):
792 def __init__(self, revlog):
795 self.revlog = revlog
793 self.revlog = revlog
796
794
797 def buildtext(self, revinfo, fh):
795 def buildtext(self, revinfo, fh):
798 """Builds a fulltext version of a revision
796 """Builds a fulltext version of a revision
799
797
800 revinfo: _revisioninfo instance that contains all needed info
798 revinfo: _revisioninfo instance that contains all needed info
801 fh: file handle to either the .i or the .d revlog file,
799 fh: file handle to either the .i or the .d revlog file,
802 depending on whether it is inlined or not
800 depending on whether it is inlined or not
803 """
801 """
804 btext = revinfo.btext
802 btext = revinfo.btext
805 if btext[0] is not None:
803 if btext[0] is not None:
806 return btext[0]
804 return btext[0]
807
805
808 revlog = self.revlog
806 revlog = self.revlog
809 cachedelta = revinfo.cachedelta
807 cachedelta = revinfo.cachedelta
810 baserev = cachedelta[0]
808 baserev = cachedelta[0]
811 delta = cachedelta[1]
809 delta = cachedelta[1]
812
810
813 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
811 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
814 revinfo.p1, revinfo.p2,
812 revinfo.p1, revinfo.p2,
815 revinfo.flags, revinfo.node)
813 revinfo.flags, revinfo.node)
816 return fulltext
814 return fulltext
817
815
818 def _builddeltadiff(self, base, revinfo, fh):
816 def _builddeltadiff(self, base, revinfo, fh):
819 revlog = self.revlog
817 revlog = self.revlog
820 t = self.buildtext(revinfo, fh)
818 t = self.buildtext(revinfo, fh)
821 if revlog.iscensored(base):
819 if revlog.iscensored(base):
822 # deltas based on a censored revision must replace the
820 # deltas based on a censored revision must replace the
823 # full content in one patch, so delta works everywhere
821 # full content in one patch, so delta works everywhere
824 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
822 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
825 delta = header + t
823 delta = header + t
826 else:
824 else:
827 ptext = revlog.revision(base, _df=fh, raw=True)
825 ptext = revlog.revision(base, _df=fh, raw=True)
828 delta = mdiff.textdiff(ptext, t)
826 delta = mdiff.textdiff(ptext, t)
829
827
830 return delta
828 return delta
831
829
832 def _builddeltainfo(self, revinfo, base, fh):
830 def _builddeltainfo(self, revinfo, base, fh):
833 # can we use the cached delta?
831 # can we use the cached delta?
834 delta = None
832 delta = None
835 if revinfo.cachedelta:
833 if revinfo.cachedelta:
836 cachebase, cachediff = revinfo.cachedelta
834 cachebase, cachediff = revinfo.cachedelta
837 #check if the diff still apply
835 #check if the diff still apply
838 currentbase = cachebase
836 currentbase = cachebase
839 while (currentbase != nullrev
837 while (currentbase != nullrev
840 and currentbase != base
838 and currentbase != base
841 and self.revlog.length(currentbase) == 0):
839 and self.revlog.length(currentbase) == 0):
842 currentbase = self.revlog.deltaparent(currentbase)
840 currentbase = self.revlog.deltaparent(currentbase)
843 if currentbase == base:
841 if currentbase == base:
844 delta = revinfo.cachedelta[1]
842 delta = revinfo.cachedelta[1]
845 if delta is None:
843 if delta is None:
846 delta = self._builddeltadiff(base, revinfo, fh)
844 delta = self._builddeltadiff(base, revinfo, fh)
847 revlog = self.revlog
845 revlog = self.revlog
848 header, data = revlog.compress(delta)
846 header, data = revlog.compress(delta)
849 deltalen = len(header) + len(data)
847 deltalen = len(header) + len(data)
850 chainbase = revlog.chainbase(base)
848 chainbase = revlog.chainbase(base)
851 offset = revlog.end(len(revlog) - 1)
849 offset = revlog.end(len(revlog) - 1)
852 dist = deltalen + offset - revlog.start(chainbase)
850 dist = deltalen + offset - revlog.start(chainbase)
853 if revlog._generaldelta:
851 if revlog._generaldelta:
854 deltabase = base
852 deltabase = base
855 else:
853 else:
856 deltabase = chainbase
854 deltabase = chainbase
857 chainlen, compresseddeltalen = revlog._chaininfo(base)
855 chainlen, compresseddeltalen = revlog._chaininfo(base)
858 chainlen += 1
856 chainlen += 1
859 compresseddeltalen += deltalen
857 compresseddeltalen += deltalen
860
858
861 revlog = self.revlog
859 revlog = self.revlog
862 snapshotdepth = None
860 snapshotdepth = None
863 if deltabase == nullrev:
861 if deltabase == nullrev:
864 snapshotdepth = 0
862 snapshotdepth = 0
865 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
863 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
866 # A delta chain should always be one full snapshot,
864 # A delta chain should always be one full snapshot,
867 # zero or more semi-snapshots, and zero or more deltas
865 # zero or more semi-snapshots, and zero or more deltas
868 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
866 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
869 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
867 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
870 snapshotdepth = len(revlog._deltachain(deltabase)[0])
868 snapshotdepth = len(revlog._deltachain(deltabase)[0])
871
869
872 return _deltainfo(dist, deltalen, (header, data), deltabase,
870 return _deltainfo(dist, deltalen, (header, data), deltabase,
873 chainbase, chainlen, compresseddeltalen,
871 chainbase, chainlen, compresseddeltalen,
874 snapshotdepth)
872 snapshotdepth)
875
873
876 def _fullsnapshotinfo(self, fh, revinfo):
874 def _fullsnapshotinfo(self, fh, revinfo):
877 curr = len(self.revlog)
875 curr = len(self.revlog)
878 rawtext = self.buildtext(revinfo, fh)
876 rawtext = self.buildtext(revinfo, fh)
879 data = self.revlog.compress(rawtext)
877 data = self.revlog.compress(rawtext)
880 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
878 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
881 deltabase = chainbase = curr
879 deltabase = chainbase = curr
882 snapshotdepth = 0
880 snapshotdepth = 0
883 chainlen = 1
881 chainlen = 1
884
882
885 return _deltainfo(dist, deltalen, data, deltabase,
883 return _deltainfo(dist, deltalen, data, deltabase,
886 chainbase, chainlen, compresseddeltalen,
884 chainbase, chainlen, compresseddeltalen,
887 snapshotdepth)
885 snapshotdepth)
888
886
889 def finddeltainfo(self, revinfo, fh):
887 def finddeltainfo(self, revinfo, fh):
890 """Find an acceptable delta against a candidate revision
888 """Find an acceptable delta against a candidate revision
891
889
892 revinfo: information about the revision (instance of _revisioninfo)
890 revinfo: information about the revision (instance of _revisioninfo)
893 fh: file handle to either the .i or the .d revlog file,
891 fh: file handle to either the .i or the .d revlog file,
894 depending on whether it is inlined or not
892 depending on whether it is inlined or not
895
893
896 Returns the first acceptable candidate revision, as ordered by
894 Returns the first acceptable candidate revision, as ordered by
897 _candidategroups
895 _candidategroups
898
896
899 If no suitable deltabase is found, we return delta info for a full
897 If no suitable deltabase is found, we return delta info for a full
900 snapshot.
898 snapshot.
901 """
899 """
902 if not revinfo.textlen:
900 if not revinfo.textlen:
903 return self._fullsnapshotinfo(fh, revinfo)
901 return self._fullsnapshotinfo(fh, revinfo)
904
902
905 # no delta for flag processor revision (see "candelta" for why)
903 # no delta for flag processor revision (see "candelta" for why)
906 # not calling candelta since only one revision needs test, also to
904 # not calling candelta since only one revision needs test, also to
907 # avoid overhead fetching flags again.
905 # avoid overhead fetching flags again.
908 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
906 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
909 return self._fullsnapshotinfo(fh, revinfo)
907 return self._fullsnapshotinfo(fh, revinfo)
910
908
911 cachedelta = revinfo.cachedelta
909 cachedelta = revinfo.cachedelta
912 p1 = revinfo.p1
910 p1 = revinfo.p1
913 p2 = revinfo.p2
911 p2 = revinfo.p2
914 revlog = self.revlog
912 revlog = self.revlog
915
913
916 deltainfo = None
914 deltainfo = None
917 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
915 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
918 groups = _candidategroups(self.revlog, revinfo.textlen,
916 groups = _candidategroups(self.revlog, revinfo.textlen,
919 p1r, p2r, cachedelta)
917 p1r, p2r, cachedelta)
920 candidaterevs = next(groups)
918 candidaterevs = next(groups)
921 while candidaterevs is not None:
919 while candidaterevs is not None:
922 nominateddeltas = []
920 nominateddeltas = []
923 if deltainfo is not None:
921 if deltainfo is not None:
924 # if we already found a good delta,
922 # if we already found a good delta,
925 # challenge it against refined candidates
923 # challenge it against refined candidates
926 nominateddeltas.append(deltainfo)
924 nominateddeltas.append(deltainfo)
927 for candidaterev in candidaterevs:
925 for candidaterev in candidaterevs:
928 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
926 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
929 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
927 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
930 nominateddeltas.append(candidatedelta)
928 nominateddeltas.append(candidatedelta)
931 if nominateddeltas:
929 if nominateddeltas:
932 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
930 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
933 if deltainfo is not None:
931 if deltainfo is not None:
934 candidaterevs = groups.send(deltainfo.base)
932 candidaterevs = groups.send(deltainfo.base)
935 else:
933 else:
936 candidaterevs = next(groups)
934 candidaterevs = next(groups)
937
935
938 if deltainfo is None:
936 if deltainfo is None:
939 deltainfo = self._fullsnapshotinfo(fh, revinfo)
937 deltainfo = self._fullsnapshotinfo(fh, revinfo)
940 return deltainfo
938 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now