##// END OF EJS Templates
revlogdeltas: always return a delta info object in finddeltainfo...
Boris Feld -
r39369:5d343a24 default
parent child Browse files
Show More
@@ -1,2483 +1,2475 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import re
21 import re
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .thirdparty import (
57 from .thirdparty import (
58 attr,
58 attr,
59 )
59 )
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 stringutil,
75 stringutil,
76 )
76 )
77
77
78 # blanked usage of all the name to prevent pyflakes constraints
78 # blanked usage of all the name to prevent pyflakes constraints
79 # We need these name available in the module for extensions.
79 # We need these name available in the module for extensions.
80 REVLOGV0
80 REVLOGV0
81 REVLOGV1
81 REVLOGV1
82 REVLOGV2
82 REVLOGV2
83 FLAG_INLINE_DATA
83 FLAG_INLINE_DATA
84 FLAG_GENERALDELTA
84 FLAG_GENERALDELTA
85 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_VERSION
87 REVLOG_DEFAULT_VERSION
88 REVLOGV1_FLAGS
88 REVLOGV1_FLAGS
89 REVLOGV2_FLAGS
89 REVLOGV2_FLAGS
90 REVIDX_ISCENSORED
90 REVIDX_ISCENSORED
91 REVIDX_ELLIPSIS
91 REVIDX_ELLIPSIS
92 REVIDX_EXTSTORED
92 REVIDX_EXTSTORED
93 REVIDX_DEFAULT_FLAGS
93 REVIDX_DEFAULT_FLAGS
94 REVIDX_FLAGS_ORDER
94 REVIDX_FLAGS_ORDER
95 REVIDX_KNOWN_FLAGS
95 REVIDX_KNOWN_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
97
97
98 parsers = policy.importmod(r'parsers')
98 parsers = policy.importmod(r'parsers')
99
99
100 # Aliased for performance.
100 # Aliased for performance.
101 _zlibdecompress = zlib.decompress
101 _zlibdecompress = zlib.decompress
102
102
103 # max size of revlog with inline data
103 # max size of revlog with inline data
104 _maxinline = 131072
104 _maxinline = 131072
105 _chunksize = 1048576
105 _chunksize = 1048576
106
106
107 RevlogError = error.RevlogError
107 RevlogError = error.RevlogError
108 LookupError = error.LookupError
108 LookupError = error.LookupError
109 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
109 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
110 CensoredNodeError = error.CensoredNodeError
110 CensoredNodeError = error.CensoredNodeError
111 ProgrammingError = error.ProgrammingError
111 ProgrammingError = error.ProgrammingError
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 _mdre = re.compile('\1\n')
118 _mdre = re.compile('\1\n')
119 def parsemeta(text):
119 def parsemeta(text):
120 """return (metadatadict, metadatasize)"""
120 """return (metadatadict, metadatasize)"""
121 # text can be buffer, so we can't use .startswith or .index
121 # text can be buffer, so we can't use .startswith or .index
122 if text[:2] != '\1\n':
122 if text[:2] != '\1\n':
123 return None, None
123 return None, None
124 s = _mdre.search(text, 2).start()
124 s = _mdre.search(text, 2).start()
125 mtext = text[2:s]
125 mtext = text[2:s]
126 meta = {}
126 meta = {}
127 for l in mtext.splitlines():
127 for l in mtext.splitlines():
128 k, v = l.split(": ", 1)
128 k, v = l.split(": ", 1)
129 meta[k] = v
129 meta[k] = v
130 return meta, (s + 2)
130 return meta, (s + 2)
131
131
132 def packmeta(meta, text):
132 def packmeta(meta, text):
133 keys = sorted(meta)
133 keys = sorted(meta)
134 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
134 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
135 return "\1\n%s\1\n%s" % (metatext, text)
135 return "\1\n%s\1\n%s" % (metatext, text)
136
136
137 def _censoredtext(text):
137 def _censoredtext(text):
138 m, offs = parsemeta(text)
138 m, offs = parsemeta(text)
139 return m and "censored" in m
139 return m and "censored" in m
140
140
141 def addflagprocessor(flag, processor):
141 def addflagprocessor(flag, processor):
142 """Register a flag processor on a revision data flag.
142 """Register a flag processor on a revision data flag.
143
143
144 Invariant:
144 Invariant:
145 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
145 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
146 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
146 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
147 - Only one flag processor can be registered on a specific flag.
147 - Only one flag processor can be registered on a specific flag.
148 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
148 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
149 following signatures:
149 following signatures:
150 - (read) f(self, rawtext) -> text, bool
150 - (read) f(self, rawtext) -> text, bool
151 - (write) f(self, text) -> rawtext, bool
151 - (write) f(self, text) -> rawtext, bool
152 - (raw) f(self, rawtext) -> bool
152 - (raw) f(self, rawtext) -> bool
153 "text" is presented to the user. "rawtext" is stored in revlog data, not
153 "text" is presented to the user. "rawtext" is stored in revlog data, not
154 directly visible to the user.
154 directly visible to the user.
155 The boolean returned by these transforms is used to determine whether
155 The boolean returned by these transforms is used to determine whether
156 the returned text can be used for hash integrity checking. For example,
156 the returned text can be used for hash integrity checking. For example,
157 if "write" returns False, then "text" is used to generate hash. If
157 if "write" returns False, then "text" is used to generate hash. If
158 "write" returns True, that basically means "rawtext" returned by "write"
158 "write" returns True, that basically means "rawtext" returned by "write"
159 should be used to generate hash. Usually, "write" and "read" return
159 should be used to generate hash. Usually, "write" and "read" return
160 different booleans. And "raw" returns a same boolean as "write".
160 different booleans. And "raw" returns a same boolean as "write".
161
161
162 Note: The 'raw' transform is used for changegroup generation and in some
162 Note: The 'raw' transform is used for changegroup generation and in some
163 debug commands. In this case the transform only indicates whether the
163 debug commands. In this case the transform only indicates whether the
164 contents can be used for hash integrity checks.
164 contents can be used for hash integrity checks.
165 """
165 """
166 if not flag & REVIDX_KNOWN_FLAGS:
166 if not flag & REVIDX_KNOWN_FLAGS:
167 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
167 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
168 raise ProgrammingError(msg)
168 raise ProgrammingError(msg)
169 if flag not in REVIDX_FLAGS_ORDER:
169 if flag not in REVIDX_FLAGS_ORDER:
170 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
170 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
171 raise ProgrammingError(msg)
171 raise ProgrammingError(msg)
172 if flag in _flagprocessors:
172 if flag in _flagprocessors:
173 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
173 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
174 raise error.Abort(msg)
174 raise error.Abort(msg)
175 _flagprocessors[flag] = processor
175 _flagprocessors[flag] = processor
176
176
177 def getoffset(q):
177 def getoffset(q):
178 return int(q >> 16)
178 return int(q >> 16)
179
179
180 def gettype(q):
180 def gettype(q):
181 return int(q & 0xFFFF)
181 return int(q & 0xFFFF)
182
182
183 def offset_type(offset, type):
183 def offset_type(offset, type):
184 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
184 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
185 raise ValueError('unknown revlog index flags')
185 raise ValueError('unknown revlog index flags')
186 return int(int(offset) << 16 | type)
186 return int(int(offset) << 16 | type)
187
187
188 _nullhash = hashlib.sha1(nullid)
188 _nullhash = hashlib.sha1(nullid)
189
189
190 def hash(text, p1, p2):
190 def hash(text, p1, p2):
191 """generate a hash from the given text and its parent hashes
191 """generate a hash from the given text and its parent hashes
192
192
193 This hash combines both the current file contents and its history
193 This hash combines both the current file contents and its history
194 in a manner that makes it easy to distinguish nodes with the same
194 in a manner that makes it easy to distinguish nodes with the same
195 content in the revision graph.
195 content in the revision graph.
196 """
196 """
197 # As of now, if one of the parent node is null, p2 is null
197 # As of now, if one of the parent node is null, p2 is null
198 if p2 == nullid:
198 if p2 == nullid:
199 # deep copy of a hash is faster than creating one
199 # deep copy of a hash is faster than creating one
200 s = _nullhash.copy()
200 s = _nullhash.copy()
201 s.update(p1)
201 s.update(p1)
202 else:
202 else:
203 # none of the parent nodes are nullid
203 # none of the parent nodes are nullid
204 if p1 < p2:
204 if p1 < p2:
205 a = p1
205 a = p1
206 b = p2
206 b = p2
207 else:
207 else:
208 a = p2
208 a = p2
209 b = p1
209 b = p1
210 s = hashlib.sha1(a)
210 s = hashlib.sha1(a)
211 s.update(b)
211 s.update(b)
212 s.update(text)
212 s.update(text)
213 return s.digest()
213 return s.digest()
214
214
215 @attr.s(slots=True, frozen=True)
215 @attr.s(slots=True, frozen=True)
216 class _revisioninfo(object):
216 class _revisioninfo(object):
217 """Information about a revision that allows building its fulltext
217 """Information about a revision that allows building its fulltext
218 node: expected hash of the revision
218 node: expected hash of the revision
219 p1, p2: parent revs of the revision
219 p1, p2: parent revs of the revision
220 btext: built text cache consisting of a one-element list
220 btext: built text cache consisting of a one-element list
221 cachedelta: (baserev, uncompressed_delta) or None
221 cachedelta: (baserev, uncompressed_delta) or None
222 flags: flags associated to the revision storage
222 flags: flags associated to the revision storage
223
223
224 One of btext[0] or cachedelta must be set.
224 One of btext[0] or cachedelta must be set.
225 """
225 """
226 node = attr.ib()
226 node = attr.ib()
227 p1 = attr.ib()
227 p1 = attr.ib()
228 p2 = attr.ib()
228 p2 = attr.ib()
229 btext = attr.ib()
229 btext = attr.ib()
230 textlen = attr.ib()
230 textlen = attr.ib()
231 cachedelta = attr.ib()
231 cachedelta = attr.ib()
232 flags = attr.ib()
232 flags = attr.ib()
233
233
234 @interfaceutil.implementer(repository.irevisiondelta)
234 @interfaceutil.implementer(repository.irevisiondelta)
235 @attr.s(slots=True, frozen=True)
235 @attr.s(slots=True, frozen=True)
236 class revlogrevisiondelta(object):
236 class revlogrevisiondelta(object):
237 node = attr.ib()
237 node = attr.ib()
238 p1node = attr.ib()
238 p1node = attr.ib()
239 p2node = attr.ib()
239 p2node = attr.ib()
240 basenode = attr.ib()
240 basenode = attr.ib()
241 linknode = attr.ib()
241 linknode = attr.ib()
242 flags = attr.ib()
242 flags = attr.ib()
243 baserevisionsize = attr.ib()
243 baserevisionsize = attr.ib()
244 revision = attr.ib()
244 revision = attr.ib()
245 delta = attr.ib()
245 delta = attr.ib()
246
246
247 # index v0:
247 # index v0:
248 # 4 bytes: offset
248 # 4 bytes: offset
249 # 4 bytes: compressed length
249 # 4 bytes: compressed length
250 # 4 bytes: base rev
250 # 4 bytes: base rev
251 # 4 bytes: link rev
251 # 4 bytes: link rev
252 # 20 bytes: parent 1 nodeid
252 # 20 bytes: parent 1 nodeid
253 # 20 bytes: parent 2 nodeid
253 # 20 bytes: parent 2 nodeid
254 # 20 bytes: nodeid
254 # 20 bytes: nodeid
255 indexformatv0 = struct.Struct(">4l20s20s20s")
255 indexformatv0 = struct.Struct(">4l20s20s20s")
256 indexformatv0_pack = indexformatv0.pack
256 indexformatv0_pack = indexformatv0.pack
257 indexformatv0_unpack = indexformatv0.unpack
257 indexformatv0_unpack = indexformatv0.unpack
258
258
259 class revlogoldindex(list):
259 class revlogoldindex(list):
260 def __getitem__(self, i):
260 def __getitem__(self, i):
261 if i == -1:
261 if i == -1:
262 return (0, 0, 0, -1, -1, -1, -1, nullid)
262 return (0, 0, 0, -1, -1, -1, -1, nullid)
263 return list.__getitem__(self, i)
263 return list.__getitem__(self, i)
264
264
265 class revlogoldio(object):
265 class revlogoldio(object):
266 def __init__(self):
266 def __init__(self):
267 self.size = indexformatv0.size
267 self.size = indexformatv0.size
268
268
269 def parseindex(self, data, inline):
269 def parseindex(self, data, inline):
270 s = self.size
270 s = self.size
271 index = []
271 index = []
272 nodemap = {nullid: nullrev}
272 nodemap = {nullid: nullrev}
273 n = off = 0
273 n = off = 0
274 l = len(data)
274 l = len(data)
275 while off + s <= l:
275 while off + s <= l:
276 cur = data[off:off + s]
276 cur = data[off:off + s]
277 off += s
277 off += s
278 e = indexformatv0_unpack(cur)
278 e = indexformatv0_unpack(cur)
279 # transform to revlogv1 format
279 # transform to revlogv1 format
280 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
280 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
281 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
281 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
282 index.append(e2)
282 index.append(e2)
283 nodemap[e[6]] = n
283 nodemap[e[6]] = n
284 n += 1
284 n += 1
285
285
286 return revlogoldindex(index), nodemap, None
286 return revlogoldindex(index), nodemap, None
287
287
288 def packentry(self, entry, node, version, rev):
288 def packentry(self, entry, node, version, rev):
289 if gettype(entry[0]):
289 if gettype(entry[0]):
290 raise RevlogError(_('index entry flags need revlog version 1'))
290 raise RevlogError(_('index entry flags need revlog version 1'))
291 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
291 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
292 node(entry[5]), node(entry[6]), entry[7])
292 node(entry[5]), node(entry[6]), entry[7])
293 return indexformatv0_pack(*e2)
293 return indexformatv0_pack(*e2)
294
294
295 # index ng:
295 # index ng:
296 # 6 bytes: offset
296 # 6 bytes: offset
297 # 2 bytes: flags
297 # 2 bytes: flags
298 # 4 bytes: compressed length
298 # 4 bytes: compressed length
299 # 4 bytes: uncompressed length
299 # 4 bytes: uncompressed length
300 # 4 bytes: base rev
300 # 4 bytes: base rev
301 # 4 bytes: link rev
301 # 4 bytes: link rev
302 # 4 bytes: parent 1 rev
302 # 4 bytes: parent 1 rev
303 # 4 bytes: parent 2 rev
303 # 4 bytes: parent 2 rev
304 # 32 bytes: nodeid
304 # 32 bytes: nodeid
305 indexformatng = struct.Struct(">Qiiiiii20s12x")
305 indexformatng = struct.Struct(">Qiiiiii20s12x")
306 indexformatng_pack = indexformatng.pack
306 indexformatng_pack = indexformatng.pack
307 versionformat = struct.Struct(">I")
307 versionformat = struct.Struct(">I")
308 versionformat_pack = versionformat.pack
308 versionformat_pack = versionformat.pack
309 versionformat_unpack = versionformat.unpack
309 versionformat_unpack = versionformat.unpack
310
310
311 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
311 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
312 # signed integer)
312 # signed integer)
313 _maxentrysize = 0x7fffffff
313 _maxentrysize = 0x7fffffff
314
314
315 class revlogio(object):
315 class revlogio(object):
316 def __init__(self):
316 def __init__(self):
317 self.size = indexformatng.size
317 self.size = indexformatng.size
318
318
319 def parseindex(self, data, inline):
319 def parseindex(self, data, inline):
320 # call the C implementation to parse the index data
320 # call the C implementation to parse the index data
321 index, cache = parsers.parse_index2(data, inline)
321 index, cache = parsers.parse_index2(data, inline)
322 return index, getattr(index, 'nodemap', None), cache
322 return index, getattr(index, 'nodemap', None), cache
323
323
324 def packentry(self, entry, node, version, rev):
324 def packentry(self, entry, node, version, rev):
325 p = indexformatng_pack(*entry)
325 p = indexformatng_pack(*entry)
326 if rev == 0:
326 if rev == 0:
327 p = versionformat_pack(version) + p[4:]
327 p = versionformat_pack(version) + p[4:]
328 return p
328 return p
329
329
330 class revlog(object):
330 class revlog(object):
331 """
331 """
332 the underlying revision storage object
332 the underlying revision storage object
333
333
334 A revlog consists of two parts, an index and the revision data.
334 A revlog consists of two parts, an index and the revision data.
335
335
336 The index is a file with a fixed record size containing
336 The index is a file with a fixed record size containing
337 information on each revision, including its nodeid (hash), the
337 information on each revision, including its nodeid (hash), the
338 nodeids of its parents, the position and offset of its data within
338 nodeids of its parents, the position and offset of its data within
339 the data file, and the revision it's based on. Finally, each entry
339 the data file, and the revision it's based on. Finally, each entry
340 contains a linkrev entry that can serve as a pointer to external
340 contains a linkrev entry that can serve as a pointer to external
341 data.
341 data.
342
342
343 The revision data itself is a linear collection of data chunks.
343 The revision data itself is a linear collection of data chunks.
344 Each chunk represents a revision and is usually represented as a
344 Each chunk represents a revision and is usually represented as a
345 delta against the previous chunk. To bound lookup time, runs of
345 delta against the previous chunk. To bound lookup time, runs of
346 deltas are limited to about 2 times the length of the original
346 deltas are limited to about 2 times the length of the original
347 version data. This makes retrieval of a version proportional to
347 version data. This makes retrieval of a version proportional to
348 its size, or O(1) relative to the number of revisions.
348 its size, or O(1) relative to the number of revisions.
349
349
350 Both pieces of the revlog are written to in an append-only
350 Both pieces of the revlog are written to in an append-only
351 fashion, which means we never need to rewrite a file to insert or
351 fashion, which means we never need to rewrite a file to insert or
352 remove data, and can use some simple techniques to avoid the need
352 remove data, and can use some simple techniques to avoid the need
353 for locking while reading.
353 for locking while reading.
354
354
355 If checkambig, indexfile is opened with checkambig=True at
355 If checkambig, indexfile is opened with checkambig=True at
356 writing, to avoid file stat ambiguity.
356 writing, to avoid file stat ambiguity.
357
357
358 If mmaplargeindex is True, and an mmapindexthreshold is set, the
358 If mmaplargeindex is True, and an mmapindexthreshold is set, the
359 index will be mmapped rather than read if it is larger than the
359 index will be mmapped rather than read if it is larger than the
360 configured threshold.
360 configured threshold.
361
361
362 If censorable is True, the revlog can have censored revisions.
362 If censorable is True, the revlog can have censored revisions.
363 """
363 """
364 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
364 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
365 mmaplargeindex=False, censorable=False):
365 mmaplargeindex=False, censorable=False):
366 """
366 """
367 create a revlog object
367 create a revlog object
368
368
369 opener is a function that abstracts the file opening operation
369 opener is a function that abstracts the file opening operation
370 and can be used to implement COW semantics or the like.
370 and can be used to implement COW semantics or the like.
371 """
371 """
372 self.indexfile = indexfile
372 self.indexfile = indexfile
373 self.datafile = datafile or (indexfile[:-2] + ".d")
373 self.datafile = datafile or (indexfile[:-2] + ".d")
374 self.opener = opener
374 self.opener = opener
375 # When True, indexfile is opened with checkambig=True at writing, to
375 # When True, indexfile is opened with checkambig=True at writing, to
376 # avoid file stat ambiguity.
376 # avoid file stat ambiguity.
377 self._checkambig = checkambig
377 self._checkambig = checkambig
378 self._censorable = censorable
378 self._censorable = censorable
379 # 3-tuple of (node, rev, text) for a raw revision.
379 # 3-tuple of (node, rev, text) for a raw revision.
380 self._cache = None
380 self._cache = None
381 # Maps rev to chain base rev.
381 # Maps rev to chain base rev.
382 self._chainbasecache = util.lrucachedict(100)
382 self._chainbasecache = util.lrucachedict(100)
383 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
383 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
384 self._chunkcache = (0, '')
384 self._chunkcache = (0, '')
385 # How much data to read and cache into the raw revlog data cache.
385 # How much data to read and cache into the raw revlog data cache.
386 self._chunkcachesize = 65536
386 self._chunkcachesize = 65536
387 self._maxchainlen = None
387 self._maxchainlen = None
388 self._deltabothparents = True
388 self._deltabothparents = True
389 self.index = []
389 self.index = []
390 # Mapping of partial identifiers to full nodes.
390 # Mapping of partial identifiers to full nodes.
391 self._pcache = {}
391 self._pcache = {}
392 # Mapping of revision integer to full node.
392 # Mapping of revision integer to full node.
393 self._nodecache = {nullid: nullrev}
393 self._nodecache = {nullid: nullrev}
394 self._nodepos = None
394 self._nodepos = None
395 self._compengine = 'zlib'
395 self._compengine = 'zlib'
396 self._maxdeltachainspan = -1
396 self._maxdeltachainspan = -1
397 self._withsparseread = False
397 self._withsparseread = False
398 self._sparserevlog = False
398 self._sparserevlog = False
399 self._srdensitythreshold = 0.50
399 self._srdensitythreshold = 0.50
400 self._srmingapsize = 262144
400 self._srmingapsize = 262144
401
401
402 mmapindexthreshold = None
402 mmapindexthreshold = None
403 v = REVLOG_DEFAULT_VERSION
403 v = REVLOG_DEFAULT_VERSION
404 opts = getattr(opener, 'options', None)
404 opts = getattr(opener, 'options', None)
405 if opts is not None:
405 if opts is not None:
406 if 'revlogv2' in opts:
406 if 'revlogv2' in opts:
407 # version 2 revlogs always use generaldelta.
407 # version 2 revlogs always use generaldelta.
408 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
408 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
409 elif 'revlogv1' in opts:
409 elif 'revlogv1' in opts:
410 if 'generaldelta' in opts:
410 if 'generaldelta' in opts:
411 v |= FLAG_GENERALDELTA
411 v |= FLAG_GENERALDELTA
412 else:
412 else:
413 v = 0
413 v = 0
414 if 'chunkcachesize' in opts:
414 if 'chunkcachesize' in opts:
415 self._chunkcachesize = opts['chunkcachesize']
415 self._chunkcachesize = opts['chunkcachesize']
416 if 'maxchainlen' in opts:
416 if 'maxchainlen' in opts:
417 self._maxchainlen = opts['maxchainlen']
417 self._maxchainlen = opts['maxchainlen']
418 if 'deltabothparents' in opts:
418 if 'deltabothparents' in opts:
419 self._deltabothparents = opts['deltabothparents']
419 self._deltabothparents = opts['deltabothparents']
420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
420 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
421 if 'compengine' in opts:
421 if 'compengine' in opts:
422 self._compengine = opts['compengine']
422 self._compengine = opts['compengine']
423 if 'maxdeltachainspan' in opts:
423 if 'maxdeltachainspan' in opts:
424 self._maxdeltachainspan = opts['maxdeltachainspan']
424 self._maxdeltachainspan = opts['maxdeltachainspan']
425 if mmaplargeindex and 'mmapindexthreshold' in opts:
425 if mmaplargeindex and 'mmapindexthreshold' in opts:
426 mmapindexthreshold = opts['mmapindexthreshold']
426 mmapindexthreshold = opts['mmapindexthreshold']
427 self._sparserevlog = bool(opts.get('sparse-revlog', False))
427 self._sparserevlog = bool(opts.get('sparse-revlog', False))
428 withsparseread = bool(opts.get('with-sparse-read', False))
428 withsparseread = bool(opts.get('with-sparse-read', False))
429 # sparse-revlog forces sparse-read
429 # sparse-revlog forces sparse-read
430 self._withsparseread = self._sparserevlog or withsparseread
430 self._withsparseread = self._sparserevlog or withsparseread
431 if 'sparse-read-density-threshold' in opts:
431 if 'sparse-read-density-threshold' in opts:
432 self._srdensitythreshold = opts['sparse-read-density-threshold']
432 self._srdensitythreshold = opts['sparse-read-density-threshold']
433 if 'sparse-read-min-gap-size' in opts:
433 if 'sparse-read-min-gap-size' in opts:
434 self._srmingapsize = opts['sparse-read-min-gap-size']
434 self._srmingapsize = opts['sparse-read-min-gap-size']
435
435
436 if self._chunkcachesize <= 0:
436 if self._chunkcachesize <= 0:
437 raise RevlogError(_('revlog chunk cache size %r is not greater '
437 raise RevlogError(_('revlog chunk cache size %r is not greater '
438 'than 0') % self._chunkcachesize)
438 'than 0') % self._chunkcachesize)
439 elif self._chunkcachesize & (self._chunkcachesize - 1):
439 elif self._chunkcachesize & (self._chunkcachesize - 1):
440 raise RevlogError(_('revlog chunk cache size %r is not a power '
440 raise RevlogError(_('revlog chunk cache size %r is not a power '
441 'of 2') % self._chunkcachesize)
441 'of 2') % self._chunkcachesize)
442
442
443 indexdata = ''
443 indexdata = ''
444 self._initempty = True
444 self._initempty = True
445 try:
445 try:
446 with self._indexfp() as f:
446 with self._indexfp() as f:
447 if (mmapindexthreshold is not None and
447 if (mmapindexthreshold is not None and
448 self.opener.fstat(f).st_size >= mmapindexthreshold):
448 self.opener.fstat(f).st_size >= mmapindexthreshold):
449 indexdata = util.buffer(util.mmapread(f))
449 indexdata = util.buffer(util.mmapread(f))
450 else:
450 else:
451 indexdata = f.read()
451 indexdata = f.read()
452 if len(indexdata) > 0:
452 if len(indexdata) > 0:
453 v = versionformat_unpack(indexdata[:4])[0]
453 v = versionformat_unpack(indexdata[:4])[0]
454 self._initempty = False
454 self._initempty = False
455 except IOError as inst:
455 except IOError as inst:
456 if inst.errno != errno.ENOENT:
456 if inst.errno != errno.ENOENT:
457 raise
457 raise
458
458
459 self.version = v
459 self.version = v
460 self._inline = v & FLAG_INLINE_DATA
460 self._inline = v & FLAG_INLINE_DATA
461 self._generaldelta = v & FLAG_GENERALDELTA
461 self._generaldelta = v & FLAG_GENERALDELTA
462 flags = v & ~0xFFFF
462 flags = v & ~0xFFFF
463 fmt = v & 0xFFFF
463 fmt = v & 0xFFFF
464 if fmt == REVLOGV0:
464 if fmt == REVLOGV0:
465 if flags:
465 if flags:
466 raise RevlogError(_('unknown flags (%#04x) in version %d '
466 raise RevlogError(_('unknown flags (%#04x) in version %d '
467 'revlog %s') %
467 'revlog %s') %
468 (flags >> 16, fmt, self.indexfile))
468 (flags >> 16, fmt, self.indexfile))
469 elif fmt == REVLOGV1:
469 elif fmt == REVLOGV1:
470 if flags & ~REVLOGV1_FLAGS:
470 if flags & ~REVLOGV1_FLAGS:
471 raise RevlogError(_('unknown flags (%#04x) in version %d '
471 raise RevlogError(_('unknown flags (%#04x) in version %d '
472 'revlog %s') %
472 'revlog %s') %
473 (flags >> 16, fmt, self.indexfile))
473 (flags >> 16, fmt, self.indexfile))
474 elif fmt == REVLOGV2:
474 elif fmt == REVLOGV2:
475 if flags & ~REVLOGV2_FLAGS:
475 if flags & ~REVLOGV2_FLAGS:
476 raise RevlogError(_('unknown flags (%#04x) in version %d '
476 raise RevlogError(_('unknown flags (%#04x) in version %d '
477 'revlog %s') %
477 'revlog %s') %
478 (flags >> 16, fmt, self.indexfile))
478 (flags >> 16, fmt, self.indexfile))
479 else:
479 else:
480 raise RevlogError(_('unknown version (%d) in revlog %s') %
480 raise RevlogError(_('unknown version (%d) in revlog %s') %
481 (fmt, self.indexfile))
481 (fmt, self.indexfile))
482
482
483 self._storedeltachains = True
483 self._storedeltachains = True
484
484
485 self._io = revlogio()
485 self._io = revlogio()
486 if self.version == REVLOGV0:
486 if self.version == REVLOGV0:
487 self._io = revlogoldio()
487 self._io = revlogoldio()
488 try:
488 try:
489 d = self._io.parseindex(indexdata, self._inline)
489 d = self._io.parseindex(indexdata, self._inline)
490 except (ValueError, IndexError):
490 except (ValueError, IndexError):
491 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
491 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
492 self.index, nodemap, self._chunkcache = d
492 self.index, nodemap, self._chunkcache = d
493 if nodemap is not None:
493 if nodemap is not None:
494 self.nodemap = self._nodecache = nodemap
494 self.nodemap = self._nodecache = nodemap
495 if not self._chunkcache:
495 if not self._chunkcache:
496 self._chunkclear()
496 self._chunkclear()
497 # revnum -> (chain-length, sum-delta-length)
497 # revnum -> (chain-length, sum-delta-length)
498 self._chaininfocache = {}
498 self._chaininfocache = {}
499 # revlog header -> revlog compressor
499 # revlog header -> revlog compressor
500 self._decompressors = {}
500 self._decompressors = {}
501
501
502 @util.propertycache
502 @util.propertycache
503 def _compressor(self):
503 def _compressor(self):
504 return util.compengines[self._compengine].revlogcompressor()
504 return util.compengines[self._compengine].revlogcompressor()
505
505
506 def _indexfp(self, mode='r'):
506 def _indexfp(self, mode='r'):
507 """file object for the revlog's index file"""
507 """file object for the revlog's index file"""
508 args = {r'mode': mode}
508 args = {r'mode': mode}
509 if mode != 'r':
509 if mode != 'r':
510 args[r'checkambig'] = self._checkambig
510 args[r'checkambig'] = self._checkambig
511 if mode == 'w':
511 if mode == 'w':
512 args[r'atomictemp'] = True
512 args[r'atomictemp'] = True
513 return self.opener(self.indexfile, **args)
513 return self.opener(self.indexfile, **args)
514
514
515 def _datafp(self, mode='r'):
515 def _datafp(self, mode='r'):
516 """file object for the revlog's data file"""
516 """file object for the revlog's data file"""
517 return self.opener(self.datafile, mode=mode)
517 return self.opener(self.datafile, mode=mode)
518
518
519 @contextlib.contextmanager
519 @contextlib.contextmanager
520 def _datareadfp(self, existingfp=None):
520 def _datareadfp(self, existingfp=None):
521 """file object suitable to read data"""
521 """file object suitable to read data"""
522 if existingfp is not None:
522 if existingfp is not None:
523 yield existingfp
523 yield existingfp
524 else:
524 else:
525 if self._inline:
525 if self._inline:
526 func = self._indexfp
526 func = self._indexfp
527 else:
527 else:
528 func = self._datafp
528 func = self._datafp
529 with func() as fp:
529 with func() as fp:
530 yield fp
530 yield fp
531
531
532 def tip(self):
532 def tip(self):
533 return self.node(len(self.index) - 1)
533 return self.node(len(self.index) - 1)
534 def __contains__(self, rev):
534 def __contains__(self, rev):
535 return 0 <= rev < len(self)
535 return 0 <= rev < len(self)
536 def __len__(self):
536 def __len__(self):
537 return len(self.index)
537 return len(self.index)
538 def __iter__(self):
538 def __iter__(self):
539 return iter(pycompat.xrange(len(self)))
539 return iter(pycompat.xrange(len(self)))
540 def revs(self, start=0, stop=None):
540 def revs(self, start=0, stop=None):
541 """iterate over all rev in this revlog (from start to stop)"""
541 """iterate over all rev in this revlog (from start to stop)"""
542 step = 1
542 step = 1
543 length = len(self)
543 length = len(self)
544 if stop is not None:
544 if stop is not None:
545 if start > stop:
545 if start > stop:
546 step = -1
546 step = -1
547 stop += step
547 stop += step
548 if stop > length:
548 if stop > length:
549 stop = length
549 stop = length
550 else:
550 else:
551 stop = length
551 stop = length
552 return pycompat.xrange(start, stop, step)
552 return pycompat.xrange(start, stop, step)
553
553
554 @util.propertycache
554 @util.propertycache
555 def nodemap(self):
555 def nodemap(self):
556 if self.index:
556 if self.index:
557 # populate mapping down to the initial node
557 # populate mapping down to the initial node
558 node0 = self.index[0][7] # get around changelog filtering
558 node0 = self.index[0][7] # get around changelog filtering
559 self.rev(node0)
559 self.rev(node0)
560 return self._nodecache
560 return self._nodecache
561
561
562 def hasnode(self, node):
562 def hasnode(self, node):
563 try:
563 try:
564 self.rev(node)
564 self.rev(node)
565 return True
565 return True
566 except KeyError:
566 except KeyError:
567 return False
567 return False
568
568
569 def candelta(self, baserev, rev):
569 def candelta(self, baserev, rev):
570 """whether two revisions (baserev, rev) can be delta-ed or not"""
570 """whether two revisions (baserev, rev) can be delta-ed or not"""
571 # Disable delta if either rev requires a content-changing flag
571 # Disable delta if either rev requires a content-changing flag
572 # processor (ex. LFS). This is because such flag processor can alter
572 # processor (ex. LFS). This is because such flag processor can alter
573 # the rawtext content that the delta will be based on, and two clients
573 # the rawtext content that the delta will be based on, and two clients
574 # could have a same revlog node with different flags (i.e. different
574 # could have a same revlog node with different flags (i.e. different
575 # rawtext contents) and the delta could be incompatible.
575 # rawtext contents) and the delta could be incompatible.
576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
578 return False
578 return False
579 return True
579 return True
580
580
581 def clearcaches(self):
581 def clearcaches(self):
582 self._cache = None
582 self._cache = None
583 self._chainbasecache.clear()
583 self._chainbasecache.clear()
584 self._chunkcache = (0, '')
584 self._chunkcache = (0, '')
585 self._pcache = {}
585 self._pcache = {}
586
586
587 try:
587 try:
588 self._nodecache.clearcaches()
588 self._nodecache.clearcaches()
589 except AttributeError:
589 except AttributeError:
590 self._nodecache = {nullid: nullrev}
590 self._nodecache = {nullid: nullrev}
591 self._nodepos = None
591 self._nodepos = None
592
592
593 def rev(self, node):
593 def rev(self, node):
594 try:
594 try:
595 return self._nodecache[node]
595 return self._nodecache[node]
596 except TypeError:
596 except TypeError:
597 raise
597 raise
598 except RevlogError:
598 except RevlogError:
599 # parsers.c radix tree lookup failed
599 # parsers.c radix tree lookup failed
600 if node == wdirid or node in wdirfilenodeids:
600 if node == wdirid or node in wdirfilenodeids:
601 raise error.WdirUnsupported
601 raise error.WdirUnsupported
602 raise LookupError(node, self.indexfile, _('no node'))
602 raise LookupError(node, self.indexfile, _('no node'))
603 except KeyError:
603 except KeyError:
604 # pure python cache lookup failed
604 # pure python cache lookup failed
605 n = self._nodecache
605 n = self._nodecache
606 i = self.index
606 i = self.index
607 p = self._nodepos
607 p = self._nodepos
608 if p is None:
608 if p is None:
609 p = len(i) - 1
609 p = len(i) - 1
610 else:
610 else:
611 assert p < len(i)
611 assert p < len(i)
612 for r in pycompat.xrange(p, -1, -1):
612 for r in pycompat.xrange(p, -1, -1):
613 v = i[r][7]
613 v = i[r][7]
614 n[v] = r
614 n[v] = r
615 if v == node:
615 if v == node:
616 self._nodepos = r - 1
616 self._nodepos = r - 1
617 return r
617 return r
618 if node == wdirid or node in wdirfilenodeids:
618 if node == wdirid or node in wdirfilenodeids:
619 raise error.WdirUnsupported
619 raise error.WdirUnsupported
620 raise LookupError(node, self.indexfile, _('no node'))
620 raise LookupError(node, self.indexfile, _('no node'))
621
621
622 # Accessors for index entries.
622 # Accessors for index entries.
623
623
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 # are flags.
625 # are flags.
626 def start(self, rev):
626 def start(self, rev):
627 return int(self.index[rev][0] >> 16)
627 return int(self.index[rev][0] >> 16)
628
628
629 def flags(self, rev):
629 def flags(self, rev):
630 return self.index[rev][0] & 0xFFFF
630 return self.index[rev][0] & 0xFFFF
631
631
632 def length(self, rev):
632 def length(self, rev):
633 return self.index[rev][1]
633 return self.index[rev][1]
634
634
635 def rawsize(self, rev):
635 def rawsize(self, rev):
636 """return the length of the uncompressed text for a given revision"""
636 """return the length of the uncompressed text for a given revision"""
637 l = self.index[rev][2]
637 l = self.index[rev][2]
638 if l >= 0:
638 if l >= 0:
639 return l
639 return l
640
640
641 t = self.revision(rev, raw=True)
641 t = self.revision(rev, raw=True)
642 return len(t)
642 return len(t)
643
643
644 def size(self, rev):
644 def size(self, rev):
645 """length of non-raw text (processed by a "read" flag processor)"""
645 """length of non-raw text (processed by a "read" flag processor)"""
646 # fast path: if no "read" flag processor could change the content,
646 # fast path: if no "read" flag processor could change the content,
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 flags = self.flags(rev)
648 flags = self.flags(rev)
649 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
649 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 return self.rawsize(rev)
650 return self.rawsize(rev)
651
651
652 return len(self.revision(rev, raw=False))
652 return len(self.revision(rev, raw=False))
653
653
654 def chainbase(self, rev):
654 def chainbase(self, rev):
655 base = self._chainbasecache.get(rev)
655 base = self._chainbasecache.get(rev)
656 if base is not None:
656 if base is not None:
657 return base
657 return base
658
658
659 index = self.index
659 index = self.index
660 iterrev = rev
660 iterrev = rev
661 base = index[iterrev][3]
661 base = index[iterrev][3]
662 while base != iterrev:
662 while base != iterrev:
663 iterrev = base
663 iterrev = base
664 base = index[iterrev][3]
664 base = index[iterrev][3]
665
665
666 self._chainbasecache[rev] = base
666 self._chainbasecache[rev] = base
667 return base
667 return base
668
668
669 def linkrev(self, rev):
669 def linkrev(self, rev):
670 return self.index[rev][4]
670 return self.index[rev][4]
671
671
672 def parentrevs(self, rev):
672 def parentrevs(self, rev):
673 try:
673 try:
674 entry = self.index[rev]
674 entry = self.index[rev]
675 except IndexError:
675 except IndexError:
676 if rev == wdirrev:
676 if rev == wdirrev:
677 raise error.WdirUnsupported
677 raise error.WdirUnsupported
678 raise
678 raise
679
679
680 return entry[5], entry[6]
680 return entry[5], entry[6]
681
681
682 def node(self, rev):
682 def node(self, rev):
683 try:
683 try:
684 return self.index[rev][7]
684 return self.index[rev][7]
685 except IndexError:
685 except IndexError:
686 if rev == wdirrev:
686 if rev == wdirrev:
687 raise error.WdirUnsupported
687 raise error.WdirUnsupported
688 raise
688 raise
689
689
690 # Derived from index values.
690 # Derived from index values.
691
691
692 def end(self, rev):
692 def end(self, rev):
693 return self.start(rev) + self.length(rev)
693 return self.start(rev) + self.length(rev)
694
694
695 def parents(self, node):
695 def parents(self, node):
696 i = self.index
696 i = self.index
697 d = i[self.rev(node)]
697 d = i[self.rev(node)]
698 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
698 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
699
699
700 def chainlen(self, rev):
700 def chainlen(self, rev):
701 return self._chaininfo(rev)[0]
701 return self._chaininfo(rev)[0]
702
702
703 def _chaininfo(self, rev):
703 def _chaininfo(self, rev):
704 chaininfocache = self._chaininfocache
704 chaininfocache = self._chaininfocache
705 if rev in chaininfocache:
705 if rev in chaininfocache:
706 return chaininfocache[rev]
706 return chaininfocache[rev]
707 index = self.index
707 index = self.index
708 generaldelta = self._generaldelta
708 generaldelta = self._generaldelta
709 iterrev = rev
709 iterrev = rev
710 e = index[iterrev]
710 e = index[iterrev]
711 clen = 0
711 clen = 0
712 compresseddeltalen = 0
712 compresseddeltalen = 0
713 while iterrev != e[3]:
713 while iterrev != e[3]:
714 clen += 1
714 clen += 1
715 compresseddeltalen += e[1]
715 compresseddeltalen += e[1]
716 if generaldelta:
716 if generaldelta:
717 iterrev = e[3]
717 iterrev = e[3]
718 else:
718 else:
719 iterrev -= 1
719 iterrev -= 1
720 if iterrev in chaininfocache:
720 if iterrev in chaininfocache:
721 t = chaininfocache[iterrev]
721 t = chaininfocache[iterrev]
722 clen += t[0]
722 clen += t[0]
723 compresseddeltalen += t[1]
723 compresseddeltalen += t[1]
724 break
724 break
725 e = index[iterrev]
725 e = index[iterrev]
726 else:
726 else:
727 # Add text length of base since decompressing that also takes
727 # Add text length of base since decompressing that also takes
728 # work. For cache hits the length is already included.
728 # work. For cache hits the length is already included.
729 compresseddeltalen += e[1]
729 compresseddeltalen += e[1]
730 r = (clen, compresseddeltalen)
730 r = (clen, compresseddeltalen)
731 chaininfocache[rev] = r
731 chaininfocache[rev] = r
732 return r
732 return r
733
733
734 def _deltachain(self, rev, stoprev=None):
734 def _deltachain(self, rev, stoprev=None):
735 """Obtain the delta chain for a revision.
735 """Obtain the delta chain for a revision.
736
736
737 ``stoprev`` specifies a revision to stop at. If not specified, we
737 ``stoprev`` specifies a revision to stop at. If not specified, we
738 stop at the base of the chain.
738 stop at the base of the chain.
739
739
740 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
740 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
741 revs in ascending order and ``stopped`` is a bool indicating whether
741 revs in ascending order and ``stopped`` is a bool indicating whether
742 ``stoprev`` was hit.
742 ``stoprev`` was hit.
743 """
743 """
744 # Try C implementation.
744 # Try C implementation.
745 try:
745 try:
746 return self.index.deltachain(rev, stoprev, self._generaldelta)
746 return self.index.deltachain(rev, stoprev, self._generaldelta)
747 except AttributeError:
747 except AttributeError:
748 pass
748 pass
749
749
750 chain = []
750 chain = []
751
751
752 # Alias to prevent attribute lookup in tight loop.
752 # Alias to prevent attribute lookup in tight loop.
753 index = self.index
753 index = self.index
754 generaldelta = self._generaldelta
754 generaldelta = self._generaldelta
755
755
756 iterrev = rev
756 iterrev = rev
757 e = index[iterrev]
757 e = index[iterrev]
758 while iterrev != e[3] and iterrev != stoprev:
758 while iterrev != e[3] and iterrev != stoprev:
759 chain.append(iterrev)
759 chain.append(iterrev)
760 if generaldelta:
760 if generaldelta:
761 iterrev = e[3]
761 iterrev = e[3]
762 else:
762 else:
763 iterrev -= 1
763 iterrev -= 1
764 e = index[iterrev]
764 e = index[iterrev]
765
765
766 if iterrev == stoprev:
766 if iterrev == stoprev:
767 stopped = True
767 stopped = True
768 else:
768 else:
769 chain.append(iterrev)
769 chain.append(iterrev)
770 stopped = False
770 stopped = False
771
771
772 chain.reverse()
772 chain.reverse()
773 return chain, stopped
773 return chain, stopped
774
774
775 def ancestors(self, revs, stoprev=0, inclusive=False):
775 def ancestors(self, revs, stoprev=0, inclusive=False):
776 """Generate the ancestors of 'revs' in reverse topological order.
776 """Generate the ancestors of 'revs' in reverse topological order.
777 Does not generate revs lower than stoprev.
777 Does not generate revs lower than stoprev.
778
778
779 See the documentation for ancestor.lazyancestors for more details."""
779 See the documentation for ancestor.lazyancestors for more details."""
780
780
781 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
781 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
782 inclusive=inclusive)
782 inclusive=inclusive)
783
783
784 def descendants(self, revs):
784 def descendants(self, revs):
785 """Generate the descendants of 'revs' in revision order.
785 """Generate the descendants of 'revs' in revision order.
786
786
787 Yield a sequence of revision numbers starting with a child of
787 Yield a sequence of revision numbers starting with a child of
788 some rev in revs, i.e., each revision is *not* considered a
788 some rev in revs, i.e., each revision is *not* considered a
789 descendant of itself. Results are ordered by revision number (a
789 descendant of itself. Results are ordered by revision number (a
790 topological sort)."""
790 topological sort)."""
791 first = min(revs)
791 first = min(revs)
792 if first == nullrev:
792 if first == nullrev:
793 for i in self:
793 for i in self:
794 yield i
794 yield i
795 return
795 return
796
796
797 seen = set(revs)
797 seen = set(revs)
798 for i in self.revs(start=first + 1):
798 for i in self.revs(start=first + 1):
799 for x in self.parentrevs(i):
799 for x in self.parentrevs(i):
800 if x != nullrev and x in seen:
800 if x != nullrev and x in seen:
801 seen.add(i)
801 seen.add(i)
802 yield i
802 yield i
803 break
803 break
804
804
805 def findcommonmissing(self, common=None, heads=None):
805 def findcommonmissing(self, common=None, heads=None):
806 """Return a tuple of the ancestors of common and the ancestors of heads
806 """Return a tuple of the ancestors of common and the ancestors of heads
807 that are not ancestors of common. In revset terminology, we return the
807 that are not ancestors of common. In revset terminology, we return the
808 tuple:
808 tuple:
809
809
810 ::common, (::heads) - (::common)
810 ::common, (::heads) - (::common)
811
811
812 The list is sorted by revision number, meaning it is
812 The list is sorted by revision number, meaning it is
813 topologically sorted.
813 topologically sorted.
814
814
815 'heads' and 'common' are both lists of node IDs. If heads is
815 'heads' and 'common' are both lists of node IDs. If heads is
816 not supplied, uses all of the revlog's heads. If common is not
816 not supplied, uses all of the revlog's heads. If common is not
817 supplied, uses nullid."""
817 supplied, uses nullid."""
818 if common is None:
818 if common is None:
819 common = [nullid]
819 common = [nullid]
820 if heads is None:
820 if heads is None:
821 heads = self.heads()
821 heads = self.heads()
822
822
823 common = [self.rev(n) for n in common]
823 common = [self.rev(n) for n in common]
824 heads = [self.rev(n) for n in heads]
824 heads = [self.rev(n) for n in heads]
825
825
826 # we want the ancestors, but inclusive
826 # we want the ancestors, but inclusive
827 class lazyset(object):
827 class lazyset(object):
828 def __init__(self, lazyvalues):
828 def __init__(self, lazyvalues):
829 self.addedvalues = set()
829 self.addedvalues = set()
830 self.lazyvalues = lazyvalues
830 self.lazyvalues = lazyvalues
831
831
832 def __contains__(self, value):
832 def __contains__(self, value):
833 return value in self.addedvalues or value in self.lazyvalues
833 return value in self.addedvalues or value in self.lazyvalues
834
834
835 def __iter__(self):
835 def __iter__(self):
836 added = self.addedvalues
836 added = self.addedvalues
837 for r in added:
837 for r in added:
838 yield r
838 yield r
839 for r in self.lazyvalues:
839 for r in self.lazyvalues:
840 if not r in added:
840 if not r in added:
841 yield r
841 yield r
842
842
843 def add(self, value):
843 def add(self, value):
844 self.addedvalues.add(value)
844 self.addedvalues.add(value)
845
845
846 def update(self, values):
846 def update(self, values):
847 self.addedvalues.update(values)
847 self.addedvalues.update(values)
848
848
849 has = lazyset(self.ancestors(common))
849 has = lazyset(self.ancestors(common))
850 has.add(nullrev)
850 has.add(nullrev)
851 has.update(common)
851 has.update(common)
852
852
853 # take all ancestors from heads that aren't in has
853 # take all ancestors from heads that aren't in has
854 missing = set()
854 missing = set()
855 visit = collections.deque(r for r in heads if r not in has)
855 visit = collections.deque(r for r in heads if r not in has)
856 while visit:
856 while visit:
857 r = visit.popleft()
857 r = visit.popleft()
858 if r in missing:
858 if r in missing:
859 continue
859 continue
860 else:
860 else:
861 missing.add(r)
861 missing.add(r)
862 for p in self.parentrevs(r):
862 for p in self.parentrevs(r):
863 if p not in has:
863 if p not in has:
864 visit.append(p)
864 visit.append(p)
865 missing = list(missing)
865 missing = list(missing)
866 missing.sort()
866 missing.sort()
867 return has, [self.node(miss) for miss in missing]
867 return has, [self.node(miss) for miss in missing]
868
868
869 def incrementalmissingrevs(self, common=None):
869 def incrementalmissingrevs(self, common=None):
870 """Return an object that can be used to incrementally compute the
870 """Return an object that can be used to incrementally compute the
871 revision numbers of the ancestors of arbitrary sets that are not
871 revision numbers of the ancestors of arbitrary sets that are not
872 ancestors of common. This is an ancestor.incrementalmissingancestors
872 ancestors of common. This is an ancestor.incrementalmissingancestors
873 object.
873 object.
874
874
875 'common' is a list of revision numbers. If common is not supplied, uses
875 'common' is a list of revision numbers. If common is not supplied, uses
876 nullrev.
876 nullrev.
877 """
877 """
878 if common is None:
878 if common is None:
879 common = [nullrev]
879 common = [nullrev]
880
880
881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
882
882
883 def findmissingrevs(self, common=None, heads=None):
883 def findmissingrevs(self, common=None, heads=None):
884 """Return the revision numbers of the ancestors of heads that
884 """Return the revision numbers of the ancestors of heads that
885 are not ancestors of common.
885 are not ancestors of common.
886
886
887 More specifically, return a list of revision numbers corresponding to
887 More specifically, return a list of revision numbers corresponding to
888 nodes N such that every N satisfies the following constraints:
888 nodes N such that every N satisfies the following constraints:
889
889
890 1. N is an ancestor of some node in 'heads'
890 1. N is an ancestor of some node in 'heads'
891 2. N is not an ancestor of any node in 'common'
891 2. N is not an ancestor of any node in 'common'
892
892
893 The list is sorted by revision number, meaning it is
893 The list is sorted by revision number, meaning it is
894 topologically sorted.
894 topologically sorted.
895
895
896 'heads' and 'common' are both lists of revision numbers. If heads is
896 'heads' and 'common' are both lists of revision numbers. If heads is
897 not supplied, uses all of the revlog's heads. If common is not
897 not supplied, uses all of the revlog's heads. If common is not
898 supplied, uses nullid."""
898 supplied, uses nullid."""
899 if common is None:
899 if common is None:
900 common = [nullrev]
900 common = [nullrev]
901 if heads is None:
901 if heads is None:
902 heads = self.headrevs()
902 heads = self.headrevs()
903
903
904 inc = self.incrementalmissingrevs(common=common)
904 inc = self.incrementalmissingrevs(common=common)
905 return inc.missingancestors(heads)
905 return inc.missingancestors(heads)
906
906
907 def findmissing(self, common=None, heads=None):
907 def findmissing(self, common=None, heads=None):
908 """Return the ancestors of heads that are not ancestors of common.
908 """Return the ancestors of heads that are not ancestors of common.
909
909
910 More specifically, return a list of nodes N such that every N
910 More specifically, return a list of nodes N such that every N
911 satisfies the following constraints:
911 satisfies the following constraints:
912
912
913 1. N is an ancestor of some node in 'heads'
913 1. N is an ancestor of some node in 'heads'
914 2. N is not an ancestor of any node in 'common'
914 2. N is not an ancestor of any node in 'common'
915
915
916 The list is sorted by revision number, meaning it is
916 The list is sorted by revision number, meaning it is
917 topologically sorted.
917 topologically sorted.
918
918
919 'heads' and 'common' are both lists of node IDs. If heads is
919 'heads' and 'common' are both lists of node IDs. If heads is
920 not supplied, uses all of the revlog's heads. If common is not
920 not supplied, uses all of the revlog's heads. If common is not
921 supplied, uses nullid."""
921 supplied, uses nullid."""
922 if common is None:
922 if common is None:
923 common = [nullid]
923 common = [nullid]
924 if heads is None:
924 if heads is None:
925 heads = self.heads()
925 heads = self.heads()
926
926
927 common = [self.rev(n) for n in common]
927 common = [self.rev(n) for n in common]
928 heads = [self.rev(n) for n in heads]
928 heads = [self.rev(n) for n in heads]
929
929
930 inc = self.incrementalmissingrevs(common=common)
930 inc = self.incrementalmissingrevs(common=common)
931 return [self.node(r) for r in inc.missingancestors(heads)]
931 return [self.node(r) for r in inc.missingancestors(heads)]
932
932
933 def nodesbetween(self, roots=None, heads=None):
933 def nodesbetween(self, roots=None, heads=None):
934 """Return a topological path from 'roots' to 'heads'.
934 """Return a topological path from 'roots' to 'heads'.
935
935
936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
937 topologically sorted list of all nodes N that satisfy both of
937 topologically sorted list of all nodes N that satisfy both of
938 these constraints:
938 these constraints:
939
939
940 1. N is a descendant of some node in 'roots'
940 1. N is a descendant of some node in 'roots'
941 2. N is an ancestor of some node in 'heads'
941 2. N is an ancestor of some node in 'heads'
942
942
943 Every node is considered to be both a descendant and an ancestor
943 Every node is considered to be both a descendant and an ancestor
944 of itself, so every reachable node in 'roots' and 'heads' will be
944 of itself, so every reachable node in 'roots' and 'heads' will be
945 included in 'nodes'.
945 included in 'nodes'.
946
946
947 'outroots' is the list of reachable nodes in 'roots', i.e., the
947 'outroots' is the list of reachable nodes in 'roots', i.e., the
948 subset of 'roots' that is returned in 'nodes'. Likewise,
948 subset of 'roots' that is returned in 'nodes'. Likewise,
949 'outheads' is the subset of 'heads' that is also in 'nodes'.
949 'outheads' is the subset of 'heads' that is also in 'nodes'.
950
950
951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
952 unspecified, uses nullid as the only root. If 'heads' is
952 unspecified, uses nullid as the only root. If 'heads' is
953 unspecified, uses list of all of the revlog's heads."""
953 unspecified, uses list of all of the revlog's heads."""
954 nonodes = ([], [], [])
954 nonodes = ([], [], [])
955 if roots is not None:
955 if roots is not None:
956 roots = list(roots)
956 roots = list(roots)
957 if not roots:
957 if not roots:
958 return nonodes
958 return nonodes
959 lowestrev = min([self.rev(n) for n in roots])
959 lowestrev = min([self.rev(n) for n in roots])
960 else:
960 else:
961 roots = [nullid] # Everybody's a descendant of nullid
961 roots = [nullid] # Everybody's a descendant of nullid
962 lowestrev = nullrev
962 lowestrev = nullrev
963 if (lowestrev == nullrev) and (heads is None):
963 if (lowestrev == nullrev) and (heads is None):
964 # We want _all_ the nodes!
964 # We want _all_ the nodes!
965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
966 if heads is None:
966 if heads is None:
967 # All nodes are ancestors, so the latest ancestor is the last
967 # All nodes are ancestors, so the latest ancestor is the last
968 # node.
968 # node.
969 highestrev = len(self) - 1
969 highestrev = len(self) - 1
970 # Set ancestors to None to signal that every node is an ancestor.
970 # Set ancestors to None to signal that every node is an ancestor.
971 ancestors = None
971 ancestors = None
972 # Set heads to an empty dictionary for later discovery of heads
972 # Set heads to an empty dictionary for later discovery of heads
973 heads = {}
973 heads = {}
974 else:
974 else:
975 heads = list(heads)
975 heads = list(heads)
976 if not heads:
976 if not heads:
977 return nonodes
977 return nonodes
978 ancestors = set()
978 ancestors = set()
979 # Turn heads into a dictionary so we can remove 'fake' heads.
979 # Turn heads into a dictionary so we can remove 'fake' heads.
980 # Also, later we will be using it to filter out the heads we can't
980 # Also, later we will be using it to filter out the heads we can't
981 # find from roots.
981 # find from roots.
982 heads = dict.fromkeys(heads, False)
982 heads = dict.fromkeys(heads, False)
983 # Start at the top and keep marking parents until we're done.
983 # Start at the top and keep marking parents until we're done.
984 nodestotag = set(heads)
984 nodestotag = set(heads)
985 # Remember where the top was so we can use it as a limit later.
985 # Remember where the top was so we can use it as a limit later.
986 highestrev = max([self.rev(n) for n in nodestotag])
986 highestrev = max([self.rev(n) for n in nodestotag])
987 while nodestotag:
987 while nodestotag:
988 # grab a node to tag
988 # grab a node to tag
989 n = nodestotag.pop()
989 n = nodestotag.pop()
990 # Never tag nullid
990 # Never tag nullid
991 if n == nullid:
991 if n == nullid:
992 continue
992 continue
993 # A node's revision number represents its place in a
993 # A node's revision number represents its place in a
994 # topologically sorted list of nodes.
994 # topologically sorted list of nodes.
995 r = self.rev(n)
995 r = self.rev(n)
996 if r >= lowestrev:
996 if r >= lowestrev:
997 if n not in ancestors:
997 if n not in ancestors:
998 # If we are possibly a descendant of one of the roots
998 # If we are possibly a descendant of one of the roots
999 # and we haven't already been marked as an ancestor
999 # and we haven't already been marked as an ancestor
1000 ancestors.add(n) # Mark as ancestor
1000 ancestors.add(n) # Mark as ancestor
1001 # Add non-nullid parents to list of nodes to tag.
1001 # Add non-nullid parents to list of nodes to tag.
1002 nodestotag.update([p for p in self.parents(n) if
1002 nodestotag.update([p for p in self.parents(n) if
1003 p != nullid])
1003 p != nullid])
1004 elif n in heads: # We've seen it before, is it a fake head?
1004 elif n in heads: # We've seen it before, is it a fake head?
1005 # So it is, real heads should not be the ancestors of
1005 # So it is, real heads should not be the ancestors of
1006 # any other heads.
1006 # any other heads.
1007 heads.pop(n)
1007 heads.pop(n)
1008 if not ancestors:
1008 if not ancestors:
1009 return nonodes
1009 return nonodes
1010 # Now that we have our set of ancestors, we want to remove any
1010 # Now that we have our set of ancestors, we want to remove any
1011 # roots that are not ancestors.
1011 # roots that are not ancestors.
1012
1012
1013 # If one of the roots was nullid, everything is included anyway.
1013 # If one of the roots was nullid, everything is included anyway.
1014 if lowestrev > nullrev:
1014 if lowestrev > nullrev:
1015 # But, since we weren't, let's recompute the lowest rev to not
1015 # But, since we weren't, let's recompute the lowest rev to not
1016 # include roots that aren't ancestors.
1016 # include roots that aren't ancestors.
1017
1017
1018 # Filter out roots that aren't ancestors of heads
1018 # Filter out roots that aren't ancestors of heads
1019 roots = [root for root in roots if root in ancestors]
1019 roots = [root for root in roots if root in ancestors]
1020 # Recompute the lowest revision
1020 # Recompute the lowest revision
1021 if roots:
1021 if roots:
1022 lowestrev = min([self.rev(root) for root in roots])
1022 lowestrev = min([self.rev(root) for root in roots])
1023 else:
1023 else:
1024 # No more roots? Return empty list
1024 # No more roots? Return empty list
1025 return nonodes
1025 return nonodes
1026 else:
1026 else:
1027 # We are descending from nullid, and don't need to care about
1027 # We are descending from nullid, and don't need to care about
1028 # any other roots.
1028 # any other roots.
1029 lowestrev = nullrev
1029 lowestrev = nullrev
1030 roots = [nullid]
1030 roots = [nullid]
1031 # Transform our roots list into a set.
1031 # Transform our roots list into a set.
1032 descendants = set(roots)
1032 descendants = set(roots)
1033 # Also, keep the original roots so we can filter out roots that aren't
1033 # Also, keep the original roots so we can filter out roots that aren't
1034 # 'real' roots (i.e. are descended from other roots).
1034 # 'real' roots (i.e. are descended from other roots).
1035 roots = descendants.copy()
1035 roots = descendants.copy()
1036 # Our topologically sorted list of output nodes.
1036 # Our topologically sorted list of output nodes.
1037 orderedout = []
1037 orderedout = []
1038 # Don't start at nullid since we don't want nullid in our output list,
1038 # Don't start at nullid since we don't want nullid in our output list,
1039 # and if nullid shows up in descendants, empty parents will look like
1039 # and if nullid shows up in descendants, empty parents will look like
1040 # they're descendants.
1040 # they're descendants.
1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1042 n = self.node(r)
1042 n = self.node(r)
1043 isdescendant = False
1043 isdescendant = False
1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1045 isdescendant = True
1045 isdescendant = True
1046 elif n in descendants:
1046 elif n in descendants:
1047 # n is already a descendant
1047 # n is already a descendant
1048 isdescendant = True
1048 isdescendant = True
1049 # This check only needs to be done here because all the roots
1049 # This check only needs to be done here because all the roots
1050 # will start being marked is descendants before the loop.
1050 # will start being marked is descendants before the loop.
1051 if n in roots:
1051 if n in roots:
1052 # If n was a root, check if it's a 'real' root.
1052 # If n was a root, check if it's a 'real' root.
1053 p = tuple(self.parents(n))
1053 p = tuple(self.parents(n))
1054 # If any of its parents are descendants, it's not a root.
1054 # If any of its parents are descendants, it's not a root.
1055 if (p[0] in descendants) or (p[1] in descendants):
1055 if (p[0] in descendants) or (p[1] in descendants):
1056 roots.remove(n)
1056 roots.remove(n)
1057 else:
1057 else:
1058 p = tuple(self.parents(n))
1058 p = tuple(self.parents(n))
1059 # A node is a descendant if either of its parents are
1059 # A node is a descendant if either of its parents are
1060 # descendants. (We seeded the dependents list with the roots
1060 # descendants. (We seeded the dependents list with the roots
1061 # up there, remember?)
1061 # up there, remember?)
1062 if (p[0] in descendants) or (p[1] in descendants):
1062 if (p[0] in descendants) or (p[1] in descendants):
1063 descendants.add(n)
1063 descendants.add(n)
1064 isdescendant = True
1064 isdescendant = True
1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1066 # Only include nodes that are both descendants and ancestors.
1066 # Only include nodes that are both descendants and ancestors.
1067 orderedout.append(n)
1067 orderedout.append(n)
1068 if (ancestors is not None) and (n in heads):
1068 if (ancestors is not None) and (n in heads):
1069 # We're trying to figure out which heads are reachable
1069 # We're trying to figure out which heads are reachable
1070 # from roots.
1070 # from roots.
1071 # Mark this head as having been reached
1071 # Mark this head as having been reached
1072 heads[n] = True
1072 heads[n] = True
1073 elif ancestors is None:
1073 elif ancestors is None:
1074 # Otherwise, we're trying to discover the heads.
1074 # Otherwise, we're trying to discover the heads.
1075 # Assume this is a head because if it isn't, the next step
1075 # Assume this is a head because if it isn't, the next step
1076 # will eventually remove it.
1076 # will eventually remove it.
1077 heads[n] = True
1077 heads[n] = True
1078 # But, obviously its parents aren't.
1078 # But, obviously its parents aren't.
1079 for p in self.parents(n):
1079 for p in self.parents(n):
1080 heads.pop(p, None)
1080 heads.pop(p, None)
1081 heads = [head for head, flag in heads.iteritems() if flag]
1081 heads = [head for head, flag in heads.iteritems() if flag]
1082 roots = list(roots)
1082 roots = list(roots)
1083 assert orderedout
1083 assert orderedout
1084 assert roots
1084 assert roots
1085 assert heads
1085 assert heads
1086 return (orderedout, roots, heads)
1086 return (orderedout, roots, heads)
1087
1087
1088 def headrevs(self):
1088 def headrevs(self):
1089 try:
1089 try:
1090 return self.index.headrevs()
1090 return self.index.headrevs()
1091 except AttributeError:
1091 except AttributeError:
1092 return self._headrevs()
1092 return self._headrevs()
1093
1093
1094 def computephases(self, roots):
1094 def computephases(self, roots):
1095 return self.index.computephasesmapsets(roots)
1095 return self.index.computephasesmapsets(roots)
1096
1096
1097 def _headrevs(self):
1097 def _headrevs(self):
1098 count = len(self)
1098 count = len(self)
1099 if not count:
1099 if not count:
1100 return [nullrev]
1100 return [nullrev]
1101 # we won't iter over filtered rev so nobody is a head at start
1101 # we won't iter over filtered rev so nobody is a head at start
1102 ishead = [0] * (count + 1)
1102 ishead = [0] * (count + 1)
1103 index = self.index
1103 index = self.index
1104 for r in self:
1104 for r in self:
1105 ishead[r] = 1 # I may be an head
1105 ishead[r] = 1 # I may be an head
1106 e = index[r]
1106 e = index[r]
1107 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1107 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1108 return [r for r, val in enumerate(ishead) if val]
1108 return [r for r, val in enumerate(ishead) if val]
1109
1109
1110 def heads(self, start=None, stop=None):
1110 def heads(self, start=None, stop=None):
1111 """return the list of all nodes that have no children
1111 """return the list of all nodes that have no children
1112
1112
1113 if start is specified, only heads that are descendants of
1113 if start is specified, only heads that are descendants of
1114 start will be returned
1114 start will be returned
1115 if stop is specified, it will consider all the revs from stop
1115 if stop is specified, it will consider all the revs from stop
1116 as if they had no children
1116 as if they had no children
1117 """
1117 """
1118 if start is None and stop is None:
1118 if start is None and stop is None:
1119 if not len(self):
1119 if not len(self):
1120 return [nullid]
1120 return [nullid]
1121 return [self.node(r) for r in self.headrevs()]
1121 return [self.node(r) for r in self.headrevs()]
1122
1122
1123 if start is None:
1123 if start is None:
1124 start = nullid
1124 start = nullid
1125 if stop is None:
1125 if stop is None:
1126 stop = []
1126 stop = []
1127 stoprevs = set([self.rev(n) for n in stop])
1127 stoprevs = set([self.rev(n) for n in stop])
1128 startrev = self.rev(start)
1128 startrev = self.rev(start)
1129 reachable = {startrev}
1129 reachable = {startrev}
1130 heads = {startrev}
1130 heads = {startrev}
1131
1131
1132 parentrevs = self.parentrevs
1132 parentrevs = self.parentrevs
1133 for r in self.revs(start=startrev + 1):
1133 for r in self.revs(start=startrev + 1):
1134 for p in parentrevs(r):
1134 for p in parentrevs(r):
1135 if p in reachable:
1135 if p in reachable:
1136 if r not in stoprevs:
1136 if r not in stoprevs:
1137 reachable.add(r)
1137 reachable.add(r)
1138 heads.add(r)
1138 heads.add(r)
1139 if p in heads and p not in stoprevs:
1139 if p in heads and p not in stoprevs:
1140 heads.remove(p)
1140 heads.remove(p)
1141
1141
1142 return [self.node(r) for r in heads]
1142 return [self.node(r) for r in heads]
1143
1143
1144 def children(self, node):
1144 def children(self, node):
1145 """find the children of a given node"""
1145 """find the children of a given node"""
1146 c = []
1146 c = []
1147 p = self.rev(node)
1147 p = self.rev(node)
1148 for r in self.revs(start=p + 1):
1148 for r in self.revs(start=p + 1):
1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1150 if prevs:
1150 if prevs:
1151 for pr in prevs:
1151 for pr in prevs:
1152 if pr == p:
1152 if pr == p:
1153 c.append(self.node(r))
1153 c.append(self.node(r))
1154 elif p == nullrev:
1154 elif p == nullrev:
1155 c.append(self.node(r))
1155 c.append(self.node(r))
1156 return c
1156 return c
1157
1157
1158 def commonancestorsheads(self, a, b):
1158 def commonancestorsheads(self, a, b):
1159 """calculate all the heads of the common ancestors of nodes a and b"""
1159 """calculate all the heads of the common ancestors of nodes a and b"""
1160 a, b = self.rev(a), self.rev(b)
1160 a, b = self.rev(a), self.rev(b)
1161 ancs = self._commonancestorsheads(a, b)
1161 ancs = self._commonancestorsheads(a, b)
1162 return pycompat.maplist(self.node, ancs)
1162 return pycompat.maplist(self.node, ancs)
1163
1163
1164 def _commonancestorsheads(self, *revs):
1164 def _commonancestorsheads(self, *revs):
1165 """calculate all the heads of the common ancestors of revs"""
1165 """calculate all the heads of the common ancestors of revs"""
1166 try:
1166 try:
1167 ancs = self.index.commonancestorsheads(*revs)
1167 ancs = self.index.commonancestorsheads(*revs)
1168 except (AttributeError, OverflowError): # C implementation failed
1168 except (AttributeError, OverflowError): # C implementation failed
1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1170 return ancs
1170 return ancs
1171
1171
1172 def isancestor(self, a, b):
1172 def isancestor(self, a, b):
1173 """return True if node a is an ancestor of node b
1173 """return True if node a is an ancestor of node b
1174
1174
1175 A revision is considered an ancestor of itself."""
1175 A revision is considered an ancestor of itself."""
1176 a, b = self.rev(a), self.rev(b)
1176 a, b = self.rev(a), self.rev(b)
1177 return self.isancestorrev(a, b)
1177 return self.isancestorrev(a, b)
1178
1178
1179 def isancestorrev(self, a, b):
1179 def isancestorrev(self, a, b):
1180 """return True if revision a is an ancestor of revision b
1180 """return True if revision a is an ancestor of revision b
1181
1181
1182 A revision is considered an ancestor of itself.
1182 A revision is considered an ancestor of itself.
1183
1183
1184 The implementation of this is trivial but the use of
1184 The implementation of this is trivial but the use of
1185 commonancestorsheads is not."""
1185 commonancestorsheads is not."""
1186 if a == nullrev:
1186 if a == nullrev:
1187 return True
1187 return True
1188 elif a == b:
1188 elif a == b:
1189 return True
1189 return True
1190 elif a > b:
1190 elif a > b:
1191 return False
1191 return False
1192 return a in self._commonancestorsheads(a, b)
1192 return a in self._commonancestorsheads(a, b)
1193
1193
1194 def ancestor(self, a, b):
1194 def ancestor(self, a, b):
1195 """calculate the "best" common ancestor of nodes a and b"""
1195 """calculate the "best" common ancestor of nodes a and b"""
1196
1196
1197 a, b = self.rev(a), self.rev(b)
1197 a, b = self.rev(a), self.rev(b)
1198 try:
1198 try:
1199 ancs = self.index.ancestors(a, b)
1199 ancs = self.index.ancestors(a, b)
1200 except (AttributeError, OverflowError):
1200 except (AttributeError, OverflowError):
1201 ancs = ancestor.ancestors(self.parentrevs, a, b)
1201 ancs = ancestor.ancestors(self.parentrevs, a, b)
1202 if ancs:
1202 if ancs:
1203 # choose a consistent winner when there's a tie
1203 # choose a consistent winner when there's a tie
1204 return min(map(self.node, ancs))
1204 return min(map(self.node, ancs))
1205 return nullid
1205 return nullid
1206
1206
1207 def _match(self, id):
1207 def _match(self, id):
1208 if isinstance(id, int):
1208 if isinstance(id, int):
1209 # rev
1209 # rev
1210 return self.node(id)
1210 return self.node(id)
1211 if len(id) == 20:
1211 if len(id) == 20:
1212 # possibly a binary node
1212 # possibly a binary node
1213 # odds of a binary node being all hex in ASCII are 1 in 10**25
1213 # odds of a binary node being all hex in ASCII are 1 in 10**25
1214 try:
1214 try:
1215 node = id
1215 node = id
1216 self.rev(node) # quick search the index
1216 self.rev(node) # quick search the index
1217 return node
1217 return node
1218 except LookupError:
1218 except LookupError:
1219 pass # may be partial hex id
1219 pass # may be partial hex id
1220 try:
1220 try:
1221 # str(rev)
1221 # str(rev)
1222 rev = int(id)
1222 rev = int(id)
1223 if "%d" % rev != id:
1223 if "%d" % rev != id:
1224 raise ValueError
1224 raise ValueError
1225 if rev < 0:
1225 if rev < 0:
1226 rev = len(self) + rev
1226 rev = len(self) + rev
1227 if rev < 0 or rev >= len(self):
1227 if rev < 0 or rev >= len(self):
1228 raise ValueError
1228 raise ValueError
1229 return self.node(rev)
1229 return self.node(rev)
1230 except (ValueError, OverflowError):
1230 except (ValueError, OverflowError):
1231 pass
1231 pass
1232 if len(id) == 40:
1232 if len(id) == 40:
1233 try:
1233 try:
1234 # a full hex nodeid?
1234 # a full hex nodeid?
1235 node = bin(id)
1235 node = bin(id)
1236 self.rev(node)
1236 self.rev(node)
1237 return node
1237 return node
1238 except (TypeError, LookupError):
1238 except (TypeError, LookupError):
1239 pass
1239 pass
1240
1240
1241 def _partialmatch(self, id):
1241 def _partialmatch(self, id):
1242 # we don't care wdirfilenodeids as they should be always full hash
1242 # we don't care wdirfilenodeids as they should be always full hash
1243 maybewdir = wdirhex.startswith(id)
1243 maybewdir = wdirhex.startswith(id)
1244 try:
1244 try:
1245 partial = self.index.partialmatch(id)
1245 partial = self.index.partialmatch(id)
1246 if partial and self.hasnode(partial):
1246 if partial and self.hasnode(partial):
1247 if maybewdir:
1247 if maybewdir:
1248 # single 'ff...' match in radix tree, ambiguous with wdir
1248 # single 'ff...' match in radix tree, ambiguous with wdir
1249 raise RevlogError
1249 raise RevlogError
1250 return partial
1250 return partial
1251 if maybewdir:
1251 if maybewdir:
1252 # no 'ff...' match in radix tree, wdir identified
1252 # no 'ff...' match in radix tree, wdir identified
1253 raise error.WdirUnsupported
1253 raise error.WdirUnsupported
1254 return None
1254 return None
1255 except RevlogError:
1255 except RevlogError:
1256 # parsers.c radix tree lookup gave multiple matches
1256 # parsers.c radix tree lookup gave multiple matches
1257 # fast path: for unfiltered changelog, radix tree is accurate
1257 # fast path: for unfiltered changelog, radix tree is accurate
1258 if not getattr(self, 'filteredrevs', None):
1258 if not getattr(self, 'filteredrevs', None):
1259 raise AmbiguousPrefixLookupError(id, self.indexfile,
1259 raise AmbiguousPrefixLookupError(id, self.indexfile,
1260 _('ambiguous identifier'))
1260 _('ambiguous identifier'))
1261 # fall through to slow path that filters hidden revisions
1261 # fall through to slow path that filters hidden revisions
1262 except (AttributeError, ValueError):
1262 except (AttributeError, ValueError):
1263 # we are pure python, or key was too short to search radix tree
1263 # we are pure python, or key was too short to search radix tree
1264 pass
1264 pass
1265
1265
1266 if id in self._pcache:
1266 if id in self._pcache:
1267 return self._pcache[id]
1267 return self._pcache[id]
1268
1268
1269 if len(id) <= 40:
1269 if len(id) <= 40:
1270 try:
1270 try:
1271 # hex(node)[:...]
1271 # hex(node)[:...]
1272 l = len(id) // 2 # grab an even number of digits
1272 l = len(id) // 2 # grab an even number of digits
1273 prefix = bin(id[:l * 2])
1273 prefix = bin(id[:l * 2])
1274 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1274 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1275 nl = [n for n in nl if hex(n).startswith(id) and
1275 nl = [n for n in nl if hex(n).startswith(id) and
1276 self.hasnode(n)]
1276 self.hasnode(n)]
1277 if nullhex.startswith(id):
1277 if nullhex.startswith(id):
1278 nl.append(nullid)
1278 nl.append(nullid)
1279 if len(nl) > 0:
1279 if len(nl) > 0:
1280 if len(nl) == 1 and not maybewdir:
1280 if len(nl) == 1 and not maybewdir:
1281 self._pcache[id] = nl[0]
1281 self._pcache[id] = nl[0]
1282 return nl[0]
1282 return nl[0]
1283 raise AmbiguousPrefixLookupError(id, self.indexfile,
1283 raise AmbiguousPrefixLookupError(id, self.indexfile,
1284 _('ambiguous identifier'))
1284 _('ambiguous identifier'))
1285 if maybewdir:
1285 if maybewdir:
1286 raise error.WdirUnsupported
1286 raise error.WdirUnsupported
1287 return None
1287 return None
1288 except TypeError:
1288 except TypeError:
1289 pass
1289 pass
1290
1290
1291 def lookup(self, id):
1291 def lookup(self, id):
1292 """locate a node based on:
1292 """locate a node based on:
1293 - revision number or str(revision number)
1293 - revision number or str(revision number)
1294 - nodeid or subset of hex nodeid
1294 - nodeid or subset of hex nodeid
1295 """
1295 """
1296 n = self._match(id)
1296 n = self._match(id)
1297 if n is not None:
1297 if n is not None:
1298 return n
1298 return n
1299 n = self._partialmatch(id)
1299 n = self._partialmatch(id)
1300 if n:
1300 if n:
1301 return n
1301 return n
1302
1302
1303 raise LookupError(id, self.indexfile, _('no match found'))
1303 raise LookupError(id, self.indexfile, _('no match found'))
1304
1304
1305 def shortest(self, node, minlength=1):
1305 def shortest(self, node, minlength=1):
1306 """Find the shortest unambiguous prefix that matches node."""
1306 """Find the shortest unambiguous prefix that matches node."""
1307 def isvalid(prefix):
1307 def isvalid(prefix):
1308 try:
1308 try:
1309 node = self._partialmatch(prefix)
1309 node = self._partialmatch(prefix)
1310 except error.RevlogError:
1310 except error.RevlogError:
1311 return False
1311 return False
1312 except error.WdirUnsupported:
1312 except error.WdirUnsupported:
1313 # single 'ff...' match
1313 # single 'ff...' match
1314 return True
1314 return True
1315 if node is None:
1315 if node is None:
1316 raise LookupError(node, self.indexfile, _('no node'))
1316 raise LookupError(node, self.indexfile, _('no node'))
1317 return True
1317 return True
1318
1318
1319 def maybewdir(prefix):
1319 def maybewdir(prefix):
1320 return all(c == 'f' for c in prefix)
1320 return all(c == 'f' for c in prefix)
1321
1321
1322 hexnode = hex(node)
1322 hexnode = hex(node)
1323
1323
1324 def disambiguate(hexnode, minlength):
1324 def disambiguate(hexnode, minlength):
1325 """Disambiguate against wdirid."""
1325 """Disambiguate against wdirid."""
1326 for length in range(minlength, 41):
1326 for length in range(minlength, 41):
1327 prefix = hexnode[:length]
1327 prefix = hexnode[:length]
1328 if not maybewdir(prefix):
1328 if not maybewdir(prefix):
1329 return prefix
1329 return prefix
1330
1330
1331 if not getattr(self, 'filteredrevs', None):
1331 if not getattr(self, 'filteredrevs', None):
1332 try:
1332 try:
1333 length = max(self.index.shortest(node), minlength)
1333 length = max(self.index.shortest(node), minlength)
1334 return disambiguate(hexnode, length)
1334 return disambiguate(hexnode, length)
1335 except RevlogError:
1335 except RevlogError:
1336 if node != wdirid:
1336 if node != wdirid:
1337 raise LookupError(node, self.indexfile, _('no node'))
1337 raise LookupError(node, self.indexfile, _('no node'))
1338 except AttributeError:
1338 except AttributeError:
1339 # Fall through to pure code
1339 # Fall through to pure code
1340 pass
1340 pass
1341
1341
1342 if node == wdirid:
1342 if node == wdirid:
1343 for length in range(minlength, 41):
1343 for length in range(minlength, 41):
1344 prefix = hexnode[:length]
1344 prefix = hexnode[:length]
1345 if isvalid(prefix):
1345 if isvalid(prefix):
1346 return prefix
1346 return prefix
1347
1347
1348 for length in range(minlength, 41):
1348 for length in range(minlength, 41):
1349 prefix = hexnode[:length]
1349 prefix = hexnode[:length]
1350 if isvalid(prefix):
1350 if isvalid(prefix):
1351 return disambiguate(hexnode, length)
1351 return disambiguate(hexnode, length)
1352
1352
1353 def cmp(self, node, text):
1353 def cmp(self, node, text):
1354 """compare text with a given file revision
1354 """compare text with a given file revision
1355
1355
1356 returns True if text is different than what is stored.
1356 returns True if text is different than what is stored.
1357 """
1357 """
1358 p1, p2 = self.parents(node)
1358 p1, p2 = self.parents(node)
1359 return hash(text, p1, p2) != node
1359 return hash(text, p1, p2) != node
1360
1360
1361 def _cachesegment(self, offset, data):
1361 def _cachesegment(self, offset, data):
1362 """Add a segment to the revlog cache.
1362 """Add a segment to the revlog cache.
1363
1363
1364 Accepts an absolute offset and the data that is at that location.
1364 Accepts an absolute offset and the data that is at that location.
1365 """
1365 """
1366 o, d = self._chunkcache
1366 o, d = self._chunkcache
1367 # try to add to existing cache
1367 # try to add to existing cache
1368 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1368 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1369 self._chunkcache = o, d + data
1369 self._chunkcache = o, d + data
1370 else:
1370 else:
1371 self._chunkcache = offset, data
1371 self._chunkcache = offset, data
1372
1372
1373 def _readsegment(self, offset, length, df=None):
1373 def _readsegment(self, offset, length, df=None):
1374 """Load a segment of raw data from the revlog.
1374 """Load a segment of raw data from the revlog.
1375
1375
1376 Accepts an absolute offset, length to read, and an optional existing
1376 Accepts an absolute offset, length to read, and an optional existing
1377 file handle to read from.
1377 file handle to read from.
1378
1378
1379 If an existing file handle is passed, it will be seeked and the
1379 If an existing file handle is passed, it will be seeked and the
1380 original seek position will NOT be restored.
1380 original seek position will NOT be restored.
1381
1381
1382 Returns a str or buffer of raw byte data.
1382 Returns a str or buffer of raw byte data.
1383 """
1383 """
1384 # Cache data both forward and backward around the requested
1384 # Cache data both forward and backward around the requested
1385 # data, in a fixed size window. This helps speed up operations
1385 # data, in a fixed size window. This helps speed up operations
1386 # involving reading the revlog backwards.
1386 # involving reading the revlog backwards.
1387 cachesize = self._chunkcachesize
1387 cachesize = self._chunkcachesize
1388 realoffset = offset & ~(cachesize - 1)
1388 realoffset = offset & ~(cachesize - 1)
1389 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1389 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1390 - realoffset)
1390 - realoffset)
1391 with self._datareadfp(df) as df:
1391 with self._datareadfp(df) as df:
1392 df.seek(realoffset)
1392 df.seek(realoffset)
1393 d = df.read(reallength)
1393 d = df.read(reallength)
1394 self._cachesegment(realoffset, d)
1394 self._cachesegment(realoffset, d)
1395 if offset != realoffset or reallength != length:
1395 if offset != realoffset or reallength != length:
1396 return util.buffer(d, offset - realoffset, length)
1396 return util.buffer(d, offset - realoffset, length)
1397 return d
1397 return d
1398
1398
1399 def _getsegment(self, offset, length, df=None):
1399 def _getsegment(self, offset, length, df=None):
1400 """Obtain a segment of raw data from the revlog.
1400 """Obtain a segment of raw data from the revlog.
1401
1401
1402 Accepts an absolute offset, length of bytes to obtain, and an
1402 Accepts an absolute offset, length of bytes to obtain, and an
1403 optional file handle to the already-opened revlog. If the file
1403 optional file handle to the already-opened revlog. If the file
1404 handle is used, it's original seek position will not be preserved.
1404 handle is used, it's original seek position will not be preserved.
1405
1405
1406 Requests for data may be returned from a cache.
1406 Requests for data may be returned from a cache.
1407
1407
1408 Returns a str or a buffer instance of raw byte data.
1408 Returns a str or a buffer instance of raw byte data.
1409 """
1409 """
1410 o, d = self._chunkcache
1410 o, d = self._chunkcache
1411 l = len(d)
1411 l = len(d)
1412
1412
1413 # is it in the cache?
1413 # is it in the cache?
1414 cachestart = offset - o
1414 cachestart = offset - o
1415 cacheend = cachestart + length
1415 cacheend = cachestart + length
1416 if cachestart >= 0 and cacheend <= l:
1416 if cachestart >= 0 and cacheend <= l:
1417 if cachestart == 0 and cacheend == l:
1417 if cachestart == 0 and cacheend == l:
1418 return d # avoid a copy
1418 return d # avoid a copy
1419 return util.buffer(d, cachestart, cacheend - cachestart)
1419 return util.buffer(d, cachestart, cacheend - cachestart)
1420
1420
1421 return self._readsegment(offset, length, df=df)
1421 return self._readsegment(offset, length, df=df)
1422
1422
1423 def _getsegmentforrevs(self, startrev, endrev, df=None):
1423 def _getsegmentforrevs(self, startrev, endrev, df=None):
1424 """Obtain a segment of raw data corresponding to a range of revisions.
1424 """Obtain a segment of raw data corresponding to a range of revisions.
1425
1425
1426 Accepts the start and end revisions and an optional already-open
1426 Accepts the start and end revisions and an optional already-open
1427 file handle to be used for reading. If the file handle is read, its
1427 file handle to be used for reading. If the file handle is read, its
1428 seek position will not be preserved.
1428 seek position will not be preserved.
1429
1429
1430 Requests for data may be satisfied by a cache.
1430 Requests for data may be satisfied by a cache.
1431
1431
1432 Returns a 2-tuple of (offset, data) for the requested range of
1432 Returns a 2-tuple of (offset, data) for the requested range of
1433 revisions. Offset is the integer offset from the beginning of the
1433 revisions. Offset is the integer offset from the beginning of the
1434 revlog and data is a str or buffer of the raw byte data.
1434 revlog and data is a str or buffer of the raw byte data.
1435
1435
1436 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1436 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1437 to determine where each revision's data begins and ends.
1437 to determine where each revision's data begins and ends.
1438 """
1438 """
1439 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1439 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1440 # (functions are expensive).
1440 # (functions are expensive).
1441 index = self.index
1441 index = self.index
1442 istart = index[startrev]
1442 istart = index[startrev]
1443 start = int(istart[0] >> 16)
1443 start = int(istart[0] >> 16)
1444 if startrev == endrev:
1444 if startrev == endrev:
1445 end = start + istart[1]
1445 end = start + istart[1]
1446 else:
1446 else:
1447 iend = index[endrev]
1447 iend = index[endrev]
1448 end = int(iend[0] >> 16) + iend[1]
1448 end = int(iend[0] >> 16) + iend[1]
1449
1449
1450 if self._inline:
1450 if self._inline:
1451 start += (startrev + 1) * self._io.size
1451 start += (startrev + 1) * self._io.size
1452 end += (endrev + 1) * self._io.size
1452 end += (endrev + 1) * self._io.size
1453 length = end - start
1453 length = end - start
1454
1454
1455 return start, self._getsegment(start, length, df=df)
1455 return start, self._getsegment(start, length, df=df)
1456
1456
1457 def _chunk(self, rev, df=None):
1457 def _chunk(self, rev, df=None):
1458 """Obtain a single decompressed chunk for a revision.
1458 """Obtain a single decompressed chunk for a revision.
1459
1459
1460 Accepts an integer revision and an optional already-open file handle
1460 Accepts an integer revision and an optional already-open file handle
1461 to be used for reading. If used, the seek position of the file will not
1461 to be used for reading. If used, the seek position of the file will not
1462 be preserved.
1462 be preserved.
1463
1463
1464 Returns a str holding uncompressed data for the requested revision.
1464 Returns a str holding uncompressed data for the requested revision.
1465 """
1465 """
1466 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1466 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1467
1467
1468 def _chunks(self, revs, df=None, targetsize=None):
1468 def _chunks(self, revs, df=None, targetsize=None):
1469 """Obtain decompressed chunks for the specified revisions.
1469 """Obtain decompressed chunks for the specified revisions.
1470
1470
1471 Accepts an iterable of numeric revisions that are assumed to be in
1471 Accepts an iterable of numeric revisions that are assumed to be in
1472 ascending order. Also accepts an optional already-open file handle
1472 ascending order. Also accepts an optional already-open file handle
1473 to be used for reading. If used, the seek position of the file will
1473 to be used for reading. If used, the seek position of the file will
1474 not be preserved.
1474 not be preserved.
1475
1475
1476 This function is similar to calling ``self._chunk()`` multiple times,
1476 This function is similar to calling ``self._chunk()`` multiple times,
1477 but is faster.
1477 but is faster.
1478
1478
1479 Returns a list with decompressed data for each requested revision.
1479 Returns a list with decompressed data for each requested revision.
1480 """
1480 """
1481 if not revs:
1481 if not revs:
1482 return []
1482 return []
1483 start = self.start
1483 start = self.start
1484 length = self.length
1484 length = self.length
1485 inline = self._inline
1485 inline = self._inline
1486 iosize = self._io.size
1486 iosize = self._io.size
1487 buffer = util.buffer
1487 buffer = util.buffer
1488
1488
1489 l = []
1489 l = []
1490 ladd = l.append
1490 ladd = l.append
1491
1491
1492 if not self._withsparseread:
1492 if not self._withsparseread:
1493 slicedchunks = (revs,)
1493 slicedchunks = (revs,)
1494 else:
1494 else:
1495 slicedchunks = deltautil.slicechunk(self, revs,
1495 slicedchunks = deltautil.slicechunk(self, revs,
1496 targetsize=targetsize)
1496 targetsize=targetsize)
1497
1497
1498 for revschunk in slicedchunks:
1498 for revschunk in slicedchunks:
1499 firstrev = revschunk[0]
1499 firstrev = revschunk[0]
1500 # Skip trailing revisions with empty diff
1500 # Skip trailing revisions with empty diff
1501 for lastrev in revschunk[::-1]:
1501 for lastrev in revschunk[::-1]:
1502 if length(lastrev) != 0:
1502 if length(lastrev) != 0:
1503 break
1503 break
1504
1504
1505 try:
1505 try:
1506 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1506 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1507 except OverflowError:
1507 except OverflowError:
1508 # issue4215 - we can't cache a run of chunks greater than
1508 # issue4215 - we can't cache a run of chunks greater than
1509 # 2G on Windows
1509 # 2G on Windows
1510 return [self._chunk(rev, df=df) for rev in revschunk]
1510 return [self._chunk(rev, df=df) for rev in revschunk]
1511
1511
1512 decomp = self.decompress
1512 decomp = self.decompress
1513 for rev in revschunk:
1513 for rev in revschunk:
1514 chunkstart = start(rev)
1514 chunkstart = start(rev)
1515 if inline:
1515 if inline:
1516 chunkstart += (rev + 1) * iosize
1516 chunkstart += (rev + 1) * iosize
1517 chunklength = length(rev)
1517 chunklength = length(rev)
1518 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1518 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1519
1519
1520 return l
1520 return l
1521
1521
1522 def _chunkclear(self):
1522 def _chunkclear(self):
1523 """Clear the raw chunk cache."""
1523 """Clear the raw chunk cache."""
1524 self._chunkcache = (0, '')
1524 self._chunkcache = (0, '')
1525
1525
1526 def deltaparent(self, rev):
1526 def deltaparent(self, rev):
1527 """return deltaparent of the given revision"""
1527 """return deltaparent of the given revision"""
1528 base = self.index[rev][3]
1528 base = self.index[rev][3]
1529 if base == rev:
1529 if base == rev:
1530 return nullrev
1530 return nullrev
1531 elif self._generaldelta:
1531 elif self._generaldelta:
1532 return base
1532 return base
1533 else:
1533 else:
1534 return rev - 1
1534 return rev - 1
1535
1535
1536 def issnapshot(self, rev):
1536 def issnapshot(self, rev):
1537 """tells whether rev is a snapshot
1537 """tells whether rev is a snapshot
1538 """
1538 """
1539 if rev == nullrev:
1539 if rev == nullrev:
1540 return True
1540 return True
1541 deltap = self.deltaparent(rev)
1541 deltap = self.deltaparent(rev)
1542 if deltap == nullrev:
1542 if deltap == nullrev:
1543 return True
1543 return True
1544 p1, p2 = self.parentrevs(rev)
1544 p1, p2 = self.parentrevs(rev)
1545 if deltap in (p1, p2):
1545 if deltap in (p1, p2):
1546 return False
1546 return False
1547 return self.issnapshot(deltap)
1547 return self.issnapshot(deltap)
1548
1548
1549 def snapshotdepth(self, rev):
1549 def snapshotdepth(self, rev):
1550 """number of snapshot in the chain before this one"""
1550 """number of snapshot in the chain before this one"""
1551 if not self.issnapshot(rev):
1551 if not self.issnapshot(rev):
1552 raise ProgrammingError('revision %d not a snapshot')
1552 raise ProgrammingError('revision %d not a snapshot')
1553 return len(self._deltachain(rev)[0]) - 1
1553 return len(self._deltachain(rev)[0]) - 1
1554
1554
1555 def revdiff(self, rev1, rev2):
1555 def revdiff(self, rev1, rev2):
1556 """return or calculate a delta between two revisions
1556 """return or calculate a delta between two revisions
1557
1557
1558 The delta calculated is in binary form and is intended to be written to
1558 The delta calculated is in binary form and is intended to be written to
1559 revlog data directly. So this function needs raw revision data.
1559 revlog data directly. So this function needs raw revision data.
1560 """
1560 """
1561 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1561 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1562 return bytes(self._chunk(rev2))
1562 return bytes(self._chunk(rev2))
1563
1563
1564 return mdiff.textdiff(self.revision(rev1, raw=True),
1564 return mdiff.textdiff(self.revision(rev1, raw=True),
1565 self.revision(rev2, raw=True))
1565 self.revision(rev2, raw=True))
1566
1566
1567 def revision(self, nodeorrev, _df=None, raw=False):
1567 def revision(self, nodeorrev, _df=None, raw=False):
1568 """return an uncompressed revision of a given node or revision
1568 """return an uncompressed revision of a given node or revision
1569 number.
1569 number.
1570
1570
1571 _df - an existing file handle to read from. (internal-only)
1571 _df - an existing file handle to read from. (internal-only)
1572 raw - an optional argument specifying if the revision data is to be
1572 raw - an optional argument specifying if the revision data is to be
1573 treated as raw data when applying flag transforms. 'raw' should be set
1573 treated as raw data when applying flag transforms. 'raw' should be set
1574 to True when generating changegroups or in debug commands.
1574 to True when generating changegroups or in debug commands.
1575 """
1575 """
1576 if isinstance(nodeorrev, int):
1576 if isinstance(nodeorrev, int):
1577 rev = nodeorrev
1577 rev = nodeorrev
1578 node = self.node(rev)
1578 node = self.node(rev)
1579 else:
1579 else:
1580 node = nodeorrev
1580 node = nodeorrev
1581 rev = None
1581 rev = None
1582
1582
1583 cachedrev = None
1583 cachedrev = None
1584 flags = None
1584 flags = None
1585 rawtext = None
1585 rawtext = None
1586 if node == nullid:
1586 if node == nullid:
1587 return ""
1587 return ""
1588 if self._cache:
1588 if self._cache:
1589 if self._cache[0] == node:
1589 if self._cache[0] == node:
1590 # _cache only stores rawtext
1590 # _cache only stores rawtext
1591 if raw:
1591 if raw:
1592 return self._cache[2]
1592 return self._cache[2]
1593 # duplicated, but good for perf
1593 # duplicated, but good for perf
1594 if rev is None:
1594 if rev is None:
1595 rev = self.rev(node)
1595 rev = self.rev(node)
1596 if flags is None:
1596 if flags is None:
1597 flags = self.flags(rev)
1597 flags = self.flags(rev)
1598 # no extra flags set, no flag processor runs, text = rawtext
1598 # no extra flags set, no flag processor runs, text = rawtext
1599 if flags == REVIDX_DEFAULT_FLAGS:
1599 if flags == REVIDX_DEFAULT_FLAGS:
1600 return self._cache[2]
1600 return self._cache[2]
1601 # rawtext is reusable. need to run flag processor
1601 # rawtext is reusable. need to run flag processor
1602 rawtext = self._cache[2]
1602 rawtext = self._cache[2]
1603
1603
1604 cachedrev = self._cache[1]
1604 cachedrev = self._cache[1]
1605
1605
1606 # look up what we need to read
1606 # look up what we need to read
1607 if rawtext is None:
1607 if rawtext is None:
1608 if rev is None:
1608 if rev is None:
1609 rev = self.rev(node)
1609 rev = self.rev(node)
1610
1610
1611 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1611 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1612 if stopped:
1612 if stopped:
1613 rawtext = self._cache[2]
1613 rawtext = self._cache[2]
1614
1614
1615 # drop cache to save memory
1615 # drop cache to save memory
1616 self._cache = None
1616 self._cache = None
1617
1617
1618 targetsize = None
1618 targetsize = None
1619 rawsize = self.index[rev][2]
1619 rawsize = self.index[rev][2]
1620 if 0 <= rawsize:
1620 if 0 <= rawsize:
1621 targetsize = 4 * rawsize
1621 targetsize = 4 * rawsize
1622
1622
1623 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1623 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1624 if rawtext is None:
1624 if rawtext is None:
1625 rawtext = bytes(bins[0])
1625 rawtext = bytes(bins[0])
1626 bins = bins[1:]
1626 bins = bins[1:]
1627
1627
1628 rawtext = mdiff.patches(rawtext, bins)
1628 rawtext = mdiff.patches(rawtext, bins)
1629 self._cache = (node, rev, rawtext)
1629 self._cache = (node, rev, rawtext)
1630
1630
1631 if flags is None:
1631 if flags is None:
1632 if rev is None:
1632 if rev is None:
1633 rev = self.rev(node)
1633 rev = self.rev(node)
1634 flags = self.flags(rev)
1634 flags = self.flags(rev)
1635
1635
1636 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1636 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1637 if validatehash:
1637 if validatehash:
1638 self.checkhash(text, node, rev=rev)
1638 self.checkhash(text, node, rev=rev)
1639
1639
1640 return text
1640 return text
1641
1641
1642 def hash(self, text, p1, p2):
1642 def hash(self, text, p1, p2):
1643 """Compute a node hash.
1643 """Compute a node hash.
1644
1644
1645 Available as a function so that subclasses can replace the hash
1645 Available as a function so that subclasses can replace the hash
1646 as needed.
1646 as needed.
1647 """
1647 """
1648 return hash(text, p1, p2)
1648 return hash(text, p1, p2)
1649
1649
1650 def _processflags(self, text, flags, operation, raw=False):
1650 def _processflags(self, text, flags, operation, raw=False):
1651 """Inspect revision data flags and applies transforms defined by
1651 """Inspect revision data flags and applies transforms defined by
1652 registered flag processors.
1652 registered flag processors.
1653
1653
1654 ``text`` - the revision data to process
1654 ``text`` - the revision data to process
1655 ``flags`` - the revision flags
1655 ``flags`` - the revision flags
1656 ``operation`` - the operation being performed (read or write)
1656 ``operation`` - the operation being performed (read or write)
1657 ``raw`` - an optional argument describing if the raw transform should be
1657 ``raw`` - an optional argument describing if the raw transform should be
1658 applied.
1658 applied.
1659
1659
1660 This method processes the flags in the order (or reverse order if
1660 This method processes the flags in the order (or reverse order if
1661 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1661 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1662 flag processors registered for present flags. The order of flags defined
1662 flag processors registered for present flags. The order of flags defined
1663 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1663 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1664
1664
1665 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1665 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1666 processed text and ``validatehash`` is a bool indicating whether the
1666 processed text and ``validatehash`` is a bool indicating whether the
1667 returned text should be checked for hash integrity.
1667 returned text should be checked for hash integrity.
1668
1668
1669 Note: If the ``raw`` argument is set, it has precedence over the
1669 Note: If the ``raw`` argument is set, it has precedence over the
1670 operation and will only update the value of ``validatehash``.
1670 operation and will only update the value of ``validatehash``.
1671 """
1671 """
1672 # fast path: no flag processors will run
1672 # fast path: no flag processors will run
1673 if flags == 0:
1673 if flags == 0:
1674 return text, True
1674 return text, True
1675 if not operation in ('read', 'write'):
1675 if not operation in ('read', 'write'):
1676 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1676 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1677 # Check all flags are known.
1677 # Check all flags are known.
1678 if flags & ~REVIDX_KNOWN_FLAGS:
1678 if flags & ~REVIDX_KNOWN_FLAGS:
1679 raise RevlogError(_("incompatible revision flag '%#x'") %
1679 raise RevlogError(_("incompatible revision flag '%#x'") %
1680 (flags & ~REVIDX_KNOWN_FLAGS))
1680 (flags & ~REVIDX_KNOWN_FLAGS))
1681 validatehash = True
1681 validatehash = True
1682 # Depending on the operation (read or write), the order might be
1682 # Depending on the operation (read or write), the order might be
1683 # reversed due to non-commutative transforms.
1683 # reversed due to non-commutative transforms.
1684 orderedflags = REVIDX_FLAGS_ORDER
1684 orderedflags = REVIDX_FLAGS_ORDER
1685 if operation == 'write':
1685 if operation == 'write':
1686 orderedflags = reversed(orderedflags)
1686 orderedflags = reversed(orderedflags)
1687
1687
1688 for flag in orderedflags:
1688 for flag in orderedflags:
1689 # If a flagprocessor has been registered for a known flag, apply the
1689 # If a flagprocessor has been registered for a known flag, apply the
1690 # related operation transform and update result tuple.
1690 # related operation transform and update result tuple.
1691 if flag & flags:
1691 if flag & flags:
1692 vhash = True
1692 vhash = True
1693
1693
1694 if flag not in _flagprocessors:
1694 if flag not in _flagprocessors:
1695 message = _("missing processor for flag '%#x'") % (flag)
1695 message = _("missing processor for flag '%#x'") % (flag)
1696 raise RevlogError(message)
1696 raise RevlogError(message)
1697
1697
1698 processor = _flagprocessors[flag]
1698 processor = _flagprocessors[flag]
1699 if processor is not None:
1699 if processor is not None:
1700 readtransform, writetransform, rawtransform = processor
1700 readtransform, writetransform, rawtransform = processor
1701
1701
1702 if raw:
1702 if raw:
1703 vhash = rawtransform(self, text)
1703 vhash = rawtransform(self, text)
1704 elif operation == 'read':
1704 elif operation == 'read':
1705 text, vhash = readtransform(self, text)
1705 text, vhash = readtransform(self, text)
1706 else: # write operation
1706 else: # write operation
1707 text, vhash = writetransform(self, text)
1707 text, vhash = writetransform(self, text)
1708 validatehash = validatehash and vhash
1708 validatehash = validatehash and vhash
1709
1709
1710 return text, validatehash
1710 return text, validatehash
1711
1711
1712 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1712 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1713 """Check node hash integrity.
1713 """Check node hash integrity.
1714
1714
1715 Available as a function so that subclasses can extend hash mismatch
1715 Available as a function so that subclasses can extend hash mismatch
1716 behaviors as needed.
1716 behaviors as needed.
1717 """
1717 """
1718 try:
1718 try:
1719 if p1 is None and p2 is None:
1719 if p1 is None and p2 is None:
1720 p1, p2 = self.parents(node)
1720 p1, p2 = self.parents(node)
1721 if node != self.hash(text, p1, p2):
1721 if node != self.hash(text, p1, p2):
1722 revornode = rev
1722 revornode = rev
1723 if revornode is None:
1723 if revornode is None:
1724 revornode = templatefilters.short(hex(node))
1724 revornode = templatefilters.short(hex(node))
1725 raise RevlogError(_("integrity check failed on %s:%s")
1725 raise RevlogError(_("integrity check failed on %s:%s")
1726 % (self.indexfile, pycompat.bytestr(revornode)))
1726 % (self.indexfile, pycompat.bytestr(revornode)))
1727 except RevlogError:
1727 except RevlogError:
1728 if self._censorable and _censoredtext(text):
1728 if self._censorable and _censoredtext(text):
1729 raise error.CensoredNodeError(self.indexfile, node, text)
1729 raise error.CensoredNodeError(self.indexfile, node, text)
1730 raise
1730 raise
1731
1731
1732 def _enforceinlinesize(self, tr, fp=None):
1732 def _enforceinlinesize(self, tr, fp=None):
1733 """Check if the revlog is too big for inline and convert if so.
1733 """Check if the revlog is too big for inline and convert if so.
1734
1734
1735 This should be called after revisions are added to the revlog. If the
1735 This should be called after revisions are added to the revlog. If the
1736 revlog has grown too large to be an inline revlog, it will convert it
1736 revlog has grown too large to be an inline revlog, it will convert it
1737 to use multiple index and data files.
1737 to use multiple index and data files.
1738 """
1738 """
1739 tiprev = len(self) - 1
1739 tiprev = len(self) - 1
1740 if (not self._inline or
1740 if (not self._inline or
1741 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1741 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1742 return
1742 return
1743
1743
1744 trinfo = tr.find(self.indexfile)
1744 trinfo = tr.find(self.indexfile)
1745 if trinfo is None:
1745 if trinfo is None:
1746 raise RevlogError(_("%s not found in the transaction")
1746 raise RevlogError(_("%s not found in the transaction")
1747 % self.indexfile)
1747 % self.indexfile)
1748
1748
1749 trindex = trinfo[2]
1749 trindex = trinfo[2]
1750 if trindex is not None:
1750 if trindex is not None:
1751 dataoff = self.start(trindex)
1751 dataoff = self.start(trindex)
1752 else:
1752 else:
1753 # revlog was stripped at start of transaction, use all leftover data
1753 # revlog was stripped at start of transaction, use all leftover data
1754 trindex = len(self) - 1
1754 trindex = len(self) - 1
1755 dataoff = self.end(tiprev)
1755 dataoff = self.end(tiprev)
1756
1756
1757 tr.add(self.datafile, dataoff)
1757 tr.add(self.datafile, dataoff)
1758
1758
1759 if fp:
1759 if fp:
1760 fp.flush()
1760 fp.flush()
1761 fp.close()
1761 fp.close()
1762
1762
1763 with self._datafp('w') as df:
1763 with self._datafp('w') as df:
1764 for r in self:
1764 for r in self:
1765 df.write(self._getsegmentforrevs(r, r)[1])
1765 df.write(self._getsegmentforrevs(r, r)[1])
1766
1766
1767 with self._indexfp('w') as fp:
1767 with self._indexfp('w') as fp:
1768 self.version &= ~FLAG_INLINE_DATA
1768 self.version &= ~FLAG_INLINE_DATA
1769 self._inline = False
1769 self._inline = False
1770 io = self._io
1770 io = self._io
1771 for i in self:
1771 for i in self:
1772 e = io.packentry(self.index[i], self.node, self.version, i)
1772 e = io.packentry(self.index[i], self.node, self.version, i)
1773 fp.write(e)
1773 fp.write(e)
1774
1774
1775 # the temp file replace the real index when we exit the context
1775 # the temp file replace the real index when we exit the context
1776 # manager
1776 # manager
1777
1777
1778 tr.replace(self.indexfile, trindex * self._io.size)
1778 tr.replace(self.indexfile, trindex * self._io.size)
1779 self._chunkclear()
1779 self._chunkclear()
1780
1780
1781 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1781 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1782 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1782 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1783 """add a revision to the log
1783 """add a revision to the log
1784
1784
1785 text - the revision data to add
1785 text - the revision data to add
1786 transaction - the transaction object used for rollback
1786 transaction - the transaction object used for rollback
1787 link - the linkrev data to add
1787 link - the linkrev data to add
1788 p1, p2 - the parent nodeids of the revision
1788 p1, p2 - the parent nodeids of the revision
1789 cachedelta - an optional precomputed delta
1789 cachedelta - an optional precomputed delta
1790 node - nodeid of revision; typically node is not specified, and it is
1790 node - nodeid of revision; typically node is not specified, and it is
1791 computed by default as hash(text, p1, p2), however subclasses might
1791 computed by default as hash(text, p1, p2), however subclasses might
1792 use different hashing method (and override checkhash() in such case)
1792 use different hashing method (and override checkhash() in such case)
1793 flags - the known flags to set on the revision
1793 flags - the known flags to set on the revision
1794 deltacomputer - an optional deltacomputer instance shared between
1794 deltacomputer - an optional deltacomputer instance shared between
1795 multiple calls
1795 multiple calls
1796 """
1796 """
1797 if link == nullrev:
1797 if link == nullrev:
1798 raise RevlogError(_("attempted to add linkrev -1 to %s")
1798 raise RevlogError(_("attempted to add linkrev -1 to %s")
1799 % self.indexfile)
1799 % self.indexfile)
1800
1800
1801 if flags:
1801 if flags:
1802 node = node or self.hash(text, p1, p2)
1802 node = node or self.hash(text, p1, p2)
1803
1803
1804 rawtext, validatehash = self._processflags(text, flags, 'write')
1804 rawtext, validatehash = self._processflags(text, flags, 'write')
1805
1805
1806 # If the flag processor modifies the revision data, ignore any provided
1806 # If the flag processor modifies the revision data, ignore any provided
1807 # cachedelta.
1807 # cachedelta.
1808 if rawtext != text:
1808 if rawtext != text:
1809 cachedelta = None
1809 cachedelta = None
1810
1810
1811 if len(rawtext) > _maxentrysize:
1811 if len(rawtext) > _maxentrysize:
1812 raise RevlogError(
1812 raise RevlogError(
1813 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1813 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1814 % (self.indexfile, len(rawtext)))
1814 % (self.indexfile, len(rawtext)))
1815
1815
1816 node = node or self.hash(rawtext, p1, p2)
1816 node = node or self.hash(rawtext, p1, p2)
1817 if node in self.nodemap:
1817 if node in self.nodemap:
1818 return node
1818 return node
1819
1819
1820 if validatehash:
1820 if validatehash:
1821 self.checkhash(rawtext, node, p1=p1, p2=p2)
1821 self.checkhash(rawtext, node, p1=p1, p2=p2)
1822
1822
1823 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1823 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1824 flags, cachedelta=cachedelta,
1824 flags, cachedelta=cachedelta,
1825 deltacomputer=deltacomputer)
1825 deltacomputer=deltacomputer)
1826
1826
1827 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1827 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1828 cachedelta=None, deltacomputer=None):
1828 cachedelta=None, deltacomputer=None):
1829 """add a raw revision with known flags, node and parents
1829 """add a raw revision with known flags, node and parents
1830 useful when reusing a revision not stored in this revlog (ex: received
1830 useful when reusing a revision not stored in this revlog (ex: received
1831 over wire, or read from an external bundle).
1831 over wire, or read from an external bundle).
1832 """
1832 """
1833 dfh = None
1833 dfh = None
1834 if not self._inline:
1834 if not self._inline:
1835 dfh = self._datafp("a+")
1835 dfh = self._datafp("a+")
1836 ifh = self._indexfp("a+")
1836 ifh = self._indexfp("a+")
1837 try:
1837 try:
1838 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1838 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1839 flags, cachedelta, ifh, dfh,
1839 flags, cachedelta, ifh, dfh,
1840 deltacomputer=deltacomputer)
1840 deltacomputer=deltacomputer)
1841 finally:
1841 finally:
1842 if dfh:
1842 if dfh:
1843 dfh.close()
1843 dfh.close()
1844 ifh.close()
1844 ifh.close()
1845
1845
1846 def compress(self, data):
1846 def compress(self, data):
1847 """Generate a possibly-compressed representation of data."""
1847 """Generate a possibly-compressed representation of data."""
1848 if not data:
1848 if not data:
1849 return '', data
1849 return '', data
1850
1850
1851 compressed = self._compressor.compress(data)
1851 compressed = self._compressor.compress(data)
1852
1852
1853 if compressed:
1853 if compressed:
1854 # The revlog compressor added the header in the returned data.
1854 # The revlog compressor added the header in the returned data.
1855 return '', compressed
1855 return '', compressed
1856
1856
1857 if data[0:1] == '\0':
1857 if data[0:1] == '\0':
1858 return '', data
1858 return '', data
1859 return 'u', data
1859 return 'u', data
1860
1860
1861 def decompress(self, data):
1861 def decompress(self, data):
1862 """Decompress a revlog chunk.
1862 """Decompress a revlog chunk.
1863
1863
1864 The chunk is expected to begin with a header identifying the
1864 The chunk is expected to begin with a header identifying the
1865 format type so it can be routed to an appropriate decompressor.
1865 format type so it can be routed to an appropriate decompressor.
1866 """
1866 """
1867 if not data:
1867 if not data:
1868 return data
1868 return data
1869
1869
1870 # Revlogs are read much more frequently than they are written and many
1870 # Revlogs are read much more frequently than they are written and many
1871 # chunks only take microseconds to decompress, so performance is
1871 # chunks only take microseconds to decompress, so performance is
1872 # important here.
1872 # important here.
1873 #
1873 #
1874 # We can make a few assumptions about revlogs:
1874 # We can make a few assumptions about revlogs:
1875 #
1875 #
1876 # 1) the majority of chunks will be compressed (as opposed to inline
1876 # 1) the majority of chunks will be compressed (as opposed to inline
1877 # raw data).
1877 # raw data).
1878 # 2) decompressing *any* data will likely by at least 10x slower than
1878 # 2) decompressing *any* data will likely by at least 10x slower than
1879 # returning raw inline data.
1879 # returning raw inline data.
1880 # 3) we want to prioritize common and officially supported compression
1880 # 3) we want to prioritize common and officially supported compression
1881 # engines
1881 # engines
1882 #
1882 #
1883 # It follows that we want to optimize for "decompress compressed data
1883 # It follows that we want to optimize for "decompress compressed data
1884 # when encoded with common and officially supported compression engines"
1884 # when encoded with common and officially supported compression engines"
1885 # case over "raw data" and "data encoded by less common or non-official
1885 # case over "raw data" and "data encoded by less common or non-official
1886 # compression engines." That is why we have the inline lookup first
1886 # compression engines." That is why we have the inline lookup first
1887 # followed by the compengines lookup.
1887 # followed by the compengines lookup.
1888 #
1888 #
1889 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1889 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1890 # compressed chunks. And this matters for changelog and manifest reads.
1890 # compressed chunks. And this matters for changelog and manifest reads.
1891 t = data[0:1]
1891 t = data[0:1]
1892
1892
1893 if t == 'x':
1893 if t == 'x':
1894 try:
1894 try:
1895 return _zlibdecompress(data)
1895 return _zlibdecompress(data)
1896 except zlib.error as e:
1896 except zlib.error as e:
1897 raise RevlogError(_('revlog decompress error: %s') %
1897 raise RevlogError(_('revlog decompress error: %s') %
1898 stringutil.forcebytestr(e))
1898 stringutil.forcebytestr(e))
1899 # '\0' is more common than 'u' so it goes first.
1899 # '\0' is more common than 'u' so it goes first.
1900 elif t == '\0':
1900 elif t == '\0':
1901 return data
1901 return data
1902 elif t == 'u':
1902 elif t == 'u':
1903 return util.buffer(data, 1)
1903 return util.buffer(data, 1)
1904
1904
1905 try:
1905 try:
1906 compressor = self._decompressors[t]
1906 compressor = self._decompressors[t]
1907 except KeyError:
1907 except KeyError:
1908 try:
1908 try:
1909 engine = util.compengines.forrevlogheader(t)
1909 engine = util.compengines.forrevlogheader(t)
1910 compressor = engine.revlogcompressor()
1910 compressor = engine.revlogcompressor()
1911 self._decompressors[t] = compressor
1911 self._decompressors[t] = compressor
1912 except KeyError:
1912 except KeyError:
1913 raise RevlogError(_('unknown compression type %r') % t)
1913 raise RevlogError(_('unknown compression type %r') % t)
1914
1914
1915 return compressor.decompress(data)
1915 return compressor.decompress(data)
1916
1916
1917 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1917 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1918 cachedelta, ifh, dfh, alwayscache=False,
1918 cachedelta, ifh, dfh, alwayscache=False,
1919 deltacomputer=None):
1919 deltacomputer=None):
1920 """internal function to add revisions to the log
1920 """internal function to add revisions to the log
1921
1921
1922 see addrevision for argument descriptions.
1922 see addrevision for argument descriptions.
1923
1923
1924 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1924 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1925
1925
1926 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1926 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1927 be used.
1927 be used.
1928
1928
1929 invariants:
1929 invariants:
1930 - rawtext is optional (can be None); if not set, cachedelta must be set.
1930 - rawtext is optional (can be None); if not set, cachedelta must be set.
1931 if both are set, they must correspond to each other.
1931 if both are set, they must correspond to each other.
1932 """
1932 """
1933 if node == nullid:
1933 if node == nullid:
1934 raise RevlogError(_("%s: attempt to add null revision") %
1934 raise RevlogError(_("%s: attempt to add null revision") %
1935 (self.indexfile))
1935 (self.indexfile))
1936 if node == wdirid or node in wdirfilenodeids:
1936 if node == wdirid or node in wdirfilenodeids:
1937 raise RevlogError(_("%s: attempt to add wdir revision") %
1937 raise RevlogError(_("%s: attempt to add wdir revision") %
1938 (self.indexfile))
1938 (self.indexfile))
1939
1939
1940 if self._inline:
1940 if self._inline:
1941 fh = ifh
1941 fh = ifh
1942 else:
1942 else:
1943 fh = dfh
1943 fh = dfh
1944
1944
1945 btext = [rawtext]
1945 btext = [rawtext]
1946
1946
1947 curr = len(self)
1947 curr = len(self)
1948 prev = curr - 1
1948 prev = curr - 1
1949 offset = self.end(prev)
1949 offset = self.end(prev)
1950 p1r, p2r = self.rev(p1), self.rev(p2)
1950 p1r, p2r = self.rev(p1), self.rev(p2)
1951
1951
1952 # full versions are inserted when the needed deltas
1952 # full versions are inserted when the needed deltas
1953 # become comparable to the uncompressed text
1953 # become comparable to the uncompressed text
1954 if rawtext is None:
1954 if rawtext is None:
1955 # need rawtext size, before changed by flag processors, which is
1955 # need rawtext size, before changed by flag processors, which is
1956 # the non-raw size. use revlog explicitly to avoid filelog's extra
1956 # the non-raw size. use revlog explicitly to avoid filelog's extra
1957 # logic that might remove metadata size.
1957 # logic that might remove metadata size.
1958 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1958 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1959 cachedelta[1])
1959 cachedelta[1])
1960 else:
1960 else:
1961 textlen = len(rawtext)
1961 textlen = len(rawtext)
1962
1962
1963 if deltacomputer is None:
1963 if deltacomputer is None:
1964 deltacomputer = deltautil.deltacomputer(self)
1964 deltacomputer = deltautil.deltacomputer(self)
1965
1965
1966 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1966 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1967
1967
1968 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1968 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1969
1969
1970 if deltainfo is not None:
1970 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1971 base = deltainfo.base
1971 deltainfo.base, link, p1r, p2r, node)
1972 chainbase = deltainfo.chainbase
1973 data = deltainfo.data
1974 l = deltainfo.deltalen
1975 else:
1976 rawtext = deltacomputer.buildtext(revinfo, fh)
1977 data = self.compress(rawtext)
1978 l = len(data[1]) + len(data[0])
1979 base = chainbase = curr
1980
1981 e = (offset_type(offset, flags), l, textlen,
1982 base, link, p1r, p2r, node)
1983 self.index.append(e)
1972 self.index.append(e)
1984 self.nodemap[node] = curr
1973 self.nodemap[node] = curr
1985
1974
1986 entry = self._io.packentry(e, self.node, self.version, curr)
1975 entry = self._io.packentry(e, self.node, self.version, curr)
1987 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1976 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1977 link, offset)
1978
1979 rawtext = btext[0]
1988
1980
1989 if alwayscache and rawtext is None:
1981 if alwayscache and rawtext is None:
1990 rawtext = deltacomputer.buildtext(revinfo, fh)
1982 rawtext = deltacomputer.buildtext(revinfo, fh)
1991
1983
1992 if type(rawtext) == bytes: # only accept immutable objects
1984 if type(rawtext) == bytes: # only accept immutable objects
1993 self._cache = (node, curr, rawtext)
1985 self._cache = (node, curr, rawtext)
1994 self._chainbasecache[curr] = chainbase
1986 self._chainbasecache[curr] = deltainfo.chainbase
1995 return node
1987 return node
1996
1988
1997 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1989 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1998 # Files opened in a+ mode have inconsistent behavior on various
1990 # Files opened in a+ mode have inconsistent behavior on various
1999 # platforms. Windows requires that a file positioning call be made
1991 # platforms. Windows requires that a file positioning call be made
2000 # when the file handle transitions between reads and writes. See
1992 # when the file handle transitions between reads and writes. See
2001 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1993 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2002 # platforms, Python or the platform itself can be buggy. Some versions
1994 # platforms, Python or the platform itself can be buggy. Some versions
2003 # of Solaris have been observed to not append at the end of the file
1995 # of Solaris have been observed to not append at the end of the file
2004 # if the file was seeked to before the end. See issue4943 for more.
1996 # if the file was seeked to before the end. See issue4943 for more.
2005 #
1997 #
2006 # We work around this issue by inserting a seek() before writing.
1998 # We work around this issue by inserting a seek() before writing.
2007 # Note: This is likely not necessary on Python 3.
1999 # Note: This is likely not necessary on Python 3.
2008 ifh.seek(0, os.SEEK_END)
2000 ifh.seek(0, os.SEEK_END)
2009 if dfh:
2001 if dfh:
2010 dfh.seek(0, os.SEEK_END)
2002 dfh.seek(0, os.SEEK_END)
2011
2003
2012 curr = len(self) - 1
2004 curr = len(self) - 1
2013 if not self._inline:
2005 if not self._inline:
2014 transaction.add(self.datafile, offset)
2006 transaction.add(self.datafile, offset)
2015 transaction.add(self.indexfile, curr * len(entry))
2007 transaction.add(self.indexfile, curr * len(entry))
2016 if data[0]:
2008 if data[0]:
2017 dfh.write(data[0])
2009 dfh.write(data[0])
2018 dfh.write(data[1])
2010 dfh.write(data[1])
2019 ifh.write(entry)
2011 ifh.write(entry)
2020 else:
2012 else:
2021 offset += curr * self._io.size
2013 offset += curr * self._io.size
2022 transaction.add(self.indexfile, offset, curr)
2014 transaction.add(self.indexfile, offset, curr)
2023 ifh.write(entry)
2015 ifh.write(entry)
2024 ifh.write(data[0])
2016 ifh.write(data[0])
2025 ifh.write(data[1])
2017 ifh.write(data[1])
2026 self._enforceinlinesize(transaction, ifh)
2018 self._enforceinlinesize(transaction, ifh)
2027
2019
2028 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2020 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2029 """
2021 """
2030 add a delta group
2022 add a delta group
2031
2023
2032 given a set of deltas, add them to the revision log. the
2024 given a set of deltas, add them to the revision log. the
2033 first delta is against its parent, which should be in our
2025 first delta is against its parent, which should be in our
2034 log, the rest are against the previous delta.
2026 log, the rest are against the previous delta.
2035
2027
2036 If ``addrevisioncb`` is defined, it will be called with arguments of
2028 If ``addrevisioncb`` is defined, it will be called with arguments of
2037 this revlog and the node that was added.
2029 this revlog and the node that was added.
2038 """
2030 """
2039
2031
2040 nodes = []
2032 nodes = []
2041
2033
2042 r = len(self)
2034 r = len(self)
2043 end = 0
2035 end = 0
2044 if r:
2036 if r:
2045 end = self.end(r - 1)
2037 end = self.end(r - 1)
2046 ifh = self._indexfp("a+")
2038 ifh = self._indexfp("a+")
2047 isize = r * self._io.size
2039 isize = r * self._io.size
2048 if self._inline:
2040 if self._inline:
2049 transaction.add(self.indexfile, end + isize, r)
2041 transaction.add(self.indexfile, end + isize, r)
2050 dfh = None
2042 dfh = None
2051 else:
2043 else:
2052 transaction.add(self.indexfile, isize, r)
2044 transaction.add(self.indexfile, isize, r)
2053 transaction.add(self.datafile, end)
2045 transaction.add(self.datafile, end)
2054 dfh = self._datafp("a+")
2046 dfh = self._datafp("a+")
2055 def flush():
2047 def flush():
2056 if dfh:
2048 if dfh:
2057 dfh.flush()
2049 dfh.flush()
2058 ifh.flush()
2050 ifh.flush()
2059 try:
2051 try:
2060 deltacomputer = deltautil.deltacomputer(self)
2052 deltacomputer = deltautil.deltacomputer(self)
2061 # loop through our set of deltas
2053 # loop through our set of deltas
2062 for data in deltas:
2054 for data in deltas:
2063 node, p1, p2, linknode, deltabase, delta, flags = data
2055 node, p1, p2, linknode, deltabase, delta, flags = data
2064 link = linkmapper(linknode)
2056 link = linkmapper(linknode)
2065 flags = flags or REVIDX_DEFAULT_FLAGS
2057 flags = flags or REVIDX_DEFAULT_FLAGS
2066
2058
2067 nodes.append(node)
2059 nodes.append(node)
2068
2060
2069 if node in self.nodemap:
2061 if node in self.nodemap:
2070 # this can happen if two branches make the same change
2062 # this can happen if two branches make the same change
2071 continue
2063 continue
2072
2064
2073 for p in (p1, p2):
2065 for p in (p1, p2):
2074 if p not in self.nodemap:
2066 if p not in self.nodemap:
2075 raise LookupError(p, self.indexfile,
2067 raise LookupError(p, self.indexfile,
2076 _('unknown parent'))
2068 _('unknown parent'))
2077
2069
2078 if deltabase not in self.nodemap:
2070 if deltabase not in self.nodemap:
2079 raise LookupError(deltabase, self.indexfile,
2071 raise LookupError(deltabase, self.indexfile,
2080 _('unknown delta base'))
2072 _('unknown delta base'))
2081
2073
2082 baserev = self.rev(deltabase)
2074 baserev = self.rev(deltabase)
2083
2075
2084 if baserev != nullrev and self.iscensored(baserev):
2076 if baserev != nullrev and self.iscensored(baserev):
2085 # if base is censored, delta must be full replacement in a
2077 # if base is censored, delta must be full replacement in a
2086 # single patch operation
2078 # single patch operation
2087 hlen = struct.calcsize(">lll")
2079 hlen = struct.calcsize(">lll")
2088 oldlen = self.rawsize(baserev)
2080 oldlen = self.rawsize(baserev)
2089 newlen = len(delta) - hlen
2081 newlen = len(delta) - hlen
2090 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2082 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2091 raise error.CensoredBaseError(self.indexfile,
2083 raise error.CensoredBaseError(self.indexfile,
2092 self.node(baserev))
2084 self.node(baserev))
2093
2085
2094 if not flags and self._peek_iscensored(baserev, delta, flush):
2086 if not flags and self._peek_iscensored(baserev, delta, flush):
2095 flags |= REVIDX_ISCENSORED
2087 flags |= REVIDX_ISCENSORED
2096
2088
2097 # We assume consumers of addrevisioncb will want to retrieve
2089 # We assume consumers of addrevisioncb will want to retrieve
2098 # the added revision, which will require a call to
2090 # the added revision, which will require a call to
2099 # revision(). revision() will fast path if there is a cache
2091 # revision(). revision() will fast path if there is a cache
2100 # hit. So, we tell _addrevision() to always cache in this case.
2092 # hit. So, we tell _addrevision() to always cache in this case.
2101 # We're only using addgroup() in the context of changegroup
2093 # We're only using addgroup() in the context of changegroup
2102 # generation so the revision data can always be handled as raw
2094 # generation so the revision data can always be handled as raw
2103 # by the flagprocessor.
2095 # by the flagprocessor.
2104 self._addrevision(node, None, transaction, link,
2096 self._addrevision(node, None, transaction, link,
2105 p1, p2, flags, (baserev, delta),
2097 p1, p2, flags, (baserev, delta),
2106 ifh, dfh,
2098 ifh, dfh,
2107 alwayscache=bool(addrevisioncb),
2099 alwayscache=bool(addrevisioncb),
2108 deltacomputer=deltacomputer)
2100 deltacomputer=deltacomputer)
2109
2101
2110 if addrevisioncb:
2102 if addrevisioncb:
2111 addrevisioncb(self, node)
2103 addrevisioncb(self, node)
2112
2104
2113 if not dfh and not self._inline:
2105 if not dfh and not self._inline:
2114 # addrevision switched from inline to conventional
2106 # addrevision switched from inline to conventional
2115 # reopen the index
2107 # reopen the index
2116 ifh.close()
2108 ifh.close()
2117 dfh = self._datafp("a+")
2109 dfh = self._datafp("a+")
2118 ifh = self._indexfp("a+")
2110 ifh = self._indexfp("a+")
2119 finally:
2111 finally:
2120 if dfh:
2112 if dfh:
2121 dfh.close()
2113 dfh.close()
2122 ifh.close()
2114 ifh.close()
2123
2115
2124 return nodes
2116 return nodes
2125
2117
2126 def iscensored(self, rev):
2118 def iscensored(self, rev):
2127 """Check if a file revision is censored."""
2119 """Check if a file revision is censored."""
2128 if not self._censorable:
2120 if not self._censorable:
2129 return False
2121 return False
2130
2122
2131 return self.flags(rev) & REVIDX_ISCENSORED
2123 return self.flags(rev) & REVIDX_ISCENSORED
2132
2124
2133 def _peek_iscensored(self, baserev, delta, flush):
2125 def _peek_iscensored(self, baserev, delta, flush):
2134 """Quickly check if a delta produces a censored revision."""
2126 """Quickly check if a delta produces a censored revision."""
2135 if not self._censorable:
2127 if not self._censorable:
2136 return False
2128 return False
2137
2129
2138 # Fragile heuristic: unless new file meta keys are added alphabetically
2130 # Fragile heuristic: unless new file meta keys are added alphabetically
2139 # preceding "censored", all censored revisions are prefixed by
2131 # preceding "censored", all censored revisions are prefixed by
2140 # "\1\ncensored:". A delta producing such a censored revision must be a
2132 # "\1\ncensored:". A delta producing such a censored revision must be a
2141 # full-replacement delta, so we inspect the first and only patch in the
2133 # full-replacement delta, so we inspect the first and only patch in the
2142 # delta for this prefix.
2134 # delta for this prefix.
2143 hlen = struct.calcsize(">lll")
2135 hlen = struct.calcsize(">lll")
2144 if len(delta) <= hlen:
2136 if len(delta) <= hlen:
2145 return False
2137 return False
2146
2138
2147 oldlen = self.rawsize(baserev)
2139 oldlen = self.rawsize(baserev)
2148 newlen = len(delta) - hlen
2140 newlen = len(delta) - hlen
2149 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2141 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2150 return False
2142 return False
2151
2143
2152 add = "\1\ncensored:"
2144 add = "\1\ncensored:"
2153 addlen = len(add)
2145 addlen = len(add)
2154 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2146 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2155
2147
2156 def getstrippoint(self, minlink):
2148 def getstrippoint(self, minlink):
2157 """find the minimum rev that must be stripped to strip the linkrev
2149 """find the minimum rev that must be stripped to strip the linkrev
2158
2150
2159 Returns a tuple containing the minimum rev and a set of all revs that
2151 Returns a tuple containing the minimum rev and a set of all revs that
2160 have linkrevs that will be broken by this strip.
2152 have linkrevs that will be broken by this strip.
2161 """
2153 """
2162 brokenrevs = set()
2154 brokenrevs = set()
2163 strippoint = len(self)
2155 strippoint = len(self)
2164
2156
2165 heads = {}
2157 heads = {}
2166 futurelargelinkrevs = set()
2158 futurelargelinkrevs = set()
2167 for head in self.headrevs():
2159 for head in self.headrevs():
2168 headlinkrev = self.linkrev(head)
2160 headlinkrev = self.linkrev(head)
2169 heads[head] = headlinkrev
2161 heads[head] = headlinkrev
2170 if headlinkrev >= minlink:
2162 if headlinkrev >= minlink:
2171 futurelargelinkrevs.add(headlinkrev)
2163 futurelargelinkrevs.add(headlinkrev)
2172
2164
2173 # This algorithm involves walking down the rev graph, starting at the
2165 # This algorithm involves walking down the rev graph, starting at the
2174 # heads. Since the revs are topologically sorted according to linkrev,
2166 # heads. Since the revs are topologically sorted according to linkrev,
2175 # once all head linkrevs are below the minlink, we know there are
2167 # once all head linkrevs are below the minlink, we know there are
2176 # no more revs that could have a linkrev greater than minlink.
2168 # no more revs that could have a linkrev greater than minlink.
2177 # So we can stop walking.
2169 # So we can stop walking.
2178 while futurelargelinkrevs:
2170 while futurelargelinkrevs:
2179 strippoint -= 1
2171 strippoint -= 1
2180 linkrev = heads.pop(strippoint)
2172 linkrev = heads.pop(strippoint)
2181
2173
2182 if linkrev < minlink:
2174 if linkrev < minlink:
2183 brokenrevs.add(strippoint)
2175 brokenrevs.add(strippoint)
2184 else:
2176 else:
2185 futurelargelinkrevs.remove(linkrev)
2177 futurelargelinkrevs.remove(linkrev)
2186
2178
2187 for p in self.parentrevs(strippoint):
2179 for p in self.parentrevs(strippoint):
2188 if p != nullrev:
2180 if p != nullrev:
2189 plinkrev = self.linkrev(p)
2181 plinkrev = self.linkrev(p)
2190 heads[p] = plinkrev
2182 heads[p] = plinkrev
2191 if plinkrev >= minlink:
2183 if plinkrev >= minlink:
2192 futurelargelinkrevs.add(plinkrev)
2184 futurelargelinkrevs.add(plinkrev)
2193
2185
2194 return strippoint, brokenrevs
2186 return strippoint, brokenrevs
2195
2187
2196 def strip(self, minlink, transaction):
2188 def strip(self, minlink, transaction):
2197 """truncate the revlog on the first revision with a linkrev >= minlink
2189 """truncate the revlog on the first revision with a linkrev >= minlink
2198
2190
2199 This function is called when we're stripping revision minlink and
2191 This function is called when we're stripping revision minlink and
2200 its descendants from the repository.
2192 its descendants from the repository.
2201
2193
2202 We have to remove all revisions with linkrev >= minlink, because
2194 We have to remove all revisions with linkrev >= minlink, because
2203 the equivalent changelog revisions will be renumbered after the
2195 the equivalent changelog revisions will be renumbered after the
2204 strip.
2196 strip.
2205
2197
2206 So we truncate the revlog on the first of these revisions, and
2198 So we truncate the revlog on the first of these revisions, and
2207 trust that the caller has saved the revisions that shouldn't be
2199 trust that the caller has saved the revisions that shouldn't be
2208 removed and that it'll re-add them after this truncation.
2200 removed and that it'll re-add them after this truncation.
2209 """
2201 """
2210 if len(self) == 0:
2202 if len(self) == 0:
2211 return
2203 return
2212
2204
2213 rev, _ = self.getstrippoint(minlink)
2205 rev, _ = self.getstrippoint(minlink)
2214 if rev == len(self):
2206 if rev == len(self):
2215 return
2207 return
2216
2208
2217 # first truncate the files on disk
2209 # first truncate the files on disk
2218 end = self.start(rev)
2210 end = self.start(rev)
2219 if not self._inline:
2211 if not self._inline:
2220 transaction.add(self.datafile, end)
2212 transaction.add(self.datafile, end)
2221 end = rev * self._io.size
2213 end = rev * self._io.size
2222 else:
2214 else:
2223 end += rev * self._io.size
2215 end += rev * self._io.size
2224
2216
2225 transaction.add(self.indexfile, end)
2217 transaction.add(self.indexfile, end)
2226
2218
2227 # then reset internal state in memory to forget those revisions
2219 # then reset internal state in memory to forget those revisions
2228 self._cache = None
2220 self._cache = None
2229 self._chaininfocache = {}
2221 self._chaininfocache = {}
2230 self._chunkclear()
2222 self._chunkclear()
2231 for x in pycompat.xrange(rev, len(self)):
2223 for x in pycompat.xrange(rev, len(self)):
2232 del self.nodemap[self.node(x)]
2224 del self.nodemap[self.node(x)]
2233
2225
2234 del self.index[rev:-1]
2226 del self.index[rev:-1]
2235 self._nodepos = None
2227 self._nodepos = None
2236
2228
2237 def checksize(self):
2229 def checksize(self):
2238 expected = 0
2230 expected = 0
2239 if len(self):
2231 if len(self):
2240 expected = max(0, self.end(len(self) - 1))
2232 expected = max(0, self.end(len(self) - 1))
2241
2233
2242 try:
2234 try:
2243 with self._datafp() as f:
2235 with self._datafp() as f:
2244 f.seek(0, 2)
2236 f.seek(0, 2)
2245 actual = f.tell()
2237 actual = f.tell()
2246 dd = actual - expected
2238 dd = actual - expected
2247 except IOError as inst:
2239 except IOError as inst:
2248 if inst.errno != errno.ENOENT:
2240 if inst.errno != errno.ENOENT:
2249 raise
2241 raise
2250 dd = 0
2242 dd = 0
2251
2243
2252 try:
2244 try:
2253 f = self.opener(self.indexfile)
2245 f = self.opener(self.indexfile)
2254 f.seek(0, 2)
2246 f.seek(0, 2)
2255 actual = f.tell()
2247 actual = f.tell()
2256 f.close()
2248 f.close()
2257 s = self._io.size
2249 s = self._io.size
2258 i = max(0, actual // s)
2250 i = max(0, actual // s)
2259 di = actual - (i * s)
2251 di = actual - (i * s)
2260 if self._inline:
2252 if self._inline:
2261 databytes = 0
2253 databytes = 0
2262 for r in self:
2254 for r in self:
2263 databytes += max(0, self.length(r))
2255 databytes += max(0, self.length(r))
2264 dd = 0
2256 dd = 0
2265 di = actual - len(self) * s - databytes
2257 di = actual - len(self) * s - databytes
2266 except IOError as inst:
2258 except IOError as inst:
2267 if inst.errno != errno.ENOENT:
2259 if inst.errno != errno.ENOENT:
2268 raise
2260 raise
2269 di = 0
2261 di = 0
2270
2262
2271 return (dd, di)
2263 return (dd, di)
2272
2264
2273 def files(self):
2265 def files(self):
2274 res = [self.indexfile]
2266 res = [self.indexfile]
2275 if not self._inline:
2267 if not self._inline:
2276 res.append(self.datafile)
2268 res.append(self.datafile)
2277 return res
2269 return res
2278
2270
2279 def emitrevisiondeltas(self, requests):
2271 def emitrevisiondeltas(self, requests):
2280 frev = self.rev
2272 frev = self.rev
2281
2273
2282 prevrev = None
2274 prevrev = None
2283 for request in requests:
2275 for request in requests:
2284 node = request.node
2276 node = request.node
2285 rev = frev(node)
2277 rev = frev(node)
2286
2278
2287 if prevrev is None:
2279 if prevrev is None:
2288 prevrev = self.index[rev][5]
2280 prevrev = self.index[rev][5]
2289
2281
2290 # Requesting a full revision.
2282 # Requesting a full revision.
2291 if request.basenode == nullid:
2283 if request.basenode == nullid:
2292 baserev = nullrev
2284 baserev = nullrev
2293 # Requesting an explicit revision.
2285 # Requesting an explicit revision.
2294 elif request.basenode is not None:
2286 elif request.basenode is not None:
2295 baserev = frev(request.basenode)
2287 baserev = frev(request.basenode)
2296 # Allowing us to choose.
2288 # Allowing us to choose.
2297 else:
2289 else:
2298 p1rev, p2rev = self.parentrevs(rev)
2290 p1rev, p2rev = self.parentrevs(rev)
2299 deltaparentrev = self.deltaparent(rev)
2291 deltaparentrev = self.deltaparent(rev)
2300
2292
2301 # Avoid sending full revisions when delta parent is null. Pick
2293 # Avoid sending full revisions when delta parent is null. Pick
2302 # prev in that case. It's tempting to pick p1 in this case, as
2294 # prev in that case. It's tempting to pick p1 in this case, as
2303 # p1 will be smaller in the common case. However, computing a
2295 # p1 will be smaller in the common case. However, computing a
2304 # delta against p1 may require resolving the raw text of p1,
2296 # delta against p1 may require resolving the raw text of p1,
2305 # which could be expensive. The revlog caches should have prev
2297 # which could be expensive. The revlog caches should have prev
2306 # cached, meaning less CPU for delta generation. There is
2298 # cached, meaning less CPU for delta generation. There is
2307 # likely room to add a flag and/or config option to control this
2299 # likely room to add a flag and/or config option to control this
2308 # behavior.
2300 # behavior.
2309 if deltaparentrev == nullrev and self._storedeltachains:
2301 if deltaparentrev == nullrev and self._storedeltachains:
2310 baserev = prevrev
2302 baserev = prevrev
2311
2303
2312 # Revlog is configured to use full snapshot for a reason.
2304 # Revlog is configured to use full snapshot for a reason.
2313 # Stick to full snapshot.
2305 # Stick to full snapshot.
2314 elif deltaparentrev == nullrev:
2306 elif deltaparentrev == nullrev:
2315 baserev = nullrev
2307 baserev = nullrev
2316
2308
2317 # Pick previous when we can't be sure the base is available
2309 # Pick previous when we can't be sure the base is available
2318 # on consumer.
2310 # on consumer.
2319 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2311 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2320 baserev = prevrev
2312 baserev = prevrev
2321 else:
2313 else:
2322 baserev = deltaparentrev
2314 baserev = deltaparentrev
2323
2315
2324 if baserev != nullrev and not self.candelta(baserev, rev):
2316 if baserev != nullrev and not self.candelta(baserev, rev):
2325 baserev = nullrev
2317 baserev = nullrev
2326
2318
2327 revision = None
2319 revision = None
2328 delta = None
2320 delta = None
2329 baserevisionsize = None
2321 baserevisionsize = None
2330
2322
2331 if self.iscensored(baserev) or self.iscensored(rev):
2323 if self.iscensored(baserev) or self.iscensored(rev):
2332 try:
2324 try:
2333 revision = self.revision(node, raw=True)
2325 revision = self.revision(node, raw=True)
2334 except error.CensoredNodeError as e:
2326 except error.CensoredNodeError as e:
2335 revision = e.tombstone
2327 revision = e.tombstone
2336
2328
2337 if baserev != nullrev:
2329 if baserev != nullrev:
2338 baserevisionsize = self.rawsize(baserev)
2330 baserevisionsize = self.rawsize(baserev)
2339
2331
2340 elif baserev == nullrev:
2332 elif baserev == nullrev:
2341 revision = self.revision(node, raw=True)
2333 revision = self.revision(node, raw=True)
2342 else:
2334 else:
2343 delta = self.revdiff(baserev, rev)
2335 delta = self.revdiff(baserev, rev)
2344
2336
2345 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2337 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2346
2338
2347 yield revlogrevisiondelta(
2339 yield revlogrevisiondelta(
2348 node=node,
2340 node=node,
2349 p1node=request.p1node,
2341 p1node=request.p1node,
2350 p2node=request.p2node,
2342 p2node=request.p2node,
2351 linknode=request.linknode,
2343 linknode=request.linknode,
2352 basenode=self.node(baserev),
2344 basenode=self.node(baserev),
2353 flags=self.flags(rev) | extraflags,
2345 flags=self.flags(rev) | extraflags,
2354 baserevisionsize=baserevisionsize,
2346 baserevisionsize=baserevisionsize,
2355 revision=revision,
2347 revision=revision,
2356 delta=delta)
2348 delta=delta)
2357
2349
2358 prevrev = rev
2350 prevrev = rev
2359
2351
2360 DELTAREUSEALWAYS = 'always'
2352 DELTAREUSEALWAYS = 'always'
2361 DELTAREUSESAMEREVS = 'samerevs'
2353 DELTAREUSESAMEREVS = 'samerevs'
2362 DELTAREUSENEVER = 'never'
2354 DELTAREUSENEVER = 'never'
2363
2355
2364 DELTAREUSEFULLADD = 'fulladd'
2356 DELTAREUSEFULLADD = 'fulladd'
2365
2357
2366 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2358 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2367
2359
2368 def clone(self, tr, destrevlog, addrevisioncb=None,
2360 def clone(self, tr, destrevlog, addrevisioncb=None,
2369 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2361 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2370 """Copy this revlog to another, possibly with format changes.
2362 """Copy this revlog to another, possibly with format changes.
2371
2363
2372 The destination revlog will contain the same revisions and nodes.
2364 The destination revlog will contain the same revisions and nodes.
2373 However, it may not be bit-for-bit identical due to e.g. delta encoding
2365 However, it may not be bit-for-bit identical due to e.g. delta encoding
2374 differences.
2366 differences.
2375
2367
2376 The ``deltareuse`` argument control how deltas from the existing revlog
2368 The ``deltareuse`` argument control how deltas from the existing revlog
2377 are preserved in the destination revlog. The argument can have the
2369 are preserved in the destination revlog. The argument can have the
2378 following values:
2370 following values:
2379
2371
2380 DELTAREUSEALWAYS
2372 DELTAREUSEALWAYS
2381 Deltas will always be reused (if possible), even if the destination
2373 Deltas will always be reused (if possible), even if the destination
2382 revlog would not select the same revisions for the delta. This is the
2374 revlog would not select the same revisions for the delta. This is the
2383 fastest mode of operation.
2375 fastest mode of operation.
2384 DELTAREUSESAMEREVS
2376 DELTAREUSESAMEREVS
2385 Deltas will be reused if the destination revlog would pick the same
2377 Deltas will be reused if the destination revlog would pick the same
2386 revisions for the delta. This mode strikes a balance between speed
2378 revisions for the delta. This mode strikes a balance between speed
2387 and optimization.
2379 and optimization.
2388 DELTAREUSENEVER
2380 DELTAREUSENEVER
2389 Deltas will never be reused. This is the slowest mode of execution.
2381 Deltas will never be reused. This is the slowest mode of execution.
2390 This mode can be used to recompute deltas (e.g. if the diff/delta
2382 This mode can be used to recompute deltas (e.g. if the diff/delta
2391 algorithm changes).
2383 algorithm changes).
2392
2384
2393 Delta computation can be slow, so the choice of delta reuse policy can
2385 Delta computation can be slow, so the choice of delta reuse policy can
2394 significantly affect run time.
2386 significantly affect run time.
2395
2387
2396 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2388 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2397 two extremes. Deltas will be reused if they are appropriate. But if the
2389 two extremes. Deltas will be reused if they are appropriate. But if the
2398 delta could choose a better revision, it will do so. This means if you
2390 delta could choose a better revision, it will do so. This means if you
2399 are converting a non-generaldelta revlog to a generaldelta revlog,
2391 are converting a non-generaldelta revlog to a generaldelta revlog,
2400 deltas will be recomputed if the delta's parent isn't a parent of the
2392 deltas will be recomputed if the delta's parent isn't a parent of the
2401 revision.
2393 revision.
2402
2394
2403 In addition to the delta policy, the ``deltabothparents`` argument
2395 In addition to the delta policy, the ``deltabothparents`` argument
2404 controls whether to compute deltas against both parents for merges.
2396 controls whether to compute deltas against both parents for merges.
2405 By default, the current default is used.
2397 By default, the current default is used.
2406 """
2398 """
2407 if deltareuse not in self.DELTAREUSEALL:
2399 if deltareuse not in self.DELTAREUSEALL:
2408 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2400 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2409
2401
2410 if len(destrevlog):
2402 if len(destrevlog):
2411 raise ValueError(_('destination revlog is not empty'))
2403 raise ValueError(_('destination revlog is not empty'))
2412
2404
2413 if getattr(self, 'filteredrevs', None):
2405 if getattr(self, 'filteredrevs', None):
2414 raise ValueError(_('source revlog has filtered revisions'))
2406 raise ValueError(_('source revlog has filtered revisions'))
2415 if getattr(destrevlog, 'filteredrevs', None):
2407 if getattr(destrevlog, 'filteredrevs', None):
2416 raise ValueError(_('destination revlog has filtered revisions'))
2408 raise ValueError(_('destination revlog has filtered revisions'))
2417
2409
2418 # lazydeltabase controls whether to reuse a cached delta, if possible.
2410 # lazydeltabase controls whether to reuse a cached delta, if possible.
2419 oldlazydeltabase = destrevlog._lazydeltabase
2411 oldlazydeltabase = destrevlog._lazydeltabase
2420 oldamd = destrevlog._deltabothparents
2412 oldamd = destrevlog._deltabothparents
2421
2413
2422 try:
2414 try:
2423 if deltareuse == self.DELTAREUSEALWAYS:
2415 if deltareuse == self.DELTAREUSEALWAYS:
2424 destrevlog._lazydeltabase = True
2416 destrevlog._lazydeltabase = True
2425 elif deltareuse == self.DELTAREUSESAMEREVS:
2417 elif deltareuse == self.DELTAREUSESAMEREVS:
2426 destrevlog._lazydeltabase = False
2418 destrevlog._lazydeltabase = False
2427
2419
2428 destrevlog._deltabothparents = deltabothparents or oldamd
2420 destrevlog._deltabothparents = deltabothparents or oldamd
2429
2421
2430 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2422 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2431 self.DELTAREUSESAMEREVS)
2423 self.DELTAREUSESAMEREVS)
2432
2424
2433 deltacomputer = deltautil.deltacomputer(destrevlog)
2425 deltacomputer = deltautil.deltacomputer(destrevlog)
2434 index = self.index
2426 index = self.index
2435 for rev in self:
2427 for rev in self:
2436 entry = index[rev]
2428 entry = index[rev]
2437
2429
2438 # Some classes override linkrev to take filtered revs into
2430 # Some classes override linkrev to take filtered revs into
2439 # account. Use raw entry from index.
2431 # account. Use raw entry from index.
2440 flags = entry[0] & 0xffff
2432 flags = entry[0] & 0xffff
2441 linkrev = entry[4]
2433 linkrev = entry[4]
2442 p1 = index[entry[5]][7]
2434 p1 = index[entry[5]][7]
2443 p2 = index[entry[6]][7]
2435 p2 = index[entry[6]][7]
2444 node = entry[7]
2436 node = entry[7]
2445
2437
2446 # (Possibly) reuse the delta from the revlog if allowed and
2438 # (Possibly) reuse the delta from the revlog if allowed and
2447 # the revlog chunk is a delta.
2439 # the revlog chunk is a delta.
2448 cachedelta = None
2440 cachedelta = None
2449 rawtext = None
2441 rawtext = None
2450 if populatecachedelta:
2442 if populatecachedelta:
2451 dp = self.deltaparent(rev)
2443 dp = self.deltaparent(rev)
2452 if dp != nullrev:
2444 if dp != nullrev:
2453 cachedelta = (dp, bytes(self._chunk(rev)))
2445 cachedelta = (dp, bytes(self._chunk(rev)))
2454
2446
2455 if not cachedelta:
2447 if not cachedelta:
2456 rawtext = self.revision(rev, raw=True)
2448 rawtext = self.revision(rev, raw=True)
2457
2449
2458
2450
2459 if deltareuse == self.DELTAREUSEFULLADD:
2451 if deltareuse == self.DELTAREUSEFULLADD:
2460 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2452 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2461 cachedelta=cachedelta,
2453 cachedelta=cachedelta,
2462 node=node, flags=flags,
2454 node=node, flags=flags,
2463 deltacomputer=deltacomputer)
2455 deltacomputer=deltacomputer)
2464 else:
2456 else:
2465 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2457 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2466 checkambig=False)
2458 checkambig=False)
2467 dfh = None
2459 dfh = None
2468 if not destrevlog._inline:
2460 if not destrevlog._inline:
2469 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2461 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2470 try:
2462 try:
2471 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2463 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2472 p2, flags, cachedelta, ifh, dfh,
2464 p2, flags, cachedelta, ifh, dfh,
2473 deltacomputer=deltacomputer)
2465 deltacomputer=deltacomputer)
2474 finally:
2466 finally:
2475 if dfh:
2467 if dfh:
2476 dfh.close()
2468 dfh.close()
2477 ifh.close()
2469 ifh.close()
2478
2470
2479 if addrevisioncb:
2471 if addrevisioncb:
2480 addrevisioncb(self, rev, node)
2472 addrevisioncb(self, rev, node)
2481 finally:
2473 finally:
2482 destrevlog._lazydeltabase = oldlazydeltabase
2474 destrevlog._lazydeltabase = oldlazydeltabase
2483 destrevlog._deltabothparents = oldamd
2475 destrevlog._deltabothparents = oldamd
@@ -1,745 +1,763 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import heapq
12 import heapq
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 )
33 )
34
34
35 RevlogError = error.RevlogError
35 RevlogError = error.RevlogError
36 CensoredNodeError = error.CensoredNodeError
36 CensoredNodeError = error.CensoredNodeError
37
37
38 # maximum <delta-chain-data>/<revision-text-length> ratio
38 # maximum <delta-chain-data>/<revision-text-length> ratio
39 LIMIT_DELTA2TEXT = 2
39 LIMIT_DELTA2TEXT = 2
40
40
41 class _testrevlog(object):
41 class _testrevlog(object):
42 """minimalist fake revlog to use in doctests"""
42 """minimalist fake revlog to use in doctests"""
43
43
44 def __init__(self, data, density=0.5, mingap=0):
44 def __init__(self, data, density=0.5, mingap=0):
45 """data is an list of revision payload boundaries"""
45 """data is an list of revision payload boundaries"""
46 self._data = data
46 self._data = data
47 self._srdensitythreshold = density
47 self._srdensitythreshold = density
48 self._srmingapsize = mingap
48 self._srmingapsize = mingap
49
49
50 def start(self, rev):
50 def start(self, rev):
51 if rev == 0:
51 if rev == 0:
52 return 0
52 return 0
53 return self._data[rev - 1]
53 return self._data[rev - 1]
54
54
55 def end(self, rev):
55 def end(self, rev):
56 return self._data[rev]
56 return self._data[rev]
57
57
58 def length(self, rev):
58 def length(self, rev):
59 return self.end(rev) - self.start(rev)
59 return self.end(rev) - self.start(rev)
60
60
61 def __len__(self):
61 def __len__(self):
62 return len(self._data)
62 return len(self._data)
63
63
64 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
64 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
65 """slice revs to reduce the amount of unrelated data to be read from disk.
65 """slice revs to reduce the amount of unrelated data to be read from disk.
66
66
67 ``revs`` is sliced into groups that should be read in one time.
67 ``revs`` is sliced into groups that should be read in one time.
68 Assume that revs are sorted.
68 Assume that revs are sorted.
69
69
70 The initial chunk is sliced until the overall density (payload/chunks-span
70 The initial chunk is sliced until the overall density (payload/chunks-span
71 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
71 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
72 `revlog._srmingapsize` is skipped.
72 `revlog._srmingapsize` is skipped.
73
73
74 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
74 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
75 For consistency with other slicing choice, this limit won't go lower than
75 For consistency with other slicing choice, this limit won't go lower than
76 `revlog._srmingapsize`.
76 `revlog._srmingapsize`.
77
77
78 If individual revisions chunk are larger than this limit, they will still
78 If individual revisions chunk are larger than this limit, they will still
79 be raised individually.
79 be raised individually.
80
80
81 >>> revlog = _testrevlog([
81 >>> revlog = _testrevlog([
82 ... 5, #00 (5)
82 ... 5, #00 (5)
83 ... 10, #01 (5)
83 ... 10, #01 (5)
84 ... 12, #02 (2)
84 ... 12, #02 (2)
85 ... 12, #03 (empty)
85 ... 12, #03 (empty)
86 ... 27, #04 (15)
86 ... 27, #04 (15)
87 ... 31, #05 (4)
87 ... 31, #05 (4)
88 ... 31, #06 (empty)
88 ... 31, #06 (empty)
89 ... 42, #07 (11)
89 ... 42, #07 (11)
90 ... 47, #08 (5)
90 ... 47, #08 (5)
91 ... 47, #09 (empty)
91 ... 47, #09 (empty)
92 ... 48, #10 (1)
92 ... 48, #10 (1)
93 ... 51, #11 (3)
93 ... 51, #11 (3)
94 ... 74, #12 (23)
94 ... 74, #12 (23)
95 ... 85, #13 (11)
95 ... 85, #13 (11)
96 ... 86, #14 (1)
96 ... 86, #14 (1)
97 ... 91, #15 (5)
97 ... 91, #15 (5)
98 ... ])
98 ... ])
99
99
100 >>> list(slicechunk(revlog, list(range(16))))
100 >>> list(slicechunk(revlog, list(range(16))))
101 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
101 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
102 >>> list(slicechunk(revlog, [0, 15]))
102 >>> list(slicechunk(revlog, [0, 15]))
103 [[0], [15]]
103 [[0], [15]]
104 >>> list(slicechunk(revlog, [0, 11, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 15]))
105 [[0], [11], [15]]
105 [[0], [11], [15]]
106 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
106 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
107 [[0], [11, 13, 15]]
107 [[0], [11, 13, 15]]
108 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
108 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
109 [[1, 2], [5, 8, 10, 11], [14]]
109 [[1, 2], [5, 8, 10, 11], [14]]
110
110
111 Slicing with a maximum chunk size
111 Slicing with a maximum chunk size
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
113 [[0], [11], [13], [15]]
113 [[0], [11], [13], [15]]
114 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
114 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
115 [[0], [11], [13, 15]]
115 [[0], [11], [13, 15]]
116 """
116 """
117 if targetsize is not None:
117 if targetsize is not None:
118 targetsize = max(targetsize, revlog._srmingapsize)
118 targetsize = max(targetsize, revlog._srmingapsize)
119 # targetsize should not be specified when evaluating delta candidates:
119 # targetsize should not be specified when evaluating delta candidates:
120 # * targetsize is used to ensure we stay within specification when reading,
120 # * targetsize is used to ensure we stay within specification when reading,
121 # * deltainfo is used to pick are good delta chain when writing.
121 # * deltainfo is used to pick are good delta chain when writing.
122 if not (deltainfo is None or targetsize is None):
122 if not (deltainfo is None or targetsize is None):
123 msg = 'cannot use `targetsize` with a `deltainfo`'
123 msg = 'cannot use `targetsize` with a `deltainfo`'
124 raise error.ProgrammingError(msg)
124 raise error.ProgrammingError(msg)
125 for chunk in _slicechunktodensity(revlog, revs,
125 for chunk in _slicechunktodensity(revlog, revs,
126 deltainfo,
126 deltainfo,
127 revlog._srdensitythreshold,
127 revlog._srdensitythreshold,
128 revlog._srmingapsize):
128 revlog._srmingapsize):
129 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
129 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
130 yield subchunk
130 yield subchunk
131
131
132 def _slicechunktosize(revlog, revs, targetsize=None):
132 def _slicechunktosize(revlog, revs, targetsize=None):
133 """slice revs to match the target size
133 """slice revs to match the target size
134
134
135 This is intended to be used on chunk that density slicing selected by that
135 This is intended to be used on chunk that density slicing selected by that
136 are still too large compared to the read garantee of revlog. This might
136 are still too large compared to the read garantee of revlog. This might
137 happens when "minimal gap size" interrupted the slicing or when chain are
137 happens when "minimal gap size" interrupted the slicing or when chain are
138 built in a way that create large blocks next to each other.
138 built in a way that create large blocks next to each other.
139
139
140 >>> revlog = _testrevlog([
140 >>> revlog = _testrevlog([
141 ... 3, #0 (3)
141 ... 3, #0 (3)
142 ... 5, #1 (2)
142 ... 5, #1 (2)
143 ... 6, #2 (1)
143 ... 6, #2 (1)
144 ... 8, #3 (2)
144 ... 8, #3 (2)
145 ... 8, #4 (empty)
145 ... 8, #4 (empty)
146 ... 11, #5 (3)
146 ... 11, #5 (3)
147 ... 12, #6 (1)
147 ... 12, #6 (1)
148 ... 13, #7 (1)
148 ... 13, #7 (1)
149 ... 14, #8 (1)
149 ... 14, #8 (1)
150 ... ])
150 ... ])
151
151
152 Cases where chunk is already small enough
152 Cases where chunk is already small enough
153 >>> list(_slicechunktosize(revlog, [0], 3))
153 >>> list(_slicechunktosize(revlog, [0], 3))
154 [[0]]
154 [[0]]
155 >>> list(_slicechunktosize(revlog, [6, 7], 3))
155 >>> list(_slicechunktosize(revlog, [6, 7], 3))
156 [[6, 7]]
156 [[6, 7]]
157 >>> list(_slicechunktosize(revlog, [0], None))
157 >>> list(_slicechunktosize(revlog, [0], None))
158 [[0]]
158 [[0]]
159 >>> list(_slicechunktosize(revlog, [6, 7], None))
159 >>> list(_slicechunktosize(revlog, [6, 7], None))
160 [[6, 7]]
160 [[6, 7]]
161
161
162 cases where we need actual slicing
162 cases where we need actual slicing
163 >>> list(_slicechunktosize(revlog, [0, 1], 3))
163 >>> list(_slicechunktosize(revlog, [0, 1], 3))
164 [[0], [1]]
164 [[0], [1]]
165 >>> list(_slicechunktosize(revlog, [1, 3], 3))
165 >>> list(_slicechunktosize(revlog, [1, 3], 3))
166 [[1], [3]]
166 [[1], [3]]
167 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
167 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
168 [[1, 2], [3]]
168 [[1, 2], [3]]
169 >>> list(_slicechunktosize(revlog, [3, 5], 3))
169 >>> list(_slicechunktosize(revlog, [3, 5], 3))
170 [[3], [5]]
170 [[3], [5]]
171 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
171 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
172 [[3], [5]]
172 [[3], [5]]
173 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
173 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
174 [[5], [6, 7, 8]]
174 [[5], [6, 7, 8]]
175 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
175 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
176 [[0], [1, 2], [3], [5], [6, 7, 8]]
176 [[0], [1, 2], [3], [5], [6, 7, 8]]
177
177
178 Case with too large individual chunk (must return valid chunk)
178 Case with too large individual chunk (must return valid chunk)
179 >>> list(_slicechunktosize(revlog, [0, 1], 2))
179 >>> list(_slicechunktosize(revlog, [0, 1], 2))
180 [[0], [1]]
180 [[0], [1]]
181 >>> list(_slicechunktosize(revlog, [1, 3], 1))
181 >>> list(_slicechunktosize(revlog, [1, 3], 1))
182 [[1], [3]]
182 [[1], [3]]
183 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
183 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
184 [[3], [5]]
184 [[3], [5]]
185 """
185 """
186 assert targetsize is None or 0 <= targetsize
186 assert targetsize is None or 0 <= targetsize
187 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
187 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
188 yield revs
188 yield revs
189 return
189 return
190
190
191 startrevidx = 0
191 startrevidx = 0
192 startdata = revlog.start(revs[0])
192 startdata = revlog.start(revs[0])
193 endrevidx = 0
193 endrevidx = 0
194 iterrevs = enumerate(revs)
194 iterrevs = enumerate(revs)
195 next(iterrevs) # skip first rev.
195 next(iterrevs) # skip first rev.
196 for idx, r in iterrevs:
196 for idx, r in iterrevs:
197 span = revlog.end(r) - startdata
197 span = revlog.end(r) - startdata
198 if span <= targetsize:
198 if span <= targetsize:
199 endrevidx = idx
199 endrevidx = idx
200 else:
200 else:
201 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
201 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
202 if chunk:
202 if chunk:
203 yield chunk
203 yield chunk
204 startrevidx = idx
204 startrevidx = idx
205 startdata = revlog.start(r)
205 startdata = revlog.start(r)
206 endrevidx = idx
206 endrevidx = idx
207 yield _trimchunk(revlog, revs, startrevidx)
207 yield _trimchunk(revlog, revs, startrevidx)
208
208
209 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
209 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
210 mingapsize=0):
210 mingapsize=0):
211 """slice revs to reduce the amount of unrelated data to be read from disk.
211 """slice revs to reduce the amount of unrelated data to be read from disk.
212
212
213 ``revs`` is sliced into groups that should be read in one time.
213 ``revs`` is sliced into groups that should be read in one time.
214 Assume that revs are sorted.
214 Assume that revs are sorted.
215
215
216 ``deltainfo`` is a _deltainfo instance of a revision that we would append
216 ``deltainfo`` is a _deltainfo instance of a revision that we would append
217 to the top of the revlog.
217 to the top of the revlog.
218
218
219 The initial chunk is sliced until the overall density (payload/chunks-span
219 The initial chunk is sliced until the overall density (payload/chunks-span
220 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
220 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
221 skipped.
221 skipped.
222
222
223 >>> revlog = _testrevlog([
223 >>> revlog = _testrevlog([
224 ... 5, #00 (5)
224 ... 5, #00 (5)
225 ... 10, #01 (5)
225 ... 10, #01 (5)
226 ... 12, #02 (2)
226 ... 12, #02 (2)
227 ... 12, #03 (empty)
227 ... 12, #03 (empty)
228 ... 27, #04 (15)
228 ... 27, #04 (15)
229 ... 31, #05 (4)
229 ... 31, #05 (4)
230 ... 31, #06 (empty)
230 ... 31, #06 (empty)
231 ... 42, #07 (11)
231 ... 42, #07 (11)
232 ... 47, #08 (5)
232 ... 47, #08 (5)
233 ... 47, #09 (empty)
233 ... 47, #09 (empty)
234 ... 48, #10 (1)
234 ... 48, #10 (1)
235 ... 51, #11 (3)
235 ... 51, #11 (3)
236 ... 74, #12 (23)
236 ... 74, #12 (23)
237 ... 85, #13 (11)
237 ... 85, #13 (11)
238 ... 86, #14 (1)
238 ... 86, #14 (1)
239 ... 91, #15 (5)
239 ... 91, #15 (5)
240 ... ])
240 ... ])
241
241
242 >>> list(_slicechunktodensity(revlog, list(range(16))))
242 >>> list(_slicechunktodensity(revlog, list(range(16))))
243 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
243 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
244 >>> list(_slicechunktodensity(revlog, [0, 15]))
244 >>> list(_slicechunktodensity(revlog, [0, 15]))
245 [[0], [15]]
245 [[0], [15]]
246 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
246 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
247 [[0], [11], [15]]
247 [[0], [11], [15]]
248 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
248 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
249 [[0], [11, 13, 15]]
249 [[0], [11, 13, 15]]
250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
251 [[1, 2], [5, 8, 10, 11], [14]]
251 [[1, 2], [5, 8, 10, 11], [14]]
252 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
252 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
253 ... mingapsize=20))
253 ... mingapsize=20))
254 [[1, 2, 3, 5, 8, 10, 11], [14]]
254 [[1, 2, 3, 5, 8, 10, 11], [14]]
255 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
255 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
256 ... targetdensity=0.95))
256 ... targetdensity=0.95))
257 [[1, 2], [5], [8, 10, 11], [14]]
257 [[1, 2], [5], [8, 10, 11], [14]]
258 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
258 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
259 ... targetdensity=0.95, mingapsize=12))
259 ... targetdensity=0.95, mingapsize=12))
260 [[1, 2], [5, 8, 10, 11], [14]]
260 [[1, 2], [5, 8, 10, 11], [14]]
261 """
261 """
262 start = revlog.start
262 start = revlog.start
263 length = revlog.length
263 length = revlog.length
264
264
265 if len(revs) <= 1:
265 if len(revs) <= 1:
266 yield revs
266 yield revs
267 return
267 return
268
268
269 nextrev = len(revlog)
269 nextrev = len(revlog)
270 nextoffset = revlog.end(nextrev - 1)
270 nextoffset = revlog.end(nextrev - 1)
271
271
272 if deltainfo is None:
272 if deltainfo is None:
273 deltachainspan = segmentspan(revlog, revs)
273 deltachainspan = segmentspan(revlog, revs)
274 chainpayload = sum(length(r) for r in revs)
274 chainpayload = sum(length(r) for r in revs)
275 else:
275 else:
276 deltachainspan = deltainfo.distance
276 deltachainspan = deltainfo.distance
277 chainpayload = deltainfo.compresseddeltalen
277 chainpayload = deltainfo.compresseddeltalen
278
278
279 if deltachainspan < mingapsize:
279 if deltachainspan < mingapsize:
280 yield revs
280 yield revs
281 return
281 return
282
282
283 readdata = deltachainspan
283 readdata = deltachainspan
284
284
285 if deltachainspan:
285 if deltachainspan:
286 density = chainpayload / float(deltachainspan)
286 density = chainpayload / float(deltachainspan)
287 else:
287 else:
288 density = 1.0
288 density = 1.0
289
289
290 if density >= targetdensity:
290 if density >= targetdensity:
291 yield revs
291 yield revs
292 return
292 return
293
293
294 if deltainfo is not None and deltainfo.deltalen:
294 if deltainfo is not None and deltainfo.deltalen:
295 revs = list(revs)
295 revs = list(revs)
296 revs.append(nextrev)
296 revs.append(nextrev)
297
297
298 # Store the gaps in a heap to have them sorted by decreasing size
298 # Store the gaps in a heap to have them sorted by decreasing size
299 gapsheap = []
299 gapsheap = []
300 heapq.heapify(gapsheap)
300 heapq.heapify(gapsheap)
301 prevend = None
301 prevend = None
302 for i, rev in enumerate(revs):
302 for i, rev in enumerate(revs):
303 if rev < nextrev:
303 if rev < nextrev:
304 revstart = start(rev)
304 revstart = start(rev)
305 revlen = length(rev)
305 revlen = length(rev)
306 else:
306 else:
307 revstart = nextoffset
307 revstart = nextoffset
308 revlen = deltainfo.deltalen
308 revlen = deltainfo.deltalen
309
309
310 # Skip empty revisions to form larger holes
310 # Skip empty revisions to form larger holes
311 if revlen == 0:
311 if revlen == 0:
312 continue
312 continue
313
313
314 if prevend is not None:
314 if prevend is not None:
315 gapsize = revstart - prevend
315 gapsize = revstart - prevend
316 # only consider holes that are large enough
316 # only consider holes that are large enough
317 if gapsize > mingapsize:
317 if gapsize > mingapsize:
318 heapq.heappush(gapsheap, (-gapsize, i))
318 heapq.heappush(gapsheap, (-gapsize, i))
319
319
320 prevend = revstart + revlen
320 prevend = revstart + revlen
321
321
322 # Collect the indices of the largest holes until the density is acceptable
322 # Collect the indices of the largest holes until the density is acceptable
323 indicesheap = []
323 indicesheap = []
324 heapq.heapify(indicesheap)
324 heapq.heapify(indicesheap)
325 while gapsheap and density < targetdensity:
325 while gapsheap and density < targetdensity:
326 oppgapsize, gapidx = heapq.heappop(gapsheap)
326 oppgapsize, gapidx = heapq.heappop(gapsheap)
327
327
328 heapq.heappush(indicesheap, gapidx)
328 heapq.heappush(indicesheap, gapidx)
329
329
330 # the gap sizes are stored as negatives to be sorted decreasingly
330 # the gap sizes are stored as negatives to be sorted decreasingly
331 # by the heap
331 # by the heap
332 readdata -= (-oppgapsize)
332 readdata -= (-oppgapsize)
333 if readdata > 0:
333 if readdata > 0:
334 density = chainpayload / float(readdata)
334 density = chainpayload / float(readdata)
335 else:
335 else:
336 density = 1.0
336 density = 1.0
337
337
338 # Cut the revs at collected indices
338 # Cut the revs at collected indices
339 previdx = 0
339 previdx = 0
340 while indicesheap:
340 while indicesheap:
341 idx = heapq.heappop(indicesheap)
341 idx = heapq.heappop(indicesheap)
342
342
343 chunk = _trimchunk(revlog, revs, previdx, idx)
343 chunk = _trimchunk(revlog, revs, previdx, idx)
344 if chunk:
344 if chunk:
345 yield chunk
345 yield chunk
346
346
347 previdx = idx
347 previdx = idx
348
348
349 chunk = _trimchunk(revlog, revs, previdx)
349 chunk = _trimchunk(revlog, revs, previdx)
350 if chunk:
350 if chunk:
351 yield chunk
351 yield chunk
352
352
353 def _trimchunk(revlog, revs, startidx, endidx=None):
353 def _trimchunk(revlog, revs, startidx, endidx=None):
354 """returns revs[startidx:endidx] without empty trailing revs
354 """returns revs[startidx:endidx] without empty trailing revs
355
355
356 Doctest Setup
356 Doctest Setup
357 >>> revlog = _testrevlog([
357 >>> revlog = _testrevlog([
358 ... 5, #0
358 ... 5, #0
359 ... 10, #1
359 ... 10, #1
360 ... 12, #2
360 ... 12, #2
361 ... 12, #3 (empty)
361 ... 12, #3 (empty)
362 ... 17, #4
362 ... 17, #4
363 ... 21, #5
363 ... 21, #5
364 ... 21, #6 (empty)
364 ... 21, #6 (empty)
365 ... ])
365 ... ])
366
366
367 Contiguous cases:
367 Contiguous cases:
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
369 [0, 1, 2, 3, 4, 5]
369 [0, 1, 2, 3, 4, 5]
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
371 [0, 1, 2, 3, 4]
371 [0, 1, 2, 3, 4]
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
373 [0, 1, 2]
373 [0, 1, 2]
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
375 [2]
375 [2]
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
377 [3, 4, 5]
377 [3, 4, 5]
378 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
378 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
379 [3, 4]
379 [3, 4]
380
380
381 Discontiguous cases:
381 Discontiguous cases:
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
383 [1, 3, 5]
383 [1, 3, 5]
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
385 [1]
385 [1]
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
387 [3, 5]
387 [3, 5]
388 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
388 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
389 [3, 5]
389 [3, 5]
390 """
390 """
391 length = revlog.length
391 length = revlog.length
392
392
393 if endidx is None:
393 if endidx is None:
394 endidx = len(revs)
394 endidx = len(revs)
395
395
396 # If we have a non-emtpy delta candidate, there are nothing to trim
396 # If we have a non-emtpy delta candidate, there are nothing to trim
397 if revs[endidx - 1] < len(revlog):
397 if revs[endidx - 1] < len(revlog):
398 # Trim empty revs at the end, except the very first revision of a chain
398 # Trim empty revs at the end, except the very first revision of a chain
399 while (endidx > 1
399 while (endidx > 1
400 and endidx > startidx
400 and endidx > startidx
401 and length(revs[endidx - 1]) == 0):
401 and length(revs[endidx - 1]) == 0):
402 endidx -= 1
402 endidx -= 1
403
403
404 return revs[startidx:endidx]
404 return revs[startidx:endidx]
405
405
406 def segmentspan(revlog, revs, deltainfo=None):
406 def segmentspan(revlog, revs, deltainfo=None):
407 """Get the byte span of a segment of revisions
407 """Get the byte span of a segment of revisions
408
408
409 revs is a sorted array of revision numbers
409 revs is a sorted array of revision numbers
410
410
411 >>> revlog = _testrevlog([
411 >>> revlog = _testrevlog([
412 ... 5, #0
412 ... 5, #0
413 ... 10, #1
413 ... 10, #1
414 ... 12, #2
414 ... 12, #2
415 ... 12, #3 (empty)
415 ... 12, #3 (empty)
416 ... 17, #4
416 ... 17, #4
417 ... ])
417 ... ])
418
418
419 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
419 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
420 17
420 17
421 >>> segmentspan(revlog, [0, 4])
421 >>> segmentspan(revlog, [0, 4])
422 17
422 17
423 >>> segmentspan(revlog, [3, 4])
423 >>> segmentspan(revlog, [3, 4])
424 5
424 5
425 >>> segmentspan(revlog, [1, 2, 3,])
425 >>> segmentspan(revlog, [1, 2, 3,])
426 7
426 7
427 >>> segmentspan(revlog, [1, 3])
427 >>> segmentspan(revlog, [1, 3])
428 7
428 7
429 """
429 """
430 if not revs:
430 if not revs:
431 return 0
431 return 0
432 if deltainfo is not None and len(revlog) <= revs[-1]:
432 if deltainfo is not None and len(revlog) <= revs[-1]:
433 if len(revs) == 1:
433 if len(revs) == 1:
434 return deltainfo.deltalen
434 return deltainfo.deltalen
435 offset = revlog.end(len(revlog) - 1)
435 offset = revlog.end(len(revlog) - 1)
436 end = deltainfo.deltalen + offset
436 end = deltainfo.deltalen + offset
437 else:
437 else:
438 end = revlog.end(revs[-1])
438 end = revlog.end(revs[-1])
439 return end - revlog.start(revs[0])
439 return end - revlog.start(revs[0])
440
440
441 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
441 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
442 """build full text from a (base, delta) pair and other metadata"""
442 """build full text from a (base, delta) pair and other metadata"""
443 # special case deltas which replace entire base; no need to decode
443 # special case deltas which replace entire base; no need to decode
444 # base revision. this neatly avoids censored bases, which throw when
444 # base revision. this neatly avoids censored bases, which throw when
445 # they're decoded.
445 # they're decoded.
446 hlen = struct.calcsize(">lll")
446 hlen = struct.calcsize(">lll")
447 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
447 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
448 len(delta) - hlen):
448 len(delta) - hlen):
449 fulltext = delta[hlen:]
449 fulltext = delta[hlen:]
450 else:
450 else:
451 # deltabase is rawtext before changed by flag processors, which is
451 # deltabase is rawtext before changed by flag processors, which is
452 # equivalent to non-raw text
452 # equivalent to non-raw text
453 basetext = revlog.revision(baserev, _df=fh, raw=False)
453 basetext = revlog.revision(baserev, _df=fh, raw=False)
454 fulltext = mdiff.patch(basetext, delta)
454 fulltext = mdiff.patch(basetext, delta)
455
455
456 try:
456 try:
457 res = revlog._processflags(fulltext, flags, 'read', raw=True)
457 res = revlog._processflags(fulltext, flags, 'read', raw=True)
458 fulltext, validatehash = res
458 fulltext, validatehash = res
459 if validatehash:
459 if validatehash:
460 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
460 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
461 if flags & REVIDX_ISCENSORED:
461 if flags & REVIDX_ISCENSORED:
462 raise RevlogError(_('node %s is not censored') % expectednode)
462 raise RevlogError(_('node %s is not censored') % expectednode)
463 except CensoredNodeError:
463 except CensoredNodeError:
464 # must pass the censored index flag to add censored revisions
464 # must pass the censored index flag to add censored revisions
465 if not flags & REVIDX_ISCENSORED:
465 if not flags & REVIDX_ISCENSORED:
466 raise
466 raise
467 return fulltext
467 return fulltext
468
468
469 @attr.s(slots=True, frozen=True)
469 @attr.s(slots=True, frozen=True)
470 class _deltainfo(object):
470 class _deltainfo(object):
471 distance = attr.ib()
471 distance = attr.ib()
472 deltalen = attr.ib()
472 deltalen = attr.ib()
473 data = attr.ib()
473 data = attr.ib()
474 base = attr.ib()
474 base = attr.ib()
475 chainbase = attr.ib()
475 chainbase = attr.ib()
476 chainlen = attr.ib()
476 chainlen = attr.ib()
477 compresseddeltalen = attr.ib()
477 compresseddeltalen = attr.ib()
478 snapshotdepth = attr.ib()
478 snapshotdepth = attr.ib()
479
479
480 def isgooddeltainfo(revlog, deltainfo, revinfo):
480 def isgooddeltainfo(revlog, deltainfo, revinfo):
481 """Returns True if the given delta is good. Good means that it is within
481 """Returns True if the given delta is good. Good means that it is within
482 the disk span, disk size, and chain length bounds that we know to be
482 the disk span, disk size, and chain length bounds that we know to be
483 performant."""
483 performant."""
484 if deltainfo is None:
484 if deltainfo is None:
485 return False
485 return False
486
486
487 # - 'deltainfo.distance' is the distance from the base revision --
487 # - 'deltainfo.distance' is the distance from the base revision --
488 # bounding it limits the amount of I/O we need to do.
488 # bounding it limits the amount of I/O we need to do.
489 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
489 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
490 # deltas we need to apply -- bounding it limits the amount of CPU
490 # deltas we need to apply -- bounding it limits the amount of CPU
491 # we consume.
491 # we consume.
492
492
493 if revlog._sparserevlog:
493 if revlog._sparserevlog:
494 # As sparse-read will be used, we can consider that the distance,
494 # As sparse-read will be used, we can consider that the distance,
495 # instead of being the span of the whole chunk,
495 # instead of being the span of the whole chunk,
496 # is the span of the largest read chunk
496 # is the span of the largest read chunk
497 base = deltainfo.base
497 base = deltainfo.base
498
498
499 if base != nullrev:
499 if base != nullrev:
500 deltachain = revlog._deltachain(base)[0]
500 deltachain = revlog._deltachain(base)[0]
501 else:
501 else:
502 deltachain = []
502 deltachain = []
503
503
504 # search for the first non-snapshot revision
504 # search for the first non-snapshot revision
505 for idx, r in enumerate(deltachain):
505 for idx, r in enumerate(deltachain):
506 if not revlog.issnapshot(r):
506 if not revlog.issnapshot(r):
507 break
507 break
508 deltachain = deltachain[idx:]
508 deltachain = deltachain[idx:]
509 chunks = slicechunk(revlog, deltachain, deltainfo)
509 chunks = slicechunk(revlog, deltachain, deltainfo)
510 all_span = [segmentspan(revlog, revs, deltainfo)
510 all_span = [segmentspan(revlog, revs, deltainfo)
511 for revs in chunks]
511 for revs in chunks]
512 distance = max(all_span)
512 distance = max(all_span)
513 else:
513 else:
514 distance = deltainfo.distance
514 distance = deltainfo.distance
515
515
516 textlen = revinfo.textlen
516 textlen = revinfo.textlen
517 defaultmax = textlen * 4
517 defaultmax = textlen * 4
518 maxdist = revlog._maxdeltachainspan
518 maxdist = revlog._maxdeltachainspan
519 if not maxdist:
519 if not maxdist:
520 maxdist = distance # ensure the conditional pass
520 maxdist = distance # ensure the conditional pass
521 maxdist = max(maxdist, defaultmax)
521 maxdist = max(maxdist, defaultmax)
522 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
522 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
523 # In multiple place, we are ignoring irrelevant data range below a
523 # In multiple place, we are ignoring irrelevant data range below a
524 # certain size. Be also apply this tradeoff here and relax span
524 # certain size. Be also apply this tradeoff here and relax span
525 # constraint for small enought content.
525 # constraint for small enought content.
526 maxdist = revlog._srmingapsize
526 maxdist = revlog._srmingapsize
527
527
528 # Bad delta from read span:
528 # Bad delta from read span:
529 #
529 #
530 # If the span of data read is larger than the maximum allowed.
530 # If the span of data read is larger than the maximum allowed.
531 if maxdist < distance:
531 if maxdist < distance:
532 return False
532 return False
533
533
534 # Bad delta from new delta size:
534 # Bad delta from new delta size:
535 #
535 #
536 # If the delta size is larger than the target text, storing the
536 # If the delta size is larger than the target text, storing the
537 # delta will be inefficient.
537 # delta will be inefficient.
538 if textlen < deltainfo.deltalen:
538 if textlen < deltainfo.deltalen:
539 return False
539 return False
540
540
541 # Bad delta from cumulated payload size:
541 # Bad delta from cumulated payload size:
542 #
542 #
543 # If the sum of delta get larger than K * target text length.
543 # If the sum of delta get larger than K * target text length.
544 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
544 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
545 return False
545 return False
546
546
547 # Bad delta from chain length:
547 # Bad delta from chain length:
548 #
548 #
549 # If the number of delta in the chain gets too high.
549 # If the number of delta in the chain gets too high.
550 if (revlog._maxchainlen
550 if (revlog._maxchainlen
551 and revlog._maxchainlen < deltainfo.chainlen):
551 and revlog._maxchainlen < deltainfo.chainlen):
552 return False
552 return False
553
553
554 # bad delta from intermediate snapshot size limit
554 # bad delta from intermediate snapshot size limit
555 #
555 #
556 # If an intermediate snapshot size is higher than the limit. The
556 # If an intermediate snapshot size is higher than the limit. The
557 # limit exist to prevent endless chain of intermediate delta to be
557 # limit exist to prevent endless chain of intermediate delta to be
558 # created.
558 # created.
559 if (deltainfo.snapshotdepth is not None and
559 if (deltainfo.snapshotdepth is not None and
560 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
560 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
561 return False
561 return False
562
562
563 # bad delta if new intermediate snapshot is larger than the previous
563 # bad delta if new intermediate snapshot is larger than the previous
564 # snapshot
564 # snapshot
565 if (deltainfo.snapshotdepth
565 if (deltainfo.snapshotdepth
566 and revlog.length(deltainfo.base) < deltainfo.deltalen):
566 and revlog.length(deltainfo.base) < deltainfo.deltalen):
567 return False
567 return False
568
568
569 return True
569 return True
570
570
571 class deltacomputer(object):
571 class deltacomputer(object):
572 def __init__(self, revlog):
572 def __init__(self, revlog):
573 self.revlog = revlog
573 self.revlog = revlog
574
574
575 def _getcandidaterevs(self, p1, p2, cachedelta):
575 def _getcandidaterevs(self, p1, p2, cachedelta):
576 """
576 """
577 Provides revisions that present an interest to be diffed against,
577 Provides revisions that present an interest to be diffed against,
578 grouped by level of easiness.
578 grouped by level of easiness.
579 """
579 """
580 revlog = self.revlog
580 revlog = self.revlog
581 gdelta = revlog._generaldelta
581 gdelta = revlog._generaldelta
582 curr = len(revlog)
582 curr = len(revlog)
583 prev = curr - 1
583 prev = curr - 1
584 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
584 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
585
585
586 # should we try to build a delta?
586 # should we try to build a delta?
587 if prev != nullrev and revlog._storedeltachains:
587 if prev != nullrev and revlog._storedeltachains:
588 tested = set()
588 tested = set()
589 # This condition is true most of the time when processing
589 # This condition is true most of the time when processing
590 # changegroup data into a generaldelta repo. The only time it
590 # changegroup data into a generaldelta repo. The only time it
591 # isn't true is if this is the first revision in a delta chain
591 # isn't true is if this is the first revision in a delta chain
592 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
592 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
593 if cachedelta and gdelta and revlog._lazydeltabase:
593 if cachedelta and gdelta and revlog._lazydeltabase:
594 # Assume what we received from the server is a good choice
594 # Assume what we received from the server is a good choice
595 # build delta will reuse the cache
595 # build delta will reuse the cache
596 yield (cachedelta[0],)
596 yield (cachedelta[0],)
597 tested.add(cachedelta[0])
597 tested.add(cachedelta[0])
598
598
599 if gdelta:
599 if gdelta:
600 # exclude already lazy tested base if any
600 # exclude already lazy tested base if any
601 parents = [p for p in (p1r, p2r)
601 parents = [p for p in (p1r, p2r)
602 if p != nullrev and p not in tested]
602 if p != nullrev and p not in tested]
603
603
604 if not revlog._deltabothparents and len(parents) == 2:
604 if not revlog._deltabothparents and len(parents) == 2:
605 parents.sort()
605 parents.sort()
606 # To minimize the chance of having to build a fulltext,
606 # To minimize the chance of having to build a fulltext,
607 # pick first whichever parent is closest to us (max rev)
607 # pick first whichever parent is closest to us (max rev)
608 yield (parents[1],)
608 yield (parents[1],)
609 # then the other one (min rev) if the first did not fit
609 # then the other one (min rev) if the first did not fit
610 yield (parents[0],)
610 yield (parents[0],)
611 tested.update(parents)
611 tested.update(parents)
612 elif len(parents) > 0:
612 elif len(parents) > 0:
613 # Test all parents (1 or 2), and keep the best candidate
613 # Test all parents (1 or 2), and keep the best candidate
614 yield parents
614 yield parents
615 tested.update(parents)
615 tested.update(parents)
616
616
617 if prev not in tested:
617 if prev not in tested:
618 # other approach failed try against prev to hopefully save us a
618 # other approach failed try against prev to hopefully save us a
619 # fulltext.
619 # fulltext.
620 yield (prev,)
620 yield (prev,)
621 tested.add(prev)
621 tested.add(prev)
622
622
623 def buildtext(self, revinfo, fh):
623 def buildtext(self, revinfo, fh):
624 """Builds a fulltext version of a revision
624 """Builds a fulltext version of a revision
625
625
626 revinfo: _revisioninfo instance that contains all needed info
626 revinfo: _revisioninfo instance that contains all needed info
627 fh: file handle to either the .i or the .d revlog file,
627 fh: file handle to either the .i or the .d revlog file,
628 depending on whether it is inlined or not
628 depending on whether it is inlined or not
629 """
629 """
630 btext = revinfo.btext
630 btext = revinfo.btext
631 if btext[0] is not None:
631 if btext[0] is not None:
632 return btext[0]
632 return btext[0]
633
633
634 revlog = self.revlog
634 revlog = self.revlog
635 cachedelta = revinfo.cachedelta
635 cachedelta = revinfo.cachedelta
636 baserev = cachedelta[0]
636 baserev = cachedelta[0]
637 delta = cachedelta[1]
637 delta = cachedelta[1]
638
638
639 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
639 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
640 revinfo.p1, revinfo.p2,
640 revinfo.p1, revinfo.p2,
641 revinfo.flags, revinfo.node)
641 revinfo.flags, revinfo.node)
642 return fulltext
642 return fulltext
643
643
644 def _builddeltadiff(self, base, revinfo, fh):
644 def _builddeltadiff(self, base, revinfo, fh):
645 revlog = self.revlog
645 revlog = self.revlog
646 t = self.buildtext(revinfo, fh)
646 t = self.buildtext(revinfo, fh)
647 if revlog.iscensored(base):
647 if revlog.iscensored(base):
648 # deltas based on a censored revision must replace the
648 # deltas based on a censored revision must replace the
649 # full content in one patch, so delta works everywhere
649 # full content in one patch, so delta works everywhere
650 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
650 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
651 delta = header + t
651 delta = header + t
652 else:
652 else:
653 ptext = revlog.revision(base, _df=fh, raw=True)
653 ptext = revlog.revision(base, _df=fh, raw=True)
654 delta = mdiff.textdiff(ptext, t)
654 delta = mdiff.textdiff(ptext, t)
655
655
656 return delta
656 return delta
657
657
658 def _builddeltainfo(self, revinfo, base, fh):
658 def _builddeltainfo(self, revinfo, base, fh):
659 # can we use the cached delta?
659 # can we use the cached delta?
660 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
660 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
661 delta = revinfo.cachedelta[1]
661 delta = revinfo.cachedelta[1]
662 else:
662 else:
663 delta = self._builddeltadiff(base, revinfo, fh)
663 delta = self._builddeltadiff(base, revinfo, fh)
664 revlog = self.revlog
664 revlog = self.revlog
665 header, data = revlog.compress(delta)
665 header, data = revlog.compress(delta)
666 deltalen = len(header) + len(data)
666 deltalen = len(header) + len(data)
667 chainbase = revlog.chainbase(base)
667 chainbase = revlog.chainbase(base)
668 offset = revlog.end(len(revlog) - 1)
668 offset = revlog.end(len(revlog) - 1)
669 dist = deltalen + offset - revlog.start(chainbase)
669 dist = deltalen + offset - revlog.start(chainbase)
670 if revlog._generaldelta:
670 if revlog._generaldelta:
671 deltabase = base
671 deltabase = base
672 else:
672 else:
673 deltabase = chainbase
673 deltabase = chainbase
674 chainlen, compresseddeltalen = revlog._chaininfo(base)
674 chainlen, compresseddeltalen = revlog._chaininfo(base)
675 chainlen += 1
675 chainlen += 1
676 compresseddeltalen += deltalen
676 compresseddeltalen += deltalen
677
677
678 revlog = self.revlog
678 revlog = self.revlog
679 snapshotdepth = None
679 snapshotdepth = None
680 if deltabase == nullrev:
680 if deltabase == nullrev:
681 snapshotdepth = 0
681 snapshotdepth = 0
682 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
682 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
683 # A delta chain should always be one full snapshot,
683 # A delta chain should always be one full snapshot,
684 # zero or more semi-snapshots, and zero or more deltas
684 # zero or more semi-snapshots, and zero or more deltas
685 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
685 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
686 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
686 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
687 snapshotdepth = len(revlog._deltachain(deltabase)[0])
687 snapshotdepth = len(revlog._deltachain(deltabase)[0])
688
688
689 return _deltainfo(dist, deltalen, (header, data), deltabase,
689 return _deltainfo(dist, deltalen, (header, data), deltabase,
690 chainbase, chainlen, compresseddeltalen,
690 chainbase, chainlen, compresseddeltalen,
691 snapshotdepth)
691 snapshotdepth)
692
692
693 def _fullsnapshotinfo(self, fh, revinfo):
694 curr = len(self.revlog)
695 rawtext = self.buildtext(revinfo, fh)
696 data = self.revlog.compress(rawtext)
697 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
698 deltabase = chainbase = curr
699 snapshotdepth = 0
700 chainlen = 1
701
702 return _deltainfo(dist, deltalen, data, deltabase,
703 chainbase, chainlen, compresseddeltalen,
704 snapshotdepth)
705
693 def finddeltainfo(self, revinfo, fh):
706 def finddeltainfo(self, revinfo, fh):
694 """Find an acceptable delta against a candidate revision
707 """Find an acceptable delta against a candidate revision
695
708
696 revinfo: information about the revision (instance of _revisioninfo)
709 revinfo: information about the revision (instance of _revisioninfo)
697 fh: file handle to either the .i or the .d revlog file,
710 fh: file handle to either the .i or the .d revlog file,
698 depending on whether it is inlined or not
711 depending on whether it is inlined or not
699
712
700 Returns the first acceptable candidate revision, as ordered by
713 Returns the first acceptable candidate revision, as ordered by
701 _getcandidaterevs
714 _getcandidaterevs
715
716 If no suitable deltabase is found, we return delta info for a full
717 snapshot.
702 """
718 """
703 if not revinfo.textlen:
719 if not revinfo.textlen:
704 return None # empty file do not need delta
720 return self._fullsnapshotinfo(fh, revinfo)
705
721
706 # no delta for flag processor revision (see "candelta" for why)
722 # no delta for flag processor revision (see "candelta" for why)
707 # not calling candelta since only one revision needs test, also to
723 # not calling candelta since only one revision needs test, also to
708 # avoid overhead fetching flags again.
724 # avoid overhead fetching flags again.
709 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
725 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
710 return None
726 return self._fullsnapshotinfo(fh, revinfo)
711
727
712 cachedelta = revinfo.cachedelta
728 cachedelta = revinfo.cachedelta
713 p1 = revinfo.p1
729 p1 = revinfo.p1
714 p2 = revinfo.p2
730 p2 = revinfo.p2
715 revlog = self.revlog
731 revlog = self.revlog
716
732
717 deltalength = self.revlog.length
733 deltalength = self.revlog.length
718 deltaparent = self.revlog.deltaparent
734 deltaparent = self.revlog.deltaparent
719
735
720 deltainfo = None
736 deltainfo = None
721 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
737 deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
722 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
738 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
723 # filter out delta base that will never produce good delta
739 # filter out delta base that will never produce good delta
724 candidaterevs = [r for r in candidaterevs
740 candidaterevs = [r for r in candidaterevs
725 if self.revlog.length(r) <= deltas_limit]
741 if self.revlog.length(r) <= deltas_limit]
726 nominateddeltas = []
742 nominateddeltas = []
727 for candidaterev in candidaterevs:
743 for candidaterev in candidaterevs:
728 # skip over empty delta (no need to include them in a chain)
744 # skip over empty delta (no need to include them in a chain)
729 while candidaterev != nullrev and not deltalength(candidaterev):
745 while candidaterev != nullrev and not deltalength(candidaterev):
730 candidaterev = deltaparent(candidaterev)
746 candidaterev = deltaparent(candidaterev)
731 # no need to try a delta against nullid, this will be handled
747 # no need to try a delta against nullid, this will be handled
732 # by fulltext later.
748 # by fulltext later.
733 if candidaterev == nullrev:
749 if candidaterev == nullrev:
734 continue
750 continue
735 # no delta for rawtext-changing revs (see "candelta" for why)
751 # no delta for rawtext-changing revs (see "candelta" for why)
736 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
752 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
737 continue
753 continue
738 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
754 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
739 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
755 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
740 nominateddeltas.append(candidatedelta)
756 nominateddeltas.append(candidatedelta)
741 if nominateddeltas:
757 if nominateddeltas:
742 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
758 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
743 break
759 break
744
760
761 if deltainfo is None:
762 deltainfo = self._fullsnapshotinfo(fh, revinfo)
745 return deltainfo
763 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now