##// END OF EJS Templates
revlog: drop RevlogError alias (API)...
Gregory Szorc -
r39809:2cd93a8d default
parent child Browse files
Show More
@@ -1,2497 +1,2498 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import re
21 import re
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .thirdparty import (
57 from .thirdparty import (
58 attr,
58 attr,
59 )
59 )
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 stringutil,
75 stringutil,
76 )
76 )
77
77
78 # blanked usage of all the name to prevent pyflakes constraints
78 # blanked usage of all the name to prevent pyflakes constraints
79 # We need these name available in the module for extensions.
79 # We need these name available in the module for extensions.
80 REVLOGV0
80 REVLOGV0
81 REVLOGV1
81 REVLOGV1
82 REVLOGV2
82 REVLOGV2
83 FLAG_INLINE_DATA
83 FLAG_INLINE_DATA
84 FLAG_GENERALDELTA
84 FLAG_GENERALDELTA
85 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_VERSION
87 REVLOG_DEFAULT_VERSION
88 REVLOGV1_FLAGS
88 REVLOGV1_FLAGS
89 REVLOGV2_FLAGS
89 REVLOGV2_FLAGS
90 REVIDX_ISCENSORED
90 REVIDX_ISCENSORED
91 REVIDX_ELLIPSIS
91 REVIDX_ELLIPSIS
92 REVIDX_EXTSTORED
92 REVIDX_EXTSTORED
93 REVIDX_DEFAULT_FLAGS
93 REVIDX_DEFAULT_FLAGS
94 REVIDX_FLAGS_ORDER
94 REVIDX_FLAGS_ORDER
95 REVIDX_KNOWN_FLAGS
95 REVIDX_KNOWN_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
97
97
98 parsers = policy.importmod(r'parsers')
98 parsers = policy.importmod(r'parsers')
99
99
100 # Aliased for performance.
100 # Aliased for performance.
101 _zlibdecompress = zlib.decompress
101 _zlibdecompress = zlib.decompress
102
102
103 # max size of revlog with inline data
103 # max size of revlog with inline data
104 _maxinline = 131072
104 _maxinline = 131072
105 _chunksize = 1048576
105 _chunksize = 1048576
106
106
107 RevlogError = error.RevlogError
108 LookupError = error.LookupError
107 LookupError = error.LookupError
109 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
108 AmbiguousPrefixLookupError = error.AmbiguousPrefixLookupError
110 CensoredNodeError = error.CensoredNodeError
109 CensoredNodeError = error.CensoredNodeError
111 ProgrammingError = error.ProgrammingError
110 ProgrammingError = error.ProgrammingError
112
111
113 # Store flag processors (cf. 'addflagprocessor()' to register)
112 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
113 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
114 REVIDX_ISCENSORED: None,
116 }
115 }
117
116
118 # Flag processors for REVIDX_ELLIPSIS.
117 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
118 def ellipsisreadprocessor(rl, text):
120 return text, False
119 return text, False
121
120
122 def ellipsiswriteprocessor(rl, text):
121 def ellipsiswriteprocessor(rl, text):
123 return text, False
122 return text, False
124
123
125 def ellipsisrawprocessor(rl, text):
124 def ellipsisrawprocessor(rl, text):
126 return False
125 return False
127
126
128 ellipsisprocessor = (
127 ellipsisprocessor = (
129 ellipsisreadprocessor,
128 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
129 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
130 ellipsisrawprocessor,
132 )
131 )
133
132
134 _mdre = re.compile('\1\n')
133 _mdre = re.compile('\1\n')
135 def parsemeta(text):
134 def parsemeta(text):
136 """return (metadatadict, metadatasize)"""
135 """return (metadatadict, metadatasize)"""
137 # text can be buffer, so we can't use .startswith or .index
136 # text can be buffer, so we can't use .startswith or .index
138 if text[:2] != '\1\n':
137 if text[:2] != '\1\n':
139 return None, None
138 return None, None
140 s = _mdre.search(text, 2).start()
139 s = _mdre.search(text, 2).start()
141 mtext = text[2:s]
140 mtext = text[2:s]
142 meta = {}
141 meta = {}
143 for l in mtext.splitlines():
142 for l in mtext.splitlines():
144 k, v = l.split(": ", 1)
143 k, v = l.split(": ", 1)
145 meta[k] = v
144 meta[k] = v
146 return meta, (s + 2)
145 return meta, (s + 2)
147
146
148 def packmeta(meta, text):
147 def packmeta(meta, text):
149 keys = sorted(meta)
148 keys = sorted(meta)
150 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
149 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
151 return "\1\n%s\1\n%s" % (metatext, text)
150 return "\1\n%s\1\n%s" % (metatext, text)
152
151
153 def _censoredtext(text):
152 def _censoredtext(text):
154 m, offs = parsemeta(text)
153 m, offs = parsemeta(text)
155 return m and "censored" in m
154 return m and "censored" in m
156
155
157 def addflagprocessor(flag, processor):
156 def addflagprocessor(flag, processor):
158 """Register a flag processor on a revision data flag.
157 """Register a flag processor on a revision data flag.
159
158
160 Invariant:
159 Invariant:
161 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
160 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
162 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
161 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
163 - Only one flag processor can be registered on a specific flag.
162 - Only one flag processor can be registered on a specific flag.
164 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
163 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
165 following signatures:
164 following signatures:
166 - (read) f(self, rawtext) -> text, bool
165 - (read) f(self, rawtext) -> text, bool
167 - (write) f(self, text) -> rawtext, bool
166 - (write) f(self, text) -> rawtext, bool
168 - (raw) f(self, rawtext) -> bool
167 - (raw) f(self, rawtext) -> bool
169 "text" is presented to the user. "rawtext" is stored in revlog data, not
168 "text" is presented to the user. "rawtext" is stored in revlog data, not
170 directly visible to the user.
169 directly visible to the user.
171 The boolean returned by these transforms is used to determine whether
170 The boolean returned by these transforms is used to determine whether
172 the returned text can be used for hash integrity checking. For example,
171 the returned text can be used for hash integrity checking. For example,
173 if "write" returns False, then "text" is used to generate hash. If
172 if "write" returns False, then "text" is used to generate hash. If
174 "write" returns True, that basically means "rawtext" returned by "write"
173 "write" returns True, that basically means "rawtext" returned by "write"
175 should be used to generate hash. Usually, "write" and "read" return
174 should be used to generate hash. Usually, "write" and "read" return
176 different booleans. And "raw" returns a same boolean as "write".
175 different booleans. And "raw" returns a same boolean as "write".
177
176
178 Note: The 'raw' transform is used for changegroup generation and in some
177 Note: The 'raw' transform is used for changegroup generation and in some
179 debug commands. In this case the transform only indicates whether the
178 debug commands. In this case the transform only indicates whether the
180 contents can be used for hash integrity checks.
179 contents can be used for hash integrity checks.
181 """
180 """
182 if not flag & REVIDX_KNOWN_FLAGS:
181 if not flag & REVIDX_KNOWN_FLAGS:
183 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
182 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
184 raise ProgrammingError(msg)
183 raise ProgrammingError(msg)
185 if flag not in REVIDX_FLAGS_ORDER:
184 if flag not in REVIDX_FLAGS_ORDER:
186 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
185 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
187 raise ProgrammingError(msg)
186 raise ProgrammingError(msg)
188 if flag in _flagprocessors:
187 if flag in _flagprocessors:
189 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
188 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
190 raise error.Abort(msg)
189 raise error.Abort(msg)
191 _flagprocessors[flag] = processor
190 _flagprocessors[flag] = processor
192
191
193 def getoffset(q):
192 def getoffset(q):
194 return int(q >> 16)
193 return int(q >> 16)
195
194
196 def gettype(q):
195 def gettype(q):
197 return int(q & 0xFFFF)
196 return int(q & 0xFFFF)
198
197
199 def offset_type(offset, type):
198 def offset_type(offset, type):
200 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
199 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
201 raise ValueError('unknown revlog index flags')
200 raise ValueError('unknown revlog index flags')
202 return int(int(offset) << 16 | type)
201 return int(int(offset) << 16 | type)
203
202
204 _nullhash = hashlib.sha1(nullid)
203 _nullhash = hashlib.sha1(nullid)
205
204
206 def hash(text, p1, p2):
205 def hash(text, p1, p2):
207 """generate a hash from the given text and its parent hashes
206 """generate a hash from the given text and its parent hashes
208
207
209 This hash combines both the current file contents and its history
208 This hash combines both the current file contents and its history
210 in a manner that makes it easy to distinguish nodes with the same
209 in a manner that makes it easy to distinguish nodes with the same
211 content in the revision graph.
210 content in the revision graph.
212 """
211 """
213 # As of now, if one of the parent node is null, p2 is null
212 # As of now, if one of the parent node is null, p2 is null
214 if p2 == nullid:
213 if p2 == nullid:
215 # deep copy of a hash is faster than creating one
214 # deep copy of a hash is faster than creating one
216 s = _nullhash.copy()
215 s = _nullhash.copy()
217 s.update(p1)
216 s.update(p1)
218 else:
217 else:
219 # none of the parent nodes are nullid
218 # none of the parent nodes are nullid
220 if p1 < p2:
219 if p1 < p2:
221 a = p1
220 a = p1
222 b = p2
221 b = p2
223 else:
222 else:
224 a = p2
223 a = p2
225 b = p1
224 b = p1
226 s = hashlib.sha1(a)
225 s = hashlib.sha1(a)
227 s.update(b)
226 s.update(b)
228 s.update(text)
227 s.update(text)
229 return s.digest()
228 return s.digest()
230
229
231 @attr.s(slots=True, frozen=True)
230 @attr.s(slots=True, frozen=True)
232 class _revisioninfo(object):
231 class _revisioninfo(object):
233 """Information about a revision that allows building its fulltext
232 """Information about a revision that allows building its fulltext
234 node: expected hash of the revision
233 node: expected hash of the revision
235 p1, p2: parent revs of the revision
234 p1, p2: parent revs of the revision
236 btext: built text cache consisting of a one-element list
235 btext: built text cache consisting of a one-element list
237 cachedelta: (baserev, uncompressed_delta) or None
236 cachedelta: (baserev, uncompressed_delta) or None
238 flags: flags associated to the revision storage
237 flags: flags associated to the revision storage
239
238
240 One of btext[0] or cachedelta must be set.
239 One of btext[0] or cachedelta must be set.
241 """
240 """
242 node = attr.ib()
241 node = attr.ib()
243 p1 = attr.ib()
242 p1 = attr.ib()
244 p2 = attr.ib()
243 p2 = attr.ib()
245 btext = attr.ib()
244 btext = attr.ib()
246 textlen = attr.ib()
245 textlen = attr.ib()
247 cachedelta = attr.ib()
246 cachedelta = attr.ib()
248 flags = attr.ib()
247 flags = attr.ib()
249
248
250 @interfaceutil.implementer(repository.irevisiondelta)
249 @interfaceutil.implementer(repository.irevisiondelta)
251 @attr.s(slots=True, frozen=True)
250 @attr.s(slots=True, frozen=True)
252 class revlogrevisiondelta(object):
251 class revlogrevisiondelta(object):
253 node = attr.ib()
252 node = attr.ib()
254 p1node = attr.ib()
253 p1node = attr.ib()
255 p2node = attr.ib()
254 p2node = attr.ib()
256 basenode = attr.ib()
255 basenode = attr.ib()
257 linknode = attr.ib()
256 linknode = attr.ib()
258 flags = attr.ib()
257 flags = attr.ib()
259 baserevisionsize = attr.ib()
258 baserevisionsize = attr.ib()
260 revision = attr.ib()
259 revision = attr.ib()
261 delta = attr.ib()
260 delta = attr.ib()
262
261
263 # index v0:
262 # index v0:
264 # 4 bytes: offset
263 # 4 bytes: offset
265 # 4 bytes: compressed length
264 # 4 bytes: compressed length
266 # 4 bytes: base rev
265 # 4 bytes: base rev
267 # 4 bytes: link rev
266 # 4 bytes: link rev
268 # 20 bytes: parent 1 nodeid
267 # 20 bytes: parent 1 nodeid
269 # 20 bytes: parent 2 nodeid
268 # 20 bytes: parent 2 nodeid
270 # 20 bytes: nodeid
269 # 20 bytes: nodeid
271 indexformatv0 = struct.Struct(">4l20s20s20s")
270 indexformatv0 = struct.Struct(">4l20s20s20s")
272 indexformatv0_pack = indexformatv0.pack
271 indexformatv0_pack = indexformatv0.pack
273 indexformatv0_unpack = indexformatv0.unpack
272 indexformatv0_unpack = indexformatv0.unpack
274
273
275 class revlogoldindex(list):
274 class revlogoldindex(list):
276 def __getitem__(self, i):
275 def __getitem__(self, i):
277 if i == -1:
276 if i == -1:
278 return (0, 0, 0, -1, -1, -1, -1, nullid)
277 return (0, 0, 0, -1, -1, -1, -1, nullid)
279 return list.__getitem__(self, i)
278 return list.__getitem__(self, i)
280
279
281 class revlogoldio(object):
280 class revlogoldio(object):
282 def __init__(self):
281 def __init__(self):
283 self.size = indexformatv0.size
282 self.size = indexformatv0.size
284
283
285 def parseindex(self, data, inline):
284 def parseindex(self, data, inline):
286 s = self.size
285 s = self.size
287 index = []
286 index = []
288 nodemap = {nullid: nullrev}
287 nodemap = {nullid: nullrev}
289 n = off = 0
288 n = off = 0
290 l = len(data)
289 l = len(data)
291 while off + s <= l:
290 while off + s <= l:
292 cur = data[off:off + s]
291 cur = data[off:off + s]
293 off += s
292 off += s
294 e = indexformatv0_unpack(cur)
293 e = indexformatv0_unpack(cur)
295 # transform to revlogv1 format
294 # transform to revlogv1 format
296 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
295 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
297 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
296 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
298 index.append(e2)
297 index.append(e2)
299 nodemap[e[6]] = n
298 nodemap[e[6]] = n
300 n += 1
299 n += 1
301
300
302 return revlogoldindex(index), nodemap, None
301 return revlogoldindex(index), nodemap, None
303
302
304 def packentry(self, entry, node, version, rev):
303 def packentry(self, entry, node, version, rev):
305 if gettype(entry[0]):
304 if gettype(entry[0]):
306 raise RevlogError(_('index entry flags need revlog version 1'))
305 raise error.RevlogError(_('index entry flags need revlog '
306 'version 1'))
307 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
307 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
308 node(entry[5]), node(entry[6]), entry[7])
308 node(entry[5]), node(entry[6]), entry[7])
309 return indexformatv0_pack(*e2)
309 return indexformatv0_pack(*e2)
310
310
311 # index ng:
311 # index ng:
312 # 6 bytes: offset
312 # 6 bytes: offset
313 # 2 bytes: flags
313 # 2 bytes: flags
314 # 4 bytes: compressed length
314 # 4 bytes: compressed length
315 # 4 bytes: uncompressed length
315 # 4 bytes: uncompressed length
316 # 4 bytes: base rev
316 # 4 bytes: base rev
317 # 4 bytes: link rev
317 # 4 bytes: link rev
318 # 4 bytes: parent 1 rev
318 # 4 bytes: parent 1 rev
319 # 4 bytes: parent 2 rev
319 # 4 bytes: parent 2 rev
320 # 32 bytes: nodeid
320 # 32 bytes: nodeid
321 indexformatng = struct.Struct(">Qiiiiii20s12x")
321 indexformatng = struct.Struct(">Qiiiiii20s12x")
322 indexformatng_pack = indexformatng.pack
322 indexformatng_pack = indexformatng.pack
323 versionformat = struct.Struct(">I")
323 versionformat = struct.Struct(">I")
324 versionformat_pack = versionformat.pack
324 versionformat_pack = versionformat.pack
325 versionformat_unpack = versionformat.unpack
325 versionformat_unpack = versionformat.unpack
326
326
327 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
327 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
328 # signed integer)
328 # signed integer)
329 _maxentrysize = 0x7fffffff
329 _maxentrysize = 0x7fffffff
330
330
331 class revlogio(object):
331 class revlogio(object):
332 def __init__(self):
332 def __init__(self):
333 self.size = indexformatng.size
333 self.size = indexformatng.size
334
334
335 def parseindex(self, data, inline):
335 def parseindex(self, data, inline):
336 # call the C implementation to parse the index data
336 # call the C implementation to parse the index data
337 index, cache = parsers.parse_index2(data, inline)
337 index, cache = parsers.parse_index2(data, inline)
338 return index, getattr(index, 'nodemap', None), cache
338 return index, getattr(index, 'nodemap', None), cache
339
339
340 def packentry(self, entry, node, version, rev):
340 def packentry(self, entry, node, version, rev):
341 p = indexformatng_pack(*entry)
341 p = indexformatng_pack(*entry)
342 if rev == 0:
342 if rev == 0:
343 p = versionformat_pack(version) + p[4:]
343 p = versionformat_pack(version) + p[4:]
344 return p
344 return p
345
345
346 class revlog(object):
346 class revlog(object):
347 """
347 """
348 the underlying revision storage object
348 the underlying revision storage object
349
349
350 A revlog consists of two parts, an index and the revision data.
350 A revlog consists of two parts, an index and the revision data.
351
351
352 The index is a file with a fixed record size containing
352 The index is a file with a fixed record size containing
353 information on each revision, including its nodeid (hash), the
353 information on each revision, including its nodeid (hash), the
354 nodeids of its parents, the position and offset of its data within
354 nodeids of its parents, the position and offset of its data within
355 the data file, and the revision it's based on. Finally, each entry
355 the data file, and the revision it's based on. Finally, each entry
356 contains a linkrev entry that can serve as a pointer to external
356 contains a linkrev entry that can serve as a pointer to external
357 data.
357 data.
358
358
359 The revision data itself is a linear collection of data chunks.
359 The revision data itself is a linear collection of data chunks.
360 Each chunk represents a revision and is usually represented as a
360 Each chunk represents a revision and is usually represented as a
361 delta against the previous chunk. To bound lookup time, runs of
361 delta against the previous chunk. To bound lookup time, runs of
362 deltas are limited to about 2 times the length of the original
362 deltas are limited to about 2 times the length of the original
363 version data. This makes retrieval of a version proportional to
363 version data. This makes retrieval of a version proportional to
364 its size, or O(1) relative to the number of revisions.
364 its size, or O(1) relative to the number of revisions.
365
365
366 Both pieces of the revlog are written to in an append-only
366 Both pieces of the revlog are written to in an append-only
367 fashion, which means we never need to rewrite a file to insert or
367 fashion, which means we never need to rewrite a file to insert or
368 remove data, and can use some simple techniques to avoid the need
368 remove data, and can use some simple techniques to avoid the need
369 for locking while reading.
369 for locking while reading.
370
370
371 If checkambig, indexfile is opened with checkambig=True at
371 If checkambig, indexfile is opened with checkambig=True at
372 writing, to avoid file stat ambiguity.
372 writing, to avoid file stat ambiguity.
373
373
374 If mmaplargeindex is True, and an mmapindexthreshold is set, the
374 If mmaplargeindex is True, and an mmapindexthreshold is set, the
375 index will be mmapped rather than read if it is larger than the
375 index will be mmapped rather than read if it is larger than the
376 configured threshold.
376 configured threshold.
377
377
378 If censorable is True, the revlog can have censored revisions.
378 If censorable is True, the revlog can have censored revisions.
379 """
379 """
380 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
380 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
381 mmaplargeindex=False, censorable=False):
381 mmaplargeindex=False, censorable=False):
382 """
382 """
383 create a revlog object
383 create a revlog object
384
384
385 opener is a function that abstracts the file opening operation
385 opener is a function that abstracts the file opening operation
386 and can be used to implement COW semantics or the like.
386 and can be used to implement COW semantics or the like.
387 """
387 """
388 self.indexfile = indexfile
388 self.indexfile = indexfile
389 self.datafile = datafile or (indexfile[:-2] + ".d")
389 self.datafile = datafile or (indexfile[:-2] + ".d")
390 self.opener = opener
390 self.opener = opener
391 # When True, indexfile is opened with checkambig=True at writing, to
391 # When True, indexfile is opened with checkambig=True at writing, to
392 # avoid file stat ambiguity.
392 # avoid file stat ambiguity.
393 self._checkambig = checkambig
393 self._checkambig = checkambig
394 self._censorable = censorable
394 self._censorable = censorable
395 # 3-tuple of (node, rev, text) for a raw revision.
395 # 3-tuple of (node, rev, text) for a raw revision.
396 self._cache = None
396 self._cache = None
397 # Maps rev to chain base rev.
397 # Maps rev to chain base rev.
398 self._chainbasecache = util.lrucachedict(100)
398 self._chainbasecache = util.lrucachedict(100)
399 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
399 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
400 self._chunkcache = (0, '')
400 self._chunkcache = (0, '')
401 # How much data to read and cache into the raw revlog data cache.
401 # How much data to read and cache into the raw revlog data cache.
402 self._chunkcachesize = 65536
402 self._chunkcachesize = 65536
403 self._maxchainlen = None
403 self._maxchainlen = None
404 self._deltabothparents = True
404 self._deltabothparents = True
405 self.index = []
405 self.index = []
406 # Mapping of partial identifiers to full nodes.
406 # Mapping of partial identifiers to full nodes.
407 self._pcache = {}
407 self._pcache = {}
408 # Mapping of revision integer to full node.
408 # Mapping of revision integer to full node.
409 self._nodecache = {nullid: nullrev}
409 self._nodecache = {nullid: nullrev}
410 self._nodepos = None
410 self._nodepos = None
411 self._compengine = 'zlib'
411 self._compengine = 'zlib'
412 self._maxdeltachainspan = -1
412 self._maxdeltachainspan = -1
413 self._withsparseread = False
413 self._withsparseread = False
414 self._sparserevlog = False
414 self._sparserevlog = False
415 self._srdensitythreshold = 0.50
415 self._srdensitythreshold = 0.50
416 self._srmingapsize = 262144
416 self._srmingapsize = 262144
417
417
418 # Make copy of flag processors so each revlog instance can support
418 # Make copy of flag processors so each revlog instance can support
419 # custom flags.
419 # custom flags.
420 self._flagprocessors = dict(_flagprocessors)
420 self._flagprocessors = dict(_flagprocessors)
421
421
422 mmapindexthreshold = None
422 mmapindexthreshold = None
423 v = REVLOG_DEFAULT_VERSION
423 v = REVLOG_DEFAULT_VERSION
424 opts = getattr(opener, 'options', None)
424 opts = getattr(opener, 'options', None)
425 if opts is not None:
425 if opts is not None:
426 if 'revlogv2' in opts:
426 if 'revlogv2' in opts:
427 # version 2 revlogs always use generaldelta.
427 # version 2 revlogs always use generaldelta.
428 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
428 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
429 elif 'revlogv1' in opts:
429 elif 'revlogv1' in opts:
430 if 'generaldelta' in opts:
430 if 'generaldelta' in opts:
431 v |= FLAG_GENERALDELTA
431 v |= FLAG_GENERALDELTA
432 else:
432 else:
433 v = 0
433 v = 0
434 if 'chunkcachesize' in opts:
434 if 'chunkcachesize' in opts:
435 self._chunkcachesize = opts['chunkcachesize']
435 self._chunkcachesize = opts['chunkcachesize']
436 if 'maxchainlen' in opts:
436 if 'maxchainlen' in opts:
437 self._maxchainlen = opts['maxchainlen']
437 self._maxchainlen = opts['maxchainlen']
438 if 'deltabothparents' in opts:
438 if 'deltabothparents' in opts:
439 self._deltabothparents = opts['deltabothparents']
439 self._deltabothparents = opts['deltabothparents']
440 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
440 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
441 if 'compengine' in opts:
441 if 'compengine' in opts:
442 self._compengine = opts['compengine']
442 self._compengine = opts['compengine']
443 if 'maxdeltachainspan' in opts:
443 if 'maxdeltachainspan' in opts:
444 self._maxdeltachainspan = opts['maxdeltachainspan']
444 self._maxdeltachainspan = opts['maxdeltachainspan']
445 if mmaplargeindex and 'mmapindexthreshold' in opts:
445 if mmaplargeindex and 'mmapindexthreshold' in opts:
446 mmapindexthreshold = opts['mmapindexthreshold']
446 mmapindexthreshold = opts['mmapindexthreshold']
447 self._sparserevlog = bool(opts.get('sparse-revlog', False))
447 self._sparserevlog = bool(opts.get('sparse-revlog', False))
448 withsparseread = bool(opts.get('with-sparse-read', False))
448 withsparseread = bool(opts.get('with-sparse-read', False))
449 # sparse-revlog forces sparse-read
449 # sparse-revlog forces sparse-read
450 self._withsparseread = self._sparserevlog or withsparseread
450 self._withsparseread = self._sparserevlog or withsparseread
451 if 'sparse-read-density-threshold' in opts:
451 if 'sparse-read-density-threshold' in opts:
452 self._srdensitythreshold = opts['sparse-read-density-threshold']
452 self._srdensitythreshold = opts['sparse-read-density-threshold']
453 if 'sparse-read-min-gap-size' in opts:
453 if 'sparse-read-min-gap-size' in opts:
454 self._srmingapsize = opts['sparse-read-min-gap-size']
454 self._srmingapsize = opts['sparse-read-min-gap-size']
455 if opts.get('enableellipsis'):
455 if opts.get('enableellipsis'):
456 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
456 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
457
457
458 if self._chunkcachesize <= 0:
458 if self._chunkcachesize <= 0:
459 raise RevlogError(_('revlog chunk cache size %r is not greater '
459 raise error.RevlogError(_('revlog chunk cache size %r is not '
460 'than 0') % self._chunkcachesize)
460 'greater than 0') % self._chunkcachesize)
461 elif self._chunkcachesize & (self._chunkcachesize - 1):
461 elif self._chunkcachesize & (self._chunkcachesize - 1):
462 raise RevlogError(_('revlog chunk cache size %r is not a power '
462 raise error.RevlogError(_('revlog chunk cache size %r is not a '
463 'of 2') % self._chunkcachesize)
463 'power of 2') % self._chunkcachesize)
464
464
465 indexdata = ''
465 indexdata = ''
466 self._initempty = True
466 self._initempty = True
467 try:
467 try:
468 with self._indexfp() as f:
468 with self._indexfp() as f:
469 if (mmapindexthreshold is not None and
469 if (mmapindexthreshold is not None and
470 self.opener.fstat(f).st_size >= mmapindexthreshold):
470 self.opener.fstat(f).st_size >= mmapindexthreshold):
471 indexdata = util.buffer(util.mmapread(f))
471 indexdata = util.buffer(util.mmapread(f))
472 else:
472 else:
473 indexdata = f.read()
473 indexdata = f.read()
474 if len(indexdata) > 0:
474 if len(indexdata) > 0:
475 v = versionformat_unpack(indexdata[:4])[0]
475 v = versionformat_unpack(indexdata[:4])[0]
476 self._initempty = False
476 self._initempty = False
477 except IOError as inst:
477 except IOError as inst:
478 if inst.errno != errno.ENOENT:
478 if inst.errno != errno.ENOENT:
479 raise
479 raise
480
480
481 self.version = v
481 self.version = v
482 self._inline = v & FLAG_INLINE_DATA
482 self._inline = v & FLAG_INLINE_DATA
483 self._generaldelta = v & FLAG_GENERALDELTA
483 self._generaldelta = v & FLAG_GENERALDELTA
484 flags = v & ~0xFFFF
484 flags = v & ~0xFFFF
485 fmt = v & 0xFFFF
485 fmt = v & 0xFFFF
486 if fmt == REVLOGV0:
486 if fmt == REVLOGV0:
487 if flags:
487 if flags:
488 raise RevlogError(_('unknown flags (%#04x) in version %d '
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 'revlog %s') %
489 'revlog %s') %
490 (flags >> 16, fmt, self.indexfile))
490 (flags >> 16, fmt, self.indexfile))
491 elif fmt == REVLOGV1:
491 elif fmt == REVLOGV1:
492 if flags & ~REVLOGV1_FLAGS:
492 if flags & ~REVLOGV1_FLAGS:
493 raise RevlogError(_('unknown flags (%#04x) in version %d '
493 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
494 'revlog %s') %
494 'revlog %s') %
495 (flags >> 16, fmt, self.indexfile))
495 (flags >> 16, fmt, self.indexfile))
496 elif fmt == REVLOGV2:
496 elif fmt == REVLOGV2:
497 if flags & ~REVLOGV2_FLAGS:
497 if flags & ~REVLOGV2_FLAGS:
498 raise RevlogError(_('unknown flags (%#04x) in version %d '
498 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
499 'revlog %s') %
499 'revlog %s') %
500 (flags >> 16, fmt, self.indexfile))
500 (flags >> 16, fmt, self.indexfile))
501 else:
501 else:
502 raise RevlogError(_('unknown version (%d) in revlog %s') %
502 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
503 (fmt, self.indexfile))
503 (fmt, self.indexfile))
504
504
505 self._storedeltachains = True
505 self._storedeltachains = True
506
506
507 self._io = revlogio()
507 self._io = revlogio()
508 if self.version == REVLOGV0:
508 if self.version == REVLOGV0:
509 self._io = revlogoldio()
509 self._io = revlogoldio()
510 try:
510 try:
511 d = self._io.parseindex(indexdata, self._inline)
511 d = self._io.parseindex(indexdata, self._inline)
512 except (ValueError, IndexError):
512 except (ValueError, IndexError):
513 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
513 raise error.RevlogError(_("index %s is corrupted") %
514 self.indexfile)
514 self.index, nodemap, self._chunkcache = d
515 self.index, nodemap, self._chunkcache = d
515 if nodemap is not None:
516 if nodemap is not None:
516 self.nodemap = self._nodecache = nodemap
517 self.nodemap = self._nodecache = nodemap
517 if not self._chunkcache:
518 if not self._chunkcache:
518 self._chunkclear()
519 self._chunkclear()
519 # revnum -> (chain-length, sum-delta-length)
520 # revnum -> (chain-length, sum-delta-length)
520 self._chaininfocache = {}
521 self._chaininfocache = {}
521 # revlog header -> revlog compressor
522 # revlog header -> revlog compressor
522 self._decompressors = {}
523 self._decompressors = {}
523
524
524 @util.propertycache
525 @util.propertycache
525 def _compressor(self):
526 def _compressor(self):
526 return util.compengines[self._compengine].revlogcompressor()
527 return util.compengines[self._compengine].revlogcompressor()
527
528
528 def _indexfp(self, mode='r'):
529 def _indexfp(self, mode='r'):
529 """file object for the revlog's index file"""
530 """file object for the revlog's index file"""
530 args = {r'mode': mode}
531 args = {r'mode': mode}
531 if mode != 'r':
532 if mode != 'r':
532 args[r'checkambig'] = self._checkambig
533 args[r'checkambig'] = self._checkambig
533 if mode == 'w':
534 if mode == 'w':
534 args[r'atomictemp'] = True
535 args[r'atomictemp'] = True
535 return self.opener(self.indexfile, **args)
536 return self.opener(self.indexfile, **args)
536
537
537 def _datafp(self, mode='r'):
538 def _datafp(self, mode='r'):
538 """file object for the revlog's data file"""
539 """file object for the revlog's data file"""
539 return self.opener(self.datafile, mode=mode)
540 return self.opener(self.datafile, mode=mode)
540
541
541 @contextlib.contextmanager
542 @contextlib.contextmanager
542 def _datareadfp(self, existingfp=None):
543 def _datareadfp(self, existingfp=None):
543 """file object suitable to read data"""
544 """file object suitable to read data"""
544 if existingfp is not None:
545 if existingfp is not None:
545 yield existingfp
546 yield existingfp
546 else:
547 else:
547 if self._inline:
548 if self._inline:
548 func = self._indexfp
549 func = self._indexfp
549 else:
550 else:
550 func = self._datafp
551 func = self._datafp
551 with func() as fp:
552 with func() as fp:
552 yield fp
553 yield fp
553
554
554 def tip(self):
555 def tip(self):
555 return self.node(len(self.index) - 1)
556 return self.node(len(self.index) - 1)
556 def __contains__(self, rev):
557 def __contains__(self, rev):
557 return 0 <= rev < len(self)
558 return 0 <= rev < len(self)
558 def __len__(self):
559 def __len__(self):
559 return len(self.index)
560 return len(self.index)
560 def __iter__(self):
561 def __iter__(self):
561 return iter(pycompat.xrange(len(self)))
562 return iter(pycompat.xrange(len(self)))
562 def revs(self, start=0, stop=None):
563 def revs(self, start=0, stop=None):
563 """iterate over all rev in this revlog (from start to stop)"""
564 """iterate over all rev in this revlog (from start to stop)"""
564 step = 1
565 step = 1
565 length = len(self)
566 length = len(self)
566 if stop is not None:
567 if stop is not None:
567 if start > stop:
568 if start > stop:
568 step = -1
569 step = -1
569 stop += step
570 stop += step
570 if stop > length:
571 if stop > length:
571 stop = length
572 stop = length
572 else:
573 else:
573 stop = length
574 stop = length
574 return pycompat.xrange(start, stop, step)
575 return pycompat.xrange(start, stop, step)
575
576
576 @util.propertycache
577 @util.propertycache
577 def nodemap(self):
578 def nodemap(self):
578 if self.index:
579 if self.index:
579 # populate mapping down to the initial node
580 # populate mapping down to the initial node
580 node0 = self.index[0][7] # get around changelog filtering
581 node0 = self.index[0][7] # get around changelog filtering
581 self.rev(node0)
582 self.rev(node0)
582 return self._nodecache
583 return self._nodecache
583
584
584 def hasnode(self, node):
585 def hasnode(self, node):
585 try:
586 try:
586 self.rev(node)
587 self.rev(node)
587 return True
588 return True
588 except KeyError:
589 except KeyError:
589 return False
590 return False
590
591
591 def candelta(self, baserev, rev):
592 def candelta(self, baserev, rev):
592 """whether two revisions (baserev, rev) can be delta-ed or not"""
593 """whether two revisions (baserev, rev) can be delta-ed or not"""
593 # Disable delta if either rev requires a content-changing flag
594 # Disable delta if either rev requires a content-changing flag
594 # processor (ex. LFS). This is because such flag processor can alter
595 # processor (ex. LFS). This is because such flag processor can alter
595 # the rawtext content that the delta will be based on, and two clients
596 # the rawtext content that the delta will be based on, and two clients
596 # could have a same revlog node with different flags (i.e. different
597 # could have a same revlog node with different flags (i.e. different
597 # rawtext contents) and the delta could be incompatible.
598 # rawtext contents) and the delta could be incompatible.
598 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
599 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
599 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
600 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
600 return False
601 return False
601 return True
602 return True
602
603
603 def clearcaches(self):
604 def clearcaches(self):
604 self._cache = None
605 self._cache = None
605 self._chainbasecache.clear()
606 self._chainbasecache.clear()
606 self._chunkcache = (0, '')
607 self._chunkcache = (0, '')
607 self._pcache = {}
608 self._pcache = {}
608
609
609 try:
610 try:
610 self._nodecache.clearcaches()
611 self._nodecache.clearcaches()
611 except AttributeError:
612 except AttributeError:
612 self._nodecache = {nullid: nullrev}
613 self._nodecache = {nullid: nullrev}
613 self._nodepos = None
614 self._nodepos = None
614
615
615 def rev(self, node):
616 def rev(self, node):
616 try:
617 try:
617 return self._nodecache[node]
618 return self._nodecache[node]
618 except TypeError:
619 except TypeError:
619 raise
620 raise
620 except RevlogError:
621 except error.RevlogError:
621 # parsers.c radix tree lookup failed
622 # parsers.c radix tree lookup failed
622 if node == wdirid or node in wdirfilenodeids:
623 if node == wdirid or node in wdirfilenodeids:
623 raise error.WdirUnsupported
624 raise error.WdirUnsupported
624 raise LookupError(node, self.indexfile, _('no node'))
625 raise LookupError(node, self.indexfile, _('no node'))
625 except KeyError:
626 except KeyError:
626 # pure python cache lookup failed
627 # pure python cache lookup failed
627 n = self._nodecache
628 n = self._nodecache
628 i = self.index
629 i = self.index
629 p = self._nodepos
630 p = self._nodepos
630 if p is None:
631 if p is None:
631 p = len(i) - 1
632 p = len(i) - 1
632 else:
633 else:
633 assert p < len(i)
634 assert p < len(i)
634 for r in pycompat.xrange(p, -1, -1):
635 for r in pycompat.xrange(p, -1, -1):
635 v = i[r][7]
636 v = i[r][7]
636 n[v] = r
637 n[v] = r
637 if v == node:
638 if v == node:
638 self._nodepos = r - 1
639 self._nodepos = r - 1
639 return r
640 return r
640 if node == wdirid or node in wdirfilenodeids:
641 if node == wdirid or node in wdirfilenodeids:
641 raise error.WdirUnsupported
642 raise error.WdirUnsupported
642 raise LookupError(node, self.indexfile, _('no node'))
643 raise LookupError(node, self.indexfile, _('no node'))
643
644
644 # Accessors for index entries.
645 # Accessors for index entries.
645
646
646 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
647 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
647 # are flags.
648 # are flags.
648 def start(self, rev):
649 def start(self, rev):
649 return int(self.index[rev][0] >> 16)
650 return int(self.index[rev][0] >> 16)
650
651
651 def flags(self, rev):
652 def flags(self, rev):
652 return self.index[rev][0] & 0xFFFF
653 return self.index[rev][0] & 0xFFFF
653
654
654 def length(self, rev):
655 def length(self, rev):
655 return self.index[rev][1]
656 return self.index[rev][1]
656
657
657 def rawsize(self, rev):
658 def rawsize(self, rev):
658 """return the length of the uncompressed text for a given revision"""
659 """return the length of the uncompressed text for a given revision"""
659 l = self.index[rev][2]
660 l = self.index[rev][2]
660 if l >= 0:
661 if l >= 0:
661 return l
662 return l
662
663
663 t = self.revision(rev, raw=True)
664 t = self.revision(rev, raw=True)
664 return len(t)
665 return len(t)
665
666
666 def size(self, rev):
667 def size(self, rev):
667 """length of non-raw text (processed by a "read" flag processor)"""
668 """length of non-raw text (processed by a "read" flag processor)"""
668 # fast path: if no "read" flag processor could change the content,
669 # fast path: if no "read" flag processor could change the content,
669 # size is rawsize. note: ELLIPSIS is known to not change the content.
670 # size is rawsize. note: ELLIPSIS is known to not change the content.
670 flags = self.flags(rev)
671 flags = self.flags(rev)
671 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
672 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
672 return self.rawsize(rev)
673 return self.rawsize(rev)
673
674
674 return len(self.revision(rev, raw=False))
675 return len(self.revision(rev, raw=False))
675
676
676 def chainbase(self, rev):
677 def chainbase(self, rev):
677 base = self._chainbasecache.get(rev)
678 base = self._chainbasecache.get(rev)
678 if base is not None:
679 if base is not None:
679 return base
680 return base
680
681
681 index = self.index
682 index = self.index
682 iterrev = rev
683 iterrev = rev
683 base = index[iterrev][3]
684 base = index[iterrev][3]
684 while base != iterrev:
685 while base != iterrev:
685 iterrev = base
686 iterrev = base
686 base = index[iterrev][3]
687 base = index[iterrev][3]
687
688
688 self._chainbasecache[rev] = base
689 self._chainbasecache[rev] = base
689 return base
690 return base
690
691
691 def linkrev(self, rev):
692 def linkrev(self, rev):
692 return self.index[rev][4]
693 return self.index[rev][4]
693
694
694 def parentrevs(self, rev):
695 def parentrevs(self, rev):
695 try:
696 try:
696 entry = self.index[rev]
697 entry = self.index[rev]
697 except IndexError:
698 except IndexError:
698 if rev == wdirrev:
699 if rev == wdirrev:
699 raise error.WdirUnsupported
700 raise error.WdirUnsupported
700 raise
701 raise
701
702
702 return entry[5], entry[6]
703 return entry[5], entry[6]
703
704
704 def node(self, rev):
705 def node(self, rev):
705 try:
706 try:
706 return self.index[rev][7]
707 return self.index[rev][7]
707 except IndexError:
708 except IndexError:
708 if rev == wdirrev:
709 if rev == wdirrev:
709 raise error.WdirUnsupported
710 raise error.WdirUnsupported
710 raise
711 raise
711
712
712 # Derived from index values.
713 # Derived from index values.
713
714
714 def end(self, rev):
715 def end(self, rev):
715 return self.start(rev) + self.length(rev)
716 return self.start(rev) + self.length(rev)
716
717
717 def parents(self, node):
718 def parents(self, node):
718 i = self.index
719 i = self.index
719 d = i[self.rev(node)]
720 d = i[self.rev(node)]
720 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
721 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
721
722
722 def chainlen(self, rev):
723 def chainlen(self, rev):
723 return self._chaininfo(rev)[0]
724 return self._chaininfo(rev)[0]
724
725
725 def _chaininfo(self, rev):
726 def _chaininfo(self, rev):
726 chaininfocache = self._chaininfocache
727 chaininfocache = self._chaininfocache
727 if rev in chaininfocache:
728 if rev in chaininfocache:
728 return chaininfocache[rev]
729 return chaininfocache[rev]
729 index = self.index
730 index = self.index
730 generaldelta = self._generaldelta
731 generaldelta = self._generaldelta
731 iterrev = rev
732 iterrev = rev
732 e = index[iterrev]
733 e = index[iterrev]
733 clen = 0
734 clen = 0
734 compresseddeltalen = 0
735 compresseddeltalen = 0
735 while iterrev != e[3]:
736 while iterrev != e[3]:
736 clen += 1
737 clen += 1
737 compresseddeltalen += e[1]
738 compresseddeltalen += e[1]
738 if generaldelta:
739 if generaldelta:
739 iterrev = e[3]
740 iterrev = e[3]
740 else:
741 else:
741 iterrev -= 1
742 iterrev -= 1
742 if iterrev in chaininfocache:
743 if iterrev in chaininfocache:
743 t = chaininfocache[iterrev]
744 t = chaininfocache[iterrev]
744 clen += t[0]
745 clen += t[0]
745 compresseddeltalen += t[1]
746 compresseddeltalen += t[1]
746 break
747 break
747 e = index[iterrev]
748 e = index[iterrev]
748 else:
749 else:
749 # Add text length of base since decompressing that also takes
750 # Add text length of base since decompressing that also takes
750 # work. For cache hits the length is already included.
751 # work. For cache hits the length is already included.
751 compresseddeltalen += e[1]
752 compresseddeltalen += e[1]
752 r = (clen, compresseddeltalen)
753 r = (clen, compresseddeltalen)
753 chaininfocache[rev] = r
754 chaininfocache[rev] = r
754 return r
755 return r
755
756
756 def _deltachain(self, rev, stoprev=None):
757 def _deltachain(self, rev, stoprev=None):
757 """Obtain the delta chain for a revision.
758 """Obtain the delta chain for a revision.
758
759
759 ``stoprev`` specifies a revision to stop at. If not specified, we
760 ``stoprev`` specifies a revision to stop at. If not specified, we
760 stop at the base of the chain.
761 stop at the base of the chain.
761
762
762 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
763 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
763 revs in ascending order and ``stopped`` is a bool indicating whether
764 revs in ascending order and ``stopped`` is a bool indicating whether
764 ``stoprev`` was hit.
765 ``stoprev`` was hit.
765 """
766 """
766 # Try C implementation.
767 # Try C implementation.
767 try:
768 try:
768 return self.index.deltachain(rev, stoprev, self._generaldelta)
769 return self.index.deltachain(rev, stoprev, self._generaldelta)
769 except AttributeError:
770 except AttributeError:
770 pass
771 pass
771
772
772 chain = []
773 chain = []
773
774
774 # Alias to prevent attribute lookup in tight loop.
775 # Alias to prevent attribute lookup in tight loop.
775 index = self.index
776 index = self.index
776 generaldelta = self._generaldelta
777 generaldelta = self._generaldelta
777
778
778 iterrev = rev
779 iterrev = rev
779 e = index[iterrev]
780 e = index[iterrev]
780 while iterrev != e[3] and iterrev != stoprev:
781 while iterrev != e[3] and iterrev != stoprev:
781 chain.append(iterrev)
782 chain.append(iterrev)
782 if generaldelta:
783 if generaldelta:
783 iterrev = e[3]
784 iterrev = e[3]
784 else:
785 else:
785 iterrev -= 1
786 iterrev -= 1
786 e = index[iterrev]
787 e = index[iterrev]
787
788
788 if iterrev == stoprev:
789 if iterrev == stoprev:
789 stopped = True
790 stopped = True
790 else:
791 else:
791 chain.append(iterrev)
792 chain.append(iterrev)
792 stopped = False
793 stopped = False
793
794
794 chain.reverse()
795 chain.reverse()
795 return chain, stopped
796 return chain, stopped
796
797
797 def ancestors(self, revs, stoprev=0, inclusive=False):
798 def ancestors(self, revs, stoprev=0, inclusive=False):
798 """Generate the ancestors of 'revs' in reverse topological order.
799 """Generate the ancestors of 'revs' in reverse topological order.
799 Does not generate revs lower than stoprev.
800 Does not generate revs lower than stoprev.
800
801
801 See the documentation for ancestor.lazyancestors for more details."""
802 See the documentation for ancestor.lazyancestors for more details."""
802
803
803 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
804 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
804 inclusive=inclusive)
805 inclusive=inclusive)
805
806
806 def descendants(self, revs):
807 def descendants(self, revs):
807 """Generate the descendants of 'revs' in revision order.
808 """Generate the descendants of 'revs' in revision order.
808
809
809 Yield a sequence of revision numbers starting with a child of
810 Yield a sequence of revision numbers starting with a child of
810 some rev in revs, i.e., each revision is *not* considered a
811 some rev in revs, i.e., each revision is *not* considered a
811 descendant of itself. Results are ordered by revision number (a
812 descendant of itself. Results are ordered by revision number (a
812 topological sort)."""
813 topological sort)."""
813 first = min(revs)
814 first = min(revs)
814 if first == nullrev:
815 if first == nullrev:
815 for i in self:
816 for i in self:
816 yield i
817 yield i
817 return
818 return
818
819
819 seen = set(revs)
820 seen = set(revs)
820 for i in self.revs(start=first + 1):
821 for i in self.revs(start=first + 1):
821 for x in self.parentrevs(i):
822 for x in self.parentrevs(i):
822 if x != nullrev and x in seen:
823 if x != nullrev and x in seen:
823 seen.add(i)
824 seen.add(i)
824 yield i
825 yield i
825 break
826 break
826
827
827 def findcommonmissing(self, common=None, heads=None):
828 def findcommonmissing(self, common=None, heads=None):
828 """Return a tuple of the ancestors of common and the ancestors of heads
829 """Return a tuple of the ancestors of common and the ancestors of heads
829 that are not ancestors of common. In revset terminology, we return the
830 that are not ancestors of common. In revset terminology, we return the
830 tuple:
831 tuple:
831
832
832 ::common, (::heads) - (::common)
833 ::common, (::heads) - (::common)
833
834
834 The list is sorted by revision number, meaning it is
835 The list is sorted by revision number, meaning it is
835 topologically sorted.
836 topologically sorted.
836
837
837 'heads' and 'common' are both lists of node IDs. If heads is
838 'heads' and 'common' are both lists of node IDs. If heads is
838 not supplied, uses all of the revlog's heads. If common is not
839 not supplied, uses all of the revlog's heads. If common is not
839 supplied, uses nullid."""
840 supplied, uses nullid."""
840 if common is None:
841 if common is None:
841 common = [nullid]
842 common = [nullid]
842 if heads is None:
843 if heads is None:
843 heads = self.heads()
844 heads = self.heads()
844
845
845 common = [self.rev(n) for n in common]
846 common = [self.rev(n) for n in common]
846 heads = [self.rev(n) for n in heads]
847 heads = [self.rev(n) for n in heads]
847
848
848 # we want the ancestors, but inclusive
849 # we want the ancestors, but inclusive
849 class lazyset(object):
850 class lazyset(object):
850 def __init__(self, lazyvalues):
851 def __init__(self, lazyvalues):
851 self.addedvalues = set()
852 self.addedvalues = set()
852 self.lazyvalues = lazyvalues
853 self.lazyvalues = lazyvalues
853
854
854 def __contains__(self, value):
855 def __contains__(self, value):
855 return value in self.addedvalues or value in self.lazyvalues
856 return value in self.addedvalues or value in self.lazyvalues
856
857
857 def __iter__(self):
858 def __iter__(self):
858 added = self.addedvalues
859 added = self.addedvalues
859 for r in added:
860 for r in added:
860 yield r
861 yield r
861 for r in self.lazyvalues:
862 for r in self.lazyvalues:
862 if not r in added:
863 if not r in added:
863 yield r
864 yield r
864
865
865 def add(self, value):
866 def add(self, value):
866 self.addedvalues.add(value)
867 self.addedvalues.add(value)
867
868
868 def update(self, values):
869 def update(self, values):
869 self.addedvalues.update(values)
870 self.addedvalues.update(values)
870
871
871 has = lazyset(self.ancestors(common))
872 has = lazyset(self.ancestors(common))
872 has.add(nullrev)
873 has.add(nullrev)
873 has.update(common)
874 has.update(common)
874
875
875 # take all ancestors from heads that aren't in has
876 # take all ancestors from heads that aren't in has
876 missing = set()
877 missing = set()
877 visit = collections.deque(r for r in heads if r not in has)
878 visit = collections.deque(r for r in heads if r not in has)
878 while visit:
879 while visit:
879 r = visit.popleft()
880 r = visit.popleft()
880 if r in missing:
881 if r in missing:
881 continue
882 continue
882 else:
883 else:
883 missing.add(r)
884 missing.add(r)
884 for p in self.parentrevs(r):
885 for p in self.parentrevs(r):
885 if p not in has:
886 if p not in has:
886 visit.append(p)
887 visit.append(p)
887 missing = list(missing)
888 missing = list(missing)
888 missing.sort()
889 missing.sort()
889 return has, [self.node(miss) for miss in missing]
890 return has, [self.node(miss) for miss in missing]
890
891
891 def incrementalmissingrevs(self, common=None):
892 def incrementalmissingrevs(self, common=None):
892 """Return an object that can be used to incrementally compute the
893 """Return an object that can be used to incrementally compute the
893 revision numbers of the ancestors of arbitrary sets that are not
894 revision numbers of the ancestors of arbitrary sets that are not
894 ancestors of common. This is an ancestor.incrementalmissingancestors
895 ancestors of common. This is an ancestor.incrementalmissingancestors
895 object.
896 object.
896
897
897 'common' is a list of revision numbers. If common is not supplied, uses
898 'common' is a list of revision numbers. If common is not supplied, uses
898 nullrev.
899 nullrev.
899 """
900 """
900 if common is None:
901 if common is None:
901 common = [nullrev]
902 common = [nullrev]
902
903
903 return ancestor.incrementalmissingancestors(self.parentrevs, common)
904 return ancestor.incrementalmissingancestors(self.parentrevs, common)
904
905
905 def findmissingrevs(self, common=None, heads=None):
906 def findmissingrevs(self, common=None, heads=None):
906 """Return the revision numbers of the ancestors of heads that
907 """Return the revision numbers of the ancestors of heads that
907 are not ancestors of common.
908 are not ancestors of common.
908
909
909 More specifically, return a list of revision numbers corresponding to
910 More specifically, return a list of revision numbers corresponding to
910 nodes N such that every N satisfies the following constraints:
911 nodes N such that every N satisfies the following constraints:
911
912
912 1. N is an ancestor of some node in 'heads'
913 1. N is an ancestor of some node in 'heads'
913 2. N is not an ancestor of any node in 'common'
914 2. N is not an ancestor of any node in 'common'
914
915
915 The list is sorted by revision number, meaning it is
916 The list is sorted by revision number, meaning it is
916 topologically sorted.
917 topologically sorted.
917
918
918 'heads' and 'common' are both lists of revision numbers. If heads is
919 'heads' and 'common' are both lists of revision numbers. If heads is
919 not supplied, uses all of the revlog's heads. If common is not
920 not supplied, uses all of the revlog's heads. If common is not
920 supplied, uses nullid."""
921 supplied, uses nullid."""
921 if common is None:
922 if common is None:
922 common = [nullrev]
923 common = [nullrev]
923 if heads is None:
924 if heads is None:
924 heads = self.headrevs()
925 heads = self.headrevs()
925
926
926 inc = self.incrementalmissingrevs(common=common)
927 inc = self.incrementalmissingrevs(common=common)
927 return inc.missingancestors(heads)
928 return inc.missingancestors(heads)
928
929
929 def findmissing(self, common=None, heads=None):
930 def findmissing(self, common=None, heads=None):
930 """Return the ancestors of heads that are not ancestors of common.
931 """Return the ancestors of heads that are not ancestors of common.
931
932
932 More specifically, return a list of nodes N such that every N
933 More specifically, return a list of nodes N such that every N
933 satisfies the following constraints:
934 satisfies the following constraints:
934
935
935 1. N is an ancestor of some node in 'heads'
936 1. N is an ancestor of some node in 'heads'
936 2. N is not an ancestor of any node in 'common'
937 2. N is not an ancestor of any node in 'common'
937
938
938 The list is sorted by revision number, meaning it is
939 The list is sorted by revision number, meaning it is
939 topologically sorted.
940 topologically sorted.
940
941
941 'heads' and 'common' are both lists of node IDs. If heads is
942 'heads' and 'common' are both lists of node IDs. If heads is
942 not supplied, uses all of the revlog's heads. If common is not
943 not supplied, uses all of the revlog's heads. If common is not
943 supplied, uses nullid."""
944 supplied, uses nullid."""
944 if common is None:
945 if common is None:
945 common = [nullid]
946 common = [nullid]
946 if heads is None:
947 if heads is None:
947 heads = self.heads()
948 heads = self.heads()
948
949
949 common = [self.rev(n) for n in common]
950 common = [self.rev(n) for n in common]
950 heads = [self.rev(n) for n in heads]
951 heads = [self.rev(n) for n in heads]
951
952
952 inc = self.incrementalmissingrevs(common=common)
953 inc = self.incrementalmissingrevs(common=common)
953 return [self.node(r) for r in inc.missingancestors(heads)]
954 return [self.node(r) for r in inc.missingancestors(heads)]
954
955
955 def nodesbetween(self, roots=None, heads=None):
956 def nodesbetween(self, roots=None, heads=None):
956 """Return a topological path from 'roots' to 'heads'.
957 """Return a topological path from 'roots' to 'heads'.
957
958
958 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
959 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
959 topologically sorted list of all nodes N that satisfy both of
960 topologically sorted list of all nodes N that satisfy both of
960 these constraints:
961 these constraints:
961
962
962 1. N is a descendant of some node in 'roots'
963 1. N is a descendant of some node in 'roots'
963 2. N is an ancestor of some node in 'heads'
964 2. N is an ancestor of some node in 'heads'
964
965
965 Every node is considered to be both a descendant and an ancestor
966 Every node is considered to be both a descendant and an ancestor
966 of itself, so every reachable node in 'roots' and 'heads' will be
967 of itself, so every reachable node in 'roots' and 'heads' will be
967 included in 'nodes'.
968 included in 'nodes'.
968
969
969 'outroots' is the list of reachable nodes in 'roots', i.e., the
970 'outroots' is the list of reachable nodes in 'roots', i.e., the
970 subset of 'roots' that is returned in 'nodes'. Likewise,
971 subset of 'roots' that is returned in 'nodes'. Likewise,
971 'outheads' is the subset of 'heads' that is also in 'nodes'.
972 'outheads' is the subset of 'heads' that is also in 'nodes'.
972
973
973 'roots' and 'heads' are both lists of node IDs. If 'roots' is
974 'roots' and 'heads' are both lists of node IDs. If 'roots' is
974 unspecified, uses nullid as the only root. If 'heads' is
975 unspecified, uses nullid as the only root. If 'heads' is
975 unspecified, uses list of all of the revlog's heads."""
976 unspecified, uses list of all of the revlog's heads."""
976 nonodes = ([], [], [])
977 nonodes = ([], [], [])
977 if roots is not None:
978 if roots is not None:
978 roots = list(roots)
979 roots = list(roots)
979 if not roots:
980 if not roots:
980 return nonodes
981 return nonodes
981 lowestrev = min([self.rev(n) for n in roots])
982 lowestrev = min([self.rev(n) for n in roots])
982 else:
983 else:
983 roots = [nullid] # Everybody's a descendant of nullid
984 roots = [nullid] # Everybody's a descendant of nullid
984 lowestrev = nullrev
985 lowestrev = nullrev
985 if (lowestrev == nullrev) and (heads is None):
986 if (lowestrev == nullrev) and (heads is None):
986 # We want _all_ the nodes!
987 # We want _all_ the nodes!
987 return ([self.node(r) for r in self], [nullid], list(self.heads()))
988 return ([self.node(r) for r in self], [nullid], list(self.heads()))
988 if heads is None:
989 if heads is None:
989 # All nodes are ancestors, so the latest ancestor is the last
990 # All nodes are ancestors, so the latest ancestor is the last
990 # node.
991 # node.
991 highestrev = len(self) - 1
992 highestrev = len(self) - 1
992 # Set ancestors to None to signal that every node is an ancestor.
993 # Set ancestors to None to signal that every node is an ancestor.
993 ancestors = None
994 ancestors = None
994 # Set heads to an empty dictionary for later discovery of heads
995 # Set heads to an empty dictionary for later discovery of heads
995 heads = {}
996 heads = {}
996 else:
997 else:
997 heads = list(heads)
998 heads = list(heads)
998 if not heads:
999 if not heads:
999 return nonodes
1000 return nonodes
1000 ancestors = set()
1001 ancestors = set()
1001 # Turn heads into a dictionary so we can remove 'fake' heads.
1002 # Turn heads into a dictionary so we can remove 'fake' heads.
1002 # Also, later we will be using it to filter out the heads we can't
1003 # Also, later we will be using it to filter out the heads we can't
1003 # find from roots.
1004 # find from roots.
1004 heads = dict.fromkeys(heads, False)
1005 heads = dict.fromkeys(heads, False)
1005 # Start at the top and keep marking parents until we're done.
1006 # Start at the top and keep marking parents until we're done.
1006 nodestotag = set(heads)
1007 nodestotag = set(heads)
1007 # Remember where the top was so we can use it as a limit later.
1008 # Remember where the top was so we can use it as a limit later.
1008 highestrev = max([self.rev(n) for n in nodestotag])
1009 highestrev = max([self.rev(n) for n in nodestotag])
1009 while nodestotag:
1010 while nodestotag:
1010 # grab a node to tag
1011 # grab a node to tag
1011 n = nodestotag.pop()
1012 n = nodestotag.pop()
1012 # Never tag nullid
1013 # Never tag nullid
1013 if n == nullid:
1014 if n == nullid:
1014 continue
1015 continue
1015 # A node's revision number represents its place in a
1016 # A node's revision number represents its place in a
1016 # topologically sorted list of nodes.
1017 # topologically sorted list of nodes.
1017 r = self.rev(n)
1018 r = self.rev(n)
1018 if r >= lowestrev:
1019 if r >= lowestrev:
1019 if n not in ancestors:
1020 if n not in ancestors:
1020 # If we are possibly a descendant of one of the roots
1021 # If we are possibly a descendant of one of the roots
1021 # and we haven't already been marked as an ancestor
1022 # and we haven't already been marked as an ancestor
1022 ancestors.add(n) # Mark as ancestor
1023 ancestors.add(n) # Mark as ancestor
1023 # Add non-nullid parents to list of nodes to tag.
1024 # Add non-nullid parents to list of nodes to tag.
1024 nodestotag.update([p for p in self.parents(n) if
1025 nodestotag.update([p for p in self.parents(n) if
1025 p != nullid])
1026 p != nullid])
1026 elif n in heads: # We've seen it before, is it a fake head?
1027 elif n in heads: # We've seen it before, is it a fake head?
1027 # So it is, real heads should not be the ancestors of
1028 # So it is, real heads should not be the ancestors of
1028 # any other heads.
1029 # any other heads.
1029 heads.pop(n)
1030 heads.pop(n)
1030 if not ancestors:
1031 if not ancestors:
1031 return nonodes
1032 return nonodes
1032 # Now that we have our set of ancestors, we want to remove any
1033 # Now that we have our set of ancestors, we want to remove any
1033 # roots that are not ancestors.
1034 # roots that are not ancestors.
1034
1035
1035 # If one of the roots was nullid, everything is included anyway.
1036 # If one of the roots was nullid, everything is included anyway.
1036 if lowestrev > nullrev:
1037 if lowestrev > nullrev:
1037 # But, since we weren't, let's recompute the lowest rev to not
1038 # But, since we weren't, let's recompute the lowest rev to not
1038 # include roots that aren't ancestors.
1039 # include roots that aren't ancestors.
1039
1040
1040 # Filter out roots that aren't ancestors of heads
1041 # Filter out roots that aren't ancestors of heads
1041 roots = [root for root in roots if root in ancestors]
1042 roots = [root for root in roots if root in ancestors]
1042 # Recompute the lowest revision
1043 # Recompute the lowest revision
1043 if roots:
1044 if roots:
1044 lowestrev = min([self.rev(root) for root in roots])
1045 lowestrev = min([self.rev(root) for root in roots])
1045 else:
1046 else:
1046 # No more roots? Return empty list
1047 # No more roots? Return empty list
1047 return nonodes
1048 return nonodes
1048 else:
1049 else:
1049 # We are descending from nullid, and don't need to care about
1050 # We are descending from nullid, and don't need to care about
1050 # any other roots.
1051 # any other roots.
1051 lowestrev = nullrev
1052 lowestrev = nullrev
1052 roots = [nullid]
1053 roots = [nullid]
1053 # Transform our roots list into a set.
1054 # Transform our roots list into a set.
1054 descendants = set(roots)
1055 descendants = set(roots)
1055 # Also, keep the original roots so we can filter out roots that aren't
1056 # Also, keep the original roots so we can filter out roots that aren't
1056 # 'real' roots (i.e. are descended from other roots).
1057 # 'real' roots (i.e. are descended from other roots).
1057 roots = descendants.copy()
1058 roots = descendants.copy()
1058 # Our topologically sorted list of output nodes.
1059 # Our topologically sorted list of output nodes.
1059 orderedout = []
1060 orderedout = []
1060 # Don't start at nullid since we don't want nullid in our output list,
1061 # Don't start at nullid since we don't want nullid in our output list,
1061 # and if nullid shows up in descendants, empty parents will look like
1062 # and if nullid shows up in descendants, empty parents will look like
1062 # they're descendants.
1063 # they're descendants.
1063 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1064 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1064 n = self.node(r)
1065 n = self.node(r)
1065 isdescendant = False
1066 isdescendant = False
1066 if lowestrev == nullrev: # Everybody is a descendant of nullid
1067 if lowestrev == nullrev: # Everybody is a descendant of nullid
1067 isdescendant = True
1068 isdescendant = True
1068 elif n in descendants:
1069 elif n in descendants:
1069 # n is already a descendant
1070 # n is already a descendant
1070 isdescendant = True
1071 isdescendant = True
1071 # This check only needs to be done here because all the roots
1072 # This check only needs to be done here because all the roots
1072 # will start being marked is descendants before the loop.
1073 # will start being marked is descendants before the loop.
1073 if n in roots:
1074 if n in roots:
1074 # If n was a root, check if it's a 'real' root.
1075 # If n was a root, check if it's a 'real' root.
1075 p = tuple(self.parents(n))
1076 p = tuple(self.parents(n))
1076 # If any of its parents are descendants, it's not a root.
1077 # If any of its parents are descendants, it's not a root.
1077 if (p[0] in descendants) or (p[1] in descendants):
1078 if (p[0] in descendants) or (p[1] in descendants):
1078 roots.remove(n)
1079 roots.remove(n)
1079 else:
1080 else:
1080 p = tuple(self.parents(n))
1081 p = tuple(self.parents(n))
1081 # A node is a descendant if either of its parents are
1082 # A node is a descendant if either of its parents are
1082 # descendants. (We seeded the dependents list with the roots
1083 # descendants. (We seeded the dependents list with the roots
1083 # up there, remember?)
1084 # up there, remember?)
1084 if (p[0] in descendants) or (p[1] in descendants):
1085 if (p[0] in descendants) or (p[1] in descendants):
1085 descendants.add(n)
1086 descendants.add(n)
1086 isdescendant = True
1087 isdescendant = True
1087 if isdescendant and ((ancestors is None) or (n in ancestors)):
1088 if isdescendant and ((ancestors is None) or (n in ancestors)):
1088 # Only include nodes that are both descendants and ancestors.
1089 # Only include nodes that are both descendants and ancestors.
1089 orderedout.append(n)
1090 orderedout.append(n)
1090 if (ancestors is not None) and (n in heads):
1091 if (ancestors is not None) and (n in heads):
1091 # We're trying to figure out which heads are reachable
1092 # We're trying to figure out which heads are reachable
1092 # from roots.
1093 # from roots.
1093 # Mark this head as having been reached
1094 # Mark this head as having been reached
1094 heads[n] = True
1095 heads[n] = True
1095 elif ancestors is None:
1096 elif ancestors is None:
1096 # Otherwise, we're trying to discover the heads.
1097 # Otherwise, we're trying to discover the heads.
1097 # Assume this is a head because if it isn't, the next step
1098 # Assume this is a head because if it isn't, the next step
1098 # will eventually remove it.
1099 # will eventually remove it.
1099 heads[n] = True
1100 heads[n] = True
1100 # But, obviously its parents aren't.
1101 # But, obviously its parents aren't.
1101 for p in self.parents(n):
1102 for p in self.parents(n):
1102 heads.pop(p, None)
1103 heads.pop(p, None)
1103 heads = [head for head, flag in heads.iteritems() if flag]
1104 heads = [head for head, flag in heads.iteritems() if flag]
1104 roots = list(roots)
1105 roots = list(roots)
1105 assert orderedout
1106 assert orderedout
1106 assert roots
1107 assert roots
1107 assert heads
1108 assert heads
1108 return (orderedout, roots, heads)
1109 return (orderedout, roots, heads)
1109
1110
1110 def headrevs(self):
1111 def headrevs(self):
1111 try:
1112 try:
1112 return self.index.headrevs()
1113 return self.index.headrevs()
1113 except AttributeError:
1114 except AttributeError:
1114 return self._headrevs()
1115 return self._headrevs()
1115
1116
1116 def computephases(self, roots):
1117 def computephases(self, roots):
1117 return self.index.computephasesmapsets(roots)
1118 return self.index.computephasesmapsets(roots)
1118
1119
1119 def _headrevs(self):
1120 def _headrevs(self):
1120 count = len(self)
1121 count = len(self)
1121 if not count:
1122 if not count:
1122 return [nullrev]
1123 return [nullrev]
1123 # we won't iter over filtered rev so nobody is a head at start
1124 # we won't iter over filtered rev so nobody is a head at start
1124 ishead = [0] * (count + 1)
1125 ishead = [0] * (count + 1)
1125 index = self.index
1126 index = self.index
1126 for r in self:
1127 for r in self:
1127 ishead[r] = 1 # I may be an head
1128 ishead[r] = 1 # I may be an head
1128 e = index[r]
1129 e = index[r]
1129 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1130 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1130 return [r for r, val in enumerate(ishead) if val]
1131 return [r for r, val in enumerate(ishead) if val]
1131
1132
1132 def heads(self, start=None, stop=None):
1133 def heads(self, start=None, stop=None):
1133 """return the list of all nodes that have no children
1134 """return the list of all nodes that have no children
1134
1135
1135 if start is specified, only heads that are descendants of
1136 if start is specified, only heads that are descendants of
1136 start will be returned
1137 start will be returned
1137 if stop is specified, it will consider all the revs from stop
1138 if stop is specified, it will consider all the revs from stop
1138 as if they had no children
1139 as if they had no children
1139 """
1140 """
1140 if start is None and stop is None:
1141 if start is None and stop is None:
1141 if not len(self):
1142 if not len(self):
1142 return [nullid]
1143 return [nullid]
1143 return [self.node(r) for r in self.headrevs()]
1144 return [self.node(r) for r in self.headrevs()]
1144
1145
1145 if start is None:
1146 if start is None:
1146 start = nullid
1147 start = nullid
1147 if stop is None:
1148 if stop is None:
1148 stop = []
1149 stop = []
1149 stoprevs = set([self.rev(n) for n in stop])
1150 stoprevs = set([self.rev(n) for n in stop])
1150 startrev = self.rev(start)
1151 startrev = self.rev(start)
1151 reachable = {startrev}
1152 reachable = {startrev}
1152 heads = {startrev}
1153 heads = {startrev}
1153
1154
1154 parentrevs = self.parentrevs
1155 parentrevs = self.parentrevs
1155 for r in self.revs(start=startrev + 1):
1156 for r in self.revs(start=startrev + 1):
1156 for p in parentrevs(r):
1157 for p in parentrevs(r):
1157 if p in reachable:
1158 if p in reachable:
1158 if r not in stoprevs:
1159 if r not in stoprevs:
1159 reachable.add(r)
1160 reachable.add(r)
1160 heads.add(r)
1161 heads.add(r)
1161 if p in heads and p not in stoprevs:
1162 if p in heads and p not in stoprevs:
1162 heads.remove(p)
1163 heads.remove(p)
1163
1164
1164 return [self.node(r) for r in heads]
1165 return [self.node(r) for r in heads]
1165
1166
1166 def children(self, node):
1167 def children(self, node):
1167 """find the children of a given node"""
1168 """find the children of a given node"""
1168 c = []
1169 c = []
1169 p = self.rev(node)
1170 p = self.rev(node)
1170 for r in self.revs(start=p + 1):
1171 for r in self.revs(start=p + 1):
1171 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1172 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1172 if prevs:
1173 if prevs:
1173 for pr in prevs:
1174 for pr in prevs:
1174 if pr == p:
1175 if pr == p:
1175 c.append(self.node(r))
1176 c.append(self.node(r))
1176 elif p == nullrev:
1177 elif p == nullrev:
1177 c.append(self.node(r))
1178 c.append(self.node(r))
1178 return c
1179 return c
1179
1180
1180 def commonancestorsheads(self, a, b):
1181 def commonancestorsheads(self, a, b):
1181 """calculate all the heads of the common ancestors of nodes a and b"""
1182 """calculate all the heads of the common ancestors of nodes a and b"""
1182 a, b = self.rev(a), self.rev(b)
1183 a, b = self.rev(a), self.rev(b)
1183 ancs = self._commonancestorsheads(a, b)
1184 ancs = self._commonancestorsheads(a, b)
1184 return pycompat.maplist(self.node, ancs)
1185 return pycompat.maplist(self.node, ancs)
1185
1186
1186 def _commonancestorsheads(self, *revs):
1187 def _commonancestorsheads(self, *revs):
1187 """calculate all the heads of the common ancestors of revs"""
1188 """calculate all the heads of the common ancestors of revs"""
1188 try:
1189 try:
1189 ancs = self.index.commonancestorsheads(*revs)
1190 ancs = self.index.commonancestorsheads(*revs)
1190 except (AttributeError, OverflowError): # C implementation failed
1191 except (AttributeError, OverflowError): # C implementation failed
1191 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1192 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1192 return ancs
1193 return ancs
1193
1194
1194 def isancestor(self, a, b):
1195 def isancestor(self, a, b):
1195 """return True if node a is an ancestor of node b
1196 """return True if node a is an ancestor of node b
1196
1197
1197 A revision is considered an ancestor of itself."""
1198 A revision is considered an ancestor of itself."""
1198 a, b = self.rev(a), self.rev(b)
1199 a, b = self.rev(a), self.rev(b)
1199 return self.isancestorrev(a, b)
1200 return self.isancestorrev(a, b)
1200
1201
1201 def isancestorrev(self, a, b):
1202 def isancestorrev(self, a, b):
1202 """return True if revision a is an ancestor of revision b
1203 """return True if revision a is an ancestor of revision b
1203
1204
1204 A revision is considered an ancestor of itself.
1205 A revision is considered an ancestor of itself.
1205
1206
1206 The implementation of this is trivial but the use of
1207 The implementation of this is trivial but the use of
1207 commonancestorsheads is not."""
1208 commonancestorsheads is not."""
1208 if a == nullrev:
1209 if a == nullrev:
1209 return True
1210 return True
1210 elif a == b:
1211 elif a == b:
1211 return True
1212 return True
1212 elif a > b:
1213 elif a > b:
1213 return False
1214 return False
1214 return a in self._commonancestorsheads(a, b)
1215 return a in self._commonancestorsheads(a, b)
1215
1216
1216 def ancestor(self, a, b):
1217 def ancestor(self, a, b):
1217 """calculate the "best" common ancestor of nodes a and b"""
1218 """calculate the "best" common ancestor of nodes a and b"""
1218
1219
1219 a, b = self.rev(a), self.rev(b)
1220 a, b = self.rev(a), self.rev(b)
1220 try:
1221 try:
1221 ancs = self.index.ancestors(a, b)
1222 ancs = self.index.ancestors(a, b)
1222 except (AttributeError, OverflowError):
1223 except (AttributeError, OverflowError):
1223 ancs = ancestor.ancestors(self.parentrevs, a, b)
1224 ancs = ancestor.ancestors(self.parentrevs, a, b)
1224 if ancs:
1225 if ancs:
1225 # choose a consistent winner when there's a tie
1226 # choose a consistent winner when there's a tie
1226 return min(map(self.node, ancs))
1227 return min(map(self.node, ancs))
1227 return nullid
1228 return nullid
1228
1229
1229 def _match(self, id):
1230 def _match(self, id):
1230 if isinstance(id, int):
1231 if isinstance(id, int):
1231 # rev
1232 # rev
1232 return self.node(id)
1233 return self.node(id)
1233 if len(id) == 20:
1234 if len(id) == 20:
1234 # possibly a binary node
1235 # possibly a binary node
1235 # odds of a binary node being all hex in ASCII are 1 in 10**25
1236 # odds of a binary node being all hex in ASCII are 1 in 10**25
1236 try:
1237 try:
1237 node = id
1238 node = id
1238 self.rev(node) # quick search the index
1239 self.rev(node) # quick search the index
1239 return node
1240 return node
1240 except LookupError:
1241 except LookupError:
1241 pass # may be partial hex id
1242 pass # may be partial hex id
1242 try:
1243 try:
1243 # str(rev)
1244 # str(rev)
1244 rev = int(id)
1245 rev = int(id)
1245 if "%d" % rev != id:
1246 if "%d" % rev != id:
1246 raise ValueError
1247 raise ValueError
1247 if rev < 0:
1248 if rev < 0:
1248 rev = len(self) + rev
1249 rev = len(self) + rev
1249 if rev < 0 or rev >= len(self):
1250 if rev < 0 or rev >= len(self):
1250 raise ValueError
1251 raise ValueError
1251 return self.node(rev)
1252 return self.node(rev)
1252 except (ValueError, OverflowError):
1253 except (ValueError, OverflowError):
1253 pass
1254 pass
1254 if len(id) == 40:
1255 if len(id) == 40:
1255 try:
1256 try:
1256 # a full hex nodeid?
1257 # a full hex nodeid?
1257 node = bin(id)
1258 node = bin(id)
1258 self.rev(node)
1259 self.rev(node)
1259 return node
1260 return node
1260 except (TypeError, LookupError):
1261 except (TypeError, LookupError):
1261 pass
1262 pass
1262
1263
1263 def _partialmatch(self, id):
1264 def _partialmatch(self, id):
1264 # we don't care wdirfilenodeids as they should be always full hash
1265 # we don't care wdirfilenodeids as they should be always full hash
1265 maybewdir = wdirhex.startswith(id)
1266 maybewdir = wdirhex.startswith(id)
1266 try:
1267 try:
1267 partial = self.index.partialmatch(id)
1268 partial = self.index.partialmatch(id)
1268 if partial and self.hasnode(partial):
1269 if partial and self.hasnode(partial):
1269 if maybewdir:
1270 if maybewdir:
1270 # single 'ff...' match in radix tree, ambiguous with wdir
1271 # single 'ff...' match in radix tree, ambiguous with wdir
1271 raise RevlogError
1272 raise error.RevlogError
1272 return partial
1273 return partial
1273 if maybewdir:
1274 if maybewdir:
1274 # no 'ff...' match in radix tree, wdir identified
1275 # no 'ff...' match in radix tree, wdir identified
1275 raise error.WdirUnsupported
1276 raise error.WdirUnsupported
1276 return None
1277 return None
1277 except RevlogError:
1278 except error.RevlogError:
1278 # parsers.c radix tree lookup gave multiple matches
1279 # parsers.c radix tree lookup gave multiple matches
1279 # fast path: for unfiltered changelog, radix tree is accurate
1280 # fast path: for unfiltered changelog, radix tree is accurate
1280 if not getattr(self, 'filteredrevs', None):
1281 if not getattr(self, 'filteredrevs', None):
1281 raise AmbiguousPrefixLookupError(id, self.indexfile,
1282 raise AmbiguousPrefixLookupError(id, self.indexfile,
1282 _('ambiguous identifier'))
1283 _('ambiguous identifier'))
1283 # fall through to slow path that filters hidden revisions
1284 # fall through to slow path that filters hidden revisions
1284 except (AttributeError, ValueError):
1285 except (AttributeError, ValueError):
1285 # we are pure python, or key was too short to search radix tree
1286 # we are pure python, or key was too short to search radix tree
1286 pass
1287 pass
1287
1288
1288 if id in self._pcache:
1289 if id in self._pcache:
1289 return self._pcache[id]
1290 return self._pcache[id]
1290
1291
1291 if len(id) <= 40:
1292 if len(id) <= 40:
1292 try:
1293 try:
1293 # hex(node)[:...]
1294 # hex(node)[:...]
1294 l = len(id) // 2 # grab an even number of digits
1295 l = len(id) // 2 # grab an even number of digits
1295 prefix = bin(id[:l * 2])
1296 prefix = bin(id[:l * 2])
1296 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1297 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1297 nl = [n for n in nl if hex(n).startswith(id) and
1298 nl = [n for n in nl if hex(n).startswith(id) and
1298 self.hasnode(n)]
1299 self.hasnode(n)]
1299 if nullhex.startswith(id):
1300 if nullhex.startswith(id):
1300 nl.append(nullid)
1301 nl.append(nullid)
1301 if len(nl) > 0:
1302 if len(nl) > 0:
1302 if len(nl) == 1 and not maybewdir:
1303 if len(nl) == 1 and not maybewdir:
1303 self._pcache[id] = nl[0]
1304 self._pcache[id] = nl[0]
1304 return nl[0]
1305 return nl[0]
1305 raise AmbiguousPrefixLookupError(id, self.indexfile,
1306 raise AmbiguousPrefixLookupError(id, self.indexfile,
1306 _('ambiguous identifier'))
1307 _('ambiguous identifier'))
1307 if maybewdir:
1308 if maybewdir:
1308 raise error.WdirUnsupported
1309 raise error.WdirUnsupported
1309 return None
1310 return None
1310 except TypeError:
1311 except TypeError:
1311 pass
1312 pass
1312
1313
1313 def lookup(self, id):
1314 def lookup(self, id):
1314 """locate a node based on:
1315 """locate a node based on:
1315 - revision number or str(revision number)
1316 - revision number or str(revision number)
1316 - nodeid or subset of hex nodeid
1317 - nodeid or subset of hex nodeid
1317 """
1318 """
1318 n = self._match(id)
1319 n = self._match(id)
1319 if n is not None:
1320 if n is not None:
1320 return n
1321 return n
1321 n = self._partialmatch(id)
1322 n = self._partialmatch(id)
1322 if n:
1323 if n:
1323 return n
1324 return n
1324
1325
1325 raise LookupError(id, self.indexfile, _('no match found'))
1326 raise LookupError(id, self.indexfile, _('no match found'))
1326
1327
1327 def shortest(self, node, minlength=1):
1328 def shortest(self, node, minlength=1):
1328 """Find the shortest unambiguous prefix that matches node."""
1329 """Find the shortest unambiguous prefix that matches node."""
1329 def isvalid(prefix):
1330 def isvalid(prefix):
1330 try:
1331 try:
1331 node = self._partialmatch(prefix)
1332 node = self._partialmatch(prefix)
1332 except error.RevlogError:
1333 except error.RevlogError:
1333 return False
1334 return False
1334 except error.WdirUnsupported:
1335 except error.WdirUnsupported:
1335 # single 'ff...' match
1336 # single 'ff...' match
1336 return True
1337 return True
1337 if node is None:
1338 if node is None:
1338 raise LookupError(node, self.indexfile, _('no node'))
1339 raise LookupError(node, self.indexfile, _('no node'))
1339 return True
1340 return True
1340
1341
1341 def maybewdir(prefix):
1342 def maybewdir(prefix):
1342 return all(c == 'f' for c in prefix)
1343 return all(c == 'f' for c in prefix)
1343
1344
1344 hexnode = hex(node)
1345 hexnode = hex(node)
1345
1346
1346 def disambiguate(hexnode, minlength):
1347 def disambiguate(hexnode, minlength):
1347 """Disambiguate against wdirid."""
1348 """Disambiguate against wdirid."""
1348 for length in range(minlength, 41):
1349 for length in range(minlength, 41):
1349 prefix = hexnode[:length]
1350 prefix = hexnode[:length]
1350 if not maybewdir(prefix):
1351 if not maybewdir(prefix):
1351 return prefix
1352 return prefix
1352
1353
1353 if not getattr(self, 'filteredrevs', None):
1354 if not getattr(self, 'filteredrevs', None):
1354 try:
1355 try:
1355 length = max(self.index.shortest(node), minlength)
1356 length = max(self.index.shortest(node), minlength)
1356 return disambiguate(hexnode, length)
1357 return disambiguate(hexnode, length)
1357 except RevlogError:
1358 except error.RevlogError:
1358 if node != wdirid:
1359 if node != wdirid:
1359 raise LookupError(node, self.indexfile, _('no node'))
1360 raise LookupError(node, self.indexfile, _('no node'))
1360 except AttributeError:
1361 except AttributeError:
1361 # Fall through to pure code
1362 # Fall through to pure code
1362 pass
1363 pass
1363
1364
1364 if node == wdirid:
1365 if node == wdirid:
1365 for length in range(minlength, 41):
1366 for length in range(minlength, 41):
1366 prefix = hexnode[:length]
1367 prefix = hexnode[:length]
1367 if isvalid(prefix):
1368 if isvalid(prefix):
1368 return prefix
1369 return prefix
1369
1370
1370 for length in range(minlength, 41):
1371 for length in range(minlength, 41):
1371 prefix = hexnode[:length]
1372 prefix = hexnode[:length]
1372 if isvalid(prefix):
1373 if isvalid(prefix):
1373 return disambiguate(hexnode, length)
1374 return disambiguate(hexnode, length)
1374
1375
1375 def cmp(self, node, text):
1376 def cmp(self, node, text):
1376 """compare text with a given file revision
1377 """compare text with a given file revision
1377
1378
1378 returns True if text is different than what is stored.
1379 returns True if text is different than what is stored.
1379 """
1380 """
1380 p1, p2 = self.parents(node)
1381 p1, p2 = self.parents(node)
1381 return hash(text, p1, p2) != node
1382 return hash(text, p1, p2) != node
1382
1383
1383 def _cachesegment(self, offset, data):
1384 def _cachesegment(self, offset, data):
1384 """Add a segment to the revlog cache.
1385 """Add a segment to the revlog cache.
1385
1386
1386 Accepts an absolute offset and the data that is at that location.
1387 Accepts an absolute offset and the data that is at that location.
1387 """
1388 """
1388 o, d = self._chunkcache
1389 o, d = self._chunkcache
1389 # try to add to existing cache
1390 # try to add to existing cache
1390 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1391 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1391 self._chunkcache = o, d + data
1392 self._chunkcache = o, d + data
1392 else:
1393 else:
1393 self._chunkcache = offset, data
1394 self._chunkcache = offset, data
1394
1395
1395 def _readsegment(self, offset, length, df=None):
1396 def _readsegment(self, offset, length, df=None):
1396 """Load a segment of raw data from the revlog.
1397 """Load a segment of raw data from the revlog.
1397
1398
1398 Accepts an absolute offset, length to read, and an optional existing
1399 Accepts an absolute offset, length to read, and an optional existing
1399 file handle to read from.
1400 file handle to read from.
1400
1401
1401 If an existing file handle is passed, it will be seeked and the
1402 If an existing file handle is passed, it will be seeked and the
1402 original seek position will NOT be restored.
1403 original seek position will NOT be restored.
1403
1404
1404 Returns a str or buffer of raw byte data.
1405 Returns a str or buffer of raw byte data.
1405 """
1406 """
1406 # Cache data both forward and backward around the requested
1407 # Cache data both forward and backward around the requested
1407 # data, in a fixed size window. This helps speed up operations
1408 # data, in a fixed size window. This helps speed up operations
1408 # involving reading the revlog backwards.
1409 # involving reading the revlog backwards.
1409 cachesize = self._chunkcachesize
1410 cachesize = self._chunkcachesize
1410 realoffset = offset & ~(cachesize - 1)
1411 realoffset = offset & ~(cachesize - 1)
1411 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1412 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1412 - realoffset)
1413 - realoffset)
1413 with self._datareadfp(df) as df:
1414 with self._datareadfp(df) as df:
1414 df.seek(realoffset)
1415 df.seek(realoffset)
1415 d = df.read(reallength)
1416 d = df.read(reallength)
1416 self._cachesegment(realoffset, d)
1417 self._cachesegment(realoffset, d)
1417 if offset != realoffset or reallength != length:
1418 if offset != realoffset or reallength != length:
1418 return util.buffer(d, offset - realoffset, length)
1419 return util.buffer(d, offset - realoffset, length)
1419 return d
1420 return d
1420
1421
1421 def _getsegment(self, offset, length, df=None):
1422 def _getsegment(self, offset, length, df=None):
1422 """Obtain a segment of raw data from the revlog.
1423 """Obtain a segment of raw data from the revlog.
1423
1424
1424 Accepts an absolute offset, length of bytes to obtain, and an
1425 Accepts an absolute offset, length of bytes to obtain, and an
1425 optional file handle to the already-opened revlog. If the file
1426 optional file handle to the already-opened revlog. If the file
1426 handle is used, it's original seek position will not be preserved.
1427 handle is used, it's original seek position will not be preserved.
1427
1428
1428 Requests for data may be returned from a cache.
1429 Requests for data may be returned from a cache.
1429
1430
1430 Returns a str or a buffer instance of raw byte data.
1431 Returns a str or a buffer instance of raw byte data.
1431 """
1432 """
1432 o, d = self._chunkcache
1433 o, d = self._chunkcache
1433 l = len(d)
1434 l = len(d)
1434
1435
1435 # is it in the cache?
1436 # is it in the cache?
1436 cachestart = offset - o
1437 cachestart = offset - o
1437 cacheend = cachestart + length
1438 cacheend = cachestart + length
1438 if cachestart >= 0 and cacheend <= l:
1439 if cachestart >= 0 and cacheend <= l:
1439 if cachestart == 0 and cacheend == l:
1440 if cachestart == 0 and cacheend == l:
1440 return d # avoid a copy
1441 return d # avoid a copy
1441 return util.buffer(d, cachestart, cacheend - cachestart)
1442 return util.buffer(d, cachestart, cacheend - cachestart)
1442
1443
1443 return self._readsegment(offset, length, df=df)
1444 return self._readsegment(offset, length, df=df)
1444
1445
1445 def _getsegmentforrevs(self, startrev, endrev, df=None):
1446 def _getsegmentforrevs(self, startrev, endrev, df=None):
1446 """Obtain a segment of raw data corresponding to a range of revisions.
1447 """Obtain a segment of raw data corresponding to a range of revisions.
1447
1448
1448 Accepts the start and end revisions and an optional already-open
1449 Accepts the start and end revisions and an optional already-open
1449 file handle to be used for reading. If the file handle is read, its
1450 file handle to be used for reading. If the file handle is read, its
1450 seek position will not be preserved.
1451 seek position will not be preserved.
1451
1452
1452 Requests for data may be satisfied by a cache.
1453 Requests for data may be satisfied by a cache.
1453
1454
1454 Returns a 2-tuple of (offset, data) for the requested range of
1455 Returns a 2-tuple of (offset, data) for the requested range of
1455 revisions. Offset is the integer offset from the beginning of the
1456 revisions. Offset is the integer offset from the beginning of the
1456 revlog and data is a str or buffer of the raw byte data.
1457 revlog and data is a str or buffer of the raw byte data.
1457
1458
1458 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1459 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1459 to determine where each revision's data begins and ends.
1460 to determine where each revision's data begins and ends.
1460 """
1461 """
1461 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1462 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1462 # (functions are expensive).
1463 # (functions are expensive).
1463 index = self.index
1464 index = self.index
1464 istart = index[startrev]
1465 istart = index[startrev]
1465 start = int(istart[0] >> 16)
1466 start = int(istart[0] >> 16)
1466 if startrev == endrev:
1467 if startrev == endrev:
1467 end = start + istart[1]
1468 end = start + istart[1]
1468 else:
1469 else:
1469 iend = index[endrev]
1470 iend = index[endrev]
1470 end = int(iend[0] >> 16) + iend[1]
1471 end = int(iend[0] >> 16) + iend[1]
1471
1472
1472 if self._inline:
1473 if self._inline:
1473 start += (startrev + 1) * self._io.size
1474 start += (startrev + 1) * self._io.size
1474 end += (endrev + 1) * self._io.size
1475 end += (endrev + 1) * self._io.size
1475 length = end - start
1476 length = end - start
1476
1477
1477 return start, self._getsegment(start, length, df=df)
1478 return start, self._getsegment(start, length, df=df)
1478
1479
1479 def _chunk(self, rev, df=None):
1480 def _chunk(self, rev, df=None):
1480 """Obtain a single decompressed chunk for a revision.
1481 """Obtain a single decompressed chunk for a revision.
1481
1482
1482 Accepts an integer revision and an optional already-open file handle
1483 Accepts an integer revision and an optional already-open file handle
1483 to be used for reading. If used, the seek position of the file will not
1484 to be used for reading. If used, the seek position of the file will not
1484 be preserved.
1485 be preserved.
1485
1486
1486 Returns a str holding uncompressed data for the requested revision.
1487 Returns a str holding uncompressed data for the requested revision.
1487 """
1488 """
1488 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1489 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1489
1490
1490 def _chunks(self, revs, df=None, targetsize=None):
1491 def _chunks(self, revs, df=None, targetsize=None):
1491 """Obtain decompressed chunks for the specified revisions.
1492 """Obtain decompressed chunks for the specified revisions.
1492
1493
1493 Accepts an iterable of numeric revisions that are assumed to be in
1494 Accepts an iterable of numeric revisions that are assumed to be in
1494 ascending order. Also accepts an optional already-open file handle
1495 ascending order. Also accepts an optional already-open file handle
1495 to be used for reading. If used, the seek position of the file will
1496 to be used for reading. If used, the seek position of the file will
1496 not be preserved.
1497 not be preserved.
1497
1498
1498 This function is similar to calling ``self._chunk()`` multiple times,
1499 This function is similar to calling ``self._chunk()`` multiple times,
1499 but is faster.
1500 but is faster.
1500
1501
1501 Returns a list with decompressed data for each requested revision.
1502 Returns a list with decompressed data for each requested revision.
1502 """
1503 """
1503 if not revs:
1504 if not revs:
1504 return []
1505 return []
1505 start = self.start
1506 start = self.start
1506 length = self.length
1507 length = self.length
1507 inline = self._inline
1508 inline = self._inline
1508 iosize = self._io.size
1509 iosize = self._io.size
1509 buffer = util.buffer
1510 buffer = util.buffer
1510
1511
1511 l = []
1512 l = []
1512 ladd = l.append
1513 ladd = l.append
1513
1514
1514 if not self._withsparseread:
1515 if not self._withsparseread:
1515 slicedchunks = (revs,)
1516 slicedchunks = (revs,)
1516 else:
1517 else:
1517 slicedchunks = deltautil.slicechunk(self, revs,
1518 slicedchunks = deltautil.slicechunk(self, revs,
1518 targetsize=targetsize)
1519 targetsize=targetsize)
1519
1520
1520 for revschunk in slicedchunks:
1521 for revschunk in slicedchunks:
1521 firstrev = revschunk[0]
1522 firstrev = revschunk[0]
1522 # Skip trailing revisions with empty diff
1523 # Skip trailing revisions with empty diff
1523 for lastrev in revschunk[::-1]:
1524 for lastrev in revschunk[::-1]:
1524 if length(lastrev) != 0:
1525 if length(lastrev) != 0:
1525 break
1526 break
1526
1527
1527 try:
1528 try:
1528 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1529 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1529 except OverflowError:
1530 except OverflowError:
1530 # issue4215 - we can't cache a run of chunks greater than
1531 # issue4215 - we can't cache a run of chunks greater than
1531 # 2G on Windows
1532 # 2G on Windows
1532 return [self._chunk(rev, df=df) for rev in revschunk]
1533 return [self._chunk(rev, df=df) for rev in revschunk]
1533
1534
1534 decomp = self.decompress
1535 decomp = self.decompress
1535 for rev in revschunk:
1536 for rev in revschunk:
1536 chunkstart = start(rev)
1537 chunkstart = start(rev)
1537 if inline:
1538 if inline:
1538 chunkstart += (rev + 1) * iosize
1539 chunkstart += (rev + 1) * iosize
1539 chunklength = length(rev)
1540 chunklength = length(rev)
1540 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1541 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1541
1542
1542 return l
1543 return l
1543
1544
1544 def _chunkclear(self):
1545 def _chunkclear(self):
1545 """Clear the raw chunk cache."""
1546 """Clear the raw chunk cache."""
1546 self._chunkcache = (0, '')
1547 self._chunkcache = (0, '')
1547
1548
1548 def deltaparent(self, rev):
1549 def deltaparent(self, rev):
1549 """return deltaparent of the given revision"""
1550 """return deltaparent of the given revision"""
1550 base = self.index[rev][3]
1551 base = self.index[rev][3]
1551 if base == rev:
1552 if base == rev:
1552 return nullrev
1553 return nullrev
1553 elif self._generaldelta:
1554 elif self._generaldelta:
1554 return base
1555 return base
1555 else:
1556 else:
1556 return rev - 1
1557 return rev - 1
1557
1558
1558 def issnapshot(self, rev):
1559 def issnapshot(self, rev):
1559 """tells whether rev is a snapshot
1560 """tells whether rev is a snapshot
1560 """
1561 """
1561 if rev == nullrev:
1562 if rev == nullrev:
1562 return True
1563 return True
1563 deltap = self.deltaparent(rev)
1564 deltap = self.deltaparent(rev)
1564 if deltap == nullrev:
1565 if deltap == nullrev:
1565 return True
1566 return True
1566 p1, p2 = self.parentrevs(rev)
1567 p1, p2 = self.parentrevs(rev)
1567 if deltap in (p1, p2):
1568 if deltap in (p1, p2):
1568 return False
1569 return False
1569 return self.issnapshot(deltap)
1570 return self.issnapshot(deltap)
1570
1571
1571 def snapshotdepth(self, rev):
1572 def snapshotdepth(self, rev):
1572 """number of snapshot in the chain before this one"""
1573 """number of snapshot in the chain before this one"""
1573 if not self.issnapshot(rev):
1574 if not self.issnapshot(rev):
1574 raise ProgrammingError('revision %d not a snapshot')
1575 raise ProgrammingError('revision %d not a snapshot')
1575 return len(self._deltachain(rev)[0]) - 1
1576 return len(self._deltachain(rev)[0]) - 1
1576
1577
1577 def revdiff(self, rev1, rev2):
1578 def revdiff(self, rev1, rev2):
1578 """return or calculate a delta between two revisions
1579 """return or calculate a delta between two revisions
1579
1580
1580 The delta calculated is in binary form and is intended to be written to
1581 The delta calculated is in binary form and is intended to be written to
1581 revlog data directly. So this function needs raw revision data.
1582 revlog data directly. So this function needs raw revision data.
1582 """
1583 """
1583 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1584 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1584 return bytes(self._chunk(rev2))
1585 return bytes(self._chunk(rev2))
1585
1586
1586 return mdiff.textdiff(self.revision(rev1, raw=True),
1587 return mdiff.textdiff(self.revision(rev1, raw=True),
1587 self.revision(rev2, raw=True))
1588 self.revision(rev2, raw=True))
1588
1589
1589 def revision(self, nodeorrev, _df=None, raw=False):
1590 def revision(self, nodeorrev, _df=None, raw=False):
1590 """return an uncompressed revision of a given node or revision
1591 """return an uncompressed revision of a given node or revision
1591 number.
1592 number.
1592
1593
1593 _df - an existing file handle to read from. (internal-only)
1594 _df - an existing file handle to read from. (internal-only)
1594 raw - an optional argument specifying if the revision data is to be
1595 raw - an optional argument specifying if the revision data is to be
1595 treated as raw data when applying flag transforms. 'raw' should be set
1596 treated as raw data when applying flag transforms. 'raw' should be set
1596 to True when generating changegroups or in debug commands.
1597 to True when generating changegroups or in debug commands.
1597 """
1598 """
1598 if isinstance(nodeorrev, int):
1599 if isinstance(nodeorrev, int):
1599 rev = nodeorrev
1600 rev = nodeorrev
1600 node = self.node(rev)
1601 node = self.node(rev)
1601 else:
1602 else:
1602 node = nodeorrev
1603 node = nodeorrev
1603 rev = None
1604 rev = None
1604
1605
1605 cachedrev = None
1606 cachedrev = None
1606 flags = None
1607 flags = None
1607 rawtext = None
1608 rawtext = None
1608 if node == nullid:
1609 if node == nullid:
1609 return ""
1610 return ""
1610 if self._cache:
1611 if self._cache:
1611 if self._cache[0] == node:
1612 if self._cache[0] == node:
1612 # _cache only stores rawtext
1613 # _cache only stores rawtext
1613 if raw:
1614 if raw:
1614 return self._cache[2]
1615 return self._cache[2]
1615 # duplicated, but good for perf
1616 # duplicated, but good for perf
1616 if rev is None:
1617 if rev is None:
1617 rev = self.rev(node)
1618 rev = self.rev(node)
1618 if flags is None:
1619 if flags is None:
1619 flags = self.flags(rev)
1620 flags = self.flags(rev)
1620 # no extra flags set, no flag processor runs, text = rawtext
1621 # no extra flags set, no flag processor runs, text = rawtext
1621 if flags == REVIDX_DEFAULT_FLAGS:
1622 if flags == REVIDX_DEFAULT_FLAGS:
1622 return self._cache[2]
1623 return self._cache[2]
1623 # rawtext is reusable. need to run flag processor
1624 # rawtext is reusable. need to run flag processor
1624 rawtext = self._cache[2]
1625 rawtext = self._cache[2]
1625
1626
1626 cachedrev = self._cache[1]
1627 cachedrev = self._cache[1]
1627
1628
1628 # look up what we need to read
1629 # look up what we need to read
1629 if rawtext is None:
1630 if rawtext is None:
1630 if rev is None:
1631 if rev is None:
1631 rev = self.rev(node)
1632 rev = self.rev(node)
1632
1633
1633 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1634 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1634 if stopped:
1635 if stopped:
1635 rawtext = self._cache[2]
1636 rawtext = self._cache[2]
1636
1637
1637 # drop cache to save memory
1638 # drop cache to save memory
1638 self._cache = None
1639 self._cache = None
1639
1640
1640 targetsize = None
1641 targetsize = None
1641 rawsize = self.index[rev][2]
1642 rawsize = self.index[rev][2]
1642 if 0 <= rawsize:
1643 if 0 <= rawsize:
1643 targetsize = 4 * rawsize
1644 targetsize = 4 * rawsize
1644
1645
1645 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1646 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1646 if rawtext is None:
1647 if rawtext is None:
1647 rawtext = bytes(bins[0])
1648 rawtext = bytes(bins[0])
1648 bins = bins[1:]
1649 bins = bins[1:]
1649
1650
1650 rawtext = mdiff.patches(rawtext, bins)
1651 rawtext = mdiff.patches(rawtext, bins)
1651 self._cache = (node, rev, rawtext)
1652 self._cache = (node, rev, rawtext)
1652
1653
1653 if flags is None:
1654 if flags is None:
1654 if rev is None:
1655 if rev is None:
1655 rev = self.rev(node)
1656 rev = self.rev(node)
1656 flags = self.flags(rev)
1657 flags = self.flags(rev)
1657
1658
1658 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1659 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1659 if validatehash:
1660 if validatehash:
1660 self.checkhash(text, node, rev=rev)
1661 self.checkhash(text, node, rev=rev)
1661
1662
1662 return text
1663 return text
1663
1664
1664 def hash(self, text, p1, p2):
1665 def hash(self, text, p1, p2):
1665 """Compute a node hash.
1666 """Compute a node hash.
1666
1667
1667 Available as a function so that subclasses can replace the hash
1668 Available as a function so that subclasses can replace the hash
1668 as needed.
1669 as needed.
1669 """
1670 """
1670 return hash(text, p1, p2)
1671 return hash(text, p1, p2)
1671
1672
1672 def _processflags(self, text, flags, operation, raw=False):
1673 def _processflags(self, text, flags, operation, raw=False):
1673 """Inspect revision data flags and applies transforms defined by
1674 """Inspect revision data flags and applies transforms defined by
1674 registered flag processors.
1675 registered flag processors.
1675
1676
1676 ``text`` - the revision data to process
1677 ``text`` - the revision data to process
1677 ``flags`` - the revision flags
1678 ``flags`` - the revision flags
1678 ``operation`` - the operation being performed (read or write)
1679 ``operation`` - the operation being performed (read or write)
1679 ``raw`` - an optional argument describing if the raw transform should be
1680 ``raw`` - an optional argument describing if the raw transform should be
1680 applied.
1681 applied.
1681
1682
1682 This method processes the flags in the order (or reverse order if
1683 This method processes the flags in the order (or reverse order if
1683 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1684 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1684 flag processors registered for present flags. The order of flags defined
1685 flag processors registered for present flags. The order of flags defined
1685 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1686 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1686
1687
1687 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1688 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1688 processed text and ``validatehash`` is a bool indicating whether the
1689 processed text and ``validatehash`` is a bool indicating whether the
1689 returned text should be checked for hash integrity.
1690 returned text should be checked for hash integrity.
1690
1691
1691 Note: If the ``raw`` argument is set, it has precedence over the
1692 Note: If the ``raw`` argument is set, it has precedence over the
1692 operation and will only update the value of ``validatehash``.
1693 operation and will only update the value of ``validatehash``.
1693 """
1694 """
1694 # fast path: no flag processors will run
1695 # fast path: no flag processors will run
1695 if flags == 0:
1696 if flags == 0:
1696 return text, True
1697 return text, True
1697 if not operation in ('read', 'write'):
1698 if not operation in ('read', 'write'):
1698 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1699 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1699 # Check all flags are known.
1700 # Check all flags are known.
1700 if flags & ~REVIDX_KNOWN_FLAGS:
1701 if flags & ~REVIDX_KNOWN_FLAGS:
1701 raise RevlogError(_("incompatible revision flag '%#x'") %
1702 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1702 (flags & ~REVIDX_KNOWN_FLAGS))
1703 (flags & ~REVIDX_KNOWN_FLAGS))
1703 validatehash = True
1704 validatehash = True
1704 # Depending on the operation (read or write), the order might be
1705 # Depending on the operation (read or write), the order might be
1705 # reversed due to non-commutative transforms.
1706 # reversed due to non-commutative transforms.
1706 orderedflags = REVIDX_FLAGS_ORDER
1707 orderedflags = REVIDX_FLAGS_ORDER
1707 if operation == 'write':
1708 if operation == 'write':
1708 orderedflags = reversed(orderedflags)
1709 orderedflags = reversed(orderedflags)
1709
1710
1710 for flag in orderedflags:
1711 for flag in orderedflags:
1711 # If a flagprocessor has been registered for a known flag, apply the
1712 # If a flagprocessor has been registered for a known flag, apply the
1712 # related operation transform and update result tuple.
1713 # related operation transform and update result tuple.
1713 if flag & flags:
1714 if flag & flags:
1714 vhash = True
1715 vhash = True
1715
1716
1716 if flag not in self._flagprocessors:
1717 if flag not in self._flagprocessors:
1717 message = _("missing processor for flag '%#x'") % (flag)
1718 message = _("missing processor for flag '%#x'") % (flag)
1718 raise RevlogError(message)
1719 raise error.RevlogError(message)
1719
1720
1720 processor = self._flagprocessors[flag]
1721 processor = self._flagprocessors[flag]
1721 if processor is not None:
1722 if processor is not None:
1722 readtransform, writetransform, rawtransform = processor
1723 readtransform, writetransform, rawtransform = processor
1723
1724
1724 if raw:
1725 if raw:
1725 vhash = rawtransform(self, text)
1726 vhash = rawtransform(self, text)
1726 elif operation == 'read':
1727 elif operation == 'read':
1727 text, vhash = readtransform(self, text)
1728 text, vhash = readtransform(self, text)
1728 else: # write operation
1729 else: # write operation
1729 text, vhash = writetransform(self, text)
1730 text, vhash = writetransform(self, text)
1730 validatehash = validatehash and vhash
1731 validatehash = validatehash and vhash
1731
1732
1732 return text, validatehash
1733 return text, validatehash
1733
1734
1734 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1735 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1735 """Check node hash integrity.
1736 """Check node hash integrity.
1736
1737
1737 Available as a function so that subclasses can extend hash mismatch
1738 Available as a function so that subclasses can extend hash mismatch
1738 behaviors as needed.
1739 behaviors as needed.
1739 """
1740 """
1740 try:
1741 try:
1741 if p1 is None and p2 is None:
1742 if p1 is None and p2 is None:
1742 p1, p2 = self.parents(node)
1743 p1, p2 = self.parents(node)
1743 if node != self.hash(text, p1, p2):
1744 if node != self.hash(text, p1, p2):
1744 revornode = rev
1745 revornode = rev
1745 if revornode is None:
1746 if revornode is None:
1746 revornode = templatefilters.short(hex(node))
1747 revornode = templatefilters.short(hex(node))
1747 raise RevlogError(_("integrity check failed on %s:%s")
1748 raise error.RevlogError(_("integrity check failed on %s:%s")
1748 % (self.indexfile, pycompat.bytestr(revornode)))
1749 % (self.indexfile, pycompat.bytestr(revornode)))
1749 except RevlogError:
1750 except error.RevlogError:
1750 if self._censorable and _censoredtext(text):
1751 if self._censorable and _censoredtext(text):
1751 raise error.CensoredNodeError(self.indexfile, node, text)
1752 raise error.CensoredNodeError(self.indexfile, node, text)
1752 raise
1753 raise
1753
1754
1754 def _enforceinlinesize(self, tr, fp=None):
1755 def _enforceinlinesize(self, tr, fp=None):
1755 """Check if the revlog is too big for inline and convert if so.
1756 """Check if the revlog is too big for inline and convert if so.
1756
1757
1757 This should be called after revisions are added to the revlog. If the
1758 This should be called after revisions are added to the revlog. If the
1758 revlog has grown too large to be an inline revlog, it will convert it
1759 revlog has grown too large to be an inline revlog, it will convert it
1759 to use multiple index and data files.
1760 to use multiple index and data files.
1760 """
1761 """
1761 tiprev = len(self) - 1
1762 tiprev = len(self) - 1
1762 if (not self._inline or
1763 if (not self._inline or
1763 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1764 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1764 return
1765 return
1765
1766
1766 trinfo = tr.find(self.indexfile)
1767 trinfo = tr.find(self.indexfile)
1767 if trinfo is None:
1768 if trinfo is None:
1768 raise RevlogError(_("%s not found in the transaction")
1769 raise error.RevlogError(_("%s not found in the transaction")
1769 % self.indexfile)
1770 % self.indexfile)
1770
1771
1771 trindex = trinfo[2]
1772 trindex = trinfo[2]
1772 if trindex is not None:
1773 if trindex is not None:
1773 dataoff = self.start(trindex)
1774 dataoff = self.start(trindex)
1774 else:
1775 else:
1775 # revlog was stripped at start of transaction, use all leftover data
1776 # revlog was stripped at start of transaction, use all leftover data
1776 trindex = len(self) - 1
1777 trindex = len(self) - 1
1777 dataoff = self.end(tiprev)
1778 dataoff = self.end(tiprev)
1778
1779
1779 tr.add(self.datafile, dataoff)
1780 tr.add(self.datafile, dataoff)
1780
1781
1781 if fp:
1782 if fp:
1782 fp.flush()
1783 fp.flush()
1783 fp.close()
1784 fp.close()
1784
1785
1785 with self._datafp('w') as df:
1786 with self._datafp('w') as df:
1786 for r in self:
1787 for r in self:
1787 df.write(self._getsegmentforrevs(r, r)[1])
1788 df.write(self._getsegmentforrevs(r, r)[1])
1788
1789
1789 with self._indexfp('w') as fp:
1790 with self._indexfp('w') as fp:
1790 self.version &= ~FLAG_INLINE_DATA
1791 self.version &= ~FLAG_INLINE_DATA
1791 self._inline = False
1792 self._inline = False
1792 io = self._io
1793 io = self._io
1793 for i in self:
1794 for i in self:
1794 e = io.packentry(self.index[i], self.node, self.version, i)
1795 e = io.packentry(self.index[i], self.node, self.version, i)
1795 fp.write(e)
1796 fp.write(e)
1796
1797
1797 # the temp file replace the real index when we exit the context
1798 # the temp file replace the real index when we exit the context
1798 # manager
1799 # manager
1799
1800
1800 tr.replace(self.indexfile, trindex * self._io.size)
1801 tr.replace(self.indexfile, trindex * self._io.size)
1801 self._chunkclear()
1802 self._chunkclear()
1802
1803
1803 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1804 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1804 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1805 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1805 """add a revision to the log
1806 """add a revision to the log
1806
1807
1807 text - the revision data to add
1808 text - the revision data to add
1808 transaction - the transaction object used for rollback
1809 transaction - the transaction object used for rollback
1809 link - the linkrev data to add
1810 link - the linkrev data to add
1810 p1, p2 - the parent nodeids of the revision
1811 p1, p2 - the parent nodeids of the revision
1811 cachedelta - an optional precomputed delta
1812 cachedelta - an optional precomputed delta
1812 node - nodeid of revision; typically node is not specified, and it is
1813 node - nodeid of revision; typically node is not specified, and it is
1813 computed by default as hash(text, p1, p2), however subclasses might
1814 computed by default as hash(text, p1, p2), however subclasses might
1814 use different hashing method (and override checkhash() in such case)
1815 use different hashing method (and override checkhash() in such case)
1815 flags - the known flags to set on the revision
1816 flags - the known flags to set on the revision
1816 deltacomputer - an optional deltacomputer instance shared between
1817 deltacomputer - an optional deltacomputer instance shared between
1817 multiple calls
1818 multiple calls
1818 """
1819 """
1819 if link == nullrev:
1820 if link == nullrev:
1820 raise RevlogError(_("attempted to add linkrev -1 to %s")
1821 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1821 % self.indexfile)
1822 % self.indexfile)
1822
1823
1823 if flags:
1824 if flags:
1824 node = node or self.hash(text, p1, p2)
1825 node = node or self.hash(text, p1, p2)
1825
1826
1826 rawtext, validatehash = self._processflags(text, flags, 'write')
1827 rawtext, validatehash = self._processflags(text, flags, 'write')
1827
1828
1828 # If the flag processor modifies the revision data, ignore any provided
1829 # If the flag processor modifies the revision data, ignore any provided
1829 # cachedelta.
1830 # cachedelta.
1830 if rawtext != text:
1831 if rawtext != text:
1831 cachedelta = None
1832 cachedelta = None
1832
1833
1833 if len(rawtext) > _maxentrysize:
1834 if len(rawtext) > _maxentrysize:
1834 raise RevlogError(
1835 raise error.RevlogError(
1835 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1836 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1836 % (self.indexfile, len(rawtext)))
1837 % (self.indexfile, len(rawtext)))
1837
1838
1838 node = node or self.hash(rawtext, p1, p2)
1839 node = node or self.hash(rawtext, p1, p2)
1839 if node in self.nodemap:
1840 if node in self.nodemap:
1840 return node
1841 return node
1841
1842
1842 if validatehash:
1843 if validatehash:
1843 self.checkhash(rawtext, node, p1=p1, p2=p2)
1844 self.checkhash(rawtext, node, p1=p1, p2=p2)
1844
1845
1845 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1846 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1846 flags, cachedelta=cachedelta,
1847 flags, cachedelta=cachedelta,
1847 deltacomputer=deltacomputer)
1848 deltacomputer=deltacomputer)
1848
1849
1849 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1850 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1850 cachedelta=None, deltacomputer=None):
1851 cachedelta=None, deltacomputer=None):
1851 """add a raw revision with known flags, node and parents
1852 """add a raw revision with known flags, node and parents
1852 useful when reusing a revision not stored in this revlog (ex: received
1853 useful when reusing a revision not stored in this revlog (ex: received
1853 over wire, or read from an external bundle).
1854 over wire, or read from an external bundle).
1854 """
1855 """
1855 dfh = None
1856 dfh = None
1856 if not self._inline:
1857 if not self._inline:
1857 dfh = self._datafp("a+")
1858 dfh = self._datafp("a+")
1858 ifh = self._indexfp("a+")
1859 ifh = self._indexfp("a+")
1859 try:
1860 try:
1860 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1861 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1861 flags, cachedelta, ifh, dfh,
1862 flags, cachedelta, ifh, dfh,
1862 deltacomputer=deltacomputer)
1863 deltacomputer=deltacomputer)
1863 finally:
1864 finally:
1864 if dfh:
1865 if dfh:
1865 dfh.close()
1866 dfh.close()
1866 ifh.close()
1867 ifh.close()
1867
1868
1868 def compress(self, data):
1869 def compress(self, data):
1869 """Generate a possibly-compressed representation of data."""
1870 """Generate a possibly-compressed representation of data."""
1870 if not data:
1871 if not data:
1871 return '', data
1872 return '', data
1872
1873
1873 compressed = self._compressor.compress(data)
1874 compressed = self._compressor.compress(data)
1874
1875
1875 if compressed:
1876 if compressed:
1876 # The revlog compressor added the header in the returned data.
1877 # The revlog compressor added the header in the returned data.
1877 return '', compressed
1878 return '', compressed
1878
1879
1879 if data[0:1] == '\0':
1880 if data[0:1] == '\0':
1880 return '', data
1881 return '', data
1881 return 'u', data
1882 return 'u', data
1882
1883
1883 def decompress(self, data):
1884 def decompress(self, data):
1884 """Decompress a revlog chunk.
1885 """Decompress a revlog chunk.
1885
1886
1886 The chunk is expected to begin with a header identifying the
1887 The chunk is expected to begin with a header identifying the
1887 format type so it can be routed to an appropriate decompressor.
1888 format type so it can be routed to an appropriate decompressor.
1888 """
1889 """
1889 if not data:
1890 if not data:
1890 return data
1891 return data
1891
1892
1892 # Revlogs are read much more frequently than they are written and many
1893 # Revlogs are read much more frequently than they are written and many
1893 # chunks only take microseconds to decompress, so performance is
1894 # chunks only take microseconds to decompress, so performance is
1894 # important here.
1895 # important here.
1895 #
1896 #
1896 # We can make a few assumptions about revlogs:
1897 # We can make a few assumptions about revlogs:
1897 #
1898 #
1898 # 1) the majority of chunks will be compressed (as opposed to inline
1899 # 1) the majority of chunks will be compressed (as opposed to inline
1899 # raw data).
1900 # raw data).
1900 # 2) decompressing *any* data will likely by at least 10x slower than
1901 # 2) decompressing *any* data will likely by at least 10x slower than
1901 # returning raw inline data.
1902 # returning raw inline data.
1902 # 3) we want to prioritize common and officially supported compression
1903 # 3) we want to prioritize common and officially supported compression
1903 # engines
1904 # engines
1904 #
1905 #
1905 # It follows that we want to optimize for "decompress compressed data
1906 # It follows that we want to optimize for "decompress compressed data
1906 # when encoded with common and officially supported compression engines"
1907 # when encoded with common and officially supported compression engines"
1907 # case over "raw data" and "data encoded by less common or non-official
1908 # case over "raw data" and "data encoded by less common or non-official
1908 # compression engines." That is why we have the inline lookup first
1909 # compression engines." That is why we have the inline lookup first
1909 # followed by the compengines lookup.
1910 # followed by the compengines lookup.
1910 #
1911 #
1911 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1912 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1912 # compressed chunks. And this matters for changelog and manifest reads.
1913 # compressed chunks. And this matters for changelog and manifest reads.
1913 t = data[0:1]
1914 t = data[0:1]
1914
1915
1915 if t == 'x':
1916 if t == 'x':
1916 try:
1917 try:
1917 return _zlibdecompress(data)
1918 return _zlibdecompress(data)
1918 except zlib.error as e:
1919 except zlib.error as e:
1919 raise RevlogError(_('revlog decompress error: %s') %
1920 raise error.RevlogError(_('revlog decompress error: %s') %
1920 stringutil.forcebytestr(e))
1921 stringutil.forcebytestr(e))
1921 # '\0' is more common than 'u' so it goes first.
1922 # '\0' is more common than 'u' so it goes first.
1922 elif t == '\0':
1923 elif t == '\0':
1923 return data
1924 return data
1924 elif t == 'u':
1925 elif t == 'u':
1925 return util.buffer(data, 1)
1926 return util.buffer(data, 1)
1926
1927
1927 try:
1928 try:
1928 compressor = self._decompressors[t]
1929 compressor = self._decompressors[t]
1929 except KeyError:
1930 except KeyError:
1930 try:
1931 try:
1931 engine = util.compengines.forrevlogheader(t)
1932 engine = util.compengines.forrevlogheader(t)
1932 compressor = engine.revlogcompressor()
1933 compressor = engine.revlogcompressor()
1933 self._decompressors[t] = compressor
1934 self._decompressors[t] = compressor
1934 except KeyError:
1935 except KeyError:
1935 raise RevlogError(_('unknown compression type %r') % t)
1936 raise error.RevlogError(_('unknown compression type %r') % t)
1936
1937
1937 return compressor.decompress(data)
1938 return compressor.decompress(data)
1938
1939
1939 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1940 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1940 cachedelta, ifh, dfh, alwayscache=False,
1941 cachedelta, ifh, dfh, alwayscache=False,
1941 deltacomputer=None):
1942 deltacomputer=None):
1942 """internal function to add revisions to the log
1943 """internal function to add revisions to the log
1943
1944
1944 see addrevision for argument descriptions.
1945 see addrevision for argument descriptions.
1945
1946
1946 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1947 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1947
1948
1948 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1949 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1949 be used.
1950 be used.
1950
1951
1951 invariants:
1952 invariants:
1952 - rawtext is optional (can be None); if not set, cachedelta must be set.
1953 - rawtext is optional (can be None); if not set, cachedelta must be set.
1953 if both are set, they must correspond to each other.
1954 if both are set, they must correspond to each other.
1954 """
1955 """
1955 if node == nullid:
1956 if node == nullid:
1956 raise RevlogError(_("%s: attempt to add null revision") %
1957 raise error.RevlogError(_("%s: attempt to add null revision") %
1957 (self.indexfile))
1958 self.indexfile)
1958 if node == wdirid or node in wdirfilenodeids:
1959 if node == wdirid or node in wdirfilenodeids:
1959 raise RevlogError(_("%s: attempt to add wdir revision") %
1960 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1960 (self.indexfile))
1961 self.indexfile)
1961
1962
1962 if self._inline:
1963 if self._inline:
1963 fh = ifh
1964 fh = ifh
1964 else:
1965 else:
1965 fh = dfh
1966 fh = dfh
1966
1967
1967 btext = [rawtext]
1968 btext = [rawtext]
1968
1969
1969 curr = len(self)
1970 curr = len(self)
1970 prev = curr - 1
1971 prev = curr - 1
1971 offset = self.end(prev)
1972 offset = self.end(prev)
1972 p1r, p2r = self.rev(p1), self.rev(p2)
1973 p1r, p2r = self.rev(p1), self.rev(p2)
1973
1974
1974 # full versions are inserted when the needed deltas
1975 # full versions are inserted when the needed deltas
1975 # become comparable to the uncompressed text
1976 # become comparable to the uncompressed text
1976 if rawtext is None:
1977 if rawtext is None:
1977 # need rawtext size, before changed by flag processors, which is
1978 # need rawtext size, before changed by flag processors, which is
1978 # the non-raw size. use revlog explicitly to avoid filelog's extra
1979 # the non-raw size. use revlog explicitly to avoid filelog's extra
1979 # logic that might remove metadata size.
1980 # logic that might remove metadata size.
1980 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1981 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1981 cachedelta[1])
1982 cachedelta[1])
1982 else:
1983 else:
1983 textlen = len(rawtext)
1984 textlen = len(rawtext)
1984
1985
1985 if deltacomputer is None:
1986 if deltacomputer is None:
1986 deltacomputer = deltautil.deltacomputer(self)
1987 deltacomputer = deltautil.deltacomputer(self)
1987
1988
1988 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1989 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1989
1990
1990 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1991 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1991
1992
1992 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1993 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1993 deltainfo.base, link, p1r, p2r, node)
1994 deltainfo.base, link, p1r, p2r, node)
1994 self.index.append(e)
1995 self.index.append(e)
1995 self.nodemap[node] = curr
1996 self.nodemap[node] = curr
1996
1997
1997 entry = self._io.packentry(e, self.node, self.version, curr)
1998 entry = self._io.packentry(e, self.node, self.version, curr)
1998 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1999 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1999 link, offset)
2000 link, offset)
2000
2001
2001 rawtext = btext[0]
2002 rawtext = btext[0]
2002
2003
2003 if alwayscache and rawtext is None:
2004 if alwayscache and rawtext is None:
2004 rawtext = deltacomputer.buildtext(revinfo, fh)
2005 rawtext = deltacomputer.buildtext(revinfo, fh)
2005
2006
2006 if type(rawtext) == bytes: # only accept immutable objects
2007 if type(rawtext) == bytes: # only accept immutable objects
2007 self._cache = (node, curr, rawtext)
2008 self._cache = (node, curr, rawtext)
2008 self._chainbasecache[curr] = deltainfo.chainbase
2009 self._chainbasecache[curr] = deltainfo.chainbase
2009 return node
2010 return node
2010
2011
2011 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2012 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2012 # Files opened in a+ mode have inconsistent behavior on various
2013 # Files opened in a+ mode have inconsistent behavior on various
2013 # platforms. Windows requires that a file positioning call be made
2014 # platforms. Windows requires that a file positioning call be made
2014 # when the file handle transitions between reads and writes. See
2015 # when the file handle transitions between reads and writes. See
2015 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2016 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2016 # platforms, Python or the platform itself can be buggy. Some versions
2017 # platforms, Python or the platform itself can be buggy. Some versions
2017 # of Solaris have been observed to not append at the end of the file
2018 # of Solaris have been observed to not append at the end of the file
2018 # if the file was seeked to before the end. See issue4943 for more.
2019 # if the file was seeked to before the end. See issue4943 for more.
2019 #
2020 #
2020 # We work around this issue by inserting a seek() before writing.
2021 # We work around this issue by inserting a seek() before writing.
2021 # Note: This is likely not necessary on Python 3.
2022 # Note: This is likely not necessary on Python 3.
2022 ifh.seek(0, os.SEEK_END)
2023 ifh.seek(0, os.SEEK_END)
2023 if dfh:
2024 if dfh:
2024 dfh.seek(0, os.SEEK_END)
2025 dfh.seek(0, os.SEEK_END)
2025
2026
2026 curr = len(self) - 1
2027 curr = len(self) - 1
2027 if not self._inline:
2028 if not self._inline:
2028 transaction.add(self.datafile, offset)
2029 transaction.add(self.datafile, offset)
2029 transaction.add(self.indexfile, curr * len(entry))
2030 transaction.add(self.indexfile, curr * len(entry))
2030 if data[0]:
2031 if data[0]:
2031 dfh.write(data[0])
2032 dfh.write(data[0])
2032 dfh.write(data[1])
2033 dfh.write(data[1])
2033 ifh.write(entry)
2034 ifh.write(entry)
2034 else:
2035 else:
2035 offset += curr * self._io.size
2036 offset += curr * self._io.size
2036 transaction.add(self.indexfile, offset, curr)
2037 transaction.add(self.indexfile, offset, curr)
2037 ifh.write(entry)
2038 ifh.write(entry)
2038 ifh.write(data[0])
2039 ifh.write(data[0])
2039 ifh.write(data[1])
2040 ifh.write(data[1])
2040 self._enforceinlinesize(transaction, ifh)
2041 self._enforceinlinesize(transaction, ifh)
2041
2042
2042 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2043 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2043 """
2044 """
2044 add a delta group
2045 add a delta group
2045
2046
2046 given a set of deltas, add them to the revision log. the
2047 given a set of deltas, add them to the revision log. the
2047 first delta is against its parent, which should be in our
2048 first delta is against its parent, which should be in our
2048 log, the rest are against the previous delta.
2049 log, the rest are against the previous delta.
2049
2050
2050 If ``addrevisioncb`` is defined, it will be called with arguments of
2051 If ``addrevisioncb`` is defined, it will be called with arguments of
2051 this revlog and the node that was added.
2052 this revlog and the node that was added.
2052 """
2053 """
2053
2054
2054 nodes = []
2055 nodes = []
2055
2056
2056 r = len(self)
2057 r = len(self)
2057 end = 0
2058 end = 0
2058 if r:
2059 if r:
2059 end = self.end(r - 1)
2060 end = self.end(r - 1)
2060 ifh = self._indexfp("a+")
2061 ifh = self._indexfp("a+")
2061 isize = r * self._io.size
2062 isize = r * self._io.size
2062 if self._inline:
2063 if self._inline:
2063 transaction.add(self.indexfile, end + isize, r)
2064 transaction.add(self.indexfile, end + isize, r)
2064 dfh = None
2065 dfh = None
2065 else:
2066 else:
2066 transaction.add(self.indexfile, isize, r)
2067 transaction.add(self.indexfile, isize, r)
2067 transaction.add(self.datafile, end)
2068 transaction.add(self.datafile, end)
2068 dfh = self._datafp("a+")
2069 dfh = self._datafp("a+")
2069 def flush():
2070 def flush():
2070 if dfh:
2071 if dfh:
2071 dfh.flush()
2072 dfh.flush()
2072 ifh.flush()
2073 ifh.flush()
2073 try:
2074 try:
2074 deltacomputer = deltautil.deltacomputer(self)
2075 deltacomputer = deltautil.deltacomputer(self)
2075 # loop through our set of deltas
2076 # loop through our set of deltas
2076 for data in deltas:
2077 for data in deltas:
2077 node, p1, p2, linknode, deltabase, delta, flags = data
2078 node, p1, p2, linknode, deltabase, delta, flags = data
2078 link = linkmapper(linknode)
2079 link = linkmapper(linknode)
2079 flags = flags or REVIDX_DEFAULT_FLAGS
2080 flags = flags or REVIDX_DEFAULT_FLAGS
2080
2081
2081 nodes.append(node)
2082 nodes.append(node)
2082
2083
2083 if node in self.nodemap:
2084 if node in self.nodemap:
2084 # this can happen if two branches make the same change
2085 # this can happen if two branches make the same change
2085 continue
2086 continue
2086
2087
2087 for p in (p1, p2):
2088 for p in (p1, p2):
2088 if p not in self.nodemap:
2089 if p not in self.nodemap:
2089 raise LookupError(p, self.indexfile,
2090 raise LookupError(p, self.indexfile,
2090 _('unknown parent'))
2091 _('unknown parent'))
2091
2092
2092 if deltabase not in self.nodemap:
2093 if deltabase not in self.nodemap:
2093 raise LookupError(deltabase, self.indexfile,
2094 raise LookupError(deltabase, self.indexfile,
2094 _('unknown delta base'))
2095 _('unknown delta base'))
2095
2096
2096 baserev = self.rev(deltabase)
2097 baserev = self.rev(deltabase)
2097
2098
2098 if baserev != nullrev and self.iscensored(baserev):
2099 if baserev != nullrev and self.iscensored(baserev):
2099 # if base is censored, delta must be full replacement in a
2100 # if base is censored, delta must be full replacement in a
2100 # single patch operation
2101 # single patch operation
2101 hlen = struct.calcsize(">lll")
2102 hlen = struct.calcsize(">lll")
2102 oldlen = self.rawsize(baserev)
2103 oldlen = self.rawsize(baserev)
2103 newlen = len(delta) - hlen
2104 newlen = len(delta) - hlen
2104 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2105 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2105 raise error.CensoredBaseError(self.indexfile,
2106 raise error.CensoredBaseError(self.indexfile,
2106 self.node(baserev))
2107 self.node(baserev))
2107
2108
2108 if not flags and self._peek_iscensored(baserev, delta, flush):
2109 if not flags and self._peek_iscensored(baserev, delta, flush):
2109 flags |= REVIDX_ISCENSORED
2110 flags |= REVIDX_ISCENSORED
2110
2111
2111 # We assume consumers of addrevisioncb will want to retrieve
2112 # We assume consumers of addrevisioncb will want to retrieve
2112 # the added revision, which will require a call to
2113 # the added revision, which will require a call to
2113 # revision(). revision() will fast path if there is a cache
2114 # revision(). revision() will fast path if there is a cache
2114 # hit. So, we tell _addrevision() to always cache in this case.
2115 # hit. So, we tell _addrevision() to always cache in this case.
2115 # We're only using addgroup() in the context of changegroup
2116 # We're only using addgroup() in the context of changegroup
2116 # generation so the revision data can always be handled as raw
2117 # generation so the revision data can always be handled as raw
2117 # by the flagprocessor.
2118 # by the flagprocessor.
2118 self._addrevision(node, None, transaction, link,
2119 self._addrevision(node, None, transaction, link,
2119 p1, p2, flags, (baserev, delta),
2120 p1, p2, flags, (baserev, delta),
2120 ifh, dfh,
2121 ifh, dfh,
2121 alwayscache=bool(addrevisioncb),
2122 alwayscache=bool(addrevisioncb),
2122 deltacomputer=deltacomputer)
2123 deltacomputer=deltacomputer)
2123
2124
2124 if addrevisioncb:
2125 if addrevisioncb:
2125 addrevisioncb(self, node)
2126 addrevisioncb(self, node)
2126
2127
2127 if not dfh and not self._inline:
2128 if not dfh and not self._inline:
2128 # addrevision switched from inline to conventional
2129 # addrevision switched from inline to conventional
2129 # reopen the index
2130 # reopen the index
2130 ifh.close()
2131 ifh.close()
2131 dfh = self._datafp("a+")
2132 dfh = self._datafp("a+")
2132 ifh = self._indexfp("a+")
2133 ifh = self._indexfp("a+")
2133 finally:
2134 finally:
2134 if dfh:
2135 if dfh:
2135 dfh.close()
2136 dfh.close()
2136 ifh.close()
2137 ifh.close()
2137
2138
2138 return nodes
2139 return nodes
2139
2140
2140 def iscensored(self, rev):
2141 def iscensored(self, rev):
2141 """Check if a file revision is censored."""
2142 """Check if a file revision is censored."""
2142 if not self._censorable:
2143 if not self._censorable:
2143 return False
2144 return False
2144
2145
2145 return self.flags(rev) & REVIDX_ISCENSORED
2146 return self.flags(rev) & REVIDX_ISCENSORED
2146
2147
2147 def _peek_iscensored(self, baserev, delta, flush):
2148 def _peek_iscensored(self, baserev, delta, flush):
2148 """Quickly check if a delta produces a censored revision."""
2149 """Quickly check if a delta produces a censored revision."""
2149 if not self._censorable:
2150 if not self._censorable:
2150 return False
2151 return False
2151
2152
2152 # Fragile heuristic: unless new file meta keys are added alphabetically
2153 # Fragile heuristic: unless new file meta keys are added alphabetically
2153 # preceding "censored", all censored revisions are prefixed by
2154 # preceding "censored", all censored revisions are prefixed by
2154 # "\1\ncensored:". A delta producing such a censored revision must be a
2155 # "\1\ncensored:". A delta producing such a censored revision must be a
2155 # full-replacement delta, so we inspect the first and only patch in the
2156 # full-replacement delta, so we inspect the first and only patch in the
2156 # delta for this prefix.
2157 # delta for this prefix.
2157 hlen = struct.calcsize(">lll")
2158 hlen = struct.calcsize(">lll")
2158 if len(delta) <= hlen:
2159 if len(delta) <= hlen:
2159 return False
2160 return False
2160
2161
2161 oldlen = self.rawsize(baserev)
2162 oldlen = self.rawsize(baserev)
2162 newlen = len(delta) - hlen
2163 newlen = len(delta) - hlen
2163 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2164 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2164 return False
2165 return False
2165
2166
2166 add = "\1\ncensored:"
2167 add = "\1\ncensored:"
2167 addlen = len(add)
2168 addlen = len(add)
2168 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2169 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2169
2170
2170 def getstrippoint(self, minlink):
2171 def getstrippoint(self, minlink):
2171 """find the minimum rev that must be stripped to strip the linkrev
2172 """find the minimum rev that must be stripped to strip the linkrev
2172
2173
2173 Returns a tuple containing the minimum rev and a set of all revs that
2174 Returns a tuple containing the minimum rev and a set of all revs that
2174 have linkrevs that will be broken by this strip.
2175 have linkrevs that will be broken by this strip.
2175 """
2176 """
2176 brokenrevs = set()
2177 brokenrevs = set()
2177 strippoint = len(self)
2178 strippoint = len(self)
2178
2179
2179 heads = {}
2180 heads = {}
2180 futurelargelinkrevs = set()
2181 futurelargelinkrevs = set()
2181 for head in self.headrevs():
2182 for head in self.headrevs():
2182 headlinkrev = self.linkrev(head)
2183 headlinkrev = self.linkrev(head)
2183 heads[head] = headlinkrev
2184 heads[head] = headlinkrev
2184 if headlinkrev >= minlink:
2185 if headlinkrev >= minlink:
2185 futurelargelinkrevs.add(headlinkrev)
2186 futurelargelinkrevs.add(headlinkrev)
2186
2187
2187 # This algorithm involves walking down the rev graph, starting at the
2188 # This algorithm involves walking down the rev graph, starting at the
2188 # heads. Since the revs are topologically sorted according to linkrev,
2189 # heads. Since the revs are topologically sorted according to linkrev,
2189 # once all head linkrevs are below the minlink, we know there are
2190 # once all head linkrevs are below the minlink, we know there are
2190 # no more revs that could have a linkrev greater than minlink.
2191 # no more revs that could have a linkrev greater than minlink.
2191 # So we can stop walking.
2192 # So we can stop walking.
2192 while futurelargelinkrevs:
2193 while futurelargelinkrevs:
2193 strippoint -= 1
2194 strippoint -= 1
2194 linkrev = heads.pop(strippoint)
2195 linkrev = heads.pop(strippoint)
2195
2196
2196 if linkrev < minlink:
2197 if linkrev < minlink:
2197 brokenrevs.add(strippoint)
2198 brokenrevs.add(strippoint)
2198 else:
2199 else:
2199 futurelargelinkrevs.remove(linkrev)
2200 futurelargelinkrevs.remove(linkrev)
2200
2201
2201 for p in self.parentrevs(strippoint):
2202 for p in self.parentrevs(strippoint):
2202 if p != nullrev:
2203 if p != nullrev:
2203 plinkrev = self.linkrev(p)
2204 plinkrev = self.linkrev(p)
2204 heads[p] = plinkrev
2205 heads[p] = plinkrev
2205 if plinkrev >= minlink:
2206 if plinkrev >= minlink:
2206 futurelargelinkrevs.add(plinkrev)
2207 futurelargelinkrevs.add(plinkrev)
2207
2208
2208 return strippoint, brokenrevs
2209 return strippoint, brokenrevs
2209
2210
2210 def strip(self, minlink, transaction):
2211 def strip(self, minlink, transaction):
2211 """truncate the revlog on the first revision with a linkrev >= minlink
2212 """truncate the revlog on the first revision with a linkrev >= minlink
2212
2213
2213 This function is called when we're stripping revision minlink and
2214 This function is called when we're stripping revision minlink and
2214 its descendants from the repository.
2215 its descendants from the repository.
2215
2216
2216 We have to remove all revisions with linkrev >= minlink, because
2217 We have to remove all revisions with linkrev >= minlink, because
2217 the equivalent changelog revisions will be renumbered after the
2218 the equivalent changelog revisions will be renumbered after the
2218 strip.
2219 strip.
2219
2220
2220 So we truncate the revlog on the first of these revisions, and
2221 So we truncate the revlog on the first of these revisions, and
2221 trust that the caller has saved the revisions that shouldn't be
2222 trust that the caller has saved the revisions that shouldn't be
2222 removed and that it'll re-add them after this truncation.
2223 removed and that it'll re-add them after this truncation.
2223 """
2224 """
2224 if len(self) == 0:
2225 if len(self) == 0:
2225 return
2226 return
2226
2227
2227 rev, _ = self.getstrippoint(minlink)
2228 rev, _ = self.getstrippoint(minlink)
2228 if rev == len(self):
2229 if rev == len(self):
2229 return
2230 return
2230
2231
2231 # first truncate the files on disk
2232 # first truncate the files on disk
2232 end = self.start(rev)
2233 end = self.start(rev)
2233 if not self._inline:
2234 if not self._inline:
2234 transaction.add(self.datafile, end)
2235 transaction.add(self.datafile, end)
2235 end = rev * self._io.size
2236 end = rev * self._io.size
2236 else:
2237 else:
2237 end += rev * self._io.size
2238 end += rev * self._io.size
2238
2239
2239 transaction.add(self.indexfile, end)
2240 transaction.add(self.indexfile, end)
2240
2241
2241 # then reset internal state in memory to forget those revisions
2242 # then reset internal state in memory to forget those revisions
2242 self._cache = None
2243 self._cache = None
2243 self._chaininfocache = {}
2244 self._chaininfocache = {}
2244 self._chunkclear()
2245 self._chunkclear()
2245 for x in pycompat.xrange(rev, len(self)):
2246 for x in pycompat.xrange(rev, len(self)):
2246 del self.nodemap[self.node(x)]
2247 del self.nodemap[self.node(x)]
2247
2248
2248 del self.index[rev:-1]
2249 del self.index[rev:-1]
2249 self._nodepos = None
2250 self._nodepos = None
2250
2251
2251 def checksize(self):
2252 def checksize(self):
2252 expected = 0
2253 expected = 0
2253 if len(self):
2254 if len(self):
2254 expected = max(0, self.end(len(self) - 1))
2255 expected = max(0, self.end(len(self) - 1))
2255
2256
2256 try:
2257 try:
2257 with self._datafp() as f:
2258 with self._datafp() as f:
2258 f.seek(0, 2)
2259 f.seek(0, 2)
2259 actual = f.tell()
2260 actual = f.tell()
2260 dd = actual - expected
2261 dd = actual - expected
2261 except IOError as inst:
2262 except IOError as inst:
2262 if inst.errno != errno.ENOENT:
2263 if inst.errno != errno.ENOENT:
2263 raise
2264 raise
2264 dd = 0
2265 dd = 0
2265
2266
2266 try:
2267 try:
2267 f = self.opener(self.indexfile)
2268 f = self.opener(self.indexfile)
2268 f.seek(0, 2)
2269 f.seek(0, 2)
2269 actual = f.tell()
2270 actual = f.tell()
2270 f.close()
2271 f.close()
2271 s = self._io.size
2272 s = self._io.size
2272 i = max(0, actual // s)
2273 i = max(0, actual // s)
2273 di = actual - (i * s)
2274 di = actual - (i * s)
2274 if self._inline:
2275 if self._inline:
2275 databytes = 0
2276 databytes = 0
2276 for r in self:
2277 for r in self:
2277 databytes += max(0, self.length(r))
2278 databytes += max(0, self.length(r))
2278 dd = 0
2279 dd = 0
2279 di = actual - len(self) * s - databytes
2280 di = actual - len(self) * s - databytes
2280 except IOError as inst:
2281 except IOError as inst:
2281 if inst.errno != errno.ENOENT:
2282 if inst.errno != errno.ENOENT:
2282 raise
2283 raise
2283 di = 0
2284 di = 0
2284
2285
2285 return (dd, di)
2286 return (dd, di)
2286
2287
2287 def files(self):
2288 def files(self):
2288 res = [self.indexfile]
2289 res = [self.indexfile]
2289 if not self._inline:
2290 if not self._inline:
2290 res.append(self.datafile)
2291 res.append(self.datafile)
2291 return res
2292 return res
2292
2293
2293 def emitrevisiondeltas(self, requests):
2294 def emitrevisiondeltas(self, requests):
2294 frev = self.rev
2295 frev = self.rev
2295
2296
2296 prevrev = None
2297 prevrev = None
2297 for request in requests:
2298 for request in requests:
2298 node = request.node
2299 node = request.node
2299 rev = frev(node)
2300 rev = frev(node)
2300
2301
2301 if prevrev is None:
2302 if prevrev is None:
2302 prevrev = self.index[rev][5]
2303 prevrev = self.index[rev][5]
2303
2304
2304 # Requesting a full revision.
2305 # Requesting a full revision.
2305 if request.basenode == nullid:
2306 if request.basenode == nullid:
2306 baserev = nullrev
2307 baserev = nullrev
2307 # Requesting an explicit revision.
2308 # Requesting an explicit revision.
2308 elif request.basenode is not None:
2309 elif request.basenode is not None:
2309 baserev = frev(request.basenode)
2310 baserev = frev(request.basenode)
2310 # Allowing us to choose.
2311 # Allowing us to choose.
2311 else:
2312 else:
2312 p1rev, p2rev = self.parentrevs(rev)
2313 p1rev, p2rev = self.parentrevs(rev)
2313 deltaparentrev = self.deltaparent(rev)
2314 deltaparentrev = self.deltaparent(rev)
2314
2315
2315 # Avoid sending full revisions when delta parent is null. Pick
2316 # Avoid sending full revisions when delta parent is null. Pick
2316 # prev in that case. It's tempting to pick p1 in this case, as
2317 # prev in that case. It's tempting to pick p1 in this case, as
2317 # p1 will be smaller in the common case. However, computing a
2318 # p1 will be smaller in the common case. However, computing a
2318 # delta against p1 may require resolving the raw text of p1,
2319 # delta against p1 may require resolving the raw text of p1,
2319 # which could be expensive. The revlog caches should have prev
2320 # which could be expensive. The revlog caches should have prev
2320 # cached, meaning less CPU for delta generation. There is
2321 # cached, meaning less CPU for delta generation. There is
2321 # likely room to add a flag and/or config option to control this
2322 # likely room to add a flag and/or config option to control this
2322 # behavior.
2323 # behavior.
2323 if deltaparentrev == nullrev and self._storedeltachains:
2324 if deltaparentrev == nullrev and self._storedeltachains:
2324 baserev = prevrev
2325 baserev = prevrev
2325
2326
2326 # Revlog is configured to use full snapshot for a reason.
2327 # Revlog is configured to use full snapshot for a reason.
2327 # Stick to full snapshot.
2328 # Stick to full snapshot.
2328 elif deltaparentrev == nullrev:
2329 elif deltaparentrev == nullrev:
2329 baserev = nullrev
2330 baserev = nullrev
2330
2331
2331 # Pick previous when we can't be sure the base is available
2332 # Pick previous when we can't be sure the base is available
2332 # on consumer.
2333 # on consumer.
2333 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2334 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2334 baserev = prevrev
2335 baserev = prevrev
2335 else:
2336 else:
2336 baserev = deltaparentrev
2337 baserev = deltaparentrev
2337
2338
2338 if baserev != nullrev and not self.candelta(baserev, rev):
2339 if baserev != nullrev and not self.candelta(baserev, rev):
2339 baserev = nullrev
2340 baserev = nullrev
2340
2341
2341 revision = None
2342 revision = None
2342 delta = None
2343 delta = None
2343 baserevisionsize = None
2344 baserevisionsize = None
2344
2345
2345 if self.iscensored(baserev) or self.iscensored(rev):
2346 if self.iscensored(baserev) or self.iscensored(rev):
2346 try:
2347 try:
2347 revision = self.revision(node, raw=True)
2348 revision = self.revision(node, raw=True)
2348 except error.CensoredNodeError as e:
2349 except error.CensoredNodeError as e:
2349 revision = e.tombstone
2350 revision = e.tombstone
2350
2351
2351 if baserev != nullrev:
2352 if baserev != nullrev:
2352 baserevisionsize = self.rawsize(baserev)
2353 baserevisionsize = self.rawsize(baserev)
2353
2354
2354 elif baserev == nullrev:
2355 elif baserev == nullrev:
2355 revision = self.revision(node, raw=True)
2356 revision = self.revision(node, raw=True)
2356 else:
2357 else:
2357 delta = self.revdiff(baserev, rev)
2358 delta = self.revdiff(baserev, rev)
2358
2359
2359 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2360 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2360
2361
2361 yield revlogrevisiondelta(
2362 yield revlogrevisiondelta(
2362 node=node,
2363 node=node,
2363 p1node=request.p1node,
2364 p1node=request.p1node,
2364 p2node=request.p2node,
2365 p2node=request.p2node,
2365 linknode=request.linknode,
2366 linknode=request.linknode,
2366 basenode=self.node(baserev),
2367 basenode=self.node(baserev),
2367 flags=self.flags(rev) | extraflags,
2368 flags=self.flags(rev) | extraflags,
2368 baserevisionsize=baserevisionsize,
2369 baserevisionsize=baserevisionsize,
2369 revision=revision,
2370 revision=revision,
2370 delta=delta)
2371 delta=delta)
2371
2372
2372 prevrev = rev
2373 prevrev = rev
2373
2374
2374 DELTAREUSEALWAYS = 'always'
2375 DELTAREUSEALWAYS = 'always'
2375 DELTAREUSESAMEREVS = 'samerevs'
2376 DELTAREUSESAMEREVS = 'samerevs'
2376 DELTAREUSENEVER = 'never'
2377 DELTAREUSENEVER = 'never'
2377
2378
2378 DELTAREUSEFULLADD = 'fulladd'
2379 DELTAREUSEFULLADD = 'fulladd'
2379
2380
2380 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2381 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2381
2382
2382 def clone(self, tr, destrevlog, addrevisioncb=None,
2383 def clone(self, tr, destrevlog, addrevisioncb=None,
2383 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2384 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2384 """Copy this revlog to another, possibly with format changes.
2385 """Copy this revlog to another, possibly with format changes.
2385
2386
2386 The destination revlog will contain the same revisions and nodes.
2387 The destination revlog will contain the same revisions and nodes.
2387 However, it may not be bit-for-bit identical due to e.g. delta encoding
2388 However, it may not be bit-for-bit identical due to e.g. delta encoding
2388 differences.
2389 differences.
2389
2390
2390 The ``deltareuse`` argument control how deltas from the existing revlog
2391 The ``deltareuse`` argument control how deltas from the existing revlog
2391 are preserved in the destination revlog. The argument can have the
2392 are preserved in the destination revlog. The argument can have the
2392 following values:
2393 following values:
2393
2394
2394 DELTAREUSEALWAYS
2395 DELTAREUSEALWAYS
2395 Deltas will always be reused (if possible), even if the destination
2396 Deltas will always be reused (if possible), even if the destination
2396 revlog would not select the same revisions for the delta. This is the
2397 revlog would not select the same revisions for the delta. This is the
2397 fastest mode of operation.
2398 fastest mode of operation.
2398 DELTAREUSESAMEREVS
2399 DELTAREUSESAMEREVS
2399 Deltas will be reused if the destination revlog would pick the same
2400 Deltas will be reused if the destination revlog would pick the same
2400 revisions for the delta. This mode strikes a balance between speed
2401 revisions for the delta. This mode strikes a balance between speed
2401 and optimization.
2402 and optimization.
2402 DELTAREUSENEVER
2403 DELTAREUSENEVER
2403 Deltas will never be reused. This is the slowest mode of execution.
2404 Deltas will never be reused. This is the slowest mode of execution.
2404 This mode can be used to recompute deltas (e.g. if the diff/delta
2405 This mode can be used to recompute deltas (e.g. if the diff/delta
2405 algorithm changes).
2406 algorithm changes).
2406
2407
2407 Delta computation can be slow, so the choice of delta reuse policy can
2408 Delta computation can be slow, so the choice of delta reuse policy can
2408 significantly affect run time.
2409 significantly affect run time.
2409
2410
2410 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2411 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2411 two extremes. Deltas will be reused if they are appropriate. But if the
2412 two extremes. Deltas will be reused if they are appropriate. But if the
2412 delta could choose a better revision, it will do so. This means if you
2413 delta could choose a better revision, it will do so. This means if you
2413 are converting a non-generaldelta revlog to a generaldelta revlog,
2414 are converting a non-generaldelta revlog to a generaldelta revlog,
2414 deltas will be recomputed if the delta's parent isn't a parent of the
2415 deltas will be recomputed if the delta's parent isn't a parent of the
2415 revision.
2416 revision.
2416
2417
2417 In addition to the delta policy, the ``deltabothparents`` argument
2418 In addition to the delta policy, the ``deltabothparents`` argument
2418 controls whether to compute deltas against both parents for merges.
2419 controls whether to compute deltas against both parents for merges.
2419 By default, the current default is used.
2420 By default, the current default is used.
2420 """
2421 """
2421 if deltareuse not in self.DELTAREUSEALL:
2422 if deltareuse not in self.DELTAREUSEALL:
2422 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2423 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2423
2424
2424 if len(destrevlog):
2425 if len(destrevlog):
2425 raise ValueError(_('destination revlog is not empty'))
2426 raise ValueError(_('destination revlog is not empty'))
2426
2427
2427 if getattr(self, 'filteredrevs', None):
2428 if getattr(self, 'filteredrevs', None):
2428 raise ValueError(_('source revlog has filtered revisions'))
2429 raise ValueError(_('source revlog has filtered revisions'))
2429 if getattr(destrevlog, 'filteredrevs', None):
2430 if getattr(destrevlog, 'filteredrevs', None):
2430 raise ValueError(_('destination revlog has filtered revisions'))
2431 raise ValueError(_('destination revlog has filtered revisions'))
2431
2432
2432 # lazydeltabase controls whether to reuse a cached delta, if possible.
2433 # lazydeltabase controls whether to reuse a cached delta, if possible.
2433 oldlazydeltabase = destrevlog._lazydeltabase
2434 oldlazydeltabase = destrevlog._lazydeltabase
2434 oldamd = destrevlog._deltabothparents
2435 oldamd = destrevlog._deltabothparents
2435
2436
2436 try:
2437 try:
2437 if deltareuse == self.DELTAREUSEALWAYS:
2438 if deltareuse == self.DELTAREUSEALWAYS:
2438 destrevlog._lazydeltabase = True
2439 destrevlog._lazydeltabase = True
2439 elif deltareuse == self.DELTAREUSESAMEREVS:
2440 elif deltareuse == self.DELTAREUSESAMEREVS:
2440 destrevlog._lazydeltabase = False
2441 destrevlog._lazydeltabase = False
2441
2442
2442 destrevlog._deltabothparents = deltabothparents or oldamd
2443 destrevlog._deltabothparents = deltabothparents or oldamd
2443
2444
2444 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2445 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2445 self.DELTAREUSESAMEREVS)
2446 self.DELTAREUSESAMEREVS)
2446
2447
2447 deltacomputer = deltautil.deltacomputer(destrevlog)
2448 deltacomputer = deltautil.deltacomputer(destrevlog)
2448 index = self.index
2449 index = self.index
2449 for rev in self:
2450 for rev in self:
2450 entry = index[rev]
2451 entry = index[rev]
2451
2452
2452 # Some classes override linkrev to take filtered revs into
2453 # Some classes override linkrev to take filtered revs into
2453 # account. Use raw entry from index.
2454 # account. Use raw entry from index.
2454 flags = entry[0] & 0xffff
2455 flags = entry[0] & 0xffff
2455 linkrev = entry[4]
2456 linkrev = entry[4]
2456 p1 = index[entry[5]][7]
2457 p1 = index[entry[5]][7]
2457 p2 = index[entry[6]][7]
2458 p2 = index[entry[6]][7]
2458 node = entry[7]
2459 node = entry[7]
2459
2460
2460 # (Possibly) reuse the delta from the revlog if allowed and
2461 # (Possibly) reuse the delta from the revlog if allowed and
2461 # the revlog chunk is a delta.
2462 # the revlog chunk is a delta.
2462 cachedelta = None
2463 cachedelta = None
2463 rawtext = None
2464 rawtext = None
2464 if populatecachedelta:
2465 if populatecachedelta:
2465 dp = self.deltaparent(rev)
2466 dp = self.deltaparent(rev)
2466 if dp != nullrev:
2467 if dp != nullrev:
2467 cachedelta = (dp, bytes(self._chunk(rev)))
2468 cachedelta = (dp, bytes(self._chunk(rev)))
2468
2469
2469 if not cachedelta:
2470 if not cachedelta:
2470 rawtext = self.revision(rev, raw=True)
2471 rawtext = self.revision(rev, raw=True)
2471
2472
2472
2473
2473 if deltareuse == self.DELTAREUSEFULLADD:
2474 if deltareuse == self.DELTAREUSEFULLADD:
2474 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2475 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2475 cachedelta=cachedelta,
2476 cachedelta=cachedelta,
2476 node=node, flags=flags,
2477 node=node, flags=flags,
2477 deltacomputer=deltacomputer)
2478 deltacomputer=deltacomputer)
2478 else:
2479 else:
2479 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2480 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2480 checkambig=False)
2481 checkambig=False)
2481 dfh = None
2482 dfh = None
2482 if not destrevlog._inline:
2483 if not destrevlog._inline:
2483 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2484 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2484 try:
2485 try:
2485 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2486 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2486 p2, flags, cachedelta, ifh, dfh,
2487 p2, flags, cachedelta, ifh, dfh,
2487 deltacomputer=deltacomputer)
2488 deltacomputer=deltacomputer)
2488 finally:
2489 finally:
2489 if dfh:
2490 if dfh:
2490 dfh.close()
2491 dfh.close()
2491 ifh.close()
2492 ifh.close()
2492
2493
2493 if addrevisioncb:
2494 if addrevisioncb:
2494 addrevisioncb(self, rev, node)
2495 addrevisioncb(self, rev, node)
2495 finally:
2496 finally:
2496 destrevlog._lazydeltabase = oldlazydeltabase
2497 destrevlog._lazydeltabase = oldlazydeltabase
2497 destrevlog._deltabothparents = oldamd
2498 destrevlog._deltabothparents = oldamd
@@ -1,941 +1,940 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import heapq
13 import heapq
14 import struct
14 import struct
15
15
16 # import stuff from node for others to import from revlog
16 # import stuff from node for others to import from revlog
17 from ..node import (
17 from ..node import (
18 nullrev,
18 nullrev,
19 )
19 )
20 from ..i18n import _
20 from ..i18n import _
21
21
22 from .constants import (
22 from .constants import (
23 REVIDX_ISCENSORED,
23 REVIDX_ISCENSORED,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 )
25 )
26
26
27 from ..thirdparty import (
27 from ..thirdparty import (
28 attr,
28 attr,
29 )
29 )
30
30
31 from .. import (
31 from .. import (
32 error,
32 error,
33 mdiff,
33 mdiff,
34 )
34 )
35
35
36 RevlogError = error.RevlogError
37 CensoredNodeError = error.CensoredNodeError
36 CensoredNodeError = error.CensoredNodeError
38
37
39 # maximum <delta-chain-data>/<revision-text-length> ratio
38 # maximum <delta-chain-data>/<revision-text-length> ratio
40 LIMIT_DELTA2TEXT = 2
39 LIMIT_DELTA2TEXT = 2
41
40
42 class _testrevlog(object):
41 class _testrevlog(object):
43 """minimalist fake revlog to use in doctests"""
42 """minimalist fake revlog to use in doctests"""
44
43
45 def __init__(self, data, density=0.5, mingap=0):
44 def __init__(self, data, density=0.5, mingap=0):
46 """data is an list of revision payload boundaries"""
45 """data is an list of revision payload boundaries"""
47 self._data = data
46 self._data = data
48 self._srdensitythreshold = density
47 self._srdensitythreshold = density
49 self._srmingapsize = mingap
48 self._srmingapsize = mingap
50
49
51 def start(self, rev):
50 def start(self, rev):
52 if rev == 0:
51 if rev == 0:
53 return 0
52 return 0
54 return self._data[rev - 1]
53 return self._data[rev - 1]
55
54
56 def end(self, rev):
55 def end(self, rev):
57 return self._data[rev]
56 return self._data[rev]
58
57
59 def length(self, rev):
58 def length(self, rev):
60 return self.end(rev) - self.start(rev)
59 return self.end(rev) - self.start(rev)
61
60
62 def __len__(self):
61 def __len__(self):
63 return len(self._data)
62 return len(self._data)
64
63
65 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
64 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
66 """slice revs to reduce the amount of unrelated data to be read from disk.
65 """slice revs to reduce the amount of unrelated data to be read from disk.
67
66
68 ``revs`` is sliced into groups that should be read in one time.
67 ``revs`` is sliced into groups that should be read in one time.
69 Assume that revs are sorted.
68 Assume that revs are sorted.
70
69
71 The initial chunk is sliced until the overall density (payload/chunks-span
70 The initial chunk is sliced until the overall density (payload/chunks-span
72 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
71 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
73 `revlog._srmingapsize` is skipped.
72 `revlog._srmingapsize` is skipped.
74
73
75 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
74 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
76 For consistency with other slicing choice, this limit won't go lower than
75 For consistency with other slicing choice, this limit won't go lower than
77 `revlog._srmingapsize`.
76 `revlog._srmingapsize`.
78
77
79 If individual revisions chunk are larger than this limit, they will still
78 If individual revisions chunk are larger than this limit, they will still
80 be raised individually.
79 be raised individually.
81
80
82 >>> revlog = _testrevlog([
81 >>> revlog = _testrevlog([
83 ... 5, #00 (5)
82 ... 5, #00 (5)
84 ... 10, #01 (5)
83 ... 10, #01 (5)
85 ... 12, #02 (2)
84 ... 12, #02 (2)
86 ... 12, #03 (empty)
85 ... 12, #03 (empty)
87 ... 27, #04 (15)
86 ... 27, #04 (15)
88 ... 31, #05 (4)
87 ... 31, #05 (4)
89 ... 31, #06 (empty)
88 ... 31, #06 (empty)
90 ... 42, #07 (11)
89 ... 42, #07 (11)
91 ... 47, #08 (5)
90 ... 47, #08 (5)
92 ... 47, #09 (empty)
91 ... 47, #09 (empty)
93 ... 48, #10 (1)
92 ... 48, #10 (1)
94 ... 51, #11 (3)
93 ... 51, #11 (3)
95 ... 74, #12 (23)
94 ... 74, #12 (23)
96 ... 85, #13 (11)
95 ... 85, #13 (11)
97 ... 86, #14 (1)
96 ... 86, #14 (1)
98 ... 91, #15 (5)
97 ... 91, #15 (5)
99 ... ])
98 ... ])
100
99
101 >>> list(slicechunk(revlog, list(range(16))))
100 >>> list(slicechunk(revlog, list(range(16))))
102 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
101 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
103 >>> list(slicechunk(revlog, [0, 15]))
102 >>> list(slicechunk(revlog, [0, 15]))
104 [[0], [15]]
103 [[0], [15]]
105 >>> list(slicechunk(revlog, [0, 11, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 15]))
106 [[0], [11], [15]]
105 [[0], [11], [15]]
107 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
106 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
108 [[0], [11, 13, 15]]
107 [[0], [11, 13, 15]]
109 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
108 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
110 [[1, 2], [5, 8, 10, 11], [14]]
109 [[1, 2], [5, 8, 10, 11], [14]]
111
110
112 Slicing with a maximum chunk size
111 Slicing with a maximum chunk size
113 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
114 [[0], [11], [13], [15]]
113 [[0], [11], [13], [15]]
115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
114 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
116 [[0], [11], [13, 15]]
115 [[0], [11], [13, 15]]
117 """
116 """
118 if targetsize is not None:
117 if targetsize is not None:
119 targetsize = max(targetsize, revlog._srmingapsize)
118 targetsize = max(targetsize, revlog._srmingapsize)
120 # targetsize should not be specified when evaluating delta candidates:
119 # targetsize should not be specified when evaluating delta candidates:
121 # * targetsize is used to ensure we stay within specification when reading,
120 # * targetsize is used to ensure we stay within specification when reading,
122 # * deltainfo is used to pick are good delta chain when writing.
121 # * deltainfo is used to pick are good delta chain when writing.
123 if not (deltainfo is None or targetsize is None):
122 if not (deltainfo is None or targetsize is None):
124 msg = 'cannot use `targetsize` with a `deltainfo`'
123 msg = 'cannot use `targetsize` with a `deltainfo`'
125 raise error.ProgrammingError(msg)
124 raise error.ProgrammingError(msg)
126 for chunk in _slicechunktodensity(revlog, revs,
125 for chunk in _slicechunktodensity(revlog, revs,
127 deltainfo,
126 deltainfo,
128 revlog._srdensitythreshold,
127 revlog._srdensitythreshold,
129 revlog._srmingapsize):
128 revlog._srmingapsize):
130 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
129 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
131 yield subchunk
130 yield subchunk
132
131
133 def _slicechunktosize(revlog, revs, targetsize=None):
132 def _slicechunktosize(revlog, revs, targetsize=None):
134 """slice revs to match the target size
133 """slice revs to match the target size
135
134
136 This is intended to be used on chunk that density slicing selected by that
135 This is intended to be used on chunk that density slicing selected by that
137 are still too large compared to the read garantee of revlog. This might
136 are still too large compared to the read garantee of revlog. This might
138 happens when "minimal gap size" interrupted the slicing or when chain are
137 happens when "minimal gap size" interrupted the slicing or when chain are
139 built in a way that create large blocks next to each other.
138 built in a way that create large blocks next to each other.
140
139
141 >>> revlog = _testrevlog([
140 >>> revlog = _testrevlog([
142 ... 3, #0 (3)
141 ... 3, #0 (3)
143 ... 5, #1 (2)
142 ... 5, #1 (2)
144 ... 6, #2 (1)
143 ... 6, #2 (1)
145 ... 8, #3 (2)
144 ... 8, #3 (2)
146 ... 8, #4 (empty)
145 ... 8, #4 (empty)
147 ... 11, #5 (3)
146 ... 11, #5 (3)
148 ... 12, #6 (1)
147 ... 12, #6 (1)
149 ... 13, #7 (1)
148 ... 13, #7 (1)
150 ... 14, #8 (1)
149 ... 14, #8 (1)
151 ... ])
150 ... ])
152
151
153 Cases where chunk is already small enough
152 Cases where chunk is already small enough
154 >>> list(_slicechunktosize(revlog, [0], 3))
153 >>> list(_slicechunktosize(revlog, [0], 3))
155 [[0]]
154 [[0]]
156 >>> list(_slicechunktosize(revlog, [6, 7], 3))
155 >>> list(_slicechunktosize(revlog, [6, 7], 3))
157 [[6, 7]]
156 [[6, 7]]
158 >>> list(_slicechunktosize(revlog, [0], None))
157 >>> list(_slicechunktosize(revlog, [0], None))
159 [[0]]
158 [[0]]
160 >>> list(_slicechunktosize(revlog, [6, 7], None))
159 >>> list(_slicechunktosize(revlog, [6, 7], None))
161 [[6, 7]]
160 [[6, 7]]
162
161
163 cases where we need actual slicing
162 cases where we need actual slicing
164 >>> list(_slicechunktosize(revlog, [0, 1], 3))
163 >>> list(_slicechunktosize(revlog, [0, 1], 3))
165 [[0], [1]]
164 [[0], [1]]
166 >>> list(_slicechunktosize(revlog, [1, 3], 3))
165 >>> list(_slicechunktosize(revlog, [1, 3], 3))
167 [[1], [3]]
166 [[1], [3]]
168 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
167 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
169 [[1, 2], [3]]
168 [[1, 2], [3]]
170 >>> list(_slicechunktosize(revlog, [3, 5], 3))
169 >>> list(_slicechunktosize(revlog, [3, 5], 3))
171 [[3], [5]]
170 [[3], [5]]
172 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
171 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
173 [[3], [5]]
172 [[3], [5]]
174 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
173 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
175 [[5], [6, 7, 8]]
174 [[5], [6, 7, 8]]
176 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
175 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
177 [[0], [1, 2], [3], [5], [6, 7, 8]]
176 [[0], [1, 2], [3], [5], [6, 7, 8]]
178
177
179 Case with too large individual chunk (must return valid chunk)
178 Case with too large individual chunk (must return valid chunk)
180 >>> list(_slicechunktosize(revlog, [0, 1], 2))
179 >>> list(_slicechunktosize(revlog, [0, 1], 2))
181 [[0], [1]]
180 [[0], [1]]
182 >>> list(_slicechunktosize(revlog, [1, 3], 1))
181 >>> list(_slicechunktosize(revlog, [1, 3], 1))
183 [[1], [3]]
182 [[1], [3]]
184 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
183 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
185 [[3], [5]]
184 [[3], [5]]
186 """
185 """
187 assert targetsize is None or 0 <= targetsize
186 assert targetsize is None or 0 <= targetsize
188 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
187 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
189 yield revs
188 yield revs
190 return
189 return
191
190
192 startrevidx = 0
191 startrevidx = 0
193 startdata = revlog.start(revs[0])
192 startdata = revlog.start(revs[0])
194 endrevidx = 0
193 endrevidx = 0
195 iterrevs = enumerate(revs)
194 iterrevs = enumerate(revs)
196 next(iterrevs) # skip first rev.
195 next(iterrevs) # skip first rev.
197 for idx, r in iterrevs:
196 for idx, r in iterrevs:
198 span = revlog.end(r) - startdata
197 span = revlog.end(r) - startdata
199 if span <= targetsize:
198 if span <= targetsize:
200 endrevidx = idx
199 endrevidx = idx
201 else:
200 else:
202 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
201 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
203 if chunk:
202 if chunk:
204 yield chunk
203 yield chunk
205 startrevidx = idx
204 startrevidx = idx
206 startdata = revlog.start(r)
205 startdata = revlog.start(r)
207 endrevidx = idx
206 endrevidx = idx
208 yield _trimchunk(revlog, revs, startrevidx)
207 yield _trimchunk(revlog, revs, startrevidx)
209
208
210 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
209 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
211 mingapsize=0):
210 mingapsize=0):
212 """slice revs to reduce the amount of unrelated data to be read from disk.
211 """slice revs to reduce the amount of unrelated data to be read from disk.
213
212
214 ``revs`` is sliced into groups that should be read in one time.
213 ``revs`` is sliced into groups that should be read in one time.
215 Assume that revs are sorted.
214 Assume that revs are sorted.
216
215
217 ``deltainfo`` is a _deltainfo instance of a revision that we would append
216 ``deltainfo`` is a _deltainfo instance of a revision that we would append
218 to the top of the revlog.
217 to the top of the revlog.
219
218
220 The initial chunk is sliced until the overall density (payload/chunks-span
219 The initial chunk is sliced until the overall density (payload/chunks-span
221 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
220 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
222 skipped.
221 skipped.
223
222
224 >>> revlog = _testrevlog([
223 >>> revlog = _testrevlog([
225 ... 5, #00 (5)
224 ... 5, #00 (5)
226 ... 10, #01 (5)
225 ... 10, #01 (5)
227 ... 12, #02 (2)
226 ... 12, #02 (2)
228 ... 12, #03 (empty)
227 ... 12, #03 (empty)
229 ... 27, #04 (15)
228 ... 27, #04 (15)
230 ... 31, #05 (4)
229 ... 31, #05 (4)
231 ... 31, #06 (empty)
230 ... 31, #06 (empty)
232 ... 42, #07 (11)
231 ... 42, #07 (11)
233 ... 47, #08 (5)
232 ... 47, #08 (5)
234 ... 47, #09 (empty)
233 ... 47, #09 (empty)
235 ... 48, #10 (1)
234 ... 48, #10 (1)
236 ... 51, #11 (3)
235 ... 51, #11 (3)
237 ... 74, #12 (23)
236 ... 74, #12 (23)
238 ... 85, #13 (11)
237 ... 85, #13 (11)
239 ... 86, #14 (1)
238 ... 86, #14 (1)
240 ... 91, #15 (5)
239 ... 91, #15 (5)
241 ... ])
240 ... ])
242
241
243 >>> list(_slicechunktodensity(revlog, list(range(16))))
242 >>> list(_slicechunktodensity(revlog, list(range(16))))
244 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
243 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
245 >>> list(_slicechunktodensity(revlog, [0, 15]))
244 >>> list(_slicechunktodensity(revlog, [0, 15]))
246 [[0], [15]]
245 [[0], [15]]
247 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
246 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
248 [[0], [11], [15]]
247 [[0], [11], [15]]
249 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
248 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
250 [[0], [11, 13, 15]]
249 [[0], [11, 13, 15]]
251 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
252 [[1, 2], [5, 8, 10, 11], [14]]
251 [[1, 2], [5, 8, 10, 11], [14]]
253 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
252 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
254 ... mingapsize=20))
253 ... mingapsize=20))
255 [[1, 2, 3, 5, 8, 10, 11], [14]]
254 [[1, 2, 3, 5, 8, 10, 11], [14]]
256 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
255 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
257 ... targetdensity=0.95))
256 ... targetdensity=0.95))
258 [[1, 2], [5], [8, 10, 11], [14]]
257 [[1, 2], [5], [8, 10, 11], [14]]
259 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
258 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
260 ... targetdensity=0.95, mingapsize=12))
259 ... targetdensity=0.95, mingapsize=12))
261 [[1, 2], [5, 8, 10, 11], [14]]
260 [[1, 2], [5, 8, 10, 11], [14]]
262 """
261 """
263 start = revlog.start
262 start = revlog.start
264 length = revlog.length
263 length = revlog.length
265
264
266 if len(revs) <= 1:
265 if len(revs) <= 1:
267 yield revs
266 yield revs
268 return
267 return
269
268
270 nextrev = len(revlog)
269 nextrev = len(revlog)
271 nextoffset = revlog.end(nextrev - 1)
270 nextoffset = revlog.end(nextrev - 1)
272
271
273 if deltainfo is None:
272 if deltainfo is None:
274 deltachainspan = segmentspan(revlog, revs)
273 deltachainspan = segmentspan(revlog, revs)
275 chainpayload = sum(length(r) for r in revs)
274 chainpayload = sum(length(r) for r in revs)
276 else:
275 else:
277 deltachainspan = deltainfo.distance
276 deltachainspan = deltainfo.distance
278 chainpayload = deltainfo.compresseddeltalen
277 chainpayload = deltainfo.compresseddeltalen
279
278
280 if deltachainspan < mingapsize:
279 if deltachainspan < mingapsize:
281 yield revs
280 yield revs
282 return
281 return
283
282
284 readdata = deltachainspan
283 readdata = deltachainspan
285
284
286 if deltachainspan:
285 if deltachainspan:
287 density = chainpayload / float(deltachainspan)
286 density = chainpayload / float(deltachainspan)
288 else:
287 else:
289 density = 1.0
288 density = 1.0
290
289
291 if density >= targetdensity:
290 if density >= targetdensity:
292 yield revs
291 yield revs
293 return
292 return
294
293
295 if deltainfo is not None and deltainfo.deltalen:
294 if deltainfo is not None and deltainfo.deltalen:
296 revs = list(revs)
295 revs = list(revs)
297 revs.append(nextrev)
296 revs.append(nextrev)
298
297
299 # Store the gaps in a heap to have them sorted by decreasing size
298 # Store the gaps in a heap to have them sorted by decreasing size
300 gapsheap = []
299 gapsheap = []
301 heapq.heapify(gapsheap)
300 heapq.heapify(gapsheap)
302 prevend = None
301 prevend = None
303 for i, rev in enumerate(revs):
302 for i, rev in enumerate(revs):
304 if rev < nextrev:
303 if rev < nextrev:
305 revstart = start(rev)
304 revstart = start(rev)
306 revlen = length(rev)
305 revlen = length(rev)
307 else:
306 else:
308 revstart = nextoffset
307 revstart = nextoffset
309 revlen = deltainfo.deltalen
308 revlen = deltainfo.deltalen
310
309
311 # Skip empty revisions to form larger holes
310 # Skip empty revisions to form larger holes
312 if revlen == 0:
311 if revlen == 0:
313 continue
312 continue
314
313
315 if prevend is not None:
314 if prevend is not None:
316 gapsize = revstart - prevend
315 gapsize = revstart - prevend
317 # only consider holes that are large enough
316 # only consider holes that are large enough
318 if gapsize > mingapsize:
317 if gapsize > mingapsize:
319 heapq.heappush(gapsheap, (-gapsize, i))
318 heapq.heappush(gapsheap, (-gapsize, i))
320
319
321 prevend = revstart + revlen
320 prevend = revstart + revlen
322
321
323 # Collect the indices of the largest holes until the density is acceptable
322 # Collect the indices of the largest holes until the density is acceptable
324 indicesheap = []
323 indicesheap = []
325 heapq.heapify(indicesheap)
324 heapq.heapify(indicesheap)
326 while gapsheap and density < targetdensity:
325 while gapsheap and density < targetdensity:
327 oppgapsize, gapidx = heapq.heappop(gapsheap)
326 oppgapsize, gapidx = heapq.heappop(gapsheap)
328
327
329 heapq.heappush(indicesheap, gapidx)
328 heapq.heappush(indicesheap, gapidx)
330
329
331 # the gap sizes are stored as negatives to be sorted decreasingly
330 # the gap sizes are stored as negatives to be sorted decreasingly
332 # by the heap
331 # by the heap
333 readdata -= (-oppgapsize)
332 readdata -= (-oppgapsize)
334 if readdata > 0:
333 if readdata > 0:
335 density = chainpayload / float(readdata)
334 density = chainpayload / float(readdata)
336 else:
335 else:
337 density = 1.0
336 density = 1.0
338
337
339 # Cut the revs at collected indices
338 # Cut the revs at collected indices
340 previdx = 0
339 previdx = 0
341 while indicesheap:
340 while indicesheap:
342 idx = heapq.heappop(indicesheap)
341 idx = heapq.heappop(indicesheap)
343
342
344 chunk = _trimchunk(revlog, revs, previdx, idx)
343 chunk = _trimchunk(revlog, revs, previdx, idx)
345 if chunk:
344 if chunk:
346 yield chunk
345 yield chunk
347
346
348 previdx = idx
347 previdx = idx
349
348
350 chunk = _trimchunk(revlog, revs, previdx)
349 chunk = _trimchunk(revlog, revs, previdx)
351 if chunk:
350 if chunk:
352 yield chunk
351 yield chunk
353
352
354 def _trimchunk(revlog, revs, startidx, endidx=None):
353 def _trimchunk(revlog, revs, startidx, endidx=None):
355 """returns revs[startidx:endidx] without empty trailing revs
354 """returns revs[startidx:endidx] without empty trailing revs
356
355
357 Doctest Setup
356 Doctest Setup
358 >>> revlog = _testrevlog([
357 >>> revlog = _testrevlog([
359 ... 5, #0
358 ... 5, #0
360 ... 10, #1
359 ... 10, #1
361 ... 12, #2
360 ... 12, #2
362 ... 12, #3 (empty)
361 ... 12, #3 (empty)
363 ... 17, #4
362 ... 17, #4
364 ... 21, #5
363 ... 21, #5
365 ... 21, #6 (empty)
364 ... 21, #6 (empty)
366 ... ])
365 ... ])
367
366
368 Contiguous cases:
367 Contiguous cases:
369 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
370 [0, 1, 2, 3, 4, 5]
369 [0, 1, 2, 3, 4, 5]
371 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
372 [0, 1, 2, 3, 4]
371 [0, 1, 2, 3, 4]
373 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
374 [0, 1, 2]
373 [0, 1, 2]
375 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
376 [2]
375 [2]
377 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
378 [3, 4, 5]
377 [3, 4, 5]
379 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
378 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
380 [3, 4]
379 [3, 4]
381
380
382 Discontiguous cases:
381 Discontiguous cases:
383 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
384 [1, 3, 5]
383 [1, 3, 5]
385 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
386 [1]
385 [1]
387 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
388 [3, 5]
387 [3, 5]
389 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
388 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
390 [3, 5]
389 [3, 5]
391 """
390 """
392 length = revlog.length
391 length = revlog.length
393
392
394 if endidx is None:
393 if endidx is None:
395 endidx = len(revs)
394 endidx = len(revs)
396
395
397 # If we have a non-emtpy delta candidate, there are nothing to trim
396 # If we have a non-emtpy delta candidate, there are nothing to trim
398 if revs[endidx - 1] < len(revlog):
397 if revs[endidx - 1] < len(revlog):
399 # Trim empty revs at the end, except the very first revision of a chain
398 # Trim empty revs at the end, except the very first revision of a chain
400 while (endidx > 1
399 while (endidx > 1
401 and endidx > startidx
400 and endidx > startidx
402 and length(revs[endidx - 1]) == 0):
401 and length(revs[endidx - 1]) == 0):
403 endidx -= 1
402 endidx -= 1
404
403
405 return revs[startidx:endidx]
404 return revs[startidx:endidx]
406
405
407 def segmentspan(revlog, revs, deltainfo=None):
406 def segmentspan(revlog, revs, deltainfo=None):
408 """Get the byte span of a segment of revisions
407 """Get the byte span of a segment of revisions
409
408
410 revs is a sorted array of revision numbers
409 revs is a sorted array of revision numbers
411
410
412 >>> revlog = _testrevlog([
411 >>> revlog = _testrevlog([
413 ... 5, #0
412 ... 5, #0
414 ... 10, #1
413 ... 10, #1
415 ... 12, #2
414 ... 12, #2
416 ... 12, #3 (empty)
415 ... 12, #3 (empty)
417 ... 17, #4
416 ... 17, #4
418 ... ])
417 ... ])
419
418
420 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
419 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
421 17
420 17
422 >>> segmentspan(revlog, [0, 4])
421 >>> segmentspan(revlog, [0, 4])
423 17
422 17
424 >>> segmentspan(revlog, [3, 4])
423 >>> segmentspan(revlog, [3, 4])
425 5
424 5
426 >>> segmentspan(revlog, [1, 2, 3,])
425 >>> segmentspan(revlog, [1, 2, 3,])
427 7
426 7
428 >>> segmentspan(revlog, [1, 3])
427 >>> segmentspan(revlog, [1, 3])
429 7
428 7
430 """
429 """
431 if not revs:
430 if not revs:
432 return 0
431 return 0
433 if deltainfo is not None and len(revlog) <= revs[-1]:
432 if deltainfo is not None and len(revlog) <= revs[-1]:
434 if len(revs) == 1:
433 if len(revs) == 1:
435 return deltainfo.deltalen
434 return deltainfo.deltalen
436 offset = revlog.end(len(revlog) - 1)
435 offset = revlog.end(len(revlog) - 1)
437 end = deltainfo.deltalen + offset
436 end = deltainfo.deltalen + offset
438 else:
437 else:
439 end = revlog.end(revs[-1])
438 end = revlog.end(revs[-1])
440 return end - revlog.start(revs[0])
439 return end - revlog.start(revs[0])
441
440
442 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
441 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
443 """build full text from a (base, delta) pair and other metadata"""
442 """build full text from a (base, delta) pair and other metadata"""
444 # special case deltas which replace entire base; no need to decode
443 # special case deltas which replace entire base; no need to decode
445 # base revision. this neatly avoids censored bases, which throw when
444 # base revision. this neatly avoids censored bases, which throw when
446 # they're decoded.
445 # they're decoded.
447 hlen = struct.calcsize(">lll")
446 hlen = struct.calcsize(">lll")
448 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
447 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
449 len(delta) - hlen):
448 len(delta) - hlen):
450 fulltext = delta[hlen:]
449 fulltext = delta[hlen:]
451 else:
450 else:
452 # deltabase is rawtext before changed by flag processors, which is
451 # deltabase is rawtext before changed by flag processors, which is
453 # equivalent to non-raw text
452 # equivalent to non-raw text
454 basetext = revlog.revision(baserev, _df=fh, raw=False)
453 basetext = revlog.revision(baserev, _df=fh, raw=False)
455 fulltext = mdiff.patch(basetext, delta)
454 fulltext = mdiff.patch(basetext, delta)
456
455
457 try:
456 try:
458 res = revlog._processflags(fulltext, flags, 'read', raw=True)
457 res = revlog._processflags(fulltext, flags, 'read', raw=True)
459 fulltext, validatehash = res
458 fulltext, validatehash = res
460 if validatehash:
459 if validatehash:
461 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
460 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
462 if flags & REVIDX_ISCENSORED:
461 if flags & REVIDX_ISCENSORED:
463 raise RevlogError(_('node %s is not censored') % expectednode)
462 raise error.RevlogError(_('node %s is not censored') % expectednode)
464 except CensoredNodeError:
463 except CensoredNodeError:
465 # must pass the censored index flag to add censored revisions
464 # must pass the censored index flag to add censored revisions
466 if not flags & REVIDX_ISCENSORED:
465 if not flags & REVIDX_ISCENSORED:
467 raise
466 raise
468 return fulltext
467 return fulltext
469
468
470 @attr.s(slots=True, frozen=True)
469 @attr.s(slots=True, frozen=True)
471 class _deltainfo(object):
470 class _deltainfo(object):
472 distance = attr.ib()
471 distance = attr.ib()
473 deltalen = attr.ib()
472 deltalen = attr.ib()
474 data = attr.ib()
473 data = attr.ib()
475 base = attr.ib()
474 base = attr.ib()
476 chainbase = attr.ib()
475 chainbase = attr.ib()
477 chainlen = attr.ib()
476 chainlen = attr.ib()
478 compresseddeltalen = attr.ib()
477 compresseddeltalen = attr.ib()
479 snapshotdepth = attr.ib()
478 snapshotdepth = attr.ib()
480
479
481 def isgooddeltainfo(revlog, deltainfo, revinfo):
480 def isgooddeltainfo(revlog, deltainfo, revinfo):
482 """Returns True if the given delta is good. Good means that it is within
481 """Returns True if the given delta is good. Good means that it is within
483 the disk span, disk size, and chain length bounds that we know to be
482 the disk span, disk size, and chain length bounds that we know to be
484 performant."""
483 performant."""
485 if deltainfo is None:
484 if deltainfo is None:
486 return False
485 return False
487
486
488 # - 'deltainfo.distance' is the distance from the base revision --
487 # - 'deltainfo.distance' is the distance from the base revision --
489 # bounding it limits the amount of I/O we need to do.
488 # bounding it limits the amount of I/O we need to do.
490 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
489 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
491 # deltas we need to apply -- bounding it limits the amount of CPU
490 # deltas we need to apply -- bounding it limits the amount of CPU
492 # we consume.
491 # we consume.
493
492
494 if revlog._sparserevlog:
493 if revlog._sparserevlog:
495 # As sparse-read will be used, we can consider that the distance,
494 # As sparse-read will be used, we can consider that the distance,
496 # instead of being the span of the whole chunk,
495 # instead of being the span of the whole chunk,
497 # is the span of the largest read chunk
496 # is the span of the largest read chunk
498 base = deltainfo.base
497 base = deltainfo.base
499
498
500 if base != nullrev:
499 if base != nullrev:
501 deltachain = revlog._deltachain(base)[0]
500 deltachain = revlog._deltachain(base)[0]
502 else:
501 else:
503 deltachain = []
502 deltachain = []
504
503
505 # search for the first non-snapshot revision
504 # search for the first non-snapshot revision
506 for idx, r in enumerate(deltachain):
505 for idx, r in enumerate(deltachain):
507 if not revlog.issnapshot(r):
506 if not revlog.issnapshot(r):
508 break
507 break
509 deltachain = deltachain[idx:]
508 deltachain = deltachain[idx:]
510 chunks = slicechunk(revlog, deltachain, deltainfo)
509 chunks = slicechunk(revlog, deltachain, deltainfo)
511 all_span = [segmentspan(revlog, revs, deltainfo)
510 all_span = [segmentspan(revlog, revs, deltainfo)
512 for revs in chunks]
511 for revs in chunks]
513 distance = max(all_span)
512 distance = max(all_span)
514 else:
513 else:
515 distance = deltainfo.distance
514 distance = deltainfo.distance
516
515
517 textlen = revinfo.textlen
516 textlen = revinfo.textlen
518 defaultmax = textlen * 4
517 defaultmax = textlen * 4
519 maxdist = revlog._maxdeltachainspan
518 maxdist = revlog._maxdeltachainspan
520 if not maxdist:
519 if not maxdist:
521 maxdist = distance # ensure the conditional pass
520 maxdist = distance # ensure the conditional pass
522 maxdist = max(maxdist, defaultmax)
521 maxdist = max(maxdist, defaultmax)
523 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
522 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
524 # In multiple place, we are ignoring irrelevant data range below a
523 # In multiple place, we are ignoring irrelevant data range below a
525 # certain size. Be also apply this tradeoff here and relax span
524 # certain size. Be also apply this tradeoff here and relax span
526 # constraint for small enought content.
525 # constraint for small enought content.
527 maxdist = revlog._srmingapsize
526 maxdist = revlog._srmingapsize
528
527
529 # Bad delta from read span:
528 # Bad delta from read span:
530 #
529 #
531 # If the span of data read is larger than the maximum allowed.
530 # If the span of data read is larger than the maximum allowed.
532 if maxdist < distance:
531 if maxdist < distance:
533 return False
532 return False
534
533
535 # Bad delta from new delta size:
534 # Bad delta from new delta size:
536 #
535 #
537 # If the delta size is larger than the target text, storing the
536 # If the delta size is larger than the target text, storing the
538 # delta will be inefficient.
537 # delta will be inefficient.
539 if textlen < deltainfo.deltalen:
538 if textlen < deltainfo.deltalen:
540 return False
539 return False
541
540
542 # Bad delta from cumulated payload size:
541 # Bad delta from cumulated payload size:
543 #
542 #
544 # If the sum of delta get larger than K * target text length.
543 # If the sum of delta get larger than K * target text length.
545 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
544 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
546 return False
545 return False
547
546
548 # Bad delta from chain length:
547 # Bad delta from chain length:
549 #
548 #
550 # If the number of delta in the chain gets too high.
549 # If the number of delta in the chain gets too high.
551 if (revlog._maxchainlen
550 if (revlog._maxchainlen
552 and revlog._maxchainlen < deltainfo.chainlen):
551 and revlog._maxchainlen < deltainfo.chainlen):
553 return False
552 return False
554
553
555 # bad delta from intermediate snapshot size limit
554 # bad delta from intermediate snapshot size limit
556 #
555 #
557 # If an intermediate snapshot size is higher than the limit. The
556 # If an intermediate snapshot size is higher than the limit. The
558 # limit exist to prevent endless chain of intermediate delta to be
557 # limit exist to prevent endless chain of intermediate delta to be
559 # created.
558 # created.
560 if (deltainfo.snapshotdepth is not None and
559 if (deltainfo.snapshotdepth is not None and
561 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
560 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
562 return False
561 return False
563
562
564 # bad delta if new intermediate snapshot is larger than the previous
563 # bad delta if new intermediate snapshot is larger than the previous
565 # snapshot
564 # snapshot
566 if (deltainfo.snapshotdepth
565 if (deltainfo.snapshotdepth
567 and revlog.length(deltainfo.base) < deltainfo.deltalen):
566 and revlog.length(deltainfo.base) < deltainfo.deltalen):
568 return False
567 return False
569
568
570 return True
569 return True
571
570
572 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
571 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
573 """Provides group of revision to be tested as delta base
572 """Provides group of revision to be tested as delta base
574
573
575 This top level function focus on emitting groups with unique and worthwhile
574 This top level function focus on emitting groups with unique and worthwhile
576 content. See _raw_candidate_groups for details about the group order.
575 content. See _raw_candidate_groups for details about the group order.
577 """
576 """
578 # should we try to build a delta?
577 # should we try to build a delta?
579 if not (len(revlog) and revlog._storedeltachains):
578 if not (len(revlog) and revlog._storedeltachains):
580 yield None
579 yield None
581 return
580 return
582
581
583 deltalength = revlog.length
582 deltalength = revlog.length
584 deltaparent = revlog.deltaparent
583 deltaparent = revlog.deltaparent
585 good = None
584 good = None
586
585
587 deltas_limit = textlen * LIMIT_DELTA2TEXT
586 deltas_limit = textlen * LIMIT_DELTA2TEXT
588
587
589 tested = set([nullrev])
588 tested = set([nullrev])
590 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
589 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
591 while True:
590 while True:
592 temptative = candidates.send(good)
591 temptative = candidates.send(good)
593 if temptative is None:
592 if temptative is None:
594 break
593 break
595 group = []
594 group = []
596 for rev in temptative:
595 for rev in temptative:
597 # skip over empty delta (no need to include them in a chain)
596 # skip over empty delta (no need to include them in a chain)
598 while not (rev == nullrev or rev in tested or deltalength(rev)):
597 while not (rev == nullrev or rev in tested or deltalength(rev)):
599 tested.add(rev)
598 tested.add(rev)
600 rev = deltaparent(rev)
599 rev = deltaparent(rev)
601 # filter out revision we tested already
600 # filter out revision we tested already
602 if rev in tested:
601 if rev in tested:
603 continue
602 continue
604 tested.add(rev)
603 tested.add(rev)
605 # filter out delta base that will never produce good delta
604 # filter out delta base that will never produce good delta
606 if deltas_limit < revlog.length(rev):
605 if deltas_limit < revlog.length(rev):
607 continue
606 continue
608 # no need to try a delta against nullrev, this will be done as a
607 # no need to try a delta against nullrev, this will be done as a
609 # last resort.
608 # last resort.
610 if rev == nullrev:
609 if rev == nullrev:
611 continue
610 continue
612 # no delta for rawtext-changing revs (see "candelta" for why)
611 # no delta for rawtext-changing revs (see "candelta" for why)
613 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
612 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
614 continue
613 continue
615 group.append(rev)
614 group.append(rev)
616 if group:
615 if group:
617 # XXX: in the sparse revlog case, group can become large,
616 # XXX: in the sparse revlog case, group can become large,
618 # impacting performances. Some bounding or slicing mecanism
617 # impacting performances. Some bounding or slicing mecanism
619 # would help to reduce this impact.
618 # would help to reduce this impact.
620 good = yield tuple(group)
619 good = yield tuple(group)
621 yield None
620 yield None
622
621
623 def _findsnapshots(revlog, cache, start_rev):
622 def _findsnapshots(revlog, cache, start_rev):
624 """find snapshot from start_rev to tip"""
623 """find snapshot from start_rev to tip"""
625 deltaparent = revlog.deltaparent
624 deltaparent = revlog.deltaparent
626 issnapshot = revlog.issnapshot
625 issnapshot = revlog.issnapshot
627 for rev in revlog.revs(start_rev):
626 for rev in revlog.revs(start_rev):
628 if issnapshot(rev):
627 if issnapshot(rev):
629 cache[deltaparent(rev)].append(rev)
628 cache[deltaparent(rev)].append(rev)
630
629
631 def _refinedgroups(revlog, p1, p2, cachedelta):
630 def _refinedgroups(revlog, p1, p2, cachedelta):
632 good = None
631 good = None
633 # First we try to reuse a the delta contained in the bundle.
632 # First we try to reuse a the delta contained in the bundle.
634 # (or from the source revlog)
633 # (or from the source revlog)
635 #
634 #
636 # This logic only applies to general delta repositories and can be disabled
635 # This logic only applies to general delta repositories and can be disabled
637 # through configuration. Disabling reuse source delta is useful when
636 # through configuration. Disabling reuse source delta is useful when
638 # we want to make sure we recomputed "optimal" deltas.
637 # we want to make sure we recomputed "optimal" deltas.
639 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
638 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
640 # Assume what we received from the server is a good choice
639 # Assume what we received from the server is a good choice
641 # build delta will reuse the cache
640 # build delta will reuse the cache
642 good = yield (cachedelta[0],)
641 good = yield (cachedelta[0],)
643 if good is not None:
642 if good is not None:
644 yield None
643 yield None
645 return
644 return
646 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
645 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
647 good = yield candidates
646 good = yield candidates
648 if good is not None:
647 if good is not None:
649 break
648 break
650
649
651 # if we have a refinable value, try to refine it
650 # if we have a refinable value, try to refine it
652 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
651 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
653 # refine snapshot down
652 # refine snapshot down
654 previous = None
653 previous = None
655 while previous != good:
654 while previous != good:
656 previous = good
655 previous = good
657 base = revlog.deltaparent(good)
656 base = revlog.deltaparent(good)
658 if base == nullrev:
657 if base == nullrev:
659 break
658 break
660 good = yield (base,)
659 good = yield (base,)
661 # refine snapshot up
660 # refine snapshot up
662 #
661 #
663 # XXX the _findsnapshots call can be expensive and is "duplicated" with
662 # XXX the _findsnapshots call can be expensive and is "duplicated" with
664 # the one done in `_rawgroups`. Once we start working on performance,
663 # the one done in `_rawgroups`. Once we start working on performance,
665 # we should make the two logics share this computation.
664 # we should make the two logics share this computation.
666 snapshots = collections.defaultdict(list)
665 snapshots = collections.defaultdict(list)
667 _findsnapshots(revlog, snapshots, good + 1)
666 _findsnapshots(revlog, snapshots, good + 1)
668 previous = None
667 previous = None
669 while good != previous:
668 while good != previous:
670 previous = good
669 previous = good
671 children = tuple(sorted(c for c in snapshots[good]))
670 children = tuple(sorted(c for c in snapshots[good]))
672 good = yield children
671 good = yield children
673
672
674 # we have found nothing
673 # we have found nothing
675 yield None
674 yield None
676
675
677 def _rawgroups(revlog, p1, p2, cachedelta):
676 def _rawgroups(revlog, p1, p2, cachedelta):
678 """Provides group of revision to be tested as delta base
677 """Provides group of revision to be tested as delta base
679
678
680 This lower level function focus on emitting delta theorically interresting
679 This lower level function focus on emitting delta theorically interresting
681 without looking it any practical details.
680 without looking it any practical details.
682
681
683 The group order aims at providing fast or small candidates first.
682 The group order aims at providing fast or small candidates first.
684 """
683 """
685 gdelta = revlog._generaldelta
684 gdelta = revlog._generaldelta
686 sparse = revlog._sparserevlog
685 sparse = revlog._sparserevlog
687 curr = len(revlog)
686 curr = len(revlog)
688 prev = curr - 1
687 prev = curr - 1
689 deltachain = lambda rev: revlog._deltachain(rev)[0]
688 deltachain = lambda rev: revlog._deltachain(rev)[0]
690
689
691 if gdelta:
690 if gdelta:
692 # exclude already lazy tested base if any
691 # exclude already lazy tested base if any
693 parents = [p for p in (p1, p2) if p != nullrev]
692 parents = [p for p in (p1, p2) if p != nullrev]
694
693
695 if not revlog._deltabothparents and len(parents) == 2:
694 if not revlog._deltabothparents and len(parents) == 2:
696 parents.sort()
695 parents.sort()
697 # To minimize the chance of having to build a fulltext,
696 # To minimize the chance of having to build a fulltext,
698 # pick first whichever parent is closest to us (max rev)
697 # pick first whichever parent is closest to us (max rev)
699 yield (parents[1],)
698 yield (parents[1],)
700 # then the other one (min rev) if the first did not fit
699 # then the other one (min rev) if the first did not fit
701 yield (parents[0],)
700 yield (parents[0],)
702 elif len(parents) > 0:
701 elif len(parents) > 0:
703 # Test all parents (1 or 2), and keep the best candidate
702 # Test all parents (1 or 2), and keep the best candidate
704 yield parents
703 yield parents
705
704
706 if sparse and parents:
705 if sparse and parents:
707 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
706 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
708 # See if we can use an existing snapshot in the parent chains to use as
707 # See if we can use an existing snapshot in the parent chains to use as
709 # a base for a new intermediate-snapshot
708 # a base for a new intermediate-snapshot
710 #
709 #
711 # search for snapshot in parents delta chain
710 # search for snapshot in parents delta chain
712 # map: snapshot-level: snapshot-rev
711 # map: snapshot-level: snapshot-rev
713 parents_snaps = collections.defaultdict(set)
712 parents_snaps = collections.defaultdict(set)
714 candidate_chains = [deltachain(p) for p in parents]
713 candidate_chains = [deltachain(p) for p in parents]
715 for chain in candidate_chains:
714 for chain in candidate_chains:
716 for idx, s in enumerate(chain):
715 for idx, s in enumerate(chain):
717 if not revlog.issnapshot(s):
716 if not revlog.issnapshot(s):
718 break
717 break
719 parents_snaps[idx].add(s)
718 parents_snaps[idx].add(s)
720 snapfloor = min(parents_snaps[0]) + 1
719 snapfloor = min(parents_snaps[0]) + 1
721 _findsnapshots(revlog, snapshots, snapfloor)
720 _findsnapshots(revlog, snapshots, snapfloor)
722 # search for the highest "unrelated" revision
721 # search for the highest "unrelated" revision
723 #
722 #
724 # Adding snapshots used by "unrelated" revision increase the odd we
723 # Adding snapshots used by "unrelated" revision increase the odd we
725 # reuse an independant, yet better snapshot chain.
724 # reuse an independant, yet better snapshot chain.
726 #
725 #
727 # XXX instead of building a set of revisions, we could lazily enumerate
726 # XXX instead of building a set of revisions, we could lazily enumerate
728 # over the chains. That would be more efficient, however we stick to
727 # over the chains. That would be more efficient, however we stick to
729 # simple code for now.
728 # simple code for now.
730 all_revs = set()
729 all_revs = set()
731 for chain in candidate_chains:
730 for chain in candidate_chains:
732 all_revs.update(chain)
731 all_revs.update(chain)
733 other = None
732 other = None
734 for r in revlog.revs(prev, snapfloor):
733 for r in revlog.revs(prev, snapfloor):
735 if r not in all_revs:
734 if r not in all_revs:
736 other = r
735 other = r
737 break
736 break
738 if other is not None:
737 if other is not None:
739 # To avoid unfair competition, we won't use unrelated intermediate
738 # To avoid unfair competition, we won't use unrelated intermediate
740 # snapshot that are deeper than the ones from the parent delta
739 # snapshot that are deeper than the ones from the parent delta
741 # chain.
740 # chain.
742 max_depth = max(parents_snaps.keys())
741 max_depth = max(parents_snaps.keys())
743 chain = deltachain(other)
742 chain = deltachain(other)
744 for idx, s in enumerate(chain):
743 for idx, s in enumerate(chain):
745 if s < snapfloor:
744 if s < snapfloor:
746 continue
745 continue
747 if max_depth < idx:
746 if max_depth < idx:
748 break
747 break
749 if not revlog.issnapshot(s):
748 if not revlog.issnapshot(s):
750 break
749 break
751 parents_snaps[idx].add(s)
750 parents_snaps[idx].add(s)
752 # Test them as possible intermediate snapshot base
751 # Test them as possible intermediate snapshot base
753 # We test them from highest to lowest level. High level one are more
752 # We test them from highest to lowest level. High level one are more
754 # likely to result in small delta
753 # likely to result in small delta
755 floor = None
754 floor = None
756 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
755 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
757 siblings = set()
756 siblings = set()
758 for s in snaps:
757 for s in snaps:
759 siblings.update(snapshots[s])
758 siblings.update(snapshots[s])
760 # Before considering making a new intermediate snapshot, we check
759 # Before considering making a new intermediate snapshot, we check
761 # if an existing snapshot, children of base we consider, would be
760 # if an existing snapshot, children of base we consider, would be
762 # suitable.
761 # suitable.
763 #
762 #
764 # It give a change to reuse a delta chain "unrelated" to the
763 # It give a change to reuse a delta chain "unrelated" to the
765 # current revision instead of starting our own. Without such
764 # current revision instead of starting our own. Without such
766 # re-use, topological branches would keep reopening new chains.
765 # re-use, topological branches would keep reopening new chains.
767 # Creating more and more snapshot as the repository grow.
766 # Creating more and more snapshot as the repository grow.
768
767
769 if floor is not None:
768 if floor is not None:
770 # We only do this for siblings created after the one in our
769 # We only do this for siblings created after the one in our
771 # parent's delta chain. Those created before has less chances
770 # parent's delta chain. Those created before has less chances
772 # to be valid base since our ancestors had to create a new
771 # to be valid base since our ancestors had to create a new
773 # snapshot.
772 # snapshot.
774 siblings = [r for r in siblings if floor < r]
773 siblings = [r for r in siblings if floor < r]
775 yield tuple(sorted(siblings))
774 yield tuple(sorted(siblings))
776 # then test the base from our parent's delta chain.
775 # then test the base from our parent's delta chain.
777 yield tuple(sorted(snaps))
776 yield tuple(sorted(snaps))
778 floor = min(snaps)
777 floor = min(snaps)
779 # No suitable base found in the parent chain, search if any full
778 # No suitable base found in the parent chain, search if any full
780 # snapshots emitted since parent's base would be a suitable base for an
779 # snapshots emitted since parent's base would be a suitable base for an
781 # intermediate snapshot.
780 # intermediate snapshot.
782 #
781 #
783 # It give a chance to reuse a delta chain unrelated to the current
782 # It give a chance to reuse a delta chain unrelated to the current
784 # revisions instead of starting our own. Without such re-use,
783 # revisions instead of starting our own. Without such re-use,
785 # topological branches would keep reopening new full chains. Creating
784 # topological branches would keep reopening new full chains. Creating
786 # more and more snapshot as the repository grow.
785 # more and more snapshot as the repository grow.
787 yield tuple(snapshots[nullrev])
786 yield tuple(snapshots[nullrev])
788
787
789 if not sparse:
788 if not sparse:
790 # other approach failed try against prev to hopefully save us a
789 # other approach failed try against prev to hopefully save us a
791 # fulltext.
790 # fulltext.
792 yield (prev,)
791 yield (prev,)
793
792
794 class deltacomputer(object):
793 class deltacomputer(object):
795 def __init__(self, revlog):
794 def __init__(self, revlog):
796 self.revlog = revlog
795 self.revlog = revlog
797
796
798 def buildtext(self, revinfo, fh):
797 def buildtext(self, revinfo, fh):
799 """Builds a fulltext version of a revision
798 """Builds a fulltext version of a revision
800
799
801 revinfo: _revisioninfo instance that contains all needed info
800 revinfo: _revisioninfo instance that contains all needed info
802 fh: file handle to either the .i or the .d revlog file,
801 fh: file handle to either the .i or the .d revlog file,
803 depending on whether it is inlined or not
802 depending on whether it is inlined or not
804 """
803 """
805 btext = revinfo.btext
804 btext = revinfo.btext
806 if btext[0] is not None:
805 if btext[0] is not None:
807 return btext[0]
806 return btext[0]
808
807
809 revlog = self.revlog
808 revlog = self.revlog
810 cachedelta = revinfo.cachedelta
809 cachedelta = revinfo.cachedelta
811 baserev = cachedelta[0]
810 baserev = cachedelta[0]
812 delta = cachedelta[1]
811 delta = cachedelta[1]
813
812
814 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
813 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
815 revinfo.p1, revinfo.p2,
814 revinfo.p1, revinfo.p2,
816 revinfo.flags, revinfo.node)
815 revinfo.flags, revinfo.node)
817 return fulltext
816 return fulltext
818
817
819 def _builddeltadiff(self, base, revinfo, fh):
818 def _builddeltadiff(self, base, revinfo, fh):
820 revlog = self.revlog
819 revlog = self.revlog
821 t = self.buildtext(revinfo, fh)
820 t = self.buildtext(revinfo, fh)
822 if revlog.iscensored(base):
821 if revlog.iscensored(base):
823 # deltas based on a censored revision must replace the
822 # deltas based on a censored revision must replace the
824 # full content in one patch, so delta works everywhere
823 # full content in one patch, so delta works everywhere
825 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
824 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
826 delta = header + t
825 delta = header + t
827 else:
826 else:
828 ptext = revlog.revision(base, _df=fh, raw=True)
827 ptext = revlog.revision(base, _df=fh, raw=True)
829 delta = mdiff.textdiff(ptext, t)
828 delta = mdiff.textdiff(ptext, t)
830
829
831 return delta
830 return delta
832
831
833 def _builddeltainfo(self, revinfo, base, fh):
832 def _builddeltainfo(self, revinfo, base, fh):
834 # can we use the cached delta?
833 # can we use the cached delta?
835 delta = None
834 delta = None
836 if revinfo.cachedelta:
835 if revinfo.cachedelta:
837 cachebase, cachediff = revinfo.cachedelta
836 cachebase, cachediff = revinfo.cachedelta
838 #check if the diff still apply
837 #check if the diff still apply
839 currentbase = cachebase
838 currentbase = cachebase
840 while (currentbase != nullrev
839 while (currentbase != nullrev
841 and currentbase != base
840 and currentbase != base
842 and self.revlog.length(currentbase) == 0):
841 and self.revlog.length(currentbase) == 0):
843 currentbase = self.revlog.deltaparent(currentbase)
842 currentbase = self.revlog.deltaparent(currentbase)
844 if currentbase == base:
843 if currentbase == base:
845 delta = revinfo.cachedelta[1]
844 delta = revinfo.cachedelta[1]
846 if delta is None:
845 if delta is None:
847 delta = self._builddeltadiff(base, revinfo, fh)
846 delta = self._builddeltadiff(base, revinfo, fh)
848 revlog = self.revlog
847 revlog = self.revlog
849 header, data = revlog.compress(delta)
848 header, data = revlog.compress(delta)
850 deltalen = len(header) + len(data)
849 deltalen = len(header) + len(data)
851 chainbase = revlog.chainbase(base)
850 chainbase = revlog.chainbase(base)
852 offset = revlog.end(len(revlog) - 1)
851 offset = revlog.end(len(revlog) - 1)
853 dist = deltalen + offset - revlog.start(chainbase)
852 dist = deltalen + offset - revlog.start(chainbase)
854 if revlog._generaldelta:
853 if revlog._generaldelta:
855 deltabase = base
854 deltabase = base
856 else:
855 else:
857 deltabase = chainbase
856 deltabase = chainbase
858 chainlen, compresseddeltalen = revlog._chaininfo(base)
857 chainlen, compresseddeltalen = revlog._chaininfo(base)
859 chainlen += 1
858 chainlen += 1
860 compresseddeltalen += deltalen
859 compresseddeltalen += deltalen
861
860
862 revlog = self.revlog
861 revlog = self.revlog
863 snapshotdepth = None
862 snapshotdepth = None
864 if deltabase == nullrev:
863 if deltabase == nullrev:
865 snapshotdepth = 0
864 snapshotdepth = 0
866 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
865 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
867 # A delta chain should always be one full snapshot,
866 # A delta chain should always be one full snapshot,
868 # zero or more semi-snapshots, and zero or more deltas
867 # zero or more semi-snapshots, and zero or more deltas
869 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
868 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
870 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
869 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
871 snapshotdepth = len(revlog._deltachain(deltabase)[0])
870 snapshotdepth = len(revlog._deltachain(deltabase)[0])
872
871
873 return _deltainfo(dist, deltalen, (header, data), deltabase,
872 return _deltainfo(dist, deltalen, (header, data), deltabase,
874 chainbase, chainlen, compresseddeltalen,
873 chainbase, chainlen, compresseddeltalen,
875 snapshotdepth)
874 snapshotdepth)
876
875
877 def _fullsnapshotinfo(self, fh, revinfo):
876 def _fullsnapshotinfo(self, fh, revinfo):
878 curr = len(self.revlog)
877 curr = len(self.revlog)
879 rawtext = self.buildtext(revinfo, fh)
878 rawtext = self.buildtext(revinfo, fh)
880 data = self.revlog.compress(rawtext)
879 data = self.revlog.compress(rawtext)
881 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
880 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
882 deltabase = chainbase = curr
881 deltabase = chainbase = curr
883 snapshotdepth = 0
882 snapshotdepth = 0
884 chainlen = 1
883 chainlen = 1
885
884
886 return _deltainfo(dist, deltalen, data, deltabase,
885 return _deltainfo(dist, deltalen, data, deltabase,
887 chainbase, chainlen, compresseddeltalen,
886 chainbase, chainlen, compresseddeltalen,
888 snapshotdepth)
887 snapshotdepth)
889
888
890 def finddeltainfo(self, revinfo, fh):
889 def finddeltainfo(self, revinfo, fh):
891 """Find an acceptable delta against a candidate revision
890 """Find an acceptable delta against a candidate revision
892
891
893 revinfo: information about the revision (instance of _revisioninfo)
892 revinfo: information about the revision (instance of _revisioninfo)
894 fh: file handle to either the .i or the .d revlog file,
893 fh: file handle to either the .i or the .d revlog file,
895 depending on whether it is inlined or not
894 depending on whether it is inlined or not
896
895
897 Returns the first acceptable candidate revision, as ordered by
896 Returns the first acceptable candidate revision, as ordered by
898 _candidategroups
897 _candidategroups
899
898
900 If no suitable deltabase is found, we return delta info for a full
899 If no suitable deltabase is found, we return delta info for a full
901 snapshot.
900 snapshot.
902 """
901 """
903 if not revinfo.textlen:
902 if not revinfo.textlen:
904 return self._fullsnapshotinfo(fh, revinfo)
903 return self._fullsnapshotinfo(fh, revinfo)
905
904
906 # no delta for flag processor revision (see "candelta" for why)
905 # no delta for flag processor revision (see "candelta" for why)
907 # not calling candelta since only one revision needs test, also to
906 # not calling candelta since only one revision needs test, also to
908 # avoid overhead fetching flags again.
907 # avoid overhead fetching flags again.
909 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
908 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
910 return self._fullsnapshotinfo(fh, revinfo)
909 return self._fullsnapshotinfo(fh, revinfo)
911
910
912 cachedelta = revinfo.cachedelta
911 cachedelta = revinfo.cachedelta
913 p1 = revinfo.p1
912 p1 = revinfo.p1
914 p2 = revinfo.p2
913 p2 = revinfo.p2
915 revlog = self.revlog
914 revlog = self.revlog
916
915
917 deltainfo = None
916 deltainfo = None
918 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
917 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
919 groups = _candidategroups(self.revlog, revinfo.textlen,
918 groups = _candidategroups(self.revlog, revinfo.textlen,
920 p1r, p2r, cachedelta)
919 p1r, p2r, cachedelta)
921 candidaterevs = next(groups)
920 candidaterevs = next(groups)
922 while candidaterevs is not None:
921 while candidaterevs is not None:
923 nominateddeltas = []
922 nominateddeltas = []
924 if deltainfo is not None:
923 if deltainfo is not None:
925 # if we already found a good delta,
924 # if we already found a good delta,
926 # challenge it against refined candidates
925 # challenge it against refined candidates
927 nominateddeltas.append(deltainfo)
926 nominateddeltas.append(deltainfo)
928 for candidaterev in candidaterevs:
927 for candidaterev in candidaterevs:
929 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
928 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
930 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
929 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
931 nominateddeltas.append(candidatedelta)
930 nominateddeltas.append(candidatedelta)
932 if nominateddeltas:
931 if nominateddeltas:
933 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
932 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
934 if deltainfo is not None:
933 if deltainfo is not None:
935 candidaterevs = groups.send(deltainfo.base)
934 candidaterevs = groups.send(deltainfo.base)
936 else:
935 else:
937 candidaterevs = next(groups)
936 candidaterevs = next(groups)
938
937
939 if deltainfo is None:
938 if deltainfo is None:
940 deltainfo = self._fullsnapshotinfo(fh, revinfo)
939 deltainfo = self._fullsnapshotinfo(fh, revinfo)
941 return deltainfo
940 return deltainfo
@@ -1,750 +1,750 b''
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 nullrev,
22 nullrev,
23 )
23 )
24 from mercurial.thirdparty import (
24 from mercurial.thirdparty import (
25 attr,
25 attr,
26 cbor,
26 cbor,
27 )
27 )
28 from mercurial import (
28 from mercurial import (
29 ancestor,
29 ancestor,
30 bundlerepo,
30 bundlerepo,
31 error,
31 error,
32 extensions,
32 extensions,
33 localrepo,
33 localrepo,
34 mdiff,
34 mdiff,
35 pycompat,
35 pycompat,
36 repository,
36 repository,
37 revlog,
37 revlog,
38 store,
38 store,
39 verify,
39 verify,
40 )
40 )
41 from mercurial.utils import (
41 from mercurial.utils import (
42 interfaceutil,
42 interfaceutil,
43 )
43 )
44
44
45 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
45 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
46 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
46 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
47 # be specifying the version(s) of Mercurial they are tested with, or
47 # be specifying the version(s) of Mercurial they are tested with, or
48 # leave the attribute unspecified.
48 # leave the attribute unspecified.
49 testedwith = 'ships-with-hg-core'
49 testedwith = 'ships-with-hg-core'
50
50
51 REQUIREMENT = 'testonly-simplestore'
51 REQUIREMENT = 'testonly-simplestore'
52
52
53 def validatenode(node):
53 def validatenode(node):
54 if isinstance(node, int):
54 if isinstance(node, int):
55 raise ValueError('expected node; got int')
55 raise ValueError('expected node; got int')
56
56
57 if len(node) != 20:
57 if len(node) != 20:
58 raise ValueError('expected 20 byte node')
58 raise ValueError('expected 20 byte node')
59
59
60 def validaterev(rev):
60 def validaterev(rev):
61 if not isinstance(rev, int):
61 if not isinstance(rev, int):
62 raise ValueError('expected int')
62 raise ValueError('expected int')
63
63
64 @interfaceutil.implementer(repository.irevisiondelta)
64 @interfaceutil.implementer(repository.irevisiondelta)
65 @attr.s(slots=True, frozen=True)
65 @attr.s(slots=True, frozen=True)
66 class simplestorerevisiondelta(object):
66 class simplestorerevisiondelta(object):
67 node = attr.ib()
67 node = attr.ib()
68 p1node = attr.ib()
68 p1node = attr.ib()
69 p2node = attr.ib()
69 p2node = attr.ib()
70 basenode = attr.ib()
70 basenode = attr.ib()
71 linknode = attr.ib()
71 linknode = attr.ib()
72 flags = attr.ib()
72 flags = attr.ib()
73 baserevisionsize = attr.ib()
73 baserevisionsize = attr.ib()
74 revision = attr.ib()
74 revision = attr.ib()
75 delta = attr.ib()
75 delta = attr.ib()
76
76
77 @interfaceutil.implementer(repository.ifilestorage)
77 @interfaceutil.implementer(repository.ifilestorage)
78 class filestorage(object):
78 class filestorage(object):
79 """Implements storage for a tracked path.
79 """Implements storage for a tracked path.
80
80
81 Data is stored in the VFS in a directory corresponding to the tracked
81 Data is stored in the VFS in a directory corresponding to the tracked
82 path.
82 path.
83
83
84 Index data is stored in an ``index`` file using CBOR.
84 Index data is stored in an ``index`` file using CBOR.
85
85
86 Fulltext data is stored in files having names of the node.
86 Fulltext data is stored in files having names of the node.
87 """
87 """
88
88
89 def __init__(self, svfs, path):
89 def __init__(self, svfs, path):
90 self._svfs = svfs
90 self._svfs = svfs
91 self._path = path
91 self._path = path
92
92
93 self._storepath = b'/'.join([b'data', path])
93 self._storepath = b'/'.join([b'data', path])
94 self._indexpath = b'/'.join([self._storepath, b'index'])
94 self._indexpath = b'/'.join([self._storepath, b'index'])
95
95
96 indexdata = self._svfs.tryread(self._indexpath)
96 indexdata = self._svfs.tryread(self._indexpath)
97 if indexdata:
97 if indexdata:
98 indexdata = cbor.loads(indexdata)
98 indexdata = cbor.loads(indexdata)
99
99
100 self._indexdata = indexdata or []
100 self._indexdata = indexdata or []
101 self._indexbynode = {}
101 self._indexbynode = {}
102 self._indexbyrev = {}
102 self._indexbyrev = {}
103 self.index = []
103 self.index = []
104 self._refreshindex()
104 self._refreshindex()
105
105
106 # This is used by changegroup code :/
106 # This is used by changegroup code :/
107 self._generaldelta = True
107 self._generaldelta = True
108
108
109 self.version = 1
109 self.version = 1
110
110
111 def _refreshindex(self):
111 def _refreshindex(self):
112 self._indexbynode.clear()
112 self._indexbynode.clear()
113 self._indexbyrev.clear()
113 self._indexbyrev.clear()
114 self.index = []
114 self.index = []
115
115
116 for i, entry in enumerate(self._indexdata):
116 for i, entry in enumerate(self._indexdata):
117 self._indexbynode[entry[b'node']] = entry
117 self._indexbynode[entry[b'node']] = entry
118 self._indexbyrev[i] = entry
118 self._indexbyrev[i] = entry
119
119
120 self._indexbynode[nullid] = {
120 self._indexbynode[nullid] = {
121 b'node': nullid,
121 b'node': nullid,
122 b'p1': nullid,
122 b'p1': nullid,
123 b'p2': nullid,
123 b'p2': nullid,
124 b'linkrev': nullrev,
124 b'linkrev': nullrev,
125 b'flags': 0,
125 b'flags': 0,
126 }
126 }
127
127
128 self._indexbyrev[nullrev] = {
128 self._indexbyrev[nullrev] = {
129 b'node': nullid,
129 b'node': nullid,
130 b'p1': nullid,
130 b'p1': nullid,
131 b'p2': nullid,
131 b'p2': nullid,
132 b'linkrev': nullrev,
132 b'linkrev': nullrev,
133 b'flags': 0,
133 b'flags': 0,
134 }
134 }
135
135
136 for i, entry in enumerate(self._indexdata):
136 for i, entry in enumerate(self._indexdata):
137 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
137 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
138
138
139 # start, length, rawsize, chainbase, linkrev, p1, p2, node
139 # start, length, rawsize, chainbase, linkrev, p1, p2, node
140 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
140 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
141 entry[b'node']))
141 entry[b'node']))
142
142
143 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
143 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
144
144
145 def __len__(self):
145 def __len__(self):
146 return len(self._indexdata)
146 return len(self._indexdata)
147
147
148 def __iter__(self):
148 def __iter__(self):
149 return iter(range(len(self)))
149 return iter(range(len(self)))
150
150
151 def revs(self, start=0, stop=None):
151 def revs(self, start=0, stop=None):
152 step = 1
152 step = 1
153 if stop is not None:
153 if stop is not None:
154 if start > stop:
154 if start > stop:
155 step = -1
155 step = -1
156
156
157 stop += step
157 stop += step
158 else:
158 else:
159 stop = len(self)
159 stop = len(self)
160
160
161 return range(start, stop, step)
161 return range(start, stop, step)
162
162
163 def parents(self, node):
163 def parents(self, node):
164 validatenode(node)
164 validatenode(node)
165
165
166 if node not in self._indexbynode:
166 if node not in self._indexbynode:
167 raise KeyError('unknown node')
167 raise KeyError('unknown node')
168
168
169 entry = self._indexbynode[node]
169 entry = self._indexbynode[node]
170
170
171 return entry[b'p1'], entry[b'p2']
171 return entry[b'p1'], entry[b'p2']
172
172
173 def parentrevs(self, rev):
173 def parentrevs(self, rev):
174 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
174 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
175 return self.rev(p1), self.rev(p2)
175 return self.rev(p1), self.rev(p2)
176
176
177 def rev(self, node):
177 def rev(self, node):
178 validatenode(node)
178 validatenode(node)
179
179
180 try:
180 try:
181 self._indexbynode[node]
181 self._indexbynode[node]
182 except KeyError:
182 except KeyError:
183 raise error.LookupError(node, self._indexpath, _('no node'))
183 raise error.LookupError(node, self._indexpath, _('no node'))
184
184
185 for rev, entry in self._indexbyrev.items():
185 for rev, entry in self._indexbyrev.items():
186 if entry[b'node'] == node:
186 if entry[b'node'] == node:
187 return rev
187 return rev
188
188
189 raise error.ProgrammingError('this should not occur')
189 raise error.ProgrammingError('this should not occur')
190
190
191 def node(self, rev):
191 def node(self, rev):
192 validaterev(rev)
192 validaterev(rev)
193
193
194 return self._indexbyrev[rev][b'node']
194 return self._indexbyrev[rev][b'node']
195
195
196 def lookup(self, node):
196 def lookup(self, node):
197 if isinstance(node, int):
197 if isinstance(node, int):
198 return self.node(node)
198 return self.node(node)
199
199
200 if len(node) == 20:
200 if len(node) == 20:
201 self.rev(node)
201 self.rev(node)
202 return node
202 return node
203
203
204 try:
204 try:
205 rev = int(node)
205 rev = int(node)
206 if '%d' % rev != node:
206 if '%d' % rev != node:
207 raise ValueError
207 raise ValueError
208
208
209 if rev < 0:
209 if rev < 0:
210 rev = len(self) + rev
210 rev = len(self) + rev
211 if rev < 0 or rev >= len(self):
211 if rev < 0 or rev >= len(self):
212 raise ValueError
212 raise ValueError
213
213
214 return self.node(rev)
214 return self.node(rev)
215 except (ValueError, OverflowError):
215 except (ValueError, OverflowError):
216 pass
216 pass
217
217
218 if len(node) == 40:
218 if len(node) == 40:
219 try:
219 try:
220 rawnode = bin(node)
220 rawnode = bin(node)
221 self.rev(rawnode)
221 self.rev(rawnode)
222 return rawnode
222 return rawnode
223 except TypeError:
223 except TypeError:
224 pass
224 pass
225
225
226 raise error.LookupError(node, self._path, _('invalid lookup input'))
226 raise error.LookupError(node, self._path, _('invalid lookup input'))
227
227
228 def linkrev(self, rev):
228 def linkrev(self, rev):
229 validaterev(rev)
229 validaterev(rev)
230
230
231 return self._indexbyrev[rev][b'linkrev']
231 return self._indexbyrev[rev][b'linkrev']
232
232
233 def flags(self, rev):
233 def flags(self, rev):
234 validaterev(rev)
234 validaterev(rev)
235
235
236 return self._indexbyrev[rev][b'flags']
236 return self._indexbyrev[rev][b'flags']
237
237
238 def deltaparent(self, rev):
238 def deltaparent(self, rev):
239 validaterev(rev)
239 validaterev(rev)
240
240
241 p1node = self.parents(self.node(rev))[0]
241 p1node = self.parents(self.node(rev))[0]
242 return self.rev(p1node)
242 return self.rev(p1node)
243
243
244 def _candelta(self, baserev, rev):
244 def _candelta(self, baserev, rev):
245 validaterev(baserev)
245 validaterev(baserev)
246 validaterev(rev)
246 validaterev(rev)
247
247
248 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
248 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
249 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
249 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
250 return False
250 return False
251
251
252 return True
252 return True
253
253
254 def rawsize(self, rev):
254 def rawsize(self, rev):
255 validaterev(rev)
255 validaterev(rev)
256 node = self.node(rev)
256 node = self.node(rev)
257 return len(self.revision(node, raw=True))
257 return len(self.revision(node, raw=True))
258
258
259 def _processflags(self, text, flags, operation, raw=False):
259 def _processflags(self, text, flags, operation, raw=False):
260 if flags == 0:
260 if flags == 0:
261 return text, True
261 return text, True
262
262
263 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
263 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
264 raise error.RevlogError(_("incompatible revision flag '%#x'") %
264 raise error.RevlogError(_("incompatible revision flag '%#x'") %
265 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
265 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
266
266
267 validatehash = True
267 validatehash = True
268 # Depending on the operation (read or write), the order might be
268 # Depending on the operation (read or write), the order might be
269 # reversed due to non-commutative transforms.
269 # reversed due to non-commutative transforms.
270 orderedflags = revlog.REVIDX_FLAGS_ORDER
270 orderedflags = revlog.REVIDX_FLAGS_ORDER
271 if operation == 'write':
271 if operation == 'write':
272 orderedflags = reversed(orderedflags)
272 orderedflags = reversed(orderedflags)
273
273
274 for flag in orderedflags:
274 for flag in orderedflags:
275 # If a flagprocessor has been registered for a known flag, apply the
275 # If a flagprocessor has been registered for a known flag, apply the
276 # related operation transform and update result tuple.
276 # related operation transform and update result tuple.
277 if flag & flags:
277 if flag & flags:
278 vhash = True
278 vhash = True
279
279
280 if flag not in revlog._flagprocessors:
280 if flag not in revlog._flagprocessors:
281 message = _("missing processor for flag '%#x'") % (flag)
281 message = _("missing processor for flag '%#x'") % (flag)
282 raise revlog.RevlogError(message)
282 raise error.RevlogError(message)
283
283
284 processor = revlog._flagprocessors[flag]
284 processor = revlog._flagprocessors[flag]
285 if processor is not None:
285 if processor is not None:
286 readtransform, writetransform, rawtransform = processor
286 readtransform, writetransform, rawtransform = processor
287
287
288 if raw:
288 if raw:
289 vhash = rawtransform(self, text)
289 vhash = rawtransform(self, text)
290 elif operation == 'read':
290 elif operation == 'read':
291 text, vhash = readtransform(self, text)
291 text, vhash = readtransform(self, text)
292 else: # write operation
292 else: # write operation
293 text, vhash = writetransform(self, text)
293 text, vhash = writetransform(self, text)
294 validatehash = validatehash and vhash
294 validatehash = validatehash and vhash
295
295
296 return text, validatehash
296 return text, validatehash
297
297
298 def checkhash(self, text, node, p1=None, p2=None, rev=None):
298 def checkhash(self, text, node, p1=None, p2=None, rev=None):
299 if p1 is None and p2 is None:
299 if p1 is None and p2 is None:
300 p1, p2 = self.parents(node)
300 p1, p2 = self.parents(node)
301 if node != revlog.hash(text, p1, p2):
301 if node != revlog.hash(text, p1, p2):
302 raise error.RevlogError(_("integrity check failed on %s") %
302 raise error.RevlogError(_("integrity check failed on %s") %
303 self._path)
303 self._path)
304
304
305 def revision(self, node, raw=False):
305 def revision(self, node, raw=False):
306 validatenode(node)
306 validatenode(node)
307
307
308 if node == nullid:
308 if node == nullid:
309 return b''
309 return b''
310
310
311 rev = self.rev(node)
311 rev = self.rev(node)
312 flags = self.flags(rev)
312 flags = self.flags(rev)
313
313
314 path = b'/'.join([self._storepath, hex(node)])
314 path = b'/'.join([self._storepath, hex(node)])
315 rawtext = self._svfs.read(path)
315 rawtext = self._svfs.read(path)
316
316
317 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
317 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
318 if validatehash:
318 if validatehash:
319 self.checkhash(text, node, rev=rev)
319 self.checkhash(text, node, rev=rev)
320
320
321 return text
321 return text
322
322
323 def read(self, node):
323 def read(self, node):
324 validatenode(node)
324 validatenode(node)
325
325
326 revision = self.revision(node)
326 revision = self.revision(node)
327
327
328 if not revision.startswith(b'\1\n'):
328 if not revision.startswith(b'\1\n'):
329 return revision
329 return revision
330
330
331 start = revision.index(b'\1\n', 2)
331 start = revision.index(b'\1\n', 2)
332 return revision[start + 2:]
332 return revision[start + 2:]
333
333
334 def renamed(self, node):
334 def renamed(self, node):
335 validatenode(node)
335 validatenode(node)
336
336
337 if self.parents(node)[0] != nullid:
337 if self.parents(node)[0] != nullid:
338 return False
338 return False
339
339
340 fulltext = self.revision(node)
340 fulltext = self.revision(node)
341 m = revlog.parsemeta(fulltext)[0]
341 m = revlog.parsemeta(fulltext)[0]
342
342
343 if m and 'copy' in m:
343 if m and 'copy' in m:
344 return m['copy'], bin(m['copyrev'])
344 return m['copy'], bin(m['copyrev'])
345
345
346 return False
346 return False
347
347
348 def cmp(self, node, text):
348 def cmp(self, node, text):
349 validatenode(node)
349 validatenode(node)
350
350
351 t = text
351 t = text
352
352
353 if text.startswith(b'\1\n'):
353 if text.startswith(b'\1\n'):
354 t = b'\1\n\1\n' + text
354 t = b'\1\n\1\n' + text
355
355
356 p1, p2 = self.parents(node)
356 p1, p2 = self.parents(node)
357
357
358 if revlog.hash(t, p1, p2) == node:
358 if revlog.hash(t, p1, p2) == node:
359 return False
359 return False
360
360
361 if self.iscensored(self.rev(node)):
361 if self.iscensored(self.rev(node)):
362 return text != b''
362 return text != b''
363
363
364 if self.renamed(node):
364 if self.renamed(node):
365 t2 = self.read(node)
365 t2 = self.read(node)
366 return t2 != text
366 return t2 != text
367
367
368 return True
368 return True
369
369
370 def size(self, rev):
370 def size(self, rev):
371 validaterev(rev)
371 validaterev(rev)
372
372
373 node = self._indexbyrev[rev][b'node']
373 node = self._indexbyrev[rev][b'node']
374
374
375 if self.renamed(node):
375 if self.renamed(node):
376 return len(self.read(node))
376 return len(self.read(node))
377
377
378 if self.iscensored(rev):
378 if self.iscensored(rev):
379 return 0
379 return 0
380
380
381 return len(self.revision(node))
381 return len(self.revision(node))
382
382
383 def iscensored(self, rev):
383 def iscensored(self, rev):
384 validaterev(rev)
384 validaterev(rev)
385
385
386 return self.flags(rev) & revlog.REVIDX_ISCENSORED
386 return self.flags(rev) & revlog.REVIDX_ISCENSORED
387
387
388 def commonancestorsheads(self, a, b):
388 def commonancestorsheads(self, a, b):
389 validatenode(a)
389 validatenode(a)
390 validatenode(b)
390 validatenode(b)
391
391
392 a = self.rev(a)
392 a = self.rev(a)
393 b = self.rev(b)
393 b = self.rev(b)
394
394
395 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
395 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
396 return pycompat.maplist(self.node, ancestors)
396 return pycompat.maplist(self.node, ancestors)
397
397
398 def descendants(self, revs):
398 def descendants(self, revs):
399 # This is a copy of revlog.descendants()
399 # This is a copy of revlog.descendants()
400 first = min(revs)
400 first = min(revs)
401 if first == nullrev:
401 if first == nullrev:
402 for i in self:
402 for i in self:
403 yield i
403 yield i
404 return
404 return
405
405
406 seen = set(revs)
406 seen = set(revs)
407 for i in self.revs(start=first + 1):
407 for i in self.revs(start=first + 1):
408 for x in self.parentrevs(i):
408 for x in self.parentrevs(i):
409 if x != nullrev and x in seen:
409 if x != nullrev and x in seen:
410 seen.add(i)
410 seen.add(i)
411 yield i
411 yield i
412 break
412 break
413
413
414 # Required by verify.
414 # Required by verify.
415 def files(self):
415 def files(self):
416 entries = self._svfs.listdir(self._storepath)
416 entries = self._svfs.listdir(self._storepath)
417
417
418 # Strip out undo.backup.* files created as part of transaction
418 # Strip out undo.backup.* files created as part of transaction
419 # recording.
419 # recording.
420 entries = [f for f in entries if not f.startswith('undo.backup.')]
420 entries = [f for f in entries if not f.startswith('undo.backup.')]
421
421
422 return [b'/'.join((self._storepath, f)) for f in entries]
422 return [b'/'.join((self._storepath, f)) for f in entries]
423
423
424 # Required by verify.
424 # Required by verify.
425 def checksize(self):
425 def checksize(self):
426 return 0, 0
426 return 0, 0
427
427
428 def add(self, text, meta, transaction, linkrev, p1, p2):
428 def add(self, text, meta, transaction, linkrev, p1, p2):
429 if meta or text.startswith(b'\1\n'):
429 if meta or text.startswith(b'\1\n'):
430 text = revlog.packmeta(meta, text)
430 text = revlog.packmeta(meta, text)
431
431
432 return self.addrevision(text, transaction, linkrev, p1, p2)
432 return self.addrevision(text, transaction, linkrev, p1, p2)
433
433
434 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
434 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
435 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
435 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
436 validatenode(p1)
436 validatenode(p1)
437 validatenode(p2)
437 validatenode(p2)
438
438
439 if flags:
439 if flags:
440 node = node or revlog.hash(text, p1, p2)
440 node = node or revlog.hash(text, p1, p2)
441
441
442 rawtext, validatehash = self._processflags(text, flags, 'write')
442 rawtext, validatehash = self._processflags(text, flags, 'write')
443
443
444 node = node or revlog.hash(text, p1, p2)
444 node = node or revlog.hash(text, p1, p2)
445
445
446 if node in self._indexbynode:
446 if node in self._indexbynode:
447 return node
447 return node
448
448
449 if validatehash:
449 if validatehash:
450 self.checkhash(rawtext, node, p1=p1, p2=p2)
450 self.checkhash(rawtext, node, p1=p1, p2=p2)
451
451
452 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
452 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
453 flags)
453 flags)
454
454
455 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
455 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
456 transaction.addbackup(self._indexpath)
456 transaction.addbackup(self._indexpath)
457
457
458 path = b'/'.join([self._storepath, hex(node)])
458 path = b'/'.join([self._storepath, hex(node)])
459
459
460 self._svfs.write(path, rawtext)
460 self._svfs.write(path, rawtext)
461
461
462 self._indexdata.append({
462 self._indexdata.append({
463 b'node': node,
463 b'node': node,
464 b'p1': p1,
464 b'p1': p1,
465 b'p2': p2,
465 b'p2': p2,
466 b'linkrev': link,
466 b'linkrev': link,
467 b'flags': flags,
467 b'flags': flags,
468 })
468 })
469
469
470 self._reflectindexupdate()
470 self._reflectindexupdate()
471
471
472 return node
472 return node
473
473
474 def _reflectindexupdate(self):
474 def _reflectindexupdate(self):
475 self._refreshindex()
475 self._refreshindex()
476 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
476 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
477
477
478 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
478 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
479 nodes = []
479 nodes = []
480
480
481 transaction.addbackup(self._indexpath)
481 transaction.addbackup(self._indexpath)
482
482
483 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
483 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
484 linkrev = linkmapper(linknode)
484 linkrev = linkmapper(linknode)
485 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
485 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
486
486
487 nodes.append(node)
487 nodes.append(node)
488
488
489 if node in self._indexbynode:
489 if node in self._indexbynode:
490 continue
490 continue
491
491
492 # Need to resolve the fulltext from the delta base.
492 # Need to resolve the fulltext from the delta base.
493 if deltabase == nullid:
493 if deltabase == nullid:
494 text = mdiff.patch(b'', delta)
494 text = mdiff.patch(b'', delta)
495 else:
495 else:
496 text = mdiff.patch(self.revision(deltabase), delta)
496 text = mdiff.patch(self.revision(deltabase), delta)
497
497
498 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
498 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
499 flags)
499 flags)
500
500
501 if addrevisioncb:
501 if addrevisioncb:
502 addrevisioncb(self, node)
502 addrevisioncb(self, node)
503
503
504 return nodes
504 return nodes
505
505
506 def revdiff(self, rev1, rev2):
506 def revdiff(self, rev1, rev2):
507 validaterev(rev1)
507 validaterev(rev1)
508 validaterev(rev2)
508 validaterev(rev2)
509
509
510 node1 = self.node(rev1)
510 node1 = self.node(rev1)
511 node2 = self.node(rev2)
511 node2 = self.node(rev2)
512
512
513 return mdiff.textdiff(self.revision(node1, raw=True),
513 return mdiff.textdiff(self.revision(node1, raw=True),
514 self.revision(node2, raw=True))
514 self.revision(node2, raw=True))
515
515
516 def emitrevisiondeltas(self, requests):
516 def emitrevisiondeltas(self, requests):
517 for request in requests:
517 for request in requests:
518 node = request.node
518 node = request.node
519 rev = self.rev(node)
519 rev = self.rev(node)
520
520
521 if request.basenode == nullid:
521 if request.basenode == nullid:
522 baserev = nullrev
522 baserev = nullrev
523 elif request.basenode is not None:
523 elif request.basenode is not None:
524 baserev = self.rev(request.basenode)
524 baserev = self.rev(request.basenode)
525 else:
525 else:
526 # This is a test extension and we can do simple things
526 # This is a test extension and we can do simple things
527 # for choosing a delta parent.
527 # for choosing a delta parent.
528 baserev = self.deltaparent(rev)
528 baserev = self.deltaparent(rev)
529
529
530 if baserev != nullrev and not self._candelta(baserev, rev):
530 if baserev != nullrev and not self._candelta(baserev, rev):
531 baserev = nullrev
531 baserev = nullrev
532
532
533 revision = None
533 revision = None
534 delta = None
534 delta = None
535 baserevisionsize = None
535 baserevisionsize = None
536
536
537 if self.iscensored(baserev) or self.iscensored(rev):
537 if self.iscensored(baserev) or self.iscensored(rev):
538 try:
538 try:
539 revision = self.revision(node, raw=True)
539 revision = self.revision(node, raw=True)
540 except error.CensoredNodeError as e:
540 except error.CensoredNodeError as e:
541 revision = e.tombstone
541 revision = e.tombstone
542
542
543 if baserev != nullrev:
543 if baserev != nullrev:
544 baserevisionsize = self.rawsize(baserev)
544 baserevisionsize = self.rawsize(baserev)
545
545
546 elif baserev == nullrev:
546 elif baserev == nullrev:
547 revision = self.revision(node, raw=True)
547 revision = self.revision(node, raw=True)
548 else:
548 else:
549 delta = self.revdiff(baserev, rev)
549 delta = self.revdiff(baserev, rev)
550
550
551 extraflags = revlog.REVIDX_ELLIPSIS if request.ellipsis else 0
551 extraflags = revlog.REVIDX_ELLIPSIS if request.ellipsis else 0
552
552
553 yield simplestorerevisiondelta(
553 yield simplestorerevisiondelta(
554 node=node,
554 node=node,
555 p1node=request.p1node,
555 p1node=request.p1node,
556 p2node=request.p2node,
556 p2node=request.p2node,
557 linknode=request.linknode,
557 linknode=request.linknode,
558 basenode=self.node(baserev),
558 basenode=self.node(baserev),
559 flags=self.flags(rev) | extraflags,
559 flags=self.flags(rev) | extraflags,
560 baserevisionsize=baserevisionsize,
560 baserevisionsize=baserevisionsize,
561 revision=revision,
561 revision=revision,
562 delta=delta)
562 delta=delta)
563
563
564 def headrevs(self):
564 def headrevs(self):
565 # Assume all revisions are heads by default.
565 # Assume all revisions are heads by default.
566 revishead = {rev: True for rev in self._indexbyrev}
566 revishead = {rev: True for rev in self._indexbyrev}
567
567
568 for rev, entry in self._indexbyrev.items():
568 for rev, entry in self._indexbyrev.items():
569 # Unset head flag for all seen parents.
569 # Unset head flag for all seen parents.
570 revishead[self.rev(entry[b'p1'])] = False
570 revishead[self.rev(entry[b'p1'])] = False
571 revishead[self.rev(entry[b'p2'])] = False
571 revishead[self.rev(entry[b'p2'])] = False
572
572
573 return [rev for rev, ishead in sorted(revishead.items())
573 return [rev for rev, ishead in sorted(revishead.items())
574 if ishead]
574 if ishead]
575
575
576 def heads(self, start=None, stop=None):
576 def heads(self, start=None, stop=None):
577 # This is copied from revlog.py.
577 # This is copied from revlog.py.
578 if start is None and stop is None:
578 if start is None and stop is None:
579 if not len(self):
579 if not len(self):
580 return [nullid]
580 return [nullid]
581 return [self.node(r) for r in self.headrevs()]
581 return [self.node(r) for r in self.headrevs()]
582
582
583 if start is None:
583 if start is None:
584 start = nullid
584 start = nullid
585 if stop is None:
585 if stop is None:
586 stop = []
586 stop = []
587 stoprevs = set([self.rev(n) for n in stop])
587 stoprevs = set([self.rev(n) for n in stop])
588 startrev = self.rev(start)
588 startrev = self.rev(start)
589 reachable = {startrev}
589 reachable = {startrev}
590 heads = {startrev}
590 heads = {startrev}
591
591
592 parentrevs = self.parentrevs
592 parentrevs = self.parentrevs
593 for r in self.revs(start=startrev + 1):
593 for r in self.revs(start=startrev + 1):
594 for p in parentrevs(r):
594 for p in parentrevs(r):
595 if p in reachable:
595 if p in reachable:
596 if r not in stoprevs:
596 if r not in stoprevs:
597 reachable.add(r)
597 reachable.add(r)
598 heads.add(r)
598 heads.add(r)
599 if p in heads and p not in stoprevs:
599 if p in heads and p not in stoprevs:
600 heads.remove(p)
600 heads.remove(p)
601
601
602 return [self.node(r) for r in heads]
602 return [self.node(r) for r in heads]
603
603
604 def children(self, node):
604 def children(self, node):
605 validatenode(node)
605 validatenode(node)
606
606
607 # This is a copy of revlog.children().
607 # This is a copy of revlog.children().
608 c = []
608 c = []
609 p = self.rev(node)
609 p = self.rev(node)
610 for r in self.revs(start=p + 1):
610 for r in self.revs(start=p + 1):
611 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
611 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
612 if prevs:
612 if prevs:
613 for pr in prevs:
613 for pr in prevs:
614 if pr == p:
614 if pr == p:
615 c.append(self.node(r))
615 c.append(self.node(r))
616 elif p == nullrev:
616 elif p == nullrev:
617 c.append(self.node(r))
617 c.append(self.node(r))
618 return c
618 return c
619
619
620 def getstrippoint(self, minlink):
620 def getstrippoint(self, minlink):
621
621
622 # This is largely a copy of revlog.getstrippoint().
622 # This is largely a copy of revlog.getstrippoint().
623 brokenrevs = set()
623 brokenrevs = set()
624 strippoint = len(self)
624 strippoint = len(self)
625
625
626 heads = {}
626 heads = {}
627 futurelargelinkrevs = set()
627 futurelargelinkrevs = set()
628 for head in self.headrevs():
628 for head in self.headrevs():
629 headlinkrev = self.linkrev(head)
629 headlinkrev = self.linkrev(head)
630 heads[head] = headlinkrev
630 heads[head] = headlinkrev
631 if headlinkrev >= minlink:
631 if headlinkrev >= minlink:
632 futurelargelinkrevs.add(headlinkrev)
632 futurelargelinkrevs.add(headlinkrev)
633
633
634 # This algorithm involves walking down the rev graph, starting at the
634 # This algorithm involves walking down the rev graph, starting at the
635 # heads. Since the revs are topologically sorted according to linkrev,
635 # heads. Since the revs are topologically sorted according to linkrev,
636 # once all head linkrevs are below the minlink, we know there are
636 # once all head linkrevs are below the minlink, we know there are
637 # no more revs that could have a linkrev greater than minlink.
637 # no more revs that could have a linkrev greater than minlink.
638 # So we can stop walking.
638 # So we can stop walking.
639 while futurelargelinkrevs:
639 while futurelargelinkrevs:
640 strippoint -= 1
640 strippoint -= 1
641 linkrev = heads.pop(strippoint)
641 linkrev = heads.pop(strippoint)
642
642
643 if linkrev < minlink:
643 if linkrev < minlink:
644 brokenrevs.add(strippoint)
644 brokenrevs.add(strippoint)
645 else:
645 else:
646 futurelargelinkrevs.remove(linkrev)
646 futurelargelinkrevs.remove(linkrev)
647
647
648 for p in self.parentrevs(strippoint):
648 for p in self.parentrevs(strippoint):
649 if p != nullrev:
649 if p != nullrev:
650 plinkrev = self.linkrev(p)
650 plinkrev = self.linkrev(p)
651 heads[p] = plinkrev
651 heads[p] = plinkrev
652 if plinkrev >= minlink:
652 if plinkrev >= minlink:
653 futurelargelinkrevs.add(plinkrev)
653 futurelargelinkrevs.add(plinkrev)
654
654
655 return strippoint, brokenrevs
655 return strippoint, brokenrevs
656
656
657 def strip(self, minlink, transaction):
657 def strip(self, minlink, transaction):
658 if not len(self):
658 if not len(self):
659 return
659 return
660
660
661 rev, _ignored = self.getstrippoint(minlink)
661 rev, _ignored = self.getstrippoint(minlink)
662 if rev == len(self):
662 if rev == len(self):
663 return
663 return
664
664
665 # Purge index data starting at the requested revision.
665 # Purge index data starting at the requested revision.
666 self._indexdata[rev:] = []
666 self._indexdata[rev:] = []
667 self._reflectindexupdate()
667 self._reflectindexupdate()
668
668
669 def issimplestorefile(f, kind, st):
669 def issimplestorefile(f, kind, st):
670 if kind != stat.S_IFREG:
670 if kind != stat.S_IFREG:
671 return False
671 return False
672
672
673 if store.isrevlog(f, kind, st):
673 if store.isrevlog(f, kind, st):
674 return False
674 return False
675
675
676 # Ignore transaction undo files.
676 # Ignore transaction undo files.
677 if f.startswith('undo.'):
677 if f.startswith('undo.'):
678 return False
678 return False
679
679
680 # Otherwise assume it belongs to the simple store.
680 # Otherwise assume it belongs to the simple store.
681 return True
681 return True
682
682
683 class simplestore(store.encodedstore):
683 class simplestore(store.encodedstore):
684 def datafiles(self):
684 def datafiles(self):
685 for x in super(simplestore, self).datafiles():
685 for x in super(simplestore, self).datafiles():
686 yield x
686 yield x
687
687
688 # Supplement with non-revlog files.
688 # Supplement with non-revlog files.
689 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
689 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
690
690
691 for unencoded, encoded, size in extrafiles:
691 for unencoded, encoded, size in extrafiles:
692 try:
692 try:
693 unencoded = store.decodefilename(unencoded)
693 unencoded = store.decodefilename(unencoded)
694 except KeyError:
694 except KeyError:
695 unencoded = None
695 unencoded = None
696
696
697 yield unencoded, encoded, size
697 yield unencoded, encoded, size
698
698
699 def reposetup(ui, repo):
699 def reposetup(ui, repo):
700 if not repo.local():
700 if not repo.local():
701 return
701 return
702
702
703 if isinstance(repo, bundlerepo.bundlerepository):
703 if isinstance(repo, bundlerepo.bundlerepository):
704 raise error.Abort(_('cannot use simple store with bundlerepo'))
704 raise error.Abort(_('cannot use simple store with bundlerepo'))
705
705
706 class simplestorerepo(repo.__class__):
706 class simplestorerepo(repo.__class__):
707 def file(self, f):
707 def file(self, f):
708 return filestorage(self.svfs, f)
708 return filestorage(self.svfs, f)
709
709
710 repo.__class__ = simplestorerepo
710 repo.__class__ = simplestorerepo
711
711
712 def featuresetup(ui, supported):
712 def featuresetup(ui, supported):
713 supported.add(REQUIREMENT)
713 supported.add(REQUIREMENT)
714
714
715 def newreporequirements(orig, ui):
715 def newreporequirements(orig, ui):
716 """Modifies default requirements for new repos to use the simple store."""
716 """Modifies default requirements for new repos to use the simple store."""
717 requirements = orig(ui)
717 requirements = orig(ui)
718
718
719 # These requirements are only used to affect creation of the store
719 # These requirements are only used to affect creation of the store
720 # object. We have our own store. So we can remove them.
720 # object. We have our own store. So we can remove them.
721 # TODO do this once we feel like taking the test hit.
721 # TODO do this once we feel like taking the test hit.
722 #if 'fncache' in requirements:
722 #if 'fncache' in requirements:
723 # requirements.remove('fncache')
723 # requirements.remove('fncache')
724 #if 'dotencode' in requirements:
724 #if 'dotencode' in requirements:
725 # requirements.remove('dotencode')
725 # requirements.remove('dotencode')
726
726
727 requirements.add(REQUIREMENT)
727 requirements.add(REQUIREMENT)
728
728
729 return requirements
729 return requirements
730
730
731 def makestore(orig, requirements, path, vfstype):
731 def makestore(orig, requirements, path, vfstype):
732 if REQUIREMENT not in requirements:
732 if REQUIREMENT not in requirements:
733 return orig(requirements, path, vfstype)
733 return orig(requirements, path, vfstype)
734
734
735 return simplestore(path, vfstype)
735 return simplestore(path, vfstype)
736
736
737 def verifierinit(orig, self, *args, **kwargs):
737 def verifierinit(orig, self, *args, **kwargs):
738 orig(self, *args, **kwargs)
738 orig(self, *args, **kwargs)
739
739
740 # We don't care that files in the store don't align with what is
740 # We don't care that files in the store don't align with what is
741 # advertised. So suppress these warnings.
741 # advertised. So suppress these warnings.
742 self.warnorphanstorefiles = False
742 self.warnorphanstorefiles = False
743
743
744 def extsetup(ui):
744 def extsetup(ui):
745 localrepo.featuresetupfuncs.add(featuresetup)
745 localrepo.featuresetupfuncs.add(featuresetup)
746
746
747 extensions.wrapfunction(localrepo, 'newreporequirements',
747 extensions.wrapfunction(localrepo, 'newreporequirements',
748 newreporequirements)
748 newreporequirements)
749 extensions.wrapfunction(store, 'store', makestore)
749 extensions.wrapfunction(store, 'store', makestore)
750 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
750 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now