##// END OF EJS Templates
storageutil: new module for storage primitives (API)...
Gregory Szorc -
r39913:f8eb71f9 default
parent child Browse files
Show More
@@ -0,0 +1,41
1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 import hashlib
11
12 from ..node import (
13 nullid,
14 )
15
16 _nullhash = hashlib.sha1(nullid)
17
18 def hashrevisionsha1(text, p1, p2):
19 """Compute the SHA-1 for revision data and its parents.
20
21 This hash combines both the current file contents and its history
22 in a manner that makes it easy to distinguish nodes with the same
23 content in the revision graph.
24 """
25 # As of now, if one of the parent node is null, p2 is null
26 if p2 == nullid:
27 # deep copy of a hash is faster than creating one
28 s = _nullhash.copy()
29 s.update(p1)
30 else:
31 # none of the parent nodes are nullid
32 if p1 < p2:
33 a = p1
34 b = p2
35 else:
36 a = p2
37 b = p1
38 s = hashlib.sha1(a)
39 s.update(b)
40 s.update(text)
41 return s.digest()
@@ -1,2755 +1,2728
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
20 import os
19 import os
21 import re
20 import re
22 import struct
21 import struct
23 import zlib
22 import zlib
24
23
25 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
26 from .node import (
25 from .node import (
27 bin,
26 bin,
28 hex,
27 hex,
29 nullhex,
28 nullhex,
30 nullid,
29 nullid,
31 nullrev,
30 nullrev,
32 short,
31 short,
33 wdirfilenodeids,
32 wdirfilenodeids,
34 wdirhex,
33 wdirhex,
35 wdirid,
34 wdirid,
36 wdirrev,
35 wdirrev,
37 )
36 )
38 from .i18n import _
37 from .i18n import _
39 from .revlogutils.constants import (
38 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
42 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
43 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
44 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
45 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
46 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
47 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
48 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
49 REVLOGV0,
48 REVLOGV0,
50 REVLOGV1,
49 REVLOGV1,
51 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
52 REVLOGV2,
51 REVLOGV2,
53 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
54 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
55 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
56 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
57 )
56 )
58 from .thirdparty import (
57 from .thirdparty import (
59 attr,
58 attr,
60 )
59 )
61 from . import (
60 from . import (
62 ancestor,
61 ancestor,
63 dagop,
62 dagop,
64 error,
63 error,
65 mdiff,
64 mdiff,
66 policy,
65 policy,
67 pycompat,
66 pycompat,
68 repository,
67 repository,
69 templatefilters,
68 templatefilters,
70 util,
69 util,
71 )
70 )
72 from .revlogutils import (
71 from .revlogutils import (
73 deltas as deltautil,
72 deltas as deltautil,
74 )
73 )
75 from .utils import (
74 from .utils import (
76 interfaceutil,
75 interfaceutil,
76 storageutil,
77 stringutil,
77 stringutil,
78 )
78 )
79
79
80 # blanked usage of all the name to prevent pyflakes constraints
80 # blanked usage of all the name to prevent pyflakes constraints
81 # We need these name available in the module for extensions.
81 # We need these name available in the module for extensions.
82 REVLOGV0
82 REVLOGV0
83 REVLOGV1
83 REVLOGV1
84 REVLOGV2
84 REVLOGV2
85 FLAG_INLINE_DATA
85 FLAG_INLINE_DATA
86 FLAG_GENERALDELTA
86 FLAG_GENERALDELTA
87 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FLAGS
88 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_FORMAT
89 REVLOG_DEFAULT_VERSION
89 REVLOG_DEFAULT_VERSION
90 REVLOGV1_FLAGS
90 REVLOGV1_FLAGS
91 REVLOGV2_FLAGS
91 REVLOGV2_FLAGS
92 REVIDX_ISCENSORED
92 REVIDX_ISCENSORED
93 REVIDX_ELLIPSIS
93 REVIDX_ELLIPSIS
94 REVIDX_EXTSTORED
94 REVIDX_EXTSTORED
95 REVIDX_DEFAULT_FLAGS
95 REVIDX_DEFAULT_FLAGS
96 REVIDX_FLAGS_ORDER
96 REVIDX_FLAGS_ORDER
97 REVIDX_KNOWN_FLAGS
97 REVIDX_KNOWN_FLAGS
98 REVIDX_RAWTEXT_CHANGING_FLAGS
98 REVIDX_RAWTEXT_CHANGING_FLAGS
99
99
100 parsers = policy.importmod(r'parsers')
100 parsers = policy.importmod(r'parsers')
101
101
102 # Aliased for performance.
102 # Aliased for performance.
103 _zlibdecompress = zlib.decompress
103 _zlibdecompress = zlib.decompress
104
104
105 # max size of revlog with inline data
105 # max size of revlog with inline data
106 _maxinline = 131072
106 _maxinline = 131072
107 _chunksize = 1048576
107 _chunksize = 1048576
108
108
109 # Store flag processors (cf. 'addflagprocessor()' to register)
109 # Store flag processors (cf. 'addflagprocessor()' to register)
110 _flagprocessors = {
110 _flagprocessors = {
111 REVIDX_ISCENSORED: None,
111 REVIDX_ISCENSORED: None,
112 }
112 }
113
113
114 # Flag processors for REVIDX_ELLIPSIS.
114 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
115 def ellipsisreadprocessor(rl, text):
116 return text, False
116 return text, False
117
117
118 def ellipsiswriteprocessor(rl, text):
118 def ellipsiswriteprocessor(rl, text):
119 return text, False
119 return text, False
120
120
121 def ellipsisrawprocessor(rl, text):
121 def ellipsisrawprocessor(rl, text):
122 return False
122 return False
123
123
124 ellipsisprocessor = (
124 ellipsisprocessor = (
125 ellipsisreadprocessor,
125 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
126 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
127 ellipsisrawprocessor,
128 )
128 )
129
129
130 _mdre = re.compile('\1\n')
130 _mdre = re.compile('\1\n')
131 def parsemeta(text):
131 def parsemeta(text):
132 """return (metadatadict, metadatasize)"""
132 """return (metadatadict, metadatasize)"""
133 # text can be buffer, so we can't use .startswith or .index
133 # text can be buffer, so we can't use .startswith or .index
134 if text[:2] != '\1\n':
134 if text[:2] != '\1\n':
135 return None, None
135 return None, None
136 s = _mdre.search(text, 2).start()
136 s = _mdre.search(text, 2).start()
137 mtext = text[2:s]
137 mtext = text[2:s]
138 meta = {}
138 meta = {}
139 for l in mtext.splitlines():
139 for l in mtext.splitlines():
140 k, v = l.split(": ", 1)
140 k, v = l.split(": ", 1)
141 meta[k] = v
141 meta[k] = v
142 return meta, (s + 2)
142 return meta, (s + 2)
143
143
144 def packmeta(meta, text):
144 def packmeta(meta, text):
145 keys = sorted(meta)
145 keys = sorted(meta)
146 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
146 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
147 return "\1\n%s\1\n%s" % (metatext, text)
147 return "\1\n%s\1\n%s" % (metatext, text)
148
148
149 def _censoredtext(text):
149 def _censoredtext(text):
150 m, offs = parsemeta(text)
150 m, offs = parsemeta(text)
151 return m and "censored" in m
151 return m and "censored" in m
152
152
153 def addflagprocessor(flag, processor):
153 def addflagprocessor(flag, processor):
154 """Register a flag processor on a revision data flag.
154 """Register a flag processor on a revision data flag.
155
155
156 Invariant:
156 Invariant:
157 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
157 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
158 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
158 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
159 - Only one flag processor can be registered on a specific flag.
159 - Only one flag processor can be registered on a specific flag.
160 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
160 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
161 following signatures:
161 following signatures:
162 - (read) f(self, rawtext) -> text, bool
162 - (read) f(self, rawtext) -> text, bool
163 - (write) f(self, text) -> rawtext, bool
163 - (write) f(self, text) -> rawtext, bool
164 - (raw) f(self, rawtext) -> bool
164 - (raw) f(self, rawtext) -> bool
165 "text" is presented to the user. "rawtext" is stored in revlog data, not
165 "text" is presented to the user. "rawtext" is stored in revlog data, not
166 directly visible to the user.
166 directly visible to the user.
167 The boolean returned by these transforms is used to determine whether
167 The boolean returned by these transforms is used to determine whether
168 the returned text can be used for hash integrity checking. For example,
168 the returned text can be used for hash integrity checking. For example,
169 if "write" returns False, then "text" is used to generate hash. If
169 if "write" returns False, then "text" is used to generate hash. If
170 "write" returns True, that basically means "rawtext" returned by "write"
170 "write" returns True, that basically means "rawtext" returned by "write"
171 should be used to generate hash. Usually, "write" and "read" return
171 should be used to generate hash. Usually, "write" and "read" return
172 different booleans. And "raw" returns a same boolean as "write".
172 different booleans. And "raw" returns a same boolean as "write".
173
173
174 Note: The 'raw' transform is used for changegroup generation and in some
174 Note: The 'raw' transform is used for changegroup generation and in some
175 debug commands. In this case the transform only indicates whether the
175 debug commands. In this case the transform only indicates whether the
176 contents can be used for hash integrity checks.
176 contents can be used for hash integrity checks.
177 """
177 """
178 if not flag & REVIDX_KNOWN_FLAGS:
178 if not flag & REVIDX_KNOWN_FLAGS:
179 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
179 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
180 raise error.ProgrammingError(msg)
180 raise error.ProgrammingError(msg)
181 if flag not in REVIDX_FLAGS_ORDER:
181 if flag not in REVIDX_FLAGS_ORDER:
182 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
182 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
183 raise error.ProgrammingError(msg)
183 raise error.ProgrammingError(msg)
184 if flag in _flagprocessors:
184 if flag in _flagprocessors:
185 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
185 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
186 raise error.Abort(msg)
186 raise error.Abort(msg)
187 _flagprocessors[flag] = processor
187 _flagprocessors[flag] = processor
188
188
189 def getoffset(q):
189 def getoffset(q):
190 return int(q >> 16)
190 return int(q >> 16)
191
191
192 def gettype(q):
192 def gettype(q):
193 return int(q & 0xFFFF)
193 return int(q & 0xFFFF)
194
194
195 def offset_type(offset, type):
195 def offset_type(offset, type):
196 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
196 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
197 raise ValueError('unknown revlog index flags')
197 raise ValueError('unknown revlog index flags')
198 return int(int(offset) << 16 | type)
198 return int(int(offset) << 16 | type)
199
199
200 _nullhash = hashlib.sha1(nullid)
201
202 def hash(text, p1, p2):
203 """generate a hash from the given text and its parent hashes
204
205 This hash combines both the current file contents and its history
206 in a manner that makes it easy to distinguish nodes with the same
207 content in the revision graph.
208 """
209 # As of now, if one of the parent node is null, p2 is null
210 if p2 == nullid:
211 # deep copy of a hash is faster than creating one
212 s = _nullhash.copy()
213 s.update(p1)
214 else:
215 # none of the parent nodes are nullid
216 if p1 < p2:
217 a = p1
218 b = p2
219 else:
220 a = p2
221 b = p1
222 s = hashlib.sha1(a)
223 s.update(b)
224 s.update(text)
225 return s.digest()
226
227 @attr.s(slots=True, frozen=True)
200 @attr.s(slots=True, frozen=True)
228 class _revisioninfo(object):
201 class _revisioninfo(object):
229 """Information about a revision that allows building its fulltext
202 """Information about a revision that allows building its fulltext
230 node: expected hash of the revision
203 node: expected hash of the revision
231 p1, p2: parent revs of the revision
204 p1, p2: parent revs of the revision
232 btext: built text cache consisting of a one-element list
205 btext: built text cache consisting of a one-element list
233 cachedelta: (baserev, uncompressed_delta) or None
206 cachedelta: (baserev, uncompressed_delta) or None
234 flags: flags associated to the revision storage
207 flags: flags associated to the revision storage
235
208
236 One of btext[0] or cachedelta must be set.
209 One of btext[0] or cachedelta must be set.
237 """
210 """
238 node = attr.ib()
211 node = attr.ib()
239 p1 = attr.ib()
212 p1 = attr.ib()
240 p2 = attr.ib()
213 p2 = attr.ib()
241 btext = attr.ib()
214 btext = attr.ib()
242 textlen = attr.ib()
215 textlen = attr.ib()
243 cachedelta = attr.ib()
216 cachedelta = attr.ib()
244 flags = attr.ib()
217 flags = attr.ib()
245
218
246 @interfaceutil.implementer(repository.irevisiondelta)
219 @interfaceutil.implementer(repository.irevisiondelta)
247 @attr.s(slots=True)
220 @attr.s(slots=True)
248 class revlogrevisiondelta(object):
221 class revlogrevisiondelta(object):
249 node = attr.ib()
222 node = attr.ib()
250 p1node = attr.ib()
223 p1node = attr.ib()
251 p2node = attr.ib()
224 p2node = attr.ib()
252 basenode = attr.ib()
225 basenode = attr.ib()
253 flags = attr.ib()
226 flags = attr.ib()
254 baserevisionsize = attr.ib()
227 baserevisionsize = attr.ib()
255 revision = attr.ib()
228 revision = attr.ib()
256 delta = attr.ib()
229 delta = attr.ib()
257 linknode = attr.ib(default=None)
230 linknode = attr.ib(default=None)
258
231
259 @interfaceutil.implementer(repository.iverifyproblem)
232 @interfaceutil.implementer(repository.iverifyproblem)
260 @attr.s(frozen=True)
233 @attr.s(frozen=True)
261 class revlogproblem(object):
234 class revlogproblem(object):
262 warning = attr.ib(default=None)
235 warning = attr.ib(default=None)
263 error = attr.ib(default=None)
236 error = attr.ib(default=None)
264 node = attr.ib(default=None)
237 node = attr.ib(default=None)
265
238
266 # index v0:
239 # index v0:
267 # 4 bytes: offset
240 # 4 bytes: offset
268 # 4 bytes: compressed length
241 # 4 bytes: compressed length
269 # 4 bytes: base rev
242 # 4 bytes: base rev
270 # 4 bytes: link rev
243 # 4 bytes: link rev
271 # 20 bytes: parent 1 nodeid
244 # 20 bytes: parent 1 nodeid
272 # 20 bytes: parent 2 nodeid
245 # 20 bytes: parent 2 nodeid
273 # 20 bytes: nodeid
246 # 20 bytes: nodeid
274 indexformatv0 = struct.Struct(">4l20s20s20s")
247 indexformatv0 = struct.Struct(">4l20s20s20s")
275 indexformatv0_pack = indexformatv0.pack
248 indexformatv0_pack = indexformatv0.pack
276 indexformatv0_unpack = indexformatv0.unpack
249 indexformatv0_unpack = indexformatv0.unpack
277
250
278 class revlogoldindex(list):
251 class revlogoldindex(list):
279 def __getitem__(self, i):
252 def __getitem__(self, i):
280 if i == -1:
253 if i == -1:
281 return (0, 0, 0, -1, -1, -1, -1, nullid)
254 return (0, 0, 0, -1, -1, -1, -1, nullid)
282 return list.__getitem__(self, i)
255 return list.__getitem__(self, i)
283
256
284 class revlogoldio(object):
257 class revlogoldio(object):
285 def __init__(self):
258 def __init__(self):
286 self.size = indexformatv0.size
259 self.size = indexformatv0.size
287
260
288 def parseindex(self, data, inline):
261 def parseindex(self, data, inline):
289 s = self.size
262 s = self.size
290 index = []
263 index = []
291 nodemap = {nullid: nullrev}
264 nodemap = {nullid: nullrev}
292 n = off = 0
265 n = off = 0
293 l = len(data)
266 l = len(data)
294 while off + s <= l:
267 while off + s <= l:
295 cur = data[off:off + s]
268 cur = data[off:off + s]
296 off += s
269 off += s
297 e = indexformatv0_unpack(cur)
270 e = indexformatv0_unpack(cur)
298 # transform to revlogv1 format
271 # transform to revlogv1 format
299 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
272 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
300 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
273 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
301 index.append(e2)
274 index.append(e2)
302 nodemap[e[6]] = n
275 nodemap[e[6]] = n
303 n += 1
276 n += 1
304
277
305 return revlogoldindex(index), nodemap, None
278 return revlogoldindex(index), nodemap, None
306
279
307 def packentry(self, entry, node, version, rev):
280 def packentry(self, entry, node, version, rev):
308 if gettype(entry[0]):
281 if gettype(entry[0]):
309 raise error.RevlogError(_('index entry flags need revlog '
282 raise error.RevlogError(_('index entry flags need revlog '
310 'version 1'))
283 'version 1'))
311 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
284 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
312 node(entry[5]), node(entry[6]), entry[7])
285 node(entry[5]), node(entry[6]), entry[7])
313 return indexformatv0_pack(*e2)
286 return indexformatv0_pack(*e2)
314
287
315 # index ng:
288 # index ng:
316 # 6 bytes: offset
289 # 6 bytes: offset
317 # 2 bytes: flags
290 # 2 bytes: flags
318 # 4 bytes: compressed length
291 # 4 bytes: compressed length
319 # 4 bytes: uncompressed length
292 # 4 bytes: uncompressed length
320 # 4 bytes: base rev
293 # 4 bytes: base rev
321 # 4 bytes: link rev
294 # 4 bytes: link rev
322 # 4 bytes: parent 1 rev
295 # 4 bytes: parent 1 rev
323 # 4 bytes: parent 2 rev
296 # 4 bytes: parent 2 rev
324 # 32 bytes: nodeid
297 # 32 bytes: nodeid
325 indexformatng = struct.Struct(">Qiiiiii20s12x")
298 indexformatng = struct.Struct(">Qiiiiii20s12x")
326 indexformatng_pack = indexformatng.pack
299 indexformatng_pack = indexformatng.pack
327 versionformat = struct.Struct(">I")
300 versionformat = struct.Struct(">I")
328 versionformat_pack = versionformat.pack
301 versionformat_pack = versionformat.pack
329 versionformat_unpack = versionformat.unpack
302 versionformat_unpack = versionformat.unpack
330
303
331 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
304 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
332 # signed integer)
305 # signed integer)
333 _maxentrysize = 0x7fffffff
306 _maxentrysize = 0x7fffffff
334
307
335 class revlogio(object):
308 class revlogio(object):
336 def __init__(self):
309 def __init__(self):
337 self.size = indexformatng.size
310 self.size = indexformatng.size
338
311
339 def parseindex(self, data, inline):
312 def parseindex(self, data, inline):
340 # call the C implementation to parse the index data
313 # call the C implementation to parse the index data
341 index, cache = parsers.parse_index2(data, inline)
314 index, cache = parsers.parse_index2(data, inline)
342 return index, getattr(index, 'nodemap', None), cache
315 return index, getattr(index, 'nodemap', None), cache
343
316
344 def packentry(self, entry, node, version, rev):
317 def packentry(self, entry, node, version, rev):
345 p = indexformatng_pack(*entry)
318 p = indexformatng_pack(*entry)
346 if rev == 0:
319 if rev == 0:
347 p = versionformat_pack(version) + p[4:]
320 p = versionformat_pack(version) + p[4:]
348 return p
321 return p
349
322
350 class revlog(object):
323 class revlog(object):
351 """
324 """
352 the underlying revision storage object
325 the underlying revision storage object
353
326
354 A revlog consists of two parts, an index and the revision data.
327 A revlog consists of two parts, an index and the revision data.
355
328
356 The index is a file with a fixed record size containing
329 The index is a file with a fixed record size containing
357 information on each revision, including its nodeid (hash), the
330 information on each revision, including its nodeid (hash), the
358 nodeids of its parents, the position and offset of its data within
331 nodeids of its parents, the position and offset of its data within
359 the data file, and the revision it's based on. Finally, each entry
332 the data file, and the revision it's based on. Finally, each entry
360 contains a linkrev entry that can serve as a pointer to external
333 contains a linkrev entry that can serve as a pointer to external
361 data.
334 data.
362
335
363 The revision data itself is a linear collection of data chunks.
336 The revision data itself is a linear collection of data chunks.
364 Each chunk represents a revision and is usually represented as a
337 Each chunk represents a revision and is usually represented as a
365 delta against the previous chunk. To bound lookup time, runs of
338 delta against the previous chunk. To bound lookup time, runs of
366 deltas are limited to about 2 times the length of the original
339 deltas are limited to about 2 times the length of the original
367 version data. This makes retrieval of a version proportional to
340 version data. This makes retrieval of a version proportional to
368 its size, or O(1) relative to the number of revisions.
341 its size, or O(1) relative to the number of revisions.
369
342
370 Both pieces of the revlog are written to in an append-only
343 Both pieces of the revlog are written to in an append-only
371 fashion, which means we never need to rewrite a file to insert or
344 fashion, which means we never need to rewrite a file to insert or
372 remove data, and can use some simple techniques to avoid the need
345 remove data, and can use some simple techniques to avoid the need
373 for locking while reading.
346 for locking while reading.
374
347
375 If checkambig, indexfile is opened with checkambig=True at
348 If checkambig, indexfile is opened with checkambig=True at
376 writing, to avoid file stat ambiguity.
349 writing, to avoid file stat ambiguity.
377
350
378 If mmaplargeindex is True, and an mmapindexthreshold is set, the
351 If mmaplargeindex is True, and an mmapindexthreshold is set, the
379 index will be mmapped rather than read if it is larger than the
352 index will be mmapped rather than read if it is larger than the
380 configured threshold.
353 configured threshold.
381
354
382 If censorable is True, the revlog can have censored revisions.
355 If censorable is True, the revlog can have censored revisions.
383 """
356 """
384 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
357 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
385 mmaplargeindex=False, censorable=False):
358 mmaplargeindex=False, censorable=False):
386 """
359 """
387 create a revlog object
360 create a revlog object
388
361
389 opener is a function that abstracts the file opening operation
362 opener is a function that abstracts the file opening operation
390 and can be used to implement COW semantics or the like.
363 and can be used to implement COW semantics or the like.
391 """
364 """
392 self.indexfile = indexfile
365 self.indexfile = indexfile
393 self.datafile = datafile or (indexfile[:-2] + ".d")
366 self.datafile = datafile or (indexfile[:-2] + ".d")
394 self.opener = opener
367 self.opener = opener
395 # When True, indexfile is opened with checkambig=True at writing, to
368 # When True, indexfile is opened with checkambig=True at writing, to
396 # avoid file stat ambiguity.
369 # avoid file stat ambiguity.
397 self._checkambig = checkambig
370 self._checkambig = checkambig
398 self._censorable = censorable
371 self._censorable = censorable
399 # 3-tuple of (node, rev, text) for a raw revision.
372 # 3-tuple of (node, rev, text) for a raw revision.
400 self._cache = None
373 self._cache = None
401 # Maps rev to chain base rev.
374 # Maps rev to chain base rev.
402 self._chainbasecache = util.lrucachedict(100)
375 self._chainbasecache = util.lrucachedict(100)
403 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
376 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
404 self._chunkcache = (0, '')
377 self._chunkcache = (0, '')
405 # How much data to read and cache into the raw revlog data cache.
378 # How much data to read and cache into the raw revlog data cache.
406 self._chunkcachesize = 65536
379 self._chunkcachesize = 65536
407 self._maxchainlen = None
380 self._maxchainlen = None
408 self._deltabothparents = True
381 self._deltabothparents = True
409 self.index = []
382 self.index = []
410 # Mapping of partial identifiers to full nodes.
383 # Mapping of partial identifiers to full nodes.
411 self._pcache = {}
384 self._pcache = {}
412 # Mapping of revision integer to full node.
385 # Mapping of revision integer to full node.
413 self._nodecache = {nullid: nullrev}
386 self._nodecache = {nullid: nullrev}
414 self._nodepos = None
387 self._nodepos = None
415 self._compengine = 'zlib'
388 self._compengine = 'zlib'
416 self._maxdeltachainspan = -1
389 self._maxdeltachainspan = -1
417 self._withsparseread = False
390 self._withsparseread = False
418 self._sparserevlog = False
391 self._sparserevlog = False
419 self._srdensitythreshold = 0.50
392 self._srdensitythreshold = 0.50
420 self._srmingapsize = 262144
393 self._srmingapsize = 262144
421
394
422 # Make copy of flag processors so each revlog instance can support
395 # Make copy of flag processors so each revlog instance can support
423 # custom flags.
396 # custom flags.
424 self._flagprocessors = dict(_flagprocessors)
397 self._flagprocessors = dict(_flagprocessors)
425
398
426 mmapindexthreshold = None
399 mmapindexthreshold = None
427 v = REVLOG_DEFAULT_VERSION
400 v = REVLOG_DEFAULT_VERSION
428 opts = getattr(opener, 'options', None)
401 opts = getattr(opener, 'options', None)
429 if opts is not None:
402 if opts is not None:
430 if 'revlogv2' in opts:
403 if 'revlogv2' in opts:
431 # version 2 revlogs always use generaldelta.
404 # version 2 revlogs always use generaldelta.
432 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
405 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
433 elif 'revlogv1' in opts:
406 elif 'revlogv1' in opts:
434 if 'generaldelta' in opts:
407 if 'generaldelta' in opts:
435 v |= FLAG_GENERALDELTA
408 v |= FLAG_GENERALDELTA
436 else:
409 else:
437 v = 0
410 v = 0
438 if 'chunkcachesize' in opts:
411 if 'chunkcachesize' in opts:
439 self._chunkcachesize = opts['chunkcachesize']
412 self._chunkcachesize = opts['chunkcachesize']
440 if 'maxchainlen' in opts:
413 if 'maxchainlen' in opts:
441 self._maxchainlen = opts['maxchainlen']
414 self._maxchainlen = opts['maxchainlen']
442 if 'deltabothparents' in opts:
415 if 'deltabothparents' in opts:
443 self._deltabothparents = opts['deltabothparents']
416 self._deltabothparents = opts['deltabothparents']
444 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
445 if 'compengine' in opts:
418 if 'compengine' in opts:
446 self._compengine = opts['compengine']
419 self._compengine = opts['compengine']
447 if 'maxdeltachainspan' in opts:
420 if 'maxdeltachainspan' in opts:
448 self._maxdeltachainspan = opts['maxdeltachainspan']
421 self._maxdeltachainspan = opts['maxdeltachainspan']
449 if mmaplargeindex and 'mmapindexthreshold' in opts:
422 if mmaplargeindex and 'mmapindexthreshold' in opts:
450 mmapindexthreshold = opts['mmapindexthreshold']
423 mmapindexthreshold = opts['mmapindexthreshold']
451 self._sparserevlog = bool(opts.get('sparse-revlog', False))
424 self._sparserevlog = bool(opts.get('sparse-revlog', False))
452 withsparseread = bool(opts.get('with-sparse-read', False))
425 withsparseread = bool(opts.get('with-sparse-read', False))
453 # sparse-revlog forces sparse-read
426 # sparse-revlog forces sparse-read
454 self._withsparseread = self._sparserevlog or withsparseread
427 self._withsparseread = self._sparserevlog or withsparseread
455 if 'sparse-read-density-threshold' in opts:
428 if 'sparse-read-density-threshold' in opts:
456 self._srdensitythreshold = opts['sparse-read-density-threshold']
429 self._srdensitythreshold = opts['sparse-read-density-threshold']
457 if 'sparse-read-min-gap-size' in opts:
430 if 'sparse-read-min-gap-size' in opts:
458 self._srmingapsize = opts['sparse-read-min-gap-size']
431 self._srmingapsize = opts['sparse-read-min-gap-size']
459 if opts.get('enableellipsis'):
432 if opts.get('enableellipsis'):
460 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
433 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
461
434
462 if self._chunkcachesize <= 0:
435 if self._chunkcachesize <= 0:
463 raise error.RevlogError(_('revlog chunk cache size %r is not '
436 raise error.RevlogError(_('revlog chunk cache size %r is not '
464 'greater than 0') % self._chunkcachesize)
437 'greater than 0') % self._chunkcachesize)
465 elif self._chunkcachesize & (self._chunkcachesize - 1):
438 elif self._chunkcachesize & (self._chunkcachesize - 1):
466 raise error.RevlogError(_('revlog chunk cache size %r is not a '
439 raise error.RevlogError(_('revlog chunk cache size %r is not a '
467 'power of 2') % self._chunkcachesize)
440 'power of 2') % self._chunkcachesize)
468
441
469 indexdata = ''
442 indexdata = ''
470 self._initempty = True
443 self._initempty = True
471 try:
444 try:
472 with self._indexfp() as f:
445 with self._indexfp() as f:
473 if (mmapindexthreshold is not None and
446 if (mmapindexthreshold is not None and
474 self.opener.fstat(f).st_size >= mmapindexthreshold):
447 self.opener.fstat(f).st_size >= mmapindexthreshold):
475 indexdata = util.buffer(util.mmapread(f))
448 indexdata = util.buffer(util.mmapread(f))
476 else:
449 else:
477 indexdata = f.read()
450 indexdata = f.read()
478 if len(indexdata) > 0:
451 if len(indexdata) > 0:
479 v = versionformat_unpack(indexdata[:4])[0]
452 v = versionformat_unpack(indexdata[:4])[0]
480 self._initempty = False
453 self._initempty = False
481 except IOError as inst:
454 except IOError as inst:
482 if inst.errno != errno.ENOENT:
455 if inst.errno != errno.ENOENT:
483 raise
456 raise
484
457
485 self.version = v
458 self.version = v
486 self._inline = v & FLAG_INLINE_DATA
459 self._inline = v & FLAG_INLINE_DATA
487 self._generaldelta = v & FLAG_GENERALDELTA
460 self._generaldelta = v & FLAG_GENERALDELTA
488 flags = v & ~0xFFFF
461 flags = v & ~0xFFFF
489 fmt = v & 0xFFFF
462 fmt = v & 0xFFFF
490 if fmt == REVLOGV0:
463 if fmt == REVLOGV0:
491 if flags:
464 if flags:
492 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
493 'revlog %s') %
466 'revlog %s') %
494 (flags >> 16, fmt, self.indexfile))
467 (flags >> 16, fmt, self.indexfile))
495 elif fmt == REVLOGV1:
468 elif fmt == REVLOGV1:
496 if flags & ~REVLOGV1_FLAGS:
469 if flags & ~REVLOGV1_FLAGS:
497 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
470 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
498 'revlog %s') %
471 'revlog %s') %
499 (flags >> 16, fmt, self.indexfile))
472 (flags >> 16, fmt, self.indexfile))
500 elif fmt == REVLOGV2:
473 elif fmt == REVLOGV2:
501 if flags & ~REVLOGV2_FLAGS:
474 if flags & ~REVLOGV2_FLAGS:
502 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
475 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
503 'revlog %s') %
476 'revlog %s') %
504 (flags >> 16, fmt, self.indexfile))
477 (flags >> 16, fmt, self.indexfile))
505 else:
478 else:
506 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
479 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
507 (fmt, self.indexfile))
480 (fmt, self.indexfile))
508
481
509 self._storedeltachains = True
482 self._storedeltachains = True
510
483
511 self._io = revlogio()
484 self._io = revlogio()
512 if self.version == REVLOGV0:
485 if self.version == REVLOGV0:
513 self._io = revlogoldio()
486 self._io = revlogoldio()
514 try:
487 try:
515 d = self._io.parseindex(indexdata, self._inline)
488 d = self._io.parseindex(indexdata, self._inline)
516 except (ValueError, IndexError):
489 except (ValueError, IndexError):
517 raise error.RevlogError(_("index %s is corrupted") %
490 raise error.RevlogError(_("index %s is corrupted") %
518 self.indexfile)
491 self.indexfile)
519 self.index, nodemap, self._chunkcache = d
492 self.index, nodemap, self._chunkcache = d
520 if nodemap is not None:
493 if nodemap is not None:
521 self.nodemap = self._nodecache = nodemap
494 self.nodemap = self._nodecache = nodemap
522 if not self._chunkcache:
495 if not self._chunkcache:
523 self._chunkclear()
496 self._chunkclear()
524 # revnum -> (chain-length, sum-delta-length)
497 # revnum -> (chain-length, sum-delta-length)
525 self._chaininfocache = {}
498 self._chaininfocache = {}
526 # revlog header -> revlog compressor
499 # revlog header -> revlog compressor
527 self._decompressors = {}
500 self._decompressors = {}
528
501
529 @util.propertycache
502 @util.propertycache
530 def _compressor(self):
503 def _compressor(self):
531 return util.compengines[self._compengine].revlogcompressor()
504 return util.compengines[self._compengine].revlogcompressor()
532
505
533 def _indexfp(self, mode='r'):
506 def _indexfp(self, mode='r'):
534 """file object for the revlog's index file"""
507 """file object for the revlog's index file"""
535 args = {r'mode': mode}
508 args = {r'mode': mode}
536 if mode != 'r':
509 if mode != 'r':
537 args[r'checkambig'] = self._checkambig
510 args[r'checkambig'] = self._checkambig
538 if mode == 'w':
511 if mode == 'w':
539 args[r'atomictemp'] = True
512 args[r'atomictemp'] = True
540 return self.opener(self.indexfile, **args)
513 return self.opener(self.indexfile, **args)
541
514
542 def _datafp(self, mode='r'):
515 def _datafp(self, mode='r'):
543 """file object for the revlog's data file"""
516 """file object for the revlog's data file"""
544 return self.opener(self.datafile, mode=mode)
517 return self.opener(self.datafile, mode=mode)
545
518
546 @contextlib.contextmanager
519 @contextlib.contextmanager
547 def _datareadfp(self, existingfp=None):
520 def _datareadfp(self, existingfp=None):
548 """file object suitable to read data"""
521 """file object suitable to read data"""
549 if existingfp is not None:
522 if existingfp is not None:
550 yield existingfp
523 yield existingfp
551 else:
524 else:
552 if self._inline:
525 if self._inline:
553 func = self._indexfp
526 func = self._indexfp
554 else:
527 else:
555 func = self._datafp
528 func = self._datafp
556 with func() as fp:
529 with func() as fp:
557 yield fp
530 yield fp
558
531
559 def tip(self):
532 def tip(self):
560 return self.node(len(self.index) - 1)
533 return self.node(len(self.index) - 1)
561 def __contains__(self, rev):
534 def __contains__(self, rev):
562 return 0 <= rev < len(self)
535 return 0 <= rev < len(self)
563 def __len__(self):
536 def __len__(self):
564 return len(self.index)
537 return len(self.index)
565 def __iter__(self):
538 def __iter__(self):
566 return iter(pycompat.xrange(len(self)))
539 return iter(pycompat.xrange(len(self)))
567 def revs(self, start=0, stop=None):
540 def revs(self, start=0, stop=None):
568 """iterate over all rev in this revlog (from start to stop)"""
541 """iterate over all rev in this revlog (from start to stop)"""
569 step = 1
542 step = 1
570 length = len(self)
543 length = len(self)
571 if stop is not None:
544 if stop is not None:
572 if start > stop:
545 if start > stop:
573 step = -1
546 step = -1
574 stop += step
547 stop += step
575 if stop > length:
548 if stop > length:
576 stop = length
549 stop = length
577 else:
550 else:
578 stop = length
551 stop = length
579 return pycompat.xrange(start, stop, step)
552 return pycompat.xrange(start, stop, step)
580
553
581 @util.propertycache
554 @util.propertycache
582 def nodemap(self):
555 def nodemap(self):
583 if self.index:
556 if self.index:
584 # populate mapping down to the initial node
557 # populate mapping down to the initial node
585 node0 = self.index[0][7] # get around changelog filtering
558 node0 = self.index[0][7] # get around changelog filtering
586 self.rev(node0)
559 self.rev(node0)
587 return self._nodecache
560 return self._nodecache
588
561
589 def hasnode(self, node):
562 def hasnode(self, node):
590 try:
563 try:
591 self.rev(node)
564 self.rev(node)
592 return True
565 return True
593 except KeyError:
566 except KeyError:
594 return False
567 return False
595
568
596 def candelta(self, baserev, rev):
569 def candelta(self, baserev, rev):
597 """whether two revisions (baserev, rev) can be delta-ed or not"""
570 """whether two revisions (baserev, rev) can be delta-ed or not"""
598 # Disable delta if either rev requires a content-changing flag
571 # Disable delta if either rev requires a content-changing flag
599 # processor (ex. LFS). This is because such flag processor can alter
572 # processor (ex. LFS). This is because such flag processor can alter
600 # the rawtext content that the delta will be based on, and two clients
573 # the rawtext content that the delta will be based on, and two clients
601 # could have a same revlog node with different flags (i.e. different
574 # could have a same revlog node with different flags (i.e. different
602 # rawtext contents) and the delta could be incompatible.
575 # rawtext contents) and the delta could be incompatible.
603 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
576 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
604 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
577 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
605 return False
578 return False
606 return True
579 return True
607
580
608 def clearcaches(self):
581 def clearcaches(self):
609 self._cache = None
582 self._cache = None
610 self._chainbasecache.clear()
583 self._chainbasecache.clear()
611 self._chunkcache = (0, '')
584 self._chunkcache = (0, '')
612 self._pcache = {}
585 self._pcache = {}
613
586
614 try:
587 try:
615 self._nodecache.clearcaches()
588 self._nodecache.clearcaches()
616 except AttributeError:
589 except AttributeError:
617 self._nodecache = {nullid: nullrev}
590 self._nodecache = {nullid: nullrev}
618 self._nodepos = None
591 self._nodepos = None
619
592
620 def rev(self, node):
593 def rev(self, node):
621 try:
594 try:
622 return self._nodecache[node]
595 return self._nodecache[node]
623 except TypeError:
596 except TypeError:
624 raise
597 raise
625 except error.RevlogError:
598 except error.RevlogError:
626 # parsers.c radix tree lookup failed
599 # parsers.c radix tree lookup failed
627 if node == wdirid or node in wdirfilenodeids:
600 if node == wdirid or node in wdirfilenodeids:
628 raise error.WdirUnsupported
601 raise error.WdirUnsupported
629 raise error.LookupError(node, self.indexfile, _('no node'))
602 raise error.LookupError(node, self.indexfile, _('no node'))
630 except KeyError:
603 except KeyError:
631 # pure python cache lookup failed
604 # pure python cache lookup failed
632 n = self._nodecache
605 n = self._nodecache
633 i = self.index
606 i = self.index
634 p = self._nodepos
607 p = self._nodepos
635 if p is None:
608 if p is None:
636 p = len(i) - 1
609 p = len(i) - 1
637 else:
610 else:
638 assert p < len(i)
611 assert p < len(i)
639 for r in pycompat.xrange(p, -1, -1):
612 for r in pycompat.xrange(p, -1, -1):
640 v = i[r][7]
613 v = i[r][7]
641 n[v] = r
614 n[v] = r
642 if v == node:
615 if v == node:
643 self._nodepos = r - 1
616 self._nodepos = r - 1
644 return r
617 return r
645 if node == wdirid or node in wdirfilenodeids:
618 if node == wdirid or node in wdirfilenodeids:
646 raise error.WdirUnsupported
619 raise error.WdirUnsupported
647 raise error.LookupError(node, self.indexfile, _('no node'))
620 raise error.LookupError(node, self.indexfile, _('no node'))
648
621
649 # Accessors for index entries.
622 # Accessors for index entries.
650
623
651 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
652 # are flags.
625 # are flags.
653 def start(self, rev):
626 def start(self, rev):
654 return int(self.index[rev][0] >> 16)
627 return int(self.index[rev][0] >> 16)
655
628
656 def flags(self, rev):
629 def flags(self, rev):
657 return self.index[rev][0] & 0xFFFF
630 return self.index[rev][0] & 0xFFFF
658
631
659 def length(self, rev):
632 def length(self, rev):
660 return self.index[rev][1]
633 return self.index[rev][1]
661
634
662 def rawsize(self, rev):
635 def rawsize(self, rev):
663 """return the length of the uncompressed text for a given revision"""
636 """return the length of the uncompressed text for a given revision"""
664 l = self.index[rev][2]
637 l = self.index[rev][2]
665 if l >= 0:
638 if l >= 0:
666 return l
639 return l
667
640
668 t = self.revision(rev, raw=True)
641 t = self.revision(rev, raw=True)
669 return len(t)
642 return len(t)
670
643
671 def size(self, rev):
644 def size(self, rev):
672 """length of non-raw text (processed by a "read" flag processor)"""
645 """length of non-raw text (processed by a "read" flag processor)"""
673 # fast path: if no "read" flag processor could change the content,
646 # fast path: if no "read" flag processor could change the content,
674 # size is rawsize. note: ELLIPSIS is known to not change the content.
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
675 flags = self.flags(rev)
648 flags = self.flags(rev)
676 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
649 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
677 return self.rawsize(rev)
650 return self.rawsize(rev)
678
651
679 return len(self.revision(rev, raw=False))
652 return len(self.revision(rev, raw=False))
680
653
681 def chainbase(self, rev):
654 def chainbase(self, rev):
682 base = self._chainbasecache.get(rev)
655 base = self._chainbasecache.get(rev)
683 if base is not None:
656 if base is not None:
684 return base
657 return base
685
658
686 index = self.index
659 index = self.index
687 iterrev = rev
660 iterrev = rev
688 base = index[iterrev][3]
661 base = index[iterrev][3]
689 while base != iterrev:
662 while base != iterrev:
690 iterrev = base
663 iterrev = base
691 base = index[iterrev][3]
664 base = index[iterrev][3]
692
665
693 self._chainbasecache[rev] = base
666 self._chainbasecache[rev] = base
694 return base
667 return base
695
668
696 def linkrev(self, rev):
669 def linkrev(self, rev):
697 return self.index[rev][4]
670 return self.index[rev][4]
698
671
699 def parentrevs(self, rev):
672 def parentrevs(self, rev):
700 try:
673 try:
701 entry = self.index[rev]
674 entry = self.index[rev]
702 except IndexError:
675 except IndexError:
703 if rev == wdirrev:
676 if rev == wdirrev:
704 raise error.WdirUnsupported
677 raise error.WdirUnsupported
705 raise
678 raise
706
679
707 return entry[5], entry[6]
680 return entry[5], entry[6]
708
681
709 def node(self, rev):
682 def node(self, rev):
710 try:
683 try:
711 return self.index[rev][7]
684 return self.index[rev][7]
712 except IndexError:
685 except IndexError:
713 if rev == wdirrev:
686 if rev == wdirrev:
714 raise error.WdirUnsupported
687 raise error.WdirUnsupported
715 raise
688 raise
716
689
717 # Derived from index values.
690 # Derived from index values.
718
691
719 def end(self, rev):
692 def end(self, rev):
720 return self.start(rev) + self.length(rev)
693 return self.start(rev) + self.length(rev)
721
694
722 def parents(self, node):
695 def parents(self, node):
723 i = self.index
696 i = self.index
724 d = i[self.rev(node)]
697 d = i[self.rev(node)]
725 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
698 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
726
699
727 def chainlen(self, rev):
700 def chainlen(self, rev):
728 return self._chaininfo(rev)[0]
701 return self._chaininfo(rev)[0]
729
702
730 def _chaininfo(self, rev):
703 def _chaininfo(self, rev):
731 chaininfocache = self._chaininfocache
704 chaininfocache = self._chaininfocache
732 if rev in chaininfocache:
705 if rev in chaininfocache:
733 return chaininfocache[rev]
706 return chaininfocache[rev]
734 index = self.index
707 index = self.index
735 generaldelta = self._generaldelta
708 generaldelta = self._generaldelta
736 iterrev = rev
709 iterrev = rev
737 e = index[iterrev]
710 e = index[iterrev]
738 clen = 0
711 clen = 0
739 compresseddeltalen = 0
712 compresseddeltalen = 0
740 while iterrev != e[3]:
713 while iterrev != e[3]:
741 clen += 1
714 clen += 1
742 compresseddeltalen += e[1]
715 compresseddeltalen += e[1]
743 if generaldelta:
716 if generaldelta:
744 iterrev = e[3]
717 iterrev = e[3]
745 else:
718 else:
746 iterrev -= 1
719 iterrev -= 1
747 if iterrev in chaininfocache:
720 if iterrev in chaininfocache:
748 t = chaininfocache[iterrev]
721 t = chaininfocache[iterrev]
749 clen += t[0]
722 clen += t[0]
750 compresseddeltalen += t[1]
723 compresseddeltalen += t[1]
751 break
724 break
752 e = index[iterrev]
725 e = index[iterrev]
753 else:
726 else:
754 # Add text length of base since decompressing that also takes
727 # Add text length of base since decompressing that also takes
755 # work. For cache hits the length is already included.
728 # work. For cache hits the length is already included.
756 compresseddeltalen += e[1]
729 compresseddeltalen += e[1]
757 r = (clen, compresseddeltalen)
730 r = (clen, compresseddeltalen)
758 chaininfocache[rev] = r
731 chaininfocache[rev] = r
759 return r
732 return r
760
733
761 def _deltachain(self, rev, stoprev=None):
734 def _deltachain(self, rev, stoprev=None):
762 """Obtain the delta chain for a revision.
735 """Obtain the delta chain for a revision.
763
736
764 ``stoprev`` specifies a revision to stop at. If not specified, we
737 ``stoprev`` specifies a revision to stop at. If not specified, we
765 stop at the base of the chain.
738 stop at the base of the chain.
766
739
767 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
740 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
768 revs in ascending order and ``stopped`` is a bool indicating whether
741 revs in ascending order and ``stopped`` is a bool indicating whether
769 ``stoprev`` was hit.
742 ``stoprev`` was hit.
770 """
743 """
771 # Try C implementation.
744 # Try C implementation.
772 try:
745 try:
773 return self.index.deltachain(rev, stoprev, self._generaldelta)
746 return self.index.deltachain(rev, stoprev, self._generaldelta)
774 except AttributeError:
747 except AttributeError:
775 pass
748 pass
776
749
777 chain = []
750 chain = []
778
751
779 # Alias to prevent attribute lookup in tight loop.
752 # Alias to prevent attribute lookup in tight loop.
780 index = self.index
753 index = self.index
781 generaldelta = self._generaldelta
754 generaldelta = self._generaldelta
782
755
783 iterrev = rev
756 iterrev = rev
784 e = index[iterrev]
757 e = index[iterrev]
785 while iterrev != e[3] and iterrev != stoprev:
758 while iterrev != e[3] and iterrev != stoprev:
786 chain.append(iterrev)
759 chain.append(iterrev)
787 if generaldelta:
760 if generaldelta:
788 iterrev = e[3]
761 iterrev = e[3]
789 else:
762 else:
790 iterrev -= 1
763 iterrev -= 1
791 e = index[iterrev]
764 e = index[iterrev]
792
765
793 if iterrev == stoprev:
766 if iterrev == stoprev:
794 stopped = True
767 stopped = True
795 else:
768 else:
796 chain.append(iterrev)
769 chain.append(iterrev)
797 stopped = False
770 stopped = False
798
771
799 chain.reverse()
772 chain.reverse()
800 return chain, stopped
773 return chain, stopped
801
774
802 def ancestors(self, revs, stoprev=0, inclusive=False):
775 def ancestors(self, revs, stoprev=0, inclusive=False):
803 """Generate the ancestors of 'revs' in reverse topological order.
776 """Generate the ancestors of 'revs' in reverse topological order.
804 Does not generate revs lower than stoprev.
777 Does not generate revs lower than stoprev.
805
778
806 See the documentation for ancestor.lazyancestors for more details."""
779 See the documentation for ancestor.lazyancestors for more details."""
807
780
808 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
781 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
809 inclusive=inclusive)
782 inclusive=inclusive)
810
783
811 def descendants(self, revs):
784 def descendants(self, revs):
812 """Generate the descendants of 'revs' in revision order.
785 """Generate the descendants of 'revs' in revision order.
813
786
814 Yield a sequence of revision numbers starting with a child of
787 Yield a sequence of revision numbers starting with a child of
815 some rev in revs, i.e., each revision is *not* considered a
788 some rev in revs, i.e., each revision is *not* considered a
816 descendant of itself. Results are ordered by revision number (a
789 descendant of itself. Results are ordered by revision number (a
817 topological sort)."""
790 topological sort)."""
818 first = min(revs)
791 first = min(revs)
819 if first == nullrev:
792 if first == nullrev:
820 for i in self:
793 for i in self:
821 yield i
794 yield i
822 return
795 return
823
796
824 seen = set(revs)
797 seen = set(revs)
825 for i in self.revs(start=first + 1):
798 for i in self.revs(start=first + 1):
826 for x in self.parentrevs(i):
799 for x in self.parentrevs(i):
827 if x != nullrev and x in seen:
800 if x != nullrev and x in seen:
828 seen.add(i)
801 seen.add(i)
829 yield i
802 yield i
830 break
803 break
831
804
832 def findcommonmissing(self, common=None, heads=None):
805 def findcommonmissing(self, common=None, heads=None):
833 """Return a tuple of the ancestors of common and the ancestors of heads
806 """Return a tuple of the ancestors of common and the ancestors of heads
834 that are not ancestors of common. In revset terminology, we return the
807 that are not ancestors of common. In revset terminology, we return the
835 tuple:
808 tuple:
836
809
837 ::common, (::heads) - (::common)
810 ::common, (::heads) - (::common)
838
811
839 The list is sorted by revision number, meaning it is
812 The list is sorted by revision number, meaning it is
840 topologically sorted.
813 topologically sorted.
841
814
842 'heads' and 'common' are both lists of node IDs. If heads is
815 'heads' and 'common' are both lists of node IDs. If heads is
843 not supplied, uses all of the revlog's heads. If common is not
816 not supplied, uses all of the revlog's heads. If common is not
844 supplied, uses nullid."""
817 supplied, uses nullid."""
845 if common is None:
818 if common is None:
846 common = [nullid]
819 common = [nullid]
847 if heads is None:
820 if heads is None:
848 heads = self.heads()
821 heads = self.heads()
849
822
850 common = [self.rev(n) for n in common]
823 common = [self.rev(n) for n in common]
851 heads = [self.rev(n) for n in heads]
824 heads = [self.rev(n) for n in heads]
852
825
853 # we want the ancestors, but inclusive
826 # we want the ancestors, but inclusive
854 class lazyset(object):
827 class lazyset(object):
855 def __init__(self, lazyvalues):
828 def __init__(self, lazyvalues):
856 self.addedvalues = set()
829 self.addedvalues = set()
857 self.lazyvalues = lazyvalues
830 self.lazyvalues = lazyvalues
858
831
859 def __contains__(self, value):
832 def __contains__(self, value):
860 return value in self.addedvalues or value in self.lazyvalues
833 return value in self.addedvalues or value in self.lazyvalues
861
834
862 def __iter__(self):
835 def __iter__(self):
863 added = self.addedvalues
836 added = self.addedvalues
864 for r in added:
837 for r in added:
865 yield r
838 yield r
866 for r in self.lazyvalues:
839 for r in self.lazyvalues:
867 if not r in added:
840 if not r in added:
868 yield r
841 yield r
869
842
870 def add(self, value):
843 def add(self, value):
871 self.addedvalues.add(value)
844 self.addedvalues.add(value)
872
845
873 def update(self, values):
846 def update(self, values):
874 self.addedvalues.update(values)
847 self.addedvalues.update(values)
875
848
876 has = lazyset(self.ancestors(common))
849 has = lazyset(self.ancestors(common))
877 has.add(nullrev)
850 has.add(nullrev)
878 has.update(common)
851 has.update(common)
879
852
880 # take all ancestors from heads that aren't in has
853 # take all ancestors from heads that aren't in has
881 missing = set()
854 missing = set()
882 visit = collections.deque(r for r in heads if r not in has)
855 visit = collections.deque(r for r in heads if r not in has)
883 while visit:
856 while visit:
884 r = visit.popleft()
857 r = visit.popleft()
885 if r in missing:
858 if r in missing:
886 continue
859 continue
887 else:
860 else:
888 missing.add(r)
861 missing.add(r)
889 for p in self.parentrevs(r):
862 for p in self.parentrevs(r):
890 if p not in has:
863 if p not in has:
891 visit.append(p)
864 visit.append(p)
892 missing = list(missing)
865 missing = list(missing)
893 missing.sort()
866 missing.sort()
894 return has, [self.node(miss) for miss in missing]
867 return has, [self.node(miss) for miss in missing]
895
868
896 def incrementalmissingrevs(self, common=None):
869 def incrementalmissingrevs(self, common=None):
897 """Return an object that can be used to incrementally compute the
870 """Return an object that can be used to incrementally compute the
898 revision numbers of the ancestors of arbitrary sets that are not
871 revision numbers of the ancestors of arbitrary sets that are not
899 ancestors of common. This is an ancestor.incrementalmissingancestors
872 ancestors of common. This is an ancestor.incrementalmissingancestors
900 object.
873 object.
901
874
902 'common' is a list of revision numbers. If common is not supplied, uses
875 'common' is a list of revision numbers. If common is not supplied, uses
903 nullrev.
876 nullrev.
904 """
877 """
905 if common is None:
878 if common is None:
906 common = [nullrev]
879 common = [nullrev]
907
880
908 return ancestor.incrementalmissingancestors(self.parentrevs, common)
881 return ancestor.incrementalmissingancestors(self.parentrevs, common)
909
882
910 def findmissingrevs(self, common=None, heads=None):
883 def findmissingrevs(self, common=None, heads=None):
911 """Return the revision numbers of the ancestors of heads that
884 """Return the revision numbers of the ancestors of heads that
912 are not ancestors of common.
885 are not ancestors of common.
913
886
914 More specifically, return a list of revision numbers corresponding to
887 More specifically, return a list of revision numbers corresponding to
915 nodes N such that every N satisfies the following constraints:
888 nodes N such that every N satisfies the following constraints:
916
889
917 1. N is an ancestor of some node in 'heads'
890 1. N is an ancestor of some node in 'heads'
918 2. N is not an ancestor of any node in 'common'
891 2. N is not an ancestor of any node in 'common'
919
892
920 The list is sorted by revision number, meaning it is
893 The list is sorted by revision number, meaning it is
921 topologically sorted.
894 topologically sorted.
922
895
923 'heads' and 'common' are both lists of revision numbers. If heads is
896 'heads' and 'common' are both lists of revision numbers. If heads is
924 not supplied, uses all of the revlog's heads. If common is not
897 not supplied, uses all of the revlog's heads. If common is not
925 supplied, uses nullid."""
898 supplied, uses nullid."""
926 if common is None:
899 if common is None:
927 common = [nullrev]
900 common = [nullrev]
928 if heads is None:
901 if heads is None:
929 heads = self.headrevs()
902 heads = self.headrevs()
930
903
931 inc = self.incrementalmissingrevs(common=common)
904 inc = self.incrementalmissingrevs(common=common)
932 return inc.missingancestors(heads)
905 return inc.missingancestors(heads)
933
906
934 def findmissing(self, common=None, heads=None):
907 def findmissing(self, common=None, heads=None):
935 """Return the ancestors of heads that are not ancestors of common.
908 """Return the ancestors of heads that are not ancestors of common.
936
909
937 More specifically, return a list of nodes N such that every N
910 More specifically, return a list of nodes N such that every N
938 satisfies the following constraints:
911 satisfies the following constraints:
939
912
940 1. N is an ancestor of some node in 'heads'
913 1. N is an ancestor of some node in 'heads'
941 2. N is not an ancestor of any node in 'common'
914 2. N is not an ancestor of any node in 'common'
942
915
943 The list is sorted by revision number, meaning it is
916 The list is sorted by revision number, meaning it is
944 topologically sorted.
917 topologically sorted.
945
918
946 'heads' and 'common' are both lists of node IDs. If heads is
919 'heads' and 'common' are both lists of node IDs. If heads is
947 not supplied, uses all of the revlog's heads. If common is not
920 not supplied, uses all of the revlog's heads. If common is not
948 supplied, uses nullid."""
921 supplied, uses nullid."""
949 if common is None:
922 if common is None:
950 common = [nullid]
923 common = [nullid]
951 if heads is None:
924 if heads is None:
952 heads = self.heads()
925 heads = self.heads()
953
926
954 common = [self.rev(n) for n in common]
927 common = [self.rev(n) for n in common]
955 heads = [self.rev(n) for n in heads]
928 heads = [self.rev(n) for n in heads]
956
929
957 inc = self.incrementalmissingrevs(common=common)
930 inc = self.incrementalmissingrevs(common=common)
958 return [self.node(r) for r in inc.missingancestors(heads)]
931 return [self.node(r) for r in inc.missingancestors(heads)]
959
932
960 def nodesbetween(self, roots=None, heads=None):
933 def nodesbetween(self, roots=None, heads=None):
961 """Return a topological path from 'roots' to 'heads'.
934 """Return a topological path from 'roots' to 'heads'.
962
935
963 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
936 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
964 topologically sorted list of all nodes N that satisfy both of
937 topologically sorted list of all nodes N that satisfy both of
965 these constraints:
938 these constraints:
966
939
967 1. N is a descendant of some node in 'roots'
940 1. N is a descendant of some node in 'roots'
968 2. N is an ancestor of some node in 'heads'
941 2. N is an ancestor of some node in 'heads'
969
942
970 Every node is considered to be both a descendant and an ancestor
943 Every node is considered to be both a descendant and an ancestor
971 of itself, so every reachable node in 'roots' and 'heads' will be
944 of itself, so every reachable node in 'roots' and 'heads' will be
972 included in 'nodes'.
945 included in 'nodes'.
973
946
974 'outroots' is the list of reachable nodes in 'roots', i.e., the
947 'outroots' is the list of reachable nodes in 'roots', i.e., the
975 subset of 'roots' that is returned in 'nodes'. Likewise,
948 subset of 'roots' that is returned in 'nodes'. Likewise,
976 'outheads' is the subset of 'heads' that is also in 'nodes'.
949 'outheads' is the subset of 'heads' that is also in 'nodes'.
977
950
978 'roots' and 'heads' are both lists of node IDs. If 'roots' is
951 'roots' and 'heads' are both lists of node IDs. If 'roots' is
979 unspecified, uses nullid as the only root. If 'heads' is
952 unspecified, uses nullid as the only root. If 'heads' is
980 unspecified, uses list of all of the revlog's heads."""
953 unspecified, uses list of all of the revlog's heads."""
981 nonodes = ([], [], [])
954 nonodes = ([], [], [])
982 if roots is not None:
955 if roots is not None:
983 roots = list(roots)
956 roots = list(roots)
984 if not roots:
957 if not roots:
985 return nonodes
958 return nonodes
986 lowestrev = min([self.rev(n) for n in roots])
959 lowestrev = min([self.rev(n) for n in roots])
987 else:
960 else:
988 roots = [nullid] # Everybody's a descendant of nullid
961 roots = [nullid] # Everybody's a descendant of nullid
989 lowestrev = nullrev
962 lowestrev = nullrev
990 if (lowestrev == nullrev) and (heads is None):
963 if (lowestrev == nullrev) and (heads is None):
991 # We want _all_ the nodes!
964 # We want _all_ the nodes!
992 return ([self.node(r) for r in self], [nullid], list(self.heads()))
965 return ([self.node(r) for r in self], [nullid], list(self.heads()))
993 if heads is None:
966 if heads is None:
994 # All nodes are ancestors, so the latest ancestor is the last
967 # All nodes are ancestors, so the latest ancestor is the last
995 # node.
968 # node.
996 highestrev = len(self) - 1
969 highestrev = len(self) - 1
997 # Set ancestors to None to signal that every node is an ancestor.
970 # Set ancestors to None to signal that every node is an ancestor.
998 ancestors = None
971 ancestors = None
999 # Set heads to an empty dictionary for later discovery of heads
972 # Set heads to an empty dictionary for later discovery of heads
1000 heads = {}
973 heads = {}
1001 else:
974 else:
1002 heads = list(heads)
975 heads = list(heads)
1003 if not heads:
976 if not heads:
1004 return nonodes
977 return nonodes
1005 ancestors = set()
978 ancestors = set()
1006 # Turn heads into a dictionary so we can remove 'fake' heads.
979 # Turn heads into a dictionary so we can remove 'fake' heads.
1007 # Also, later we will be using it to filter out the heads we can't
980 # Also, later we will be using it to filter out the heads we can't
1008 # find from roots.
981 # find from roots.
1009 heads = dict.fromkeys(heads, False)
982 heads = dict.fromkeys(heads, False)
1010 # Start at the top and keep marking parents until we're done.
983 # Start at the top and keep marking parents until we're done.
1011 nodestotag = set(heads)
984 nodestotag = set(heads)
1012 # Remember where the top was so we can use it as a limit later.
985 # Remember where the top was so we can use it as a limit later.
1013 highestrev = max([self.rev(n) for n in nodestotag])
986 highestrev = max([self.rev(n) for n in nodestotag])
1014 while nodestotag:
987 while nodestotag:
1015 # grab a node to tag
988 # grab a node to tag
1016 n = nodestotag.pop()
989 n = nodestotag.pop()
1017 # Never tag nullid
990 # Never tag nullid
1018 if n == nullid:
991 if n == nullid:
1019 continue
992 continue
1020 # A node's revision number represents its place in a
993 # A node's revision number represents its place in a
1021 # topologically sorted list of nodes.
994 # topologically sorted list of nodes.
1022 r = self.rev(n)
995 r = self.rev(n)
1023 if r >= lowestrev:
996 if r >= lowestrev:
1024 if n not in ancestors:
997 if n not in ancestors:
1025 # If we are possibly a descendant of one of the roots
998 # If we are possibly a descendant of one of the roots
1026 # and we haven't already been marked as an ancestor
999 # and we haven't already been marked as an ancestor
1027 ancestors.add(n) # Mark as ancestor
1000 ancestors.add(n) # Mark as ancestor
1028 # Add non-nullid parents to list of nodes to tag.
1001 # Add non-nullid parents to list of nodes to tag.
1029 nodestotag.update([p for p in self.parents(n) if
1002 nodestotag.update([p for p in self.parents(n) if
1030 p != nullid])
1003 p != nullid])
1031 elif n in heads: # We've seen it before, is it a fake head?
1004 elif n in heads: # We've seen it before, is it a fake head?
1032 # So it is, real heads should not be the ancestors of
1005 # So it is, real heads should not be the ancestors of
1033 # any other heads.
1006 # any other heads.
1034 heads.pop(n)
1007 heads.pop(n)
1035 if not ancestors:
1008 if not ancestors:
1036 return nonodes
1009 return nonodes
1037 # Now that we have our set of ancestors, we want to remove any
1010 # Now that we have our set of ancestors, we want to remove any
1038 # roots that are not ancestors.
1011 # roots that are not ancestors.
1039
1012
1040 # If one of the roots was nullid, everything is included anyway.
1013 # If one of the roots was nullid, everything is included anyway.
1041 if lowestrev > nullrev:
1014 if lowestrev > nullrev:
1042 # But, since we weren't, let's recompute the lowest rev to not
1015 # But, since we weren't, let's recompute the lowest rev to not
1043 # include roots that aren't ancestors.
1016 # include roots that aren't ancestors.
1044
1017
1045 # Filter out roots that aren't ancestors of heads
1018 # Filter out roots that aren't ancestors of heads
1046 roots = [root for root in roots if root in ancestors]
1019 roots = [root for root in roots if root in ancestors]
1047 # Recompute the lowest revision
1020 # Recompute the lowest revision
1048 if roots:
1021 if roots:
1049 lowestrev = min([self.rev(root) for root in roots])
1022 lowestrev = min([self.rev(root) for root in roots])
1050 else:
1023 else:
1051 # No more roots? Return empty list
1024 # No more roots? Return empty list
1052 return nonodes
1025 return nonodes
1053 else:
1026 else:
1054 # We are descending from nullid, and don't need to care about
1027 # We are descending from nullid, and don't need to care about
1055 # any other roots.
1028 # any other roots.
1056 lowestrev = nullrev
1029 lowestrev = nullrev
1057 roots = [nullid]
1030 roots = [nullid]
1058 # Transform our roots list into a set.
1031 # Transform our roots list into a set.
1059 descendants = set(roots)
1032 descendants = set(roots)
1060 # Also, keep the original roots so we can filter out roots that aren't
1033 # Also, keep the original roots so we can filter out roots that aren't
1061 # 'real' roots (i.e. are descended from other roots).
1034 # 'real' roots (i.e. are descended from other roots).
1062 roots = descendants.copy()
1035 roots = descendants.copy()
1063 # Our topologically sorted list of output nodes.
1036 # Our topologically sorted list of output nodes.
1064 orderedout = []
1037 orderedout = []
1065 # Don't start at nullid since we don't want nullid in our output list,
1038 # Don't start at nullid since we don't want nullid in our output list,
1066 # and if nullid shows up in descendants, empty parents will look like
1039 # and if nullid shows up in descendants, empty parents will look like
1067 # they're descendants.
1040 # they're descendants.
1068 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1041 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1069 n = self.node(r)
1042 n = self.node(r)
1070 isdescendant = False
1043 isdescendant = False
1071 if lowestrev == nullrev: # Everybody is a descendant of nullid
1044 if lowestrev == nullrev: # Everybody is a descendant of nullid
1072 isdescendant = True
1045 isdescendant = True
1073 elif n in descendants:
1046 elif n in descendants:
1074 # n is already a descendant
1047 # n is already a descendant
1075 isdescendant = True
1048 isdescendant = True
1076 # This check only needs to be done here because all the roots
1049 # This check only needs to be done here because all the roots
1077 # will start being marked is descendants before the loop.
1050 # will start being marked is descendants before the loop.
1078 if n in roots:
1051 if n in roots:
1079 # If n was a root, check if it's a 'real' root.
1052 # If n was a root, check if it's a 'real' root.
1080 p = tuple(self.parents(n))
1053 p = tuple(self.parents(n))
1081 # If any of its parents are descendants, it's not a root.
1054 # If any of its parents are descendants, it's not a root.
1082 if (p[0] in descendants) or (p[1] in descendants):
1055 if (p[0] in descendants) or (p[1] in descendants):
1083 roots.remove(n)
1056 roots.remove(n)
1084 else:
1057 else:
1085 p = tuple(self.parents(n))
1058 p = tuple(self.parents(n))
1086 # A node is a descendant if either of its parents are
1059 # A node is a descendant if either of its parents are
1087 # descendants. (We seeded the dependents list with the roots
1060 # descendants. (We seeded the dependents list with the roots
1088 # up there, remember?)
1061 # up there, remember?)
1089 if (p[0] in descendants) or (p[1] in descendants):
1062 if (p[0] in descendants) or (p[1] in descendants):
1090 descendants.add(n)
1063 descendants.add(n)
1091 isdescendant = True
1064 isdescendant = True
1092 if isdescendant and ((ancestors is None) or (n in ancestors)):
1065 if isdescendant and ((ancestors is None) or (n in ancestors)):
1093 # Only include nodes that are both descendants and ancestors.
1066 # Only include nodes that are both descendants and ancestors.
1094 orderedout.append(n)
1067 orderedout.append(n)
1095 if (ancestors is not None) and (n in heads):
1068 if (ancestors is not None) and (n in heads):
1096 # We're trying to figure out which heads are reachable
1069 # We're trying to figure out which heads are reachable
1097 # from roots.
1070 # from roots.
1098 # Mark this head as having been reached
1071 # Mark this head as having been reached
1099 heads[n] = True
1072 heads[n] = True
1100 elif ancestors is None:
1073 elif ancestors is None:
1101 # Otherwise, we're trying to discover the heads.
1074 # Otherwise, we're trying to discover the heads.
1102 # Assume this is a head because if it isn't, the next step
1075 # Assume this is a head because if it isn't, the next step
1103 # will eventually remove it.
1076 # will eventually remove it.
1104 heads[n] = True
1077 heads[n] = True
1105 # But, obviously its parents aren't.
1078 # But, obviously its parents aren't.
1106 for p in self.parents(n):
1079 for p in self.parents(n):
1107 heads.pop(p, None)
1080 heads.pop(p, None)
1108 heads = [head for head, flag in heads.iteritems() if flag]
1081 heads = [head for head, flag in heads.iteritems() if flag]
1109 roots = list(roots)
1082 roots = list(roots)
1110 assert orderedout
1083 assert orderedout
1111 assert roots
1084 assert roots
1112 assert heads
1085 assert heads
1113 return (orderedout, roots, heads)
1086 return (orderedout, roots, heads)
1114
1087
1115 def headrevs(self):
1088 def headrevs(self):
1116 try:
1089 try:
1117 return self.index.headrevs()
1090 return self.index.headrevs()
1118 except AttributeError:
1091 except AttributeError:
1119 return self._headrevs()
1092 return self._headrevs()
1120
1093
1121 def computephases(self, roots):
1094 def computephases(self, roots):
1122 return self.index.computephasesmapsets(roots)
1095 return self.index.computephasesmapsets(roots)
1123
1096
1124 def _headrevs(self):
1097 def _headrevs(self):
1125 count = len(self)
1098 count = len(self)
1126 if not count:
1099 if not count:
1127 return [nullrev]
1100 return [nullrev]
1128 # we won't iter over filtered rev so nobody is a head at start
1101 # we won't iter over filtered rev so nobody is a head at start
1129 ishead = [0] * (count + 1)
1102 ishead = [0] * (count + 1)
1130 index = self.index
1103 index = self.index
1131 for r in self:
1104 for r in self:
1132 ishead[r] = 1 # I may be an head
1105 ishead[r] = 1 # I may be an head
1133 e = index[r]
1106 e = index[r]
1134 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1107 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1135 return [r for r, val in enumerate(ishead) if val]
1108 return [r for r, val in enumerate(ishead) if val]
1136
1109
1137 def heads(self, start=None, stop=None):
1110 def heads(self, start=None, stop=None):
1138 """return the list of all nodes that have no children
1111 """return the list of all nodes that have no children
1139
1112
1140 if start is specified, only heads that are descendants of
1113 if start is specified, only heads that are descendants of
1141 start will be returned
1114 start will be returned
1142 if stop is specified, it will consider all the revs from stop
1115 if stop is specified, it will consider all the revs from stop
1143 as if they had no children
1116 as if they had no children
1144 """
1117 """
1145 if start is None and stop is None:
1118 if start is None and stop is None:
1146 if not len(self):
1119 if not len(self):
1147 return [nullid]
1120 return [nullid]
1148 return [self.node(r) for r in self.headrevs()]
1121 return [self.node(r) for r in self.headrevs()]
1149
1122
1150 if start is None:
1123 if start is None:
1151 start = nullid
1124 start = nullid
1152 if stop is None:
1125 if stop is None:
1153 stop = []
1126 stop = []
1154 stoprevs = set([self.rev(n) for n in stop])
1127 stoprevs = set([self.rev(n) for n in stop])
1155 startrev = self.rev(start)
1128 startrev = self.rev(start)
1156 reachable = {startrev}
1129 reachable = {startrev}
1157 heads = {startrev}
1130 heads = {startrev}
1158
1131
1159 parentrevs = self.parentrevs
1132 parentrevs = self.parentrevs
1160 for r in self.revs(start=startrev + 1):
1133 for r in self.revs(start=startrev + 1):
1161 for p in parentrevs(r):
1134 for p in parentrevs(r):
1162 if p in reachable:
1135 if p in reachable:
1163 if r not in stoprevs:
1136 if r not in stoprevs:
1164 reachable.add(r)
1137 reachable.add(r)
1165 heads.add(r)
1138 heads.add(r)
1166 if p in heads and p not in stoprevs:
1139 if p in heads and p not in stoprevs:
1167 heads.remove(p)
1140 heads.remove(p)
1168
1141
1169 return [self.node(r) for r in heads]
1142 return [self.node(r) for r in heads]
1170
1143
1171 def children(self, node):
1144 def children(self, node):
1172 """find the children of a given node"""
1145 """find the children of a given node"""
1173 c = []
1146 c = []
1174 p = self.rev(node)
1147 p = self.rev(node)
1175 for r in self.revs(start=p + 1):
1148 for r in self.revs(start=p + 1):
1176 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1149 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1177 if prevs:
1150 if prevs:
1178 for pr in prevs:
1151 for pr in prevs:
1179 if pr == p:
1152 if pr == p:
1180 c.append(self.node(r))
1153 c.append(self.node(r))
1181 elif p == nullrev:
1154 elif p == nullrev:
1182 c.append(self.node(r))
1155 c.append(self.node(r))
1183 return c
1156 return c
1184
1157
1185 def commonancestorsheads(self, a, b):
1158 def commonancestorsheads(self, a, b):
1186 """calculate all the heads of the common ancestors of nodes a and b"""
1159 """calculate all the heads of the common ancestors of nodes a and b"""
1187 a, b = self.rev(a), self.rev(b)
1160 a, b = self.rev(a), self.rev(b)
1188 ancs = self._commonancestorsheads(a, b)
1161 ancs = self._commonancestorsheads(a, b)
1189 return pycompat.maplist(self.node, ancs)
1162 return pycompat.maplist(self.node, ancs)
1190
1163
1191 def _commonancestorsheads(self, *revs):
1164 def _commonancestorsheads(self, *revs):
1192 """calculate all the heads of the common ancestors of revs"""
1165 """calculate all the heads of the common ancestors of revs"""
1193 try:
1166 try:
1194 ancs = self.index.commonancestorsheads(*revs)
1167 ancs = self.index.commonancestorsheads(*revs)
1195 except (AttributeError, OverflowError): # C implementation failed
1168 except (AttributeError, OverflowError): # C implementation failed
1196 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1169 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1197 return ancs
1170 return ancs
1198
1171
1199 def isancestor(self, a, b):
1172 def isancestor(self, a, b):
1200 """return True if node a is an ancestor of node b
1173 """return True if node a is an ancestor of node b
1201
1174
1202 A revision is considered an ancestor of itself."""
1175 A revision is considered an ancestor of itself."""
1203 a, b = self.rev(a), self.rev(b)
1176 a, b = self.rev(a), self.rev(b)
1204 return self.isancestorrev(a, b)
1177 return self.isancestorrev(a, b)
1205
1178
1206 def isancestorrev(self, a, b):
1179 def isancestorrev(self, a, b):
1207 """return True if revision a is an ancestor of revision b
1180 """return True if revision a is an ancestor of revision b
1208
1181
1209 A revision is considered an ancestor of itself.
1182 A revision is considered an ancestor of itself.
1210
1183
1211 The implementation of this is trivial but the use of
1184 The implementation of this is trivial but the use of
1212 commonancestorsheads is not."""
1185 commonancestorsheads is not."""
1213 if a == nullrev:
1186 if a == nullrev:
1214 return True
1187 return True
1215 elif a == b:
1188 elif a == b:
1216 return True
1189 return True
1217 elif a > b:
1190 elif a > b:
1218 return False
1191 return False
1219 return a in self._commonancestorsheads(a, b)
1192 return a in self._commonancestorsheads(a, b)
1220
1193
1221 def ancestor(self, a, b):
1194 def ancestor(self, a, b):
1222 """calculate the "best" common ancestor of nodes a and b"""
1195 """calculate the "best" common ancestor of nodes a and b"""
1223
1196
1224 a, b = self.rev(a), self.rev(b)
1197 a, b = self.rev(a), self.rev(b)
1225 try:
1198 try:
1226 ancs = self.index.ancestors(a, b)
1199 ancs = self.index.ancestors(a, b)
1227 except (AttributeError, OverflowError):
1200 except (AttributeError, OverflowError):
1228 ancs = ancestor.ancestors(self.parentrevs, a, b)
1201 ancs = ancestor.ancestors(self.parentrevs, a, b)
1229 if ancs:
1202 if ancs:
1230 # choose a consistent winner when there's a tie
1203 # choose a consistent winner when there's a tie
1231 return min(map(self.node, ancs))
1204 return min(map(self.node, ancs))
1232 return nullid
1205 return nullid
1233
1206
1234 def _match(self, id):
1207 def _match(self, id):
1235 if isinstance(id, int):
1208 if isinstance(id, int):
1236 # rev
1209 # rev
1237 return self.node(id)
1210 return self.node(id)
1238 if len(id) == 20:
1211 if len(id) == 20:
1239 # possibly a binary node
1212 # possibly a binary node
1240 # odds of a binary node being all hex in ASCII are 1 in 10**25
1213 # odds of a binary node being all hex in ASCII are 1 in 10**25
1241 try:
1214 try:
1242 node = id
1215 node = id
1243 self.rev(node) # quick search the index
1216 self.rev(node) # quick search the index
1244 return node
1217 return node
1245 except error.LookupError:
1218 except error.LookupError:
1246 pass # may be partial hex id
1219 pass # may be partial hex id
1247 try:
1220 try:
1248 # str(rev)
1221 # str(rev)
1249 rev = int(id)
1222 rev = int(id)
1250 if "%d" % rev != id:
1223 if "%d" % rev != id:
1251 raise ValueError
1224 raise ValueError
1252 if rev < 0:
1225 if rev < 0:
1253 rev = len(self) + rev
1226 rev = len(self) + rev
1254 if rev < 0 or rev >= len(self):
1227 if rev < 0 or rev >= len(self):
1255 raise ValueError
1228 raise ValueError
1256 return self.node(rev)
1229 return self.node(rev)
1257 except (ValueError, OverflowError):
1230 except (ValueError, OverflowError):
1258 pass
1231 pass
1259 if len(id) == 40:
1232 if len(id) == 40:
1260 try:
1233 try:
1261 # a full hex nodeid?
1234 # a full hex nodeid?
1262 node = bin(id)
1235 node = bin(id)
1263 self.rev(node)
1236 self.rev(node)
1264 return node
1237 return node
1265 except (TypeError, error.LookupError):
1238 except (TypeError, error.LookupError):
1266 pass
1239 pass
1267
1240
1268 def _partialmatch(self, id):
1241 def _partialmatch(self, id):
1269 # we don't care wdirfilenodeids as they should be always full hash
1242 # we don't care wdirfilenodeids as they should be always full hash
1270 maybewdir = wdirhex.startswith(id)
1243 maybewdir = wdirhex.startswith(id)
1271 try:
1244 try:
1272 partial = self.index.partialmatch(id)
1245 partial = self.index.partialmatch(id)
1273 if partial and self.hasnode(partial):
1246 if partial and self.hasnode(partial):
1274 if maybewdir:
1247 if maybewdir:
1275 # single 'ff...' match in radix tree, ambiguous with wdir
1248 # single 'ff...' match in radix tree, ambiguous with wdir
1276 raise error.RevlogError
1249 raise error.RevlogError
1277 return partial
1250 return partial
1278 if maybewdir:
1251 if maybewdir:
1279 # no 'ff...' match in radix tree, wdir identified
1252 # no 'ff...' match in radix tree, wdir identified
1280 raise error.WdirUnsupported
1253 raise error.WdirUnsupported
1281 return None
1254 return None
1282 except error.RevlogError:
1255 except error.RevlogError:
1283 # parsers.c radix tree lookup gave multiple matches
1256 # parsers.c radix tree lookup gave multiple matches
1284 # fast path: for unfiltered changelog, radix tree is accurate
1257 # fast path: for unfiltered changelog, radix tree is accurate
1285 if not getattr(self, 'filteredrevs', None):
1258 if not getattr(self, 'filteredrevs', None):
1286 raise error.AmbiguousPrefixLookupError(
1259 raise error.AmbiguousPrefixLookupError(
1287 id, self.indexfile, _('ambiguous identifier'))
1260 id, self.indexfile, _('ambiguous identifier'))
1288 # fall through to slow path that filters hidden revisions
1261 # fall through to slow path that filters hidden revisions
1289 except (AttributeError, ValueError):
1262 except (AttributeError, ValueError):
1290 # we are pure python, or key was too short to search radix tree
1263 # we are pure python, or key was too short to search radix tree
1291 pass
1264 pass
1292
1265
1293 if id in self._pcache:
1266 if id in self._pcache:
1294 return self._pcache[id]
1267 return self._pcache[id]
1295
1268
1296 if len(id) <= 40:
1269 if len(id) <= 40:
1297 try:
1270 try:
1298 # hex(node)[:...]
1271 # hex(node)[:...]
1299 l = len(id) // 2 # grab an even number of digits
1272 l = len(id) // 2 # grab an even number of digits
1300 prefix = bin(id[:l * 2])
1273 prefix = bin(id[:l * 2])
1301 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1274 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1302 nl = [n for n in nl if hex(n).startswith(id) and
1275 nl = [n for n in nl if hex(n).startswith(id) and
1303 self.hasnode(n)]
1276 self.hasnode(n)]
1304 if nullhex.startswith(id):
1277 if nullhex.startswith(id):
1305 nl.append(nullid)
1278 nl.append(nullid)
1306 if len(nl) > 0:
1279 if len(nl) > 0:
1307 if len(nl) == 1 and not maybewdir:
1280 if len(nl) == 1 and not maybewdir:
1308 self._pcache[id] = nl[0]
1281 self._pcache[id] = nl[0]
1309 return nl[0]
1282 return nl[0]
1310 raise error.AmbiguousPrefixLookupError(
1283 raise error.AmbiguousPrefixLookupError(
1311 id, self.indexfile, _('ambiguous identifier'))
1284 id, self.indexfile, _('ambiguous identifier'))
1312 if maybewdir:
1285 if maybewdir:
1313 raise error.WdirUnsupported
1286 raise error.WdirUnsupported
1314 return None
1287 return None
1315 except TypeError:
1288 except TypeError:
1316 pass
1289 pass
1317
1290
1318 def lookup(self, id):
1291 def lookup(self, id):
1319 """locate a node based on:
1292 """locate a node based on:
1320 - revision number or str(revision number)
1293 - revision number or str(revision number)
1321 - nodeid or subset of hex nodeid
1294 - nodeid or subset of hex nodeid
1322 """
1295 """
1323 n = self._match(id)
1296 n = self._match(id)
1324 if n is not None:
1297 if n is not None:
1325 return n
1298 return n
1326 n = self._partialmatch(id)
1299 n = self._partialmatch(id)
1327 if n:
1300 if n:
1328 return n
1301 return n
1329
1302
1330 raise error.LookupError(id, self.indexfile, _('no match found'))
1303 raise error.LookupError(id, self.indexfile, _('no match found'))
1331
1304
1332 def shortest(self, node, minlength=1):
1305 def shortest(self, node, minlength=1):
1333 """Find the shortest unambiguous prefix that matches node."""
1306 """Find the shortest unambiguous prefix that matches node."""
1334 def isvalid(prefix):
1307 def isvalid(prefix):
1335 try:
1308 try:
1336 node = self._partialmatch(prefix)
1309 node = self._partialmatch(prefix)
1337 except error.AmbiguousPrefixLookupError:
1310 except error.AmbiguousPrefixLookupError:
1338 return False
1311 return False
1339 except error.WdirUnsupported:
1312 except error.WdirUnsupported:
1340 # single 'ff...' match
1313 # single 'ff...' match
1341 return True
1314 return True
1342 if node is None:
1315 if node is None:
1343 raise error.LookupError(node, self.indexfile, _('no node'))
1316 raise error.LookupError(node, self.indexfile, _('no node'))
1344 return True
1317 return True
1345
1318
1346 def maybewdir(prefix):
1319 def maybewdir(prefix):
1347 return all(c == 'f' for c in prefix)
1320 return all(c == 'f' for c in prefix)
1348
1321
1349 hexnode = hex(node)
1322 hexnode = hex(node)
1350
1323
1351 def disambiguate(hexnode, minlength):
1324 def disambiguate(hexnode, minlength):
1352 """Disambiguate against wdirid."""
1325 """Disambiguate against wdirid."""
1353 for length in range(minlength, 41):
1326 for length in range(minlength, 41):
1354 prefix = hexnode[:length]
1327 prefix = hexnode[:length]
1355 if not maybewdir(prefix):
1328 if not maybewdir(prefix):
1356 return prefix
1329 return prefix
1357
1330
1358 if not getattr(self, 'filteredrevs', None):
1331 if not getattr(self, 'filteredrevs', None):
1359 try:
1332 try:
1360 length = max(self.index.shortest(node), minlength)
1333 length = max(self.index.shortest(node), minlength)
1361 return disambiguate(hexnode, length)
1334 return disambiguate(hexnode, length)
1362 except error.RevlogError:
1335 except error.RevlogError:
1363 if node != wdirid:
1336 if node != wdirid:
1364 raise error.LookupError(node, self.indexfile, _('no node'))
1337 raise error.LookupError(node, self.indexfile, _('no node'))
1365 except AttributeError:
1338 except AttributeError:
1366 # Fall through to pure code
1339 # Fall through to pure code
1367 pass
1340 pass
1368
1341
1369 if node == wdirid:
1342 if node == wdirid:
1370 for length in range(minlength, 41):
1343 for length in range(minlength, 41):
1371 prefix = hexnode[:length]
1344 prefix = hexnode[:length]
1372 if isvalid(prefix):
1345 if isvalid(prefix):
1373 return prefix
1346 return prefix
1374
1347
1375 for length in range(minlength, 41):
1348 for length in range(minlength, 41):
1376 prefix = hexnode[:length]
1349 prefix = hexnode[:length]
1377 if isvalid(prefix):
1350 if isvalid(prefix):
1378 return disambiguate(hexnode, length)
1351 return disambiguate(hexnode, length)
1379
1352
1380 def cmp(self, node, text):
1353 def cmp(self, node, text):
1381 """compare text with a given file revision
1354 """compare text with a given file revision
1382
1355
1383 returns True if text is different than what is stored.
1356 returns True if text is different than what is stored.
1384 """
1357 """
1385 p1, p2 = self.parents(node)
1358 p1, p2 = self.parents(node)
1386 return hash(text, p1, p2) != node
1359 return storageutil.hashrevisionsha1(text, p1, p2) != node
1387
1360
1388 def _cachesegment(self, offset, data):
1361 def _cachesegment(self, offset, data):
1389 """Add a segment to the revlog cache.
1362 """Add a segment to the revlog cache.
1390
1363
1391 Accepts an absolute offset and the data that is at that location.
1364 Accepts an absolute offset and the data that is at that location.
1392 """
1365 """
1393 o, d = self._chunkcache
1366 o, d = self._chunkcache
1394 # try to add to existing cache
1367 # try to add to existing cache
1395 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1368 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1396 self._chunkcache = o, d + data
1369 self._chunkcache = o, d + data
1397 else:
1370 else:
1398 self._chunkcache = offset, data
1371 self._chunkcache = offset, data
1399
1372
1400 def _readsegment(self, offset, length, df=None):
1373 def _readsegment(self, offset, length, df=None):
1401 """Load a segment of raw data from the revlog.
1374 """Load a segment of raw data from the revlog.
1402
1375
1403 Accepts an absolute offset, length to read, and an optional existing
1376 Accepts an absolute offset, length to read, and an optional existing
1404 file handle to read from.
1377 file handle to read from.
1405
1378
1406 If an existing file handle is passed, it will be seeked and the
1379 If an existing file handle is passed, it will be seeked and the
1407 original seek position will NOT be restored.
1380 original seek position will NOT be restored.
1408
1381
1409 Returns a str or buffer of raw byte data.
1382 Returns a str or buffer of raw byte data.
1410 """
1383 """
1411 # Cache data both forward and backward around the requested
1384 # Cache data both forward and backward around the requested
1412 # data, in a fixed size window. This helps speed up operations
1385 # data, in a fixed size window. This helps speed up operations
1413 # involving reading the revlog backwards.
1386 # involving reading the revlog backwards.
1414 cachesize = self._chunkcachesize
1387 cachesize = self._chunkcachesize
1415 realoffset = offset & ~(cachesize - 1)
1388 realoffset = offset & ~(cachesize - 1)
1416 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1389 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1417 - realoffset)
1390 - realoffset)
1418 with self._datareadfp(df) as df:
1391 with self._datareadfp(df) as df:
1419 df.seek(realoffset)
1392 df.seek(realoffset)
1420 d = df.read(reallength)
1393 d = df.read(reallength)
1421 self._cachesegment(realoffset, d)
1394 self._cachesegment(realoffset, d)
1422 if offset != realoffset or reallength != length:
1395 if offset != realoffset or reallength != length:
1423 return util.buffer(d, offset - realoffset, length)
1396 return util.buffer(d, offset - realoffset, length)
1424 return d
1397 return d
1425
1398
1426 def _getsegment(self, offset, length, df=None):
1399 def _getsegment(self, offset, length, df=None):
1427 """Obtain a segment of raw data from the revlog.
1400 """Obtain a segment of raw data from the revlog.
1428
1401
1429 Accepts an absolute offset, length of bytes to obtain, and an
1402 Accepts an absolute offset, length of bytes to obtain, and an
1430 optional file handle to the already-opened revlog. If the file
1403 optional file handle to the already-opened revlog. If the file
1431 handle is used, it's original seek position will not be preserved.
1404 handle is used, it's original seek position will not be preserved.
1432
1405
1433 Requests for data may be returned from a cache.
1406 Requests for data may be returned from a cache.
1434
1407
1435 Returns a str or a buffer instance of raw byte data.
1408 Returns a str or a buffer instance of raw byte data.
1436 """
1409 """
1437 o, d = self._chunkcache
1410 o, d = self._chunkcache
1438 l = len(d)
1411 l = len(d)
1439
1412
1440 # is it in the cache?
1413 # is it in the cache?
1441 cachestart = offset - o
1414 cachestart = offset - o
1442 cacheend = cachestart + length
1415 cacheend = cachestart + length
1443 if cachestart >= 0 and cacheend <= l:
1416 if cachestart >= 0 and cacheend <= l:
1444 if cachestart == 0 and cacheend == l:
1417 if cachestart == 0 and cacheend == l:
1445 return d # avoid a copy
1418 return d # avoid a copy
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1419 return util.buffer(d, cachestart, cacheend - cachestart)
1447
1420
1448 return self._readsegment(offset, length, df=df)
1421 return self._readsegment(offset, length, df=df)
1449
1422
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1423 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1424 """Obtain a segment of raw data corresponding to a range of revisions.
1452
1425
1453 Accepts the start and end revisions and an optional already-open
1426 Accepts the start and end revisions and an optional already-open
1454 file handle to be used for reading. If the file handle is read, its
1427 file handle to be used for reading. If the file handle is read, its
1455 seek position will not be preserved.
1428 seek position will not be preserved.
1456
1429
1457 Requests for data may be satisfied by a cache.
1430 Requests for data may be satisfied by a cache.
1458
1431
1459 Returns a 2-tuple of (offset, data) for the requested range of
1432 Returns a 2-tuple of (offset, data) for the requested range of
1460 revisions. Offset is the integer offset from the beginning of the
1433 revisions. Offset is the integer offset from the beginning of the
1461 revlog and data is a str or buffer of the raw byte data.
1434 revlog and data is a str or buffer of the raw byte data.
1462
1435
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1436 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 to determine where each revision's data begins and ends.
1437 to determine where each revision's data begins and ends.
1465 """
1438 """
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1439 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 # (functions are expensive).
1440 # (functions are expensive).
1468 index = self.index
1441 index = self.index
1469 istart = index[startrev]
1442 istart = index[startrev]
1470 start = int(istart[0] >> 16)
1443 start = int(istart[0] >> 16)
1471 if startrev == endrev:
1444 if startrev == endrev:
1472 end = start + istart[1]
1445 end = start + istart[1]
1473 else:
1446 else:
1474 iend = index[endrev]
1447 iend = index[endrev]
1475 end = int(iend[0] >> 16) + iend[1]
1448 end = int(iend[0] >> 16) + iend[1]
1476
1449
1477 if self._inline:
1450 if self._inline:
1478 start += (startrev + 1) * self._io.size
1451 start += (startrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1452 end += (endrev + 1) * self._io.size
1480 length = end - start
1453 length = end - start
1481
1454
1482 return start, self._getsegment(start, length, df=df)
1455 return start, self._getsegment(start, length, df=df)
1483
1456
1484 def _chunk(self, rev, df=None):
1457 def _chunk(self, rev, df=None):
1485 """Obtain a single decompressed chunk for a revision.
1458 """Obtain a single decompressed chunk for a revision.
1486
1459
1487 Accepts an integer revision and an optional already-open file handle
1460 Accepts an integer revision and an optional already-open file handle
1488 to be used for reading. If used, the seek position of the file will not
1461 to be used for reading. If used, the seek position of the file will not
1489 be preserved.
1462 be preserved.
1490
1463
1491 Returns a str holding uncompressed data for the requested revision.
1464 Returns a str holding uncompressed data for the requested revision.
1492 """
1465 """
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1466 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494
1467
1495 def _chunks(self, revs, df=None, targetsize=None):
1468 def _chunks(self, revs, df=None, targetsize=None):
1496 """Obtain decompressed chunks for the specified revisions.
1469 """Obtain decompressed chunks for the specified revisions.
1497
1470
1498 Accepts an iterable of numeric revisions that are assumed to be in
1471 Accepts an iterable of numeric revisions that are assumed to be in
1499 ascending order. Also accepts an optional already-open file handle
1472 ascending order. Also accepts an optional already-open file handle
1500 to be used for reading. If used, the seek position of the file will
1473 to be used for reading. If used, the seek position of the file will
1501 not be preserved.
1474 not be preserved.
1502
1475
1503 This function is similar to calling ``self._chunk()`` multiple times,
1476 This function is similar to calling ``self._chunk()`` multiple times,
1504 but is faster.
1477 but is faster.
1505
1478
1506 Returns a list with decompressed data for each requested revision.
1479 Returns a list with decompressed data for each requested revision.
1507 """
1480 """
1508 if not revs:
1481 if not revs:
1509 return []
1482 return []
1510 start = self.start
1483 start = self.start
1511 length = self.length
1484 length = self.length
1512 inline = self._inline
1485 inline = self._inline
1513 iosize = self._io.size
1486 iosize = self._io.size
1514 buffer = util.buffer
1487 buffer = util.buffer
1515
1488
1516 l = []
1489 l = []
1517 ladd = l.append
1490 ladd = l.append
1518
1491
1519 if not self._withsparseread:
1492 if not self._withsparseread:
1520 slicedchunks = (revs,)
1493 slicedchunks = (revs,)
1521 else:
1494 else:
1522 slicedchunks = deltautil.slicechunk(self, revs,
1495 slicedchunks = deltautil.slicechunk(self, revs,
1523 targetsize=targetsize)
1496 targetsize=targetsize)
1524
1497
1525 for revschunk in slicedchunks:
1498 for revschunk in slicedchunks:
1526 firstrev = revschunk[0]
1499 firstrev = revschunk[0]
1527 # Skip trailing revisions with empty diff
1500 # Skip trailing revisions with empty diff
1528 for lastrev in revschunk[::-1]:
1501 for lastrev in revschunk[::-1]:
1529 if length(lastrev) != 0:
1502 if length(lastrev) != 0:
1530 break
1503 break
1531
1504
1532 try:
1505 try:
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1506 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 except OverflowError:
1507 except OverflowError:
1535 # issue4215 - we can't cache a run of chunks greater than
1508 # issue4215 - we can't cache a run of chunks greater than
1536 # 2G on Windows
1509 # 2G on Windows
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1510 return [self._chunk(rev, df=df) for rev in revschunk]
1538
1511
1539 decomp = self.decompress
1512 decomp = self.decompress
1540 for rev in revschunk:
1513 for rev in revschunk:
1541 chunkstart = start(rev)
1514 chunkstart = start(rev)
1542 if inline:
1515 if inline:
1543 chunkstart += (rev + 1) * iosize
1516 chunkstart += (rev + 1) * iosize
1544 chunklength = length(rev)
1517 chunklength = length(rev)
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1518 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546
1519
1547 return l
1520 return l
1548
1521
1549 def _chunkclear(self):
1522 def _chunkclear(self):
1550 """Clear the raw chunk cache."""
1523 """Clear the raw chunk cache."""
1551 self._chunkcache = (0, '')
1524 self._chunkcache = (0, '')
1552
1525
1553 def deltaparent(self, rev):
1526 def deltaparent(self, rev):
1554 """return deltaparent of the given revision"""
1527 """return deltaparent of the given revision"""
1555 base = self.index[rev][3]
1528 base = self.index[rev][3]
1556 if base == rev:
1529 if base == rev:
1557 return nullrev
1530 return nullrev
1558 elif self._generaldelta:
1531 elif self._generaldelta:
1559 return base
1532 return base
1560 else:
1533 else:
1561 return rev - 1
1534 return rev - 1
1562
1535
1563 def issnapshot(self, rev):
1536 def issnapshot(self, rev):
1564 """tells whether rev is a snapshot
1537 """tells whether rev is a snapshot
1565 """
1538 """
1566 if rev == nullrev:
1539 if rev == nullrev:
1567 return True
1540 return True
1568 deltap = self.deltaparent(rev)
1541 deltap = self.deltaparent(rev)
1569 if deltap == nullrev:
1542 if deltap == nullrev:
1570 return True
1543 return True
1571 p1, p2 = self.parentrevs(rev)
1544 p1, p2 = self.parentrevs(rev)
1572 if deltap in (p1, p2):
1545 if deltap in (p1, p2):
1573 return False
1546 return False
1574 return self.issnapshot(deltap)
1547 return self.issnapshot(deltap)
1575
1548
1576 def snapshotdepth(self, rev):
1549 def snapshotdepth(self, rev):
1577 """number of snapshot in the chain before this one"""
1550 """number of snapshot in the chain before this one"""
1578 if not self.issnapshot(rev):
1551 if not self.issnapshot(rev):
1579 raise error.ProgrammingError('revision %d not a snapshot')
1552 raise error.ProgrammingError('revision %d not a snapshot')
1580 return len(self._deltachain(rev)[0]) - 1
1553 return len(self._deltachain(rev)[0]) - 1
1581
1554
1582 def revdiff(self, rev1, rev2):
1555 def revdiff(self, rev1, rev2):
1583 """return or calculate a delta between two revisions
1556 """return or calculate a delta between two revisions
1584
1557
1585 The delta calculated is in binary form and is intended to be written to
1558 The delta calculated is in binary form and is intended to be written to
1586 revlog data directly. So this function needs raw revision data.
1559 revlog data directly. So this function needs raw revision data.
1587 """
1560 """
1588 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1561 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1589 return bytes(self._chunk(rev2))
1562 return bytes(self._chunk(rev2))
1590
1563
1591 return mdiff.textdiff(self.revision(rev1, raw=True),
1564 return mdiff.textdiff(self.revision(rev1, raw=True),
1592 self.revision(rev2, raw=True))
1565 self.revision(rev2, raw=True))
1593
1566
1594 def revision(self, nodeorrev, _df=None, raw=False):
1567 def revision(self, nodeorrev, _df=None, raw=False):
1595 """return an uncompressed revision of a given node or revision
1568 """return an uncompressed revision of a given node or revision
1596 number.
1569 number.
1597
1570
1598 _df - an existing file handle to read from. (internal-only)
1571 _df - an existing file handle to read from. (internal-only)
1599 raw - an optional argument specifying if the revision data is to be
1572 raw - an optional argument specifying if the revision data is to be
1600 treated as raw data when applying flag transforms. 'raw' should be set
1573 treated as raw data when applying flag transforms. 'raw' should be set
1601 to True when generating changegroups or in debug commands.
1574 to True when generating changegroups or in debug commands.
1602 """
1575 """
1603 if isinstance(nodeorrev, int):
1576 if isinstance(nodeorrev, int):
1604 rev = nodeorrev
1577 rev = nodeorrev
1605 node = self.node(rev)
1578 node = self.node(rev)
1606 else:
1579 else:
1607 node = nodeorrev
1580 node = nodeorrev
1608 rev = None
1581 rev = None
1609
1582
1610 cachedrev = None
1583 cachedrev = None
1611 flags = None
1584 flags = None
1612 rawtext = None
1585 rawtext = None
1613 if node == nullid:
1586 if node == nullid:
1614 return ""
1587 return ""
1615 if self._cache:
1588 if self._cache:
1616 if self._cache[0] == node:
1589 if self._cache[0] == node:
1617 # _cache only stores rawtext
1590 # _cache only stores rawtext
1618 if raw:
1591 if raw:
1619 return self._cache[2]
1592 return self._cache[2]
1620 # duplicated, but good for perf
1593 # duplicated, but good for perf
1621 if rev is None:
1594 if rev is None:
1622 rev = self.rev(node)
1595 rev = self.rev(node)
1623 if flags is None:
1596 if flags is None:
1624 flags = self.flags(rev)
1597 flags = self.flags(rev)
1625 # no extra flags set, no flag processor runs, text = rawtext
1598 # no extra flags set, no flag processor runs, text = rawtext
1626 if flags == REVIDX_DEFAULT_FLAGS:
1599 if flags == REVIDX_DEFAULT_FLAGS:
1627 return self._cache[2]
1600 return self._cache[2]
1628 # rawtext is reusable. need to run flag processor
1601 # rawtext is reusable. need to run flag processor
1629 rawtext = self._cache[2]
1602 rawtext = self._cache[2]
1630
1603
1631 cachedrev = self._cache[1]
1604 cachedrev = self._cache[1]
1632
1605
1633 # look up what we need to read
1606 # look up what we need to read
1634 if rawtext is None:
1607 if rawtext is None:
1635 if rev is None:
1608 if rev is None:
1636 rev = self.rev(node)
1609 rev = self.rev(node)
1637
1610
1638 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1611 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1639 if stopped:
1612 if stopped:
1640 rawtext = self._cache[2]
1613 rawtext = self._cache[2]
1641
1614
1642 # drop cache to save memory
1615 # drop cache to save memory
1643 self._cache = None
1616 self._cache = None
1644
1617
1645 targetsize = None
1618 targetsize = None
1646 rawsize = self.index[rev][2]
1619 rawsize = self.index[rev][2]
1647 if 0 <= rawsize:
1620 if 0 <= rawsize:
1648 targetsize = 4 * rawsize
1621 targetsize = 4 * rawsize
1649
1622
1650 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1623 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1651 if rawtext is None:
1624 if rawtext is None:
1652 rawtext = bytes(bins[0])
1625 rawtext = bytes(bins[0])
1653 bins = bins[1:]
1626 bins = bins[1:]
1654
1627
1655 rawtext = mdiff.patches(rawtext, bins)
1628 rawtext = mdiff.patches(rawtext, bins)
1656 self._cache = (node, rev, rawtext)
1629 self._cache = (node, rev, rawtext)
1657
1630
1658 if flags is None:
1631 if flags is None:
1659 if rev is None:
1632 if rev is None:
1660 rev = self.rev(node)
1633 rev = self.rev(node)
1661 flags = self.flags(rev)
1634 flags = self.flags(rev)
1662
1635
1663 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1636 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1664 if validatehash:
1637 if validatehash:
1665 self.checkhash(text, node, rev=rev)
1638 self.checkhash(text, node, rev=rev)
1666
1639
1667 return text
1640 return text
1668
1641
1669 def hash(self, text, p1, p2):
1642 def hash(self, text, p1, p2):
1670 """Compute a node hash.
1643 """Compute a node hash.
1671
1644
1672 Available as a function so that subclasses can replace the hash
1645 Available as a function so that subclasses can replace the hash
1673 as needed.
1646 as needed.
1674 """
1647 """
1675 return hash(text, p1, p2)
1648 return storageutil.hashrevisionsha1(text, p1, p2)
1676
1649
1677 def _processflags(self, text, flags, operation, raw=False):
1650 def _processflags(self, text, flags, operation, raw=False):
1678 """Inspect revision data flags and applies transforms defined by
1651 """Inspect revision data flags and applies transforms defined by
1679 registered flag processors.
1652 registered flag processors.
1680
1653
1681 ``text`` - the revision data to process
1654 ``text`` - the revision data to process
1682 ``flags`` - the revision flags
1655 ``flags`` - the revision flags
1683 ``operation`` - the operation being performed (read or write)
1656 ``operation`` - the operation being performed (read or write)
1684 ``raw`` - an optional argument describing if the raw transform should be
1657 ``raw`` - an optional argument describing if the raw transform should be
1685 applied.
1658 applied.
1686
1659
1687 This method processes the flags in the order (or reverse order if
1660 This method processes the flags in the order (or reverse order if
1688 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1661 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1689 flag processors registered for present flags. The order of flags defined
1662 flag processors registered for present flags. The order of flags defined
1690 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1663 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1691
1664
1692 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1665 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1693 processed text and ``validatehash`` is a bool indicating whether the
1666 processed text and ``validatehash`` is a bool indicating whether the
1694 returned text should be checked for hash integrity.
1667 returned text should be checked for hash integrity.
1695
1668
1696 Note: If the ``raw`` argument is set, it has precedence over the
1669 Note: If the ``raw`` argument is set, it has precedence over the
1697 operation and will only update the value of ``validatehash``.
1670 operation and will only update the value of ``validatehash``.
1698 """
1671 """
1699 # fast path: no flag processors will run
1672 # fast path: no flag processors will run
1700 if flags == 0:
1673 if flags == 0:
1701 return text, True
1674 return text, True
1702 if not operation in ('read', 'write'):
1675 if not operation in ('read', 'write'):
1703 raise error.ProgrammingError(_("invalid '%s' operation") %
1676 raise error.ProgrammingError(_("invalid '%s' operation") %
1704 operation)
1677 operation)
1705 # Check all flags are known.
1678 # Check all flags are known.
1706 if flags & ~REVIDX_KNOWN_FLAGS:
1679 if flags & ~REVIDX_KNOWN_FLAGS:
1707 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1680 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1708 (flags & ~REVIDX_KNOWN_FLAGS))
1681 (flags & ~REVIDX_KNOWN_FLAGS))
1709 validatehash = True
1682 validatehash = True
1710 # Depending on the operation (read or write), the order might be
1683 # Depending on the operation (read or write), the order might be
1711 # reversed due to non-commutative transforms.
1684 # reversed due to non-commutative transforms.
1712 orderedflags = REVIDX_FLAGS_ORDER
1685 orderedflags = REVIDX_FLAGS_ORDER
1713 if operation == 'write':
1686 if operation == 'write':
1714 orderedflags = reversed(orderedflags)
1687 orderedflags = reversed(orderedflags)
1715
1688
1716 for flag in orderedflags:
1689 for flag in orderedflags:
1717 # If a flagprocessor has been registered for a known flag, apply the
1690 # If a flagprocessor has been registered for a known flag, apply the
1718 # related operation transform and update result tuple.
1691 # related operation transform and update result tuple.
1719 if flag & flags:
1692 if flag & flags:
1720 vhash = True
1693 vhash = True
1721
1694
1722 if flag not in self._flagprocessors:
1695 if flag not in self._flagprocessors:
1723 message = _("missing processor for flag '%#x'") % (flag)
1696 message = _("missing processor for flag '%#x'") % (flag)
1724 raise error.RevlogError(message)
1697 raise error.RevlogError(message)
1725
1698
1726 processor = self._flagprocessors[flag]
1699 processor = self._flagprocessors[flag]
1727 if processor is not None:
1700 if processor is not None:
1728 readtransform, writetransform, rawtransform = processor
1701 readtransform, writetransform, rawtransform = processor
1729
1702
1730 if raw:
1703 if raw:
1731 vhash = rawtransform(self, text)
1704 vhash = rawtransform(self, text)
1732 elif operation == 'read':
1705 elif operation == 'read':
1733 text, vhash = readtransform(self, text)
1706 text, vhash = readtransform(self, text)
1734 else: # write operation
1707 else: # write operation
1735 text, vhash = writetransform(self, text)
1708 text, vhash = writetransform(self, text)
1736 validatehash = validatehash and vhash
1709 validatehash = validatehash and vhash
1737
1710
1738 return text, validatehash
1711 return text, validatehash
1739
1712
1740 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1713 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1741 """Check node hash integrity.
1714 """Check node hash integrity.
1742
1715
1743 Available as a function so that subclasses can extend hash mismatch
1716 Available as a function so that subclasses can extend hash mismatch
1744 behaviors as needed.
1717 behaviors as needed.
1745 """
1718 """
1746 try:
1719 try:
1747 if p1 is None and p2 is None:
1720 if p1 is None and p2 is None:
1748 p1, p2 = self.parents(node)
1721 p1, p2 = self.parents(node)
1749 if node != self.hash(text, p1, p2):
1722 if node != self.hash(text, p1, p2):
1750 revornode = rev
1723 revornode = rev
1751 if revornode is None:
1724 if revornode is None:
1752 revornode = templatefilters.short(hex(node))
1725 revornode = templatefilters.short(hex(node))
1753 raise error.RevlogError(_("integrity check failed on %s:%s")
1726 raise error.RevlogError(_("integrity check failed on %s:%s")
1754 % (self.indexfile, pycompat.bytestr(revornode)))
1727 % (self.indexfile, pycompat.bytestr(revornode)))
1755 except error.RevlogError:
1728 except error.RevlogError:
1756 if self._censorable and _censoredtext(text):
1729 if self._censorable and _censoredtext(text):
1757 raise error.CensoredNodeError(self.indexfile, node, text)
1730 raise error.CensoredNodeError(self.indexfile, node, text)
1758 raise
1731 raise
1759
1732
1760 def _enforceinlinesize(self, tr, fp=None):
1733 def _enforceinlinesize(self, tr, fp=None):
1761 """Check if the revlog is too big for inline and convert if so.
1734 """Check if the revlog is too big for inline and convert if so.
1762
1735
1763 This should be called after revisions are added to the revlog. If the
1736 This should be called after revisions are added to the revlog. If the
1764 revlog has grown too large to be an inline revlog, it will convert it
1737 revlog has grown too large to be an inline revlog, it will convert it
1765 to use multiple index and data files.
1738 to use multiple index and data files.
1766 """
1739 """
1767 tiprev = len(self) - 1
1740 tiprev = len(self) - 1
1768 if (not self._inline or
1741 if (not self._inline or
1769 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1742 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1770 return
1743 return
1771
1744
1772 trinfo = tr.find(self.indexfile)
1745 trinfo = tr.find(self.indexfile)
1773 if trinfo is None:
1746 if trinfo is None:
1774 raise error.RevlogError(_("%s not found in the transaction")
1747 raise error.RevlogError(_("%s not found in the transaction")
1775 % self.indexfile)
1748 % self.indexfile)
1776
1749
1777 trindex = trinfo[2]
1750 trindex = trinfo[2]
1778 if trindex is not None:
1751 if trindex is not None:
1779 dataoff = self.start(trindex)
1752 dataoff = self.start(trindex)
1780 else:
1753 else:
1781 # revlog was stripped at start of transaction, use all leftover data
1754 # revlog was stripped at start of transaction, use all leftover data
1782 trindex = len(self) - 1
1755 trindex = len(self) - 1
1783 dataoff = self.end(tiprev)
1756 dataoff = self.end(tiprev)
1784
1757
1785 tr.add(self.datafile, dataoff)
1758 tr.add(self.datafile, dataoff)
1786
1759
1787 if fp:
1760 if fp:
1788 fp.flush()
1761 fp.flush()
1789 fp.close()
1762 fp.close()
1790
1763
1791 with self._datafp('w') as df:
1764 with self._datafp('w') as df:
1792 for r in self:
1765 for r in self:
1793 df.write(self._getsegmentforrevs(r, r)[1])
1766 df.write(self._getsegmentforrevs(r, r)[1])
1794
1767
1795 with self._indexfp('w') as fp:
1768 with self._indexfp('w') as fp:
1796 self.version &= ~FLAG_INLINE_DATA
1769 self.version &= ~FLAG_INLINE_DATA
1797 self._inline = False
1770 self._inline = False
1798 io = self._io
1771 io = self._io
1799 for i in self:
1772 for i in self:
1800 e = io.packentry(self.index[i], self.node, self.version, i)
1773 e = io.packentry(self.index[i], self.node, self.version, i)
1801 fp.write(e)
1774 fp.write(e)
1802
1775
1803 # the temp file replace the real index when we exit the context
1776 # the temp file replace the real index when we exit the context
1804 # manager
1777 # manager
1805
1778
1806 tr.replace(self.indexfile, trindex * self._io.size)
1779 tr.replace(self.indexfile, trindex * self._io.size)
1807 self._chunkclear()
1780 self._chunkclear()
1808
1781
1809 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1782 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1810 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1783 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1811 """add a revision to the log
1784 """add a revision to the log
1812
1785
1813 text - the revision data to add
1786 text - the revision data to add
1814 transaction - the transaction object used for rollback
1787 transaction - the transaction object used for rollback
1815 link - the linkrev data to add
1788 link - the linkrev data to add
1816 p1, p2 - the parent nodeids of the revision
1789 p1, p2 - the parent nodeids of the revision
1817 cachedelta - an optional precomputed delta
1790 cachedelta - an optional precomputed delta
1818 node - nodeid of revision; typically node is not specified, and it is
1791 node - nodeid of revision; typically node is not specified, and it is
1819 computed by default as hash(text, p1, p2), however subclasses might
1792 computed by default as hash(text, p1, p2), however subclasses might
1820 use different hashing method (and override checkhash() in such case)
1793 use different hashing method (and override checkhash() in such case)
1821 flags - the known flags to set on the revision
1794 flags - the known flags to set on the revision
1822 deltacomputer - an optional deltacomputer instance shared between
1795 deltacomputer - an optional deltacomputer instance shared between
1823 multiple calls
1796 multiple calls
1824 """
1797 """
1825 if link == nullrev:
1798 if link == nullrev:
1826 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1799 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1827 % self.indexfile)
1800 % self.indexfile)
1828
1801
1829 if flags:
1802 if flags:
1830 node = node or self.hash(text, p1, p2)
1803 node = node or self.hash(text, p1, p2)
1831
1804
1832 rawtext, validatehash = self._processflags(text, flags, 'write')
1805 rawtext, validatehash = self._processflags(text, flags, 'write')
1833
1806
1834 # If the flag processor modifies the revision data, ignore any provided
1807 # If the flag processor modifies the revision data, ignore any provided
1835 # cachedelta.
1808 # cachedelta.
1836 if rawtext != text:
1809 if rawtext != text:
1837 cachedelta = None
1810 cachedelta = None
1838
1811
1839 if len(rawtext) > _maxentrysize:
1812 if len(rawtext) > _maxentrysize:
1840 raise error.RevlogError(
1813 raise error.RevlogError(
1841 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1814 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1842 % (self.indexfile, len(rawtext)))
1815 % (self.indexfile, len(rawtext)))
1843
1816
1844 node = node or self.hash(rawtext, p1, p2)
1817 node = node or self.hash(rawtext, p1, p2)
1845 if node in self.nodemap:
1818 if node in self.nodemap:
1846 return node
1819 return node
1847
1820
1848 if validatehash:
1821 if validatehash:
1849 self.checkhash(rawtext, node, p1=p1, p2=p2)
1822 self.checkhash(rawtext, node, p1=p1, p2=p2)
1850
1823
1851 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1824 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1852 flags, cachedelta=cachedelta,
1825 flags, cachedelta=cachedelta,
1853 deltacomputer=deltacomputer)
1826 deltacomputer=deltacomputer)
1854
1827
1855 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1828 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1856 cachedelta=None, deltacomputer=None):
1829 cachedelta=None, deltacomputer=None):
1857 """add a raw revision with known flags, node and parents
1830 """add a raw revision with known flags, node and parents
1858 useful when reusing a revision not stored in this revlog (ex: received
1831 useful when reusing a revision not stored in this revlog (ex: received
1859 over wire, or read from an external bundle).
1832 over wire, or read from an external bundle).
1860 """
1833 """
1861 dfh = None
1834 dfh = None
1862 if not self._inline:
1835 if not self._inline:
1863 dfh = self._datafp("a+")
1836 dfh = self._datafp("a+")
1864 ifh = self._indexfp("a+")
1837 ifh = self._indexfp("a+")
1865 try:
1838 try:
1866 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1839 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1867 flags, cachedelta, ifh, dfh,
1840 flags, cachedelta, ifh, dfh,
1868 deltacomputer=deltacomputer)
1841 deltacomputer=deltacomputer)
1869 finally:
1842 finally:
1870 if dfh:
1843 if dfh:
1871 dfh.close()
1844 dfh.close()
1872 ifh.close()
1845 ifh.close()
1873
1846
1874 def compress(self, data):
1847 def compress(self, data):
1875 """Generate a possibly-compressed representation of data."""
1848 """Generate a possibly-compressed representation of data."""
1876 if not data:
1849 if not data:
1877 return '', data
1850 return '', data
1878
1851
1879 compressed = self._compressor.compress(data)
1852 compressed = self._compressor.compress(data)
1880
1853
1881 if compressed:
1854 if compressed:
1882 # The revlog compressor added the header in the returned data.
1855 # The revlog compressor added the header in the returned data.
1883 return '', compressed
1856 return '', compressed
1884
1857
1885 if data[0:1] == '\0':
1858 if data[0:1] == '\0':
1886 return '', data
1859 return '', data
1887 return 'u', data
1860 return 'u', data
1888
1861
1889 def decompress(self, data):
1862 def decompress(self, data):
1890 """Decompress a revlog chunk.
1863 """Decompress a revlog chunk.
1891
1864
1892 The chunk is expected to begin with a header identifying the
1865 The chunk is expected to begin with a header identifying the
1893 format type so it can be routed to an appropriate decompressor.
1866 format type so it can be routed to an appropriate decompressor.
1894 """
1867 """
1895 if not data:
1868 if not data:
1896 return data
1869 return data
1897
1870
1898 # Revlogs are read much more frequently than they are written and many
1871 # Revlogs are read much more frequently than they are written and many
1899 # chunks only take microseconds to decompress, so performance is
1872 # chunks only take microseconds to decompress, so performance is
1900 # important here.
1873 # important here.
1901 #
1874 #
1902 # We can make a few assumptions about revlogs:
1875 # We can make a few assumptions about revlogs:
1903 #
1876 #
1904 # 1) the majority of chunks will be compressed (as opposed to inline
1877 # 1) the majority of chunks will be compressed (as opposed to inline
1905 # raw data).
1878 # raw data).
1906 # 2) decompressing *any* data will likely by at least 10x slower than
1879 # 2) decompressing *any* data will likely by at least 10x slower than
1907 # returning raw inline data.
1880 # returning raw inline data.
1908 # 3) we want to prioritize common and officially supported compression
1881 # 3) we want to prioritize common and officially supported compression
1909 # engines
1882 # engines
1910 #
1883 #
1911 # It follows that we want to optimize for "decompress compressed data
1884 # It follows that we want to optimize for "decompress compressed data
1912 # when encoded with common and officially supported compression engines"
1885 # when encoded with common and officially supported compression engines"
1913 # case over "raw data" and "data encoded by less common or non-official
1886 # case over "raw data" and "data encoded by less common or non-official
1914 # compression engines." That is why we have the inline lookup first
1887 # compression engines." That is why we have the inline lookup first
1915 # followed by the compengines lookup.
1888 # followed by the compengines lookup.
1916 #
1889 #
1917 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1890 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1918 # compressed chunks. And this matters for changelog and manifest reads.
1891 # compressed chunks. And this matters for changelog and manifest reads.
1919 t = data[0:1]
1892 t = data[0:1]
1920
1893
1921 if t == 'x':
1894 if t == 'x':
1922 try:
1895 try:
1923 return _zlibdecompress(data)
1896 return _zlibdecompress(data)
1924 except zlib.error as e:
1897 except zlib.error as e:
1925 raise error.RevlogError(_('revlog decompress error: %s') %
1898 raise error.RevlogError(_('revlog decompress error: %s') %
1926 stringutil.forcebytestr(e))
1899 stringutil.forcebytestr(e))
1927 # '\0' is more common than 'u' so it goes first.
1900 # '\0' is more common than 'u' so it goes first.
1928 elif t == '\0':
1901 elif t == '\0':
1929 return data
1902 return data
1930 elif t == 'u':
1903 elif t == 'u':
1931 return util.buffer(data, 1)
1904 return util.buffer(data, 1)
1932
1905
1933 try:
1906 try:
1934 compressor = self._decompressors[t]
1907 compressor = self._decompressors[t]
1935 except KeyError:
1908 except KeyError:
1936 try:
1909 try:
1937 engine = util.compengines.forrevlogheader(t)
1910 engine = util.compengines.forrevlogheader(t)
1938 compressor = engine.revlogcompressor()
1911 compressor = engine.revlogcompressor()
1939 self._decompressors[t] = compressor
1912 self._decompressors[t] = compressor
1940 except KeyError:
1913 except KeyError:
1941 raise error.RevlogError(_('unknown compression type %r') % t)
1914 raise error.RevlogError(_('unknown compression type %r') % t)
1942
1915
1943 return compressor.decompress(data)
1916 return compressor.decompress(data)
1944
1917
1945 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1918 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1946 cachedelta, ifh, dfh, alwayscache=False,
1919 cachedelta, ifh, dfh, alwayscache=False,
1947 deltacomputer=None):
1920 deltacomputer=None):
1948 """internal function to add revisions to the log
1921 """internal function to add revisions to the log
1949
1922
1950 see addrevision for argument descriptions.
1923 see addrevision for argument descriptions.
1951
1924
1952 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1925 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1953
1926
1954 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1927 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1955 be used.
1928 be used.
1956
1929
1957 invariants:
1930 invariants:
1958 - rawtext is optional (can be None); if not set, cachedelta must be set.
1931 - rawtext is optional (can be None); if not set, cachedelta must be set.
1959 if both are set, they must correspond to each other.
1932 if both are set, they must correspond to each other.
1960 """
1933 """
1961 if node == nullid:
1934 if node == nullid:
1962 raise error.RevlogError(_("%s: attempt to add null revision") %
1935 raise error.RevlogError(_("%s: attempt to add null revision") %
1963 self.indexfile)
1936 self.indexfile)
1964 if node == wdirid or node in wdirfilenodeids:
1937 if node == wdirid or node in wdirfilenodeids:
1965 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1938 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1966 self.indexfile)
1939 self.indexfile)
1967
1940
1968 if self._inline:
1941 if self._inline:
1969 fh = ifh
1942 fh = ifh
1970 else:
1943 else:
1971 fh = dfh
1944 fh = dfh
1972
1945
1973 btext = [rawtext]
1946 btext = [rawtext]
1974
1947
1975 curr = len(self)
1948 curr = len(self)
1976 prev = curr - 1
1949 prev = curr - 1
1977 offset = self.end(prev)
1950 offset = self.end(prev)
1978 p1r, p2r = self.rev(p1), self.rev(p2)
1951 p1r, p2r = self.rev(p1), self.rev(p2)
1979
1952
1980 # full versions are inserted when the needed deltas
1953 # full versions are inserted when the needed deltas
1981 # become comparable to the uncompressed text
1954 # become comparable to the uncompressed text
1982 if rawtext is None:
1955 if rawtext is None:
1983 # need rawtext size, before changed by flag processors, which is
1956 # need rawtext size, before changed by flag processors, which is
1984 # the non-raw size. use revlog explicitly to avoid filelog's extra
1957 # the non-raw size. use revlog explicitly to avoid filelog's extra
1985 # logic that might remove metadata size.
1958 # logic that might remove metadata size.
1986 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1959 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1987 cachedelta[1])
1960 cachedelta[1])
1988 else:
1961 else:
1989 textlen = len(rawtext)
1962 textlen = len(rawtext)
1990
1963
1991 if deltacomputer is None:
1964 if deltacomputer is None:
1992 deltacomputer = deltautil.deltacomputer(self)
1965 deltacomputer = deltautil.deltacomputer(self)
1993
1966
1994 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1967 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1995
1968
1996 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1969 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1997
1970
1998 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1971 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1999 deltainfo.base, link, p1r, p2r, node)
1972 deltainfo.base, link, p1r, p2r, node)
2000 self.index.append(e)
1973 self.index.append(e)
2001 self.nodemap[node] = curr
1974 self.nodemap[node] = curr
2002
1975
2003 entry = self._io.packentry(e, self.node, self.version, curr)
1976 entry = self._io.packentry(e, self.node, self.version, curr)
2004 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1977 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2005 link, offset)
1978 link, offset)
2006
1979
2007 rawtext = btext[0]
1980 rawtext = btext[0]
2008
1981
2009 if alwayscache and rawtext is None:
1982 if alwayscache and rawtext is None:
2010 rawtext = deltacomputer.buildtext(revinfo, fh)
1983 rawtext = deltacomputer.buildtext(revinfo, fh)
2011
1984
2012 if type(rawtext) == bytes: # only accept immutable objects
1985 if type(rawtext) == bytes: # only accept immutable objects
2013 self._cache = (node, curr, rawtext)
1986 self._cache = (node, curr, rawtext)
2014 self._chainbasecache[curr] = deltainfo.chainbase
1987 self._chainbasecache[curr] = deltainfo.chainbase
2015 return node
1988 return node
2016
1989
2017 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1990 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2018 # Files opened in a+ mode have inconsistent behavior on various
1991 # Files opened in a+ mode have inconsistent behavior on various
2019 # platforms. Windows requires that a file positioning call be made
1992 # platforms. Windows requires that a file positioning call be made
2020 # when the file handle transitions between reads and writes. See
1993 # when the file handle transitions between reads and writes. See
2021 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1994 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2022 # platforms, Python or the platform itself can be buggy. Some versions
1995 # platforms, Python or the platform itself can be buggy. Some versions
2023 # of Solaris have been observed to not append at the end of the file
1996 # of Solaris have been observed to not append at the end of the file
2024 # if the file was seeked to before the end. See issue4943 for more.
1997 # if the file was seeked to before the end. See issue4943 for more.
2025 #
1998 #
2026 # We work around this issue by inserting a seek() before writing.
1999 # We work around this issue by inserting a seek() before writing.
2027 # Note: This is likely not necessary on Python 3.
2000 # Note: This is likely not necessary on Python 3.
2028 ifh.seek(0, os.SEEK_END)
2001 ifh.seek(0, os.SEEK_END)
2029 if dfh:
2002 if dfh:
2030 dfh.seek(0, os.SEEK_END)
2003 dfh.seek(0, os.SEEK_END)
2031
2004
2032 curr = len(self) - 1
2005 curr = len(self) - 1
2033 if not self._inline:
2006 if not self._inline:
2034 transaction.add(self.datafile, offset)
2007 transaction.add(self.datafile, offset)
2035 transaction.add(self.indexfile, curr * len(entry))
2008 transaction.add(self.indexfile, curr * len(entry))
2036 if data[0]:
2009 if data[0]:
2037 dfh.write(data[0])
2010 dfh.write(data[0])
2038 dfh.write(data[1])
2011 dfh.write(data[1])
2039 ifh.write(entry)
2012 ifh.write(entry)
2040 else:
2013 else:
2041 offset += curr * self._io.size
2014 offset += curr * self._io.size
2042 transaction.add(self.indexfile, offset, curr)
2015 transaction.add(self.indexfile, offset, curr)
2043 ifh.write(entry)
2016 ifh.write(entry)
2044 ifh.write(data[0])
2017 ifh.write(data[0])
2045 ifh.write(data[1])
2018 ifh.write(data[1])
2046 self._enforceinlinesize(transaction, ifh)
2019 self._enforceinlinesize(transaction, ifh)
2047
2020
2048 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2021 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2049 """
2022 """
2050 add a delta group
2023 add a delta group
2051
2024
2052 given a set of deltas, add them to the revision log. the
2025 given a set of deltas, add them to the revision log. the
2053 first delta is against its parent, which should be in our
2026 first delta is against its parent, which should be in our
2054 log, the rest are against the previous delta.
2027 log, the rest are against the previous delta.
2055
2028
2056 If ``addrevisioncb`` is defined, it will be called with arguments of
2029 If ``addrevisioncb`` is defined, it will be called with arguments of
2057 this revlog and the node that was added.
2030 this revlog and the node that was added.
2058 """
2031 """
2059
2032
2060 nodes = []
2033 nodes = []
2061
2034
2062 r = len(self)
2035 r = len(self)
2063 end = 0
2036 end = 0
2064 if r:
2037 if r:
2065 end = self.end(r - 1)
2038 end = self.end(r - 1)
2066 ifh = self._indexfp("a+")
2039 ifh = self._indexfp("a+")
2067 isize = r * self._io.size
2040 isize = r * self._io.size
2068 if self._inline:
2041 if self._inline:
2069 transaction.add(self.indexfile, end + isize, r)
2042 transaction.add(self.indexfile, end + isize, r)
2070 dfh = None
2043 dfh = None
2071 else:
2044 else:
2072 transaction.add(self.indexfile, isize, r)
2045 transaction.add(self.indexfile, isize, r)
2073 transaction.add(self.datafile, end)
2046 transaction.add(self.datafile, end)
2074 dfh = self._datafp("a+")
2047 dfh = self._datafp("a+")
2075 def flush():
2048 def flush():
2076 if dfh:
2049 if dfh:
2077 dfh.flush()
2050 dfh.flush()
2078 ifh.flush()
2051 ifh.flush()
2079 try:
2052 try:
2080 deltacomputer = deltautil.deltacomputer(self)
2053 deltacomputer = deltautil.deltacomputer(self)
2081 # loop through our set of deltas
2054 # loop through our set of deltas
2082 for data in deltas:
2055 for data in deltas:
2083 node, p1, p2, linknode, deltabase, delta, flags = data
2056 node, p1, p2, linknode, deltabase, delta, flags = data
2084 link = linkmapper(linknode)
2057 link = linkmapper(linknode)
2085 flags = flags or REVIDX_DEFAULT_FLAGS
2058 flags = flags or REVIDX_DEFAULT_FLAGS
2086
2059
2087 nodes.append(node)
2060 nodes.append(node)
2088
2061
2089 if node in self.nodemap:
2062 if node in self.nodemap:
2090 # this can happen if two branches make the same change
2063 # this can happen if two branches make the same change
2091 continue
2064 continue
2092
2065
2093 for p in (p1, p2):
2066 for p in (p1, p2):
2094 if p not in self.nodemap:
2067 if p not in self.nodemap:
2095 raise error.LookupError(p, self.indexfile,
2068 raise error.LookupError(p, self.indexfile,
2096 _('unknown parent'))
2069 _('unknown parent'))
2097
2070
2098 if deltabase not in self.nodemap:
2071 if deltabase not in self.nodemap:
2099 raise error.LookupError(deltabase, self.indexfile,
2072 raise error.LookupError(deltabase, self.indexfile,
2100 _('unknown delta base'))
2073 _('unknown delta base'))
2101
2074
2102 baserev = self.rev(deltabase)
2075 baserev = self.rev(deltabase)
2103
2076
2104 if baserev != nullrev and self.iscensored(baserev):
2077 if baserev != nullrev and self.iscensored(baserev):
2105 # if base is censored, delta must be full replacement in a
2078 # if base is censored, delta must be full replacement in a
2106 # single patch operation
2079 # single patch operation
2107 hlen = struct.calcsize(">lll")
2080 hlen = struct.calcsize(">lll")
2108 oldlen = self.rawsize(baserev)
2081 oldlen = self.rawsize(baserev)
2109 newlen = len(delta) - hlen
2082 newlen = len(delta) - hlen
2110 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2083 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2111 raise error.CensoredBaseError(self.indexfile,
2084 raise error.CensoredBaseError(self.indexfile,
2112 self.node(baserev))
2085 self.node(baserev))
2113
2086
2114 if not flags and self._peek_iscensored(baserev, delta, flush):
2087 if not flags and self._peek_iscensored(baserev, delta, flush):
2115 flags |= REVIDX_ISCENSORED
2088 flags |= REVIDX_ISCENSORED
2116
2089
2117 # We assume consumers of addrevisioncb will want to retrieve
2090 # We assume consumers of addrevisioncb will want to retrieve
2118 # the added revision, which will require a call to
2091 # the added revision, which will require a call to
2119 # revision(). revision() will fast path if there is a cache
2092 # revision(). revision() will fast path if there is a cache
2120 # hit. So, we tell _addrevision() to always cache in this case.
2093 # hit. So, we tell _addrevision() to always cache in this case.
2121 # We're only using addgroup() in the context of changegroup
2094 # We're only using addgroup() in the context of changegroup
2122 # generation so the revision data can always be handled as raw
2095 # generation so the revision data can always be handled as raw
2123 # by the flagprocessor.
2096 # by the flagprocessor.
2124 self._addrevision(node, None, transaction, link,
2097 self._addrevision(node, None, transaction, link,
2125 p1, p2, flags, (baserev, delta),
2098 p1, p2, flags, (baserev, delta),
2126 ifh, dfh,
2099 ifh, dfh,
2127 alwayscache=bool(addrevisioncb),
2100 alwayscache=bool(addrevisioncb),
2128 deltacomputer=deltacomputer)
2101 deltacomputer=deltacomputer)
2129
2102
2130 if addrevisioncb:
2103 if addrevisioncb:
2131 addrevisioncb(self, node)
2104 addrevisioncb(self, node)
2132
2105
2133 if not dfh and not self._inline:
2106 if not dfh and not self._inline:
2134 # addrevision switched from inline to conventional
2107 # addrevision switched from inline to conventional
2135 # reopen the index
2108 # reopen the index
2136 ifh.close()
2109 ifh.close()
2137 dfh = self._datafp("a+")
2110 dfh = self._datafp("a+")
2138 ifh = self._indexfp("a+")
2111 ifh = self._indexfp("a+")
2139 finally:
2112 finally:
2140 if dfh:
2113 if dfh:
2141 dfh.close()
2114 dfh.close()
2142 ifh.close()
2115 ifh.close()
2143
2116
2144 return nodes
2117 return nodes
2145
2118
2146 def iscensored(self, rev):
2119 def iscensored(self, rev):
2147 """Check if a file revision is censored."""
2120 """Check if a file revision is censored."""
2148 if not self._censorable:
2121 if not self._censorable:
2149 return False
2122 return False
2150
2123
2151 return self.flags(rev) & REVIDX_ISCENSORED
2124 return self.flags(rev) & REVIDX_ISCENSORED
2152
2125
2153 def _peek_iscensored(self, baserev, delta, flush):
2126 def _peek_iscensored(self, baserev, delta, flush):
2154 """Quickly check if a delta produces a censored revision."""
2127 """Quickly check if a delta produces a censored revision."""
2155 if not self._censorable:
2128 if not self._censorable:
2156 return False
2129 return False
2157
2130
2158 # Fragile heuristic: unless new file meta keys are added alphabetically
2131 # Fragile heuristic: unless new file meta keys are added alphabetically
2159 # preceding "censored", all censored revisions are prefixed by
2132 # preceding "censored", all censored revisions are prefixed by
2160 # "\1\ncensored:". A delta producing such a censored revision must be a
2133 # "\1\ncensored:". A delta producing such a censored revision must be a
2161 # full-replacement delta, so we inspect the first and only patch in the
2134 # full-replacement delta, so we inspect the first and only patch in the
2162 # delta for this prefix.
2135 # delta for this prefix.
2163 hlen = struct.calcsize(">lll")
2136 hlen = struct.calcsize(">lll")
2164 if len(delta) <= hlen:
2137 if len(delta) <= hlen:
2165 return False
2138 return False
2166
2139
2167 oldlen = self.rawsize(baserev)
2140 oldlen = self.rawsize(baserev)
2168 newlen = len(delta) - hlen
2141 newlen = len(delta) - hlen
2169 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2142 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2170 return False
2143 return False
2171
2144
2172 add = "\1\ncensored:"
2145 add = "\1\ncensored:"
2173 addlen = len(add)
2146 addlen = len(add)
2174 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2147 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2175
2148
2176 def getstrippoint(self, minlink):
2149 def getstrippoint(self, minlink):
2177 """find the minimum rev that must be stripped to strip the linkrev
2150 """find the minimum rev that must be stripped to strip the linkrev
2178
2151
2179 Returns a tuple containing the minimum rev and a set of all revs that
2152 Returns a tuple containing the minimum rev and a set of all revs that
2180 have linkrevs that will be broken by this strip.
2153 have linkrevs that will be broken by this strip.
2181 """
2154 """
2182 brokenrevs = set()
2155 brokenrevs = set()
2183 strippoint = len(self)
2156 strippoint = len(self)
2184
2157
2185 heads = {}
2158 heads = {}
2186 futurelargelinkrevs = set()
2159 futurelargelinkrevs = set()
2187 for head in self.headrevs():
2160 for head in self.headrevs():
2188 headlinkrev = self.linkrev(head)
2161 headlinkrev = self.linkrev(head)
2189 heads[head] = headlinkrev
2162 heads[head] = headlinkrev
2190 if headlinkrev >= minlink:
2163 if headlinkrev >= minlink:
2191 futurelargelinkrevs.add(headlinkrev)
2164 futurelargelinkrevs.add(headlinkrev)
2192
2165
2193 # This algorithm involves walking down the rev graph, starting at the
2166 # This algorithm involves walking down the rev graph, starting at the
2194 # heads. Since the revs are topologically sorted according to linkrev,
2167 # heads. Since the revs are topologically sorted according to linkrev,
2195 # once all head linkrevs are below the minlink, we know there are
2168 # once all head linkrevs are below the minlink, we know there are
2196 # no more revs that could have a linkrev greater than minlink.
2169 # no more revs that could have a linkrev greater than minlink.
2197 # So we can stop walking.
2170 # So we can stop walking.
2198 while futurelargelinkrevs:
2171 while futurelargelinkrevs:
2199 strippoint -= 1
2172 strippoint -= 1
2200 linkrev = heads.pop(strippoint)
2173 linkrev = heads.pop(strippoint)
2201
2174
2202 if linkrev < minlink:
2175 if linkrev < minlink:
2203 brokenrevs.add(strippoint)
2176 brokenrevs.add(strippoint)
2204 else:
2177 else:
2205 futurelargelinkrevs.remove(linkrev)
2178 futurelargelinkrevs.remove(linkrev)
2206
2179
2207 for p in self.parentrevs(strippoint):
2180 for p in self.parentrevs(strippoint):
2208 if p != nullrev:
2181 if p != nullrev:
2209 plinkrev = self.linkrev(p)
2182 plinkrev = self.linkrev(p)
2210 heads[p] = plinkrev
2183 heads[p] = plinkrev
2211 if plinkrev >= minlink:
2184 if plinkrev >= minlink:
2212 futurelargelinkrevs.add(plinkrev)
2185 futurelargelinkrevs.add(plinkrev)
2213
2186
2214 return strippoint, brokenrevs
2187 return strippoint, brokenrevs
2215
2188
2216 def strip(self, minlink, transaction):
2189 def strip(self, minlink, transaction):
2217 """truncate the revlog on the first revision with a linkrev >= minlink
2190 """truncate the revlog on the first revision with a linkrev >= minlink
2218
2191
2219 This function is called when we're stripping revision minlink and
2192 This function is called when we're stripping revision minlink and
2220 its descendants from the repository.
2193 its descendants from the repository.
2221
2194
2222 We have to remove all revisions with linkrev >= minlink, because
2195 We have to remove all revisions with linkrev >= minlink, because
2223 the equivalent changelog revisions will be renumbered after the
2196 the equivalent changelog revisions will be renumbered after the
2224 strip.
2197 strip.
2225
2198
2226 So we truncate the revlog on the first of these revisions, and
2199 So we truncate the revlog on the first of these revisions, and
2227 trust that the caller has saved the revisions that shouldn't be
2200 trust that the caller has saved the revisions that shouldn't be
2228 removed and that it'll re-add them after this truncation.
2201 removed and that it'll re-add them after this truncation.
2229 """
2202 """
2230 if len(self) == 0:
2203 if len(self) == 0:
2231 return
2204 return
2232
2205
2233 rev, _ = self.getstrippoint(minlink)
2206 rev, _ = self.getstrippoint(minlink)
2234 if rev == len(self):
2207 if rev == len(self):
2235 return
2208 return
2236
2209
2237 # first truncate the files on disk
2210 # first truncate the files on disk
2238 end = self.start(rev)
2211 end = self.start(rev)
2239 if not self._inline:
2212 if not self._inline:
2240 transaction.add(self.datafile, end)
2213 transaction.add(self.datafile, end)
2241 end = rev * self._io.size
2214 end = rev * self._io.size
2242 else:
2215 else:
2243 end += rev * self._io.size
2216 end += rev * self._io.size
2244
2217
2245 transaction.add(self.indexfile, end)
2218 transaction.add(self.indexfile, end)
2246
2219
2247 # then reset internal state in memory to forget those revisions
2220 # then reset internal state in memory to forget those revisions
2248 self._cache = None
2221 self._cache = None
2249 self._chaininfocache = {}
2222 self._chaininfocache = {}
2250 self._chunkclear()
2223 self._chunkclear()
2251 for x in pycompat.xrange(rev, len(self)):
2224 for x in pycompat.xrange(rev, len(self)):
2252 del self.nodemap[self.node(x)]
2225 del self.nodemap[self.node(x)]
2253
2226
2254 del self.index[rev:-1]
2227 del self.index[rev:-1]
2255 self._nodepos = None
2228 self._nodepos = None
2256
2229
2257 def checksize(self):
2230 def checksize(self):
2258 expected = 0
2231 expected = 0
2259 if len(self):
2232 if len(self):
2260 expected = max(0, self.end(len(self) - 1))
2233 expected = max(0, self.end(len(self) - 1))
2261
2234
2262 try:
2235 try:
2263 with self._datafp() as f:
2236 with self._datafp() as f:
2264 f.seek(0, 2)
2237 f.seek(0, 2)
2265 actual = f.tell()
2238 actual = f.tell()
2266 dd = actual - expected
2239 dd = actual - expected
2267 except IOError as inst:
2240 except IOError as inst:
2268 if inst.errno != errno.ENOENT:
2241 if inst.errno != errno.ENOENT:
2269 raise
2242 raise
2270 dd = 0
2243 dd = 0
2271
2244
2272 try:
2245 try:
2273 f = self.opener(self.indexfile)
2246 f = self.opener(self.indexfile)
2274 f.seek(0, 2)
2247 f.seek(0, 2)
2275 actual = f.tell()
2248 actual = f.tell()
2276 f.close()
2249 f.close()
2277 s = self._io.size
2250 s = self._io.size
2278 i = max(0, actual // s)
2251 i = max(0, actual // s)
2279 di = actual - (i * s)
2252 di = actual - (i * s)
2280 if self._inline:
2253 if self._inline:
2281 databytes = 0
2254 databytes = 0
2282 for r in self:
2255 for r in self:
2283 databytes += max(0, self.length(r))
2256 databytes += max(0, self.length(r))
2284 dd = 0
2257 dd = 0
2285 di = actual - len(self) * s - databytes
2258 di = actual - len(self) * s - databytes
2286 except IOError as inst:
2259 except IOError as inst:
2287 if inst.errno != errno.ENOENT:
2260 if inst.errno != errno.ENOENT:
2288 raise
2261 raise
2289 di = 0
2262 di = 0
2290
2263
2291 return (dd, di)
2264 return (dd, di)
2292
2265
2293 def files(self):
2266 def files(self):
2294 res = [self.indexfile]
2267 res = [self.indexfile]
2295 if not self._inline:
2268 if not self._inline:
2296 res.append(self.datafile)
2269 res.append(self.datafile)
2297 return res
2270 return res
2298
2271
2299 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2272 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2300 assumehaveparentrevisions=False, deltaprevious=False):
2273 assumehaveparentrevisions=False, deltaprevious=False):
2301 if nodesorder not in ('nodes', 'storage', None):
2274 if nodesorder not in ('nodes', 'storage', None):
2302 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2275 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2303 nodesorder)
2276 nodesorder)
2304
2277
2305 if nodesorder is None and not self._generaldelta:
2278 if nodesorder is None and not self._generaldelta:
2306 nodesorder = 'storage'
2279 nodesorder = 'storage'
2307
2280
2308 frev = self.rev
2281 frev = self.rev
2309 fnode = self.node
2282 fnode = self.node
2310
2283
2311 if nodesorder == 'nodes':
2284 if nodesorder == 'nodes':
2312 revs = [frev(n) for n in nodes]
2285 revs = [frev(n) for n in nodes]
2313 elif nodesorder == 'storage':
2286 elif nodesorder == 'storage':
2314 revs = sorted(frev(n) for n in nodes)
2287 revs = sorted(frev(n) for n in nodes)
2315 else:
2288 else:
2316 assert self._generaldelta
2289 assert self._generaldelta
2317 revs = set(frev(n) for n in nodes)
2290 revs = set(frev(n) for n in nodes)
2318 revs = dagop.linearize(revs, self.parentrevs)
2291 revs = dagop.linearize(revs, self.parentrevs)
2319
2292
2320 prevrev = None
2293 prevrev = None
2321
2294
2322 if deltaprevious or assumehaveparentrevisions:
2295 if deltaprevious or assumehaveparentrevisions:
2323 prevrev = self.parentrevs(revs[0])[0]
2296 prevrev = self.parentrevs(revs[0])[0]
2324
2297
2325 # Set of revs available to delta against.
2298 # Set of revs available to delta against.
2326 available = set()
2299 available = set()
2327
2300
2328 for rev in revs:
2301 for rev in revs:
2329 if rev == nullrev:
2302 if rev == nullrev:
2330 continue
2303 continue
2331
2304
2332 node = fnode(rev)
2305 node = fnode(rev)
2333 deltaparentrev = self.deltaparent(rev)
2306 deltaparentrev = self.deltaparent(rev)
2334 p1rev, p2rev = self.parentrevs(rev)
2307 p1rev, p2rev = self.parentrevs(rev)
2335
2308
2336 # Forced delta against previous mode.
2309 # Forced delta against previous mode.
2337 if deltaprevious:
2310 if deltaprevious:
2338 baserev = prevrev
2311 baserev = prevrev
2339
2312
2340 # Revlog is configured to use full snapshots. Stick to that.
2313 # Revlog is configured to use full snapshots. Stick to that.
2341 elif not self._storedeltachains:
2314 elif not self._storedeltachains:
2342 baserev = nullrev
2315 baserev = nullrev
2343
2316
2344 # There is a delta in storage. We try to use that because it
2317 # There is a delta in storage. We try to use that because it
2345 # amounts to effectively copying data from storage and is
2318 # amounts to effectively copying data from storage and is
2346 # therefore the fastest.
2319 # therefore the fastest.
2347 elif deltaparentrev != nullrev:
2320 elif deltaparentrev != nullrev:
2348 # Base revision was already emitted in this group. We can
2321 # Base revision was already emitted in this group. We can
2349 # always safely use the delta.
2322 # always safely use the delta.
2350 if deltaparentrev in available:
2323 if deltaparentrev in available:
2351 baserev = deltaparentrev
2324 baserev = deltaparentrev
2352
2325
2353 # Base revision is a parent that hasn't been emitted already.
2326 # Base revision is a parent that hasn't been emitted already.
2354 # Use it if we can assume the receiver has the parent revision.
2327 # Use it if we can assume the receiver has the parent revision.
2355 elif (assumehaveparentrevisions
2328 elif (assumehaveparentrevisions
2356 and deltaparentrev in (p1rev, p2rev)):
2329 and deltaparentrev in (p1rev, p2rev)):
2357 baserev = deltaparentrev
2330 baserev = deltaparentrev
2358
2331
2359 # No guarantee the receiver has the delta parent. Send delta
2332 # No guarantee the receiver has the delta parent. Send delta
2360 # against last revision (if possible), which in the common case
2333 # against last revision (if possible), which in the common case
2361 # should be similar enough to this revision that the delta is
2334 # should be similar enough to this revision that the delta is
2362 # reasonable.
2335 # reasonable.
2363 elif prevrev is not None:
2336 elif prevrev is not None:
2364 baserev = prevrev
2337 baserev = prevrev
2365 else:
2338 else:
2366 baserev = nullrev
2339 baserev = nullrev
2367
2340
2368 # Storage has a fulltext revision.
2341 # Storage has a fulltext revision.
2369
2342
2370 # Let's use the previous revision, which is as good a guess as any.
2343 # Let's use the previous revision, which is as good a guess as any.
2371 # There is definitely room to improve this logic.
2344 # There is definitely room to improve this logic.
2372 elif prevrev is not None:
2345 elif prevrev is not None:
2373 baserev = prevrev
2346 baserev = prevrev
2374 else:
2347 else:
2375 baserev = nullrev
2348 baserev = nullrev
2376
2349
2377 # But we can't actually use our chosen delta base for whatever
2350 # But we can't actually use our chosen delta base for whatever
2378 # reason. Reset to fulltext.
2351 # reason. Reset to fulltext.
2379 if baserev != nullrev and not self.candelta(baserev, rev):
2352 if baserev != nullrev and not self.candelta(baserev, rev):
2380 baserev = nullrev
2353 baserev = nullrev
2381
2354
2382 revision = None
2355 revision = None
2383 delta = None
2356 delta = None
2384 baserevisionsize = None
2357 baserevisionsize = None
2385
2358
2386 if revisiondata:
2359 if revisiondata:
2387 if self.iscensored(baserev) or self.iscensored(rev):
2360 if self.iscensored(baserev) or self.iscensored(rev):
2388 try:
2361 try:
2389 revision = self.revision(node, raw=True)
2362 revision = self.revision(node, raw=True)
2390 except error.CensoredNodeError as e:
2363 except error.CensoredNodeError as e:
2391 revision = e.tombstone
2364 revision = e.tombstone
2392
2365
2393 if baserev != nullrev:
2366 if baserev != nullrev:
2394 baserevisionsize = self.rawsize(baserev)
2367 baserevisionsize = self.rawsize(baserev)
2395
2368
2396 elif baserev == nullrev and not deltaprevious:
2369 elif baserev == nullrev and not deltaprevious:
2397 revision = self.revision(node, raw=True)
2370 revision = self.revision(node, raw=True)
2398 available.add(rev)
2371 available.add(rev)
2399 else:
2372 else:
2400 delta = self.revdiff(baserev, rev)
2373 delta = self.revdiff(baserev, rev)
2401 available.add(rev)
2374 available.add(rev)
2402
2375
2403 yield revlogrevisiondelta(
2376 yield revlogrevisiondelta(
2404 node=node,
2377 node=node,
2405 p1node=fnode(p1rev),
2378 p1node=fnode(p1rev),
2406 p2node=fnode(p2rev),
2379 p2node=fnode(p2rev),
2407 basenode=fnode(baserev),
2380 basenode=fnode(baserev),
2408 flags=self.flags(rev),
2381 flags=self.flags(rev),
2409 baserevisionsize=baserevisionsize,
2382 baserevisionsize=baserevisionsize,
2410 revision=revision,
2383 revision=revision,
2411 delta=delta)
2384 delta=delta)
2412
2385
2413 prevrev = rev
2386 prevrev = rev
2414
2387
2415 DELTAREUSEALWAYS = 'always'
2388 DELTAREUSEALWAYS = 'always'
2416 DELTAREUSESAMEREVS = 'samerevs'
2389 DELTAREUSESAMEREVS = 'samerevs'
2417 DELTAREUSENEVER = 'never'
2390 DELTAREUSENEVER = 'never'
2418
2391
2419 DELTAREUSEFULLADD = 'fulladd'
2392 DELTAREUSEFULLADD = 'fulladd'
2420
2393
2421 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2394 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2422
2395
2423 def clone(self, tr, destrevlog, addrevisioncb=None,
2396 def clone(self, tr, destrevlog, addrevisioncb=None,
2424 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2397 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2425 """Copy this revlog to another, possibly with format changes.
2398 """Copy this revlog to another, possibly with format changes.
2426
2399
2427 The destination revlog will contain the same revisions and nodes.
2400 The destination revlog will contain the same revisions and nodes.
2428 However, it may not be bit-for-bit identical due to e.g. delta encoding
2401 However, it may not be bit-for-bit identical due to e.g. delta encoding
2429 differences.
2402 differences.
2430
2403
2431 The ``deltareuse`` argument control how deltas from the existing revlog
2404 The ``deltareuse`` argument control how deltas from the existing revlog
2432 are preserved in the destination revlog. The argument can have the
2405 are preserved in the destination revlog. The argument can have the
2433 following values:
2406 following values:
2434
2407
2435 DELTAREUSEALWAYS
2408 DELTAREUSEALWAYS
2436 Deltas will always be reused (if possible), even if the destination
2409 Deltas will always be reused (if possible), even if the destination
2437 revlog would not select the same revisions for the delta. This is the
2410 revlog would not select the same revisions for the delta. This is the
2438 fastest mode of operation.
2411 fastest mode of operation.
2439 DELTAREUSESAMEREVS
2412 DELTAREUSESAMEREVS
2440 Deltas will be reused if the destination revlog would pick the same
2413 Deltas will be reused if the destination revlog would pick the same
2441 revisions for the delta. This mode strikes a balance between speed
2414 revisions for the delta. This mode strikes a balance between speed
2442 and optimization.
2415 and optimization.
2443 DELTAREUSENEVER
2416 DELTAREUSENEVER
2444 Deltas will never be reused. This is the slowest mode of execution.
2417 Deltas will never be reused. This is the slowest mode of execution.
2445 This mode can be used to recompute deltas (e.g. if the diff/delta
2418 This mode can be used to recompute deltas (e.g. if the diff/delta
2446 algorithm changes).
2419 algorithm changes).
2447
2420
2448 Delta computation can be slow, so the choice of delta reuse policy can
2421 Delta computation can be slow, so the choice of delta reuse policy can
2449 significantly affect run time.
2422 significantly affect run time.
2450
2423
2451 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2424 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2452 two extremes. Deltas will be reused if they are appropriate. But if the
2425 two extremes. Deltas will be reused if they are appropriate. But if the
2453 delta could choose a better revision, it will do so. This means if you
2426 delta could choose a better revision, it will do so. This means if you
2454 are converting a non-generaldelta revlog to a generaldelta revlog,
2427 are converting a non-generaldelta revlog to a generaldelta revlog,
2455 deltas will be recomputed if the delta's parent isn't a parent of the
2428 deltas will be recomputed if the delta's parent isn't a parent of the
2456 revision.
2429 revision.
2457
2430
2458 In addition to the delta policy, the ``deltabothparents`` argument
2431 In addition to the delta policy, the ``deltabothparents`` argument
2459 controls whether to compute deltas against both parents for merges.
2432 controls whether to compute deltas against both parents for merges.
2460 By default, the current default is used.
2433 By default, the current default is used.
2461 """
2434 """
2462 if deltareuse not in self.DELTAREUSEALL:
2435 if deltareuse not in self.DELTAREUSEALL:
2463 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2436 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2464
2437
2465 if len(destrevlog):
2438 if len(destrevlog):
2466 raise ValueError(_('destination revlog is not empty'))
2439 raise ValueError(_('destination revlog is not empty'))
2467
2440
2468 if getattr(self, 'filteredrevs', None):
2441 if getattr(self, 'filteredrevs', None):
2469 raise ValueError(_('source revlog has filtered revisions'))
2442 raise ValueError(_('source revlog has filtered revisions'))
2470 if getattr(destrevlog, 'filteredrevs', None):
2443 if getattr(destrevlog, 'filteredrevs', None):
2471 raise ValueError(_('destination revlog has filtered revisions'))
2444 raise ValueError(_('destination revlog has filtered revisions'))
2472
2445
2473 # lazydeltabase controls whether to reuse a cached delta, if possible.
2446 # lazydeltabase controls whether to reuse a cached delta, if possible.
2474 oldlazydeltabase = destrevlog._lazydeltabase
2447 oldlazydeltabase = destrevlog._lazydeltabase
2475 oldamd = destrevlog._deltabothparents
2448 oldamd = destrevlog._deltabothparents
2476
2449
2477 try:
2450 try:
2478 if deltareuse == self.DELTAREUSEALWAYS:
2451 if deltareuse == self.DELTAREUSEALWAYS:
2479 destrevlog._lazydeltabase = True
2452 destrevlog._lazydeltabase = True
2480 elif deltareuse == self.DELTAREUSESAMEREVS:
2453 elif deltareuse == self.DELTAREUSESAMEREVS:
2481 destrevlog._lazydeltabase = False
2454 destrevlog._lazydeltabase = False
2482
2455
2483 destrevlog._deltabothparents = deltabothparents or oldamd
2456 destrevlog._deltabothparents = deltabothparents or oldamd
2484
2457
2485 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2458 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2486 self.DELTAREUSESAMEREVS)
2459 self.DELTAREUSESAMEREVS)
2487
2460
2488 deltacomputer = deltautil.deltacomputer(destrevlog)
2461 deltacomputer = deltautil.deltacomputer(destrevlog)
2489 index = self.index
2462 index = self.index
2490 for rev in self:
2463 for rev in self:
2491 entry = index[rev]
2464 entry = index[rev]
2492
2465
2493 # Some classes override linkrev to take filtered revs into
2466 # Some classes override linkrev to take filtered revs into
2494 # account. Use raw entry from index.
2467 # account. Use raw entry from index.
2495 flags = entry[0] & 0xffff
2468 flags = entry[0] & 0xffff
2496 linkrev = entry[4]
2469 linkrev = entry[4]
2497 p1 = index[entry[5]][7]
2470 p1 = index[entry[5]][7]
2498 p2 = index[entry[6]][7]
2471 p2 = index[entry[6]][7]
2499 node = entry[7]
2472 node = entry[7]
2500
2473
2501 # (Possibly) reuse the delta from the revlog if allowed and
2474 # (Possibly) reuse the delta from the revlog if allowed and
2502 # the revlog chunk is a delta.
2475 # the revlog chunk is a delta.
2503 cachedelta = None
2476 cachedelta = None
2504 rawtext = None
2477 rawtext = None
2505 if populatecachedelta:
2478 if populatecachedelta:
2506 dp = self.deltaparent(rev)
2479 dp = self.deltaparent(rev)
2507 if dp != nullrev:
2480 if dp != nullrev:
2508 cachedelta = (dp, bytes(self._chunk(rev)))
2481 cachedelta = (dp, bytes(self._chunk(rev)))
2509
2482
2510 if not cachedelta:
2483 if not cachedelta:
2511 rawtext = self.revision(rev, raw=True)
2484 rawtext = self.revision(rev, raw=True)
2512
2485
2513
2486
2514 if deltareuse == self.DELTAREUSEFULLADD:
2487 if deltareuse == self.DELTAREUSEFULLADD:
2515 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2488 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2516 cachedelta=cachedelta,
2489 cachedelta=cachedelta,
2517 node=node, flags=flags,
2490 node=node, flags=flags,
2518 deltacomputer=deltacomputer)
2491 deltacomputer=deltacomputer)
2519 else:
2492 else:
2520 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2493 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2521 checkambig=False)
2494 checkambig=False)
2522 dfh = None
2495 dfh = None
2523 if not destrevlog._inline:
2496 if not destrevlog._inline:
2524 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2497 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2525 try:
2498 try:
2526 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2499 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2527 p2, flags, cachedelta, ifh, dfh,
2500 p2, flags, cachedelta, ifh, dfh,
2528 deltacomputer=deltacomputer)
2501 deltacomputer=deltacomputer)
2529 finally:
2502 finally:
2530 if dfh:
2503 if dfh:
2531 dfh.close()
2504 dfh.close()
2532 ifh.close()
2505 ifh.close()
2533
2506
2534 if addrevisioncb:
2507 if addrevisioncb:
2535 addrevisioncb(self, rev, node)
2508 addrevisioncb(self, rev, node)
2536 finally:
2509 finally:
2537 destrevlog._lazydeltabase = oldlazydeltabase
2510 destrevlog._lazydeltabase = oldlazydeltabase
2538 destrevlog._deltabothparents = oldamd
2511 destrevlog._deltabothparents = oldamd
2539
2512
2540 def censorrevision(self, node, tombstone=b''):
2513 def censorrevision(self, node, tombstone=b''):
2541 if (self.version & 0xFFFF) == REVLOGV0:
2514 if (self.version & 0xFFFF) == REVLOGV0:
2542 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2515 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2543 self.version)
2516 self.version)
2544
2517
2545 rev = self.rev(node)
2518 rev = self.rev(node)
2546 tombstone = packmeta({b'censored': tombstone}, b'')
2519 tombstone = packmeta({b'censored': tombstone}, b'')
2547
2520
2548 if len(tombstone) > self.rawsize(rev):
2521 if len(tombstone) > self.rawsize(rev):
2549 raise error.Abort(_('censor tombstone must be no longer than '
2522 raise error.Abort(_('censor tombstone must be no longer than '
2550 'censored data'))
2523 'censored data'))
2551
2524
2552 # Using two files instead of one makes it easy to rewrite entry-by-entry
2525 # Using two files instead of one makes it easy to rewrite entry-by-entry
2553 idxread = self.opener(self.indexfile, 'r')
2526 idxread = self.opener(self.indexfile, 'r')
2554 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2527 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2555 if self.version & FLAG_INLINE_DATA:
2528 if self.version & FLAG_INLINE_DATA:
2556 dataread, datawrite = idxread, idxwrite
2529 dataread, datawrite = idxread, idxwrite
2557 else:
2530 else:
2558 dataread = self.opener(self.datafile, 'r')
2531 dataread = self.opener(self.datafile, 'r')
2559 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2532 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2560
2533
2561 # Copy all revlog data up to the entry to be censored.
2534 # Copy all revlog data up to the entry to be censored.
2562 offset = self.start(rev)
2535 offset = self.start(rev)
2563
2536
2564 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2537 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2565 idxwrite.write(chunk)
2538 idxwrite.write(chunk)
2566 for chunk in util.filechunkiter(dataread, limit=offset):
2539 for chunk in util.filechunkiter(dataread, limit=offset):
2567 datawrite.write(chunk)
2540 datawrite.write(chunk)
2568
2541
2569 def rewriteindex(r, newoffs, newdata=None):
2542 def rewriteindex(r, newoffs, newdata=None):
2570 """Rewrite the index entry with a new data offset and new data.
2543 """Rewrite the index entry with a new data offset and new data.
2571
2544
2572 The newdata argument, if given, is a tuple of three positive
2545 The newdata argument, if given, is a tuple of three positive
2573 integers: (new compressed, new uncompressed, added flag bits).
2546 integers: (new compressed, new uncompressed, added flag bits).
2574 """
2547 """
2575 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2548 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2576 flags = gettype(offlags)
2549 flags = gettype(offlags)
2577 if newdata:
2550 if newdata:
2578 comp, uncomp, nflags = newdata
2551 comp, uncomp, nflags = newdata
2579 flags |= nflags
2552 flags |= nflags
2580 offlags = offset_type(newoffs, flags)
2553 offlags = offset_type(newoffs, flags)
2581 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2554 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2582 idxwrite.write(self._io.packentry(e, None, self.version, r))
2555 idxwrite.write(self._io.packentry(e, None, self.version, r))
2583 idxread.seek(self._io.size, 1)
2556 idxread.seek(self._io.size, 1)
2584
2557
2585 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2558 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2586 """Write the given fulltext with the given data offset.
2559 """Write the given fulltext with the given data offset.
2587
2560
2588 Returns:
2561 Returns:
2589 The integer number of data bytes written, for tracking data
2562 The integer number of data bytes written, for tracking data
2590 offsets.
2563 offsets.
2591 """
2564 """
2592 flag, compdata = self.compress(data)
2565 flag, compdata = self.compress(data)
2593 newcomp = len(flag) + len(compdata)
2566 newcomp = len(flag) + len(compdata)
2594 rewriteindex(r, offs, (newcomp, len(data), nflags))
2567 rewriteindex(r, offs, (newcomp, len(data), nflags))
2595 datawrite.write(flag)
2568 datawrite.write(flag)
2596 datawrite.write(compdata)
2569 datawrite.write(compdata)
2597 dataread.seek(self.length(r), 1)
2570 dataread.seek(self.length(r), 1)
2598 return newcomp
2571 return newcomp
2599
2572
2600 # Rewrite censored entry with (padded) tombstone data.
2573 # Rewrite censored entry with (padded) tombstone data.
2601 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2574 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2602 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2575 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2603
2576
2604 # Rewrite all following filelog revisions fixing up offsets and deltas.
2577 # Rewrite all following filelog revisions fixing up offsets and deltas.
2605 for srev in pycompat.xrange(rev + 1, len(self)):
2578 for srev in pycompat.xrange(rev + 1, len(self)):
2606 if rev in self.parentrevs(srev):
2579 if rev in self.parentrevs(srev):
2607 # Immediate children of censored node must be re-added as
2580 # Immediate children of censored node must be re-added as
2608 # fulltext.
2581 # fulltext.
2609 try:
2582 try:
2610 revdata = self.revision(srev)
2583 revdata = self.revision(srev)
2611 except error.CensoredNodeError as e:
2584 except error.CensoredNodeError as e:
2612 revdata = e.tombstone
2585 revdata = e.tombstone
2613 dlen = rewrite(srev, offset, revdata)
2586 dlen = rewrite(srev, offset, revdata)
2614 else:
2587 else:
2615 # Copy any other revision data verbatim after fixing up the
2588 # Copy any other revision data verbatim after fixing up the
2616 # offset.
2589 # offset.
2617 rewriteindex(srev, offset)
2590 rewriteindex(srev, offset)
2618 dlen = self.length(srev)
2591 dlen = self.length(srev)
2619 for chunk in util.filechunkiter(dataread, limit=dlen):
2592 for chunk in util.filechunkiter(dataread, limit=dlen):
2620 datawrite.write(chunk)
2593 datawrite.write(chunk)
2621 offset += dlen
2594 offset += dlen
2622
2595
2623 idxread.close()
2596 idxread.close()
2624 idxwrite.close()
2597 idxwrite.close()
2625 if dataread is not idxread:
2598 if dataread is not idxread:
2626 dataread.close()
2599 dataread.close()
2627 datawrite.close()
2600 datawrite.close()
2628
2601
2629 def verifyintegrity(self, state):
2602 def verifyintegrity(self, state):
2630 """Verifies the integrity of the revlog.
2603 """Verifies the integrity of the revlog.
2631
2604
2632 Yields ``revlogproblem`` instances describing problems that are
2605 Yields ``revlogproblem`` instances describing problems that are
2633 found.
2606 found.
2634 """
2607 """
2635 dd, di = self.checksize()
2608 dd, di = self.checksize()
2636 if dd:
2609 if dd:
2637 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2610 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2638 if di:
2611 if di:
2639 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2612 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2640
2613
2641 version = self.version & 0xFFFF
2614 version = self.version & 0xFFFF
2642
2615
2643 # The verifier tells us what version revlog we should be.
2616 # The verifier tells us what version revlog we should be.
2644 if version != state['expectedversion']:
2617 if version != state['expectedversion']:
2645 yield revlogproblem(
2618 yield revlogproblem(
2646 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2619 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2647 (self.indexfile, version, state['expectedversion']))
2620 (self.indexfile, version, state['expectedversion']))
2648
2621
2649 state['skipread'] = set()
2622 state['skipread'] = set()
2650
2623
2651 for rev in self:
2624 for rev in self:
2652 node = self.node(rev)
2625 node = self.node(rev)
2653
2626
2654 # Verify contents. 4 cases to care about:
2627 # Verify contents. 4 cases to care about:
2655 #
2628 #
2656 # common: the most common case
2629 # common: the most common case
2657 # rename: with a rename
2630 # rename: with a rename
2658 # meta: file content starts with b'\1\n', the metadata
2631 # meta: file content starts with b'\1\n', the metadata
2659 # header defined in filelog.py, but without a rename
2632 # header defined in filelog.py, but without a rename
2660 # ext: content stored externally
2633 # ext: content stored externally
2661 #
2634 #
2662 # More formally, their differences are shown below:
2635 # More formally, their differences are shown below:
2663 #
2636 #
2664 # | common | rename | meta | ext
2637 # | common | rename | meta | ext
2665 # -------------------------------------------------------
2638 # -------------------------------------------------------
2666 # flags() | 0 | 0 | 0 | not 0
2639 # flags() | 0 | 0 | 0 | not 0
2667 # renamed() | False | True | False | ?
2640 # renamed() | False | True | False | ?
2668 # rawtext[0:2]=='\1\n'| False | True | True | ?
2641 # rawtext[0:2]=='\1\n'| False | True | True | ?
2669 #
2642 #
2670 # "rawtext" means the raw text stored in revlog data, which
2643 # "rawtext" means the raw text stored in revlog data, which
2671 # could be retrieved by "revision(rev, raw=True)". "text"
2644 # could be retrieved by "revision(rev, raw=True)". "text"
2672 # mentioned below is "revision(rev, raw=False)".
2645 # mentioned below is "revision(rev, raw=False)".
2673 #
2646 #
2674 # There are 3 different lengths stored physically:
2647 # There are 3 different lengths stored physically:
2675 # 1. L1: rawsize, stored in revlog index
2648 # 1. L1: rawsize, stored in revlog index
2676 # 2. L2: len(rawtext), stored in revlog data
2649 # 2. L2: len(rawtext), stored in revlog data
2677 # 3. L3: len(text), stored in revlog data if flags==0, or
2650 # 3. L3: len(text), stored in revlog data if flags==0, or
2678 # possibly somewhere else if flags!=0
2651 # possibly somewhere else if flags!=0
2679 #
2652 #
2680 # L1 should be equal to L2. L3 could be different from them.
2653 # L1 should be equal to L2. L3 could be different from them.
2681 # "text" may or may not affect commit hash depending on flag
2654 # "text" may or may not affect commit hash depending on flag
2682 # processors (see revlog.addflagprocessor).
2655 # processors (see revlog.addflagprocessor).
2683 #
2656 #
2684 # | common | rename | meta | ext
2657 # | common | rename | meta | ext
2685 # -------------------------------------------------
2658 # -------------------------------------------------
2686 # rawsize() | L1 | L1 | L1 | L1
2659 # rawsize() | L1 | L1 | L1 | L1
2687 # size() | L1 | L2-LM | L1(*) | L1 (?)
2660 # size() | L1 | L2-LM | L1(*) | L1 (?)
2688 # len(rawtext) | L2 | L2 | L2 | L2
2661 # len(rawtext) | L2 | L2 | L2 | L2
2689 # len(text) | L2 | L2 | L2 | L3
2662 # len(text) | L2 | L2 | L2 | L3
2690 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2663 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2691 #
2664 #
2692 # LM: length of metadata, depending on rawtext
2665 # LM: length of metadata, depending on rawtext
2693 # (*): not ideal, see comment in filelog.size
2666 # (*): not ideal, see comment in filelog.size
2694 # (?): could be "- len(meta)" if the resolved content has
2667 # (?): could be "- len(meta)" if the resolved content has
2695 # rename metadata
2668 # rename metadata
2696 #
2669 #
2697 # Checks needed to be done:
2670 # Checks needed to be done:
2698 # 1. length check: L1 == L2, in all cases.
2671 # 1. length check: L1 == L2, in all cases.
2699 # 2. hash check: depending on flag processor, we may need to
2672 # 2. hash check: depending on flag processor, we may need to
2700 # use either "text" (external), or "rawtext" (in revlog).
2673 # use either "text" (external), or "rawtext" (in revlog).
2701
2674
2702 try:
2675 try:
2703 skipflags = state.get('skipflags', 0)
2676 skipflags = state.get('skipflags', 0)
2704 if skipflags:
2677 if skipflags:
2705 skipflags &= self.flags(rev)
2678 skipflags &= self.flags(rev)
2706
2679
2707 if skipflags:
2680 if skipflags:
2708 state['skipread'].add(node)
2681 state['skipread'].add(node)
2709 else:
2682 else:
2710 # Side-effect: read content and verify hash.
2683 # Side-effect: read content and verify hash.
2711 self.revision(node)
2684 self.revision(node)
2712
2685
2713 l1 = self.rawsize(rev)
2686 l1 = self.rawsize(rev)
2714 l2 = len(self.revision(node, raw=True))
2687 l2 = len(self.revision(node, raw=True))
2715
2688
2716 if l1 != l2:
2689 if l1 != l2:
2717 yield revlogproblem(
2690 yield revlogproblem(
2718 error=_('unpacked size is %d, %d expected') % (l2, l1),
2691 error=_('unpacked size is %d, %d expected') % (l2, l1),
2719 node=node)
2692 node=node)
2720
2693
2721 except error.CensoredNodeError:
2694 except error.CensoredNodeError:
2722 if state['erroroncensored']:
2695 if state['erroroncensored']:
2723 yield revlogproblem(error=_('censored file data'),
2696 yield revlogproblem(error=_('censored file data'),
2724 node=node)
2697 node=node)
2725 state['skipread'].add(node)
2698 state['skipread'].add(node)
2726 except Exception as e:
2699 except Exception as e:
2727 yield revlogproblem(
2700 yield revlogproblem(
2728 error=_('unpacking %s: %s') % (short(node), e),
2701 error=_('unpacking %s: %s') % (short(node), e),
2729 node=node)
2702 node=node)
2730 state['skipread'].add(node)
2703 state['skipread'].add(node)
2731
2704
2732 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2705 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2733 revisionscount=False, trackedsize=False,
2706 revisionscount=False, trackedsize=False,
2734 storedsize=False):
2707 storedsize=False):
2735 d = {}
2708 d = {}
2736
2709
2737 if exclusivefiles:
2710 if exclusivefiles:
2738 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2711 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2739 if not self._inline:
2712 if not self._inline:
2740 d['exclusivefiles'].append((self.opener, self.datafile))
2713 d['exclusivefiles'].append((self.opener, self.datafile))
2741
2714
2742 if sharedfiles:
2715 if sharedfiles:
2743 d['sharedfiles'] = []
2716 d['sharedfiles'] = []
2744
2717
2745 if revisionscount:
2718 if revisionscount:
2746 d['revisionscount'] = len(self)
2719 d['revisionscount'] = len(self)
2747
2720
2748 if trackedsize:
2721 if trackedsize:
2749 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2722 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2750
2723
2751 if storedsize:
2724 if storedsize:
2752 d['storedsize'] = sum(self.opener.stat(path).st_size
2725 d['storedsize'] = sum(self.opener.stat(path).st_size
2753 for path in self.files())
2726 for path in self.files())
2754
2727
2755 return d
2728 return d
@@ -1,673 +1,674
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 nullrev,
22 nullrev,
23 )
23 )
24 from mercurial.thirdparty import (
24 from mercurial.thirdparty import (
25 attr,
25 attr,
26 cbor,
26 cbor,
27 )
27 )
28 from mercurial import (
28 from mercurial import (
29 ancestor,
29 ancestor,
30 bundlerepo,
30 bundlerepo,
31 error,
31 error,
32 extensions,
32 extensions,
33 localrepo,
33 localrepo,
34 mdiff,
34 mdiff,
35 pycompat,
35 pycompat,
36 repository,
36 repository,
37 revlog,
37 revlog,
38 store,
38 store,
39 verify,
39 verify,
40 )
40 )
41 from mercurial.utils import (
41 from mercurial.utils import (
42 interfaceutil,
42 interfaceutil,
43 storageutil,
43 )
44 )
44
45
45 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
46 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
46 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
47 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
47 # be specifying the version(s) of Mercurial they are tested with, or
48 # be specifying the version(s) of Mercurial they are tested with, or
48 # leave the attribute unspecified.
49 # leave the attribute unspecified.
49 testedwith = 'ships-with-hg-core'
50 testedwith = 'ships-with-hg-core'
50
51
51 REQUIREMENT = 'testonly-simplestore'
52 REQUIREMENT = 'testonly-simplestore'
52
53
53 def validatenode(node):
54 def validatenode(node):
54 if isinstance(node, int):
55 if isinstance(node, int):
55 raise ValueError('expected node; got int')
56 raise ValueError('expected node; got int')
56
57
57 if len(node) != 20:
58 if len(node) != 20:
58 raise ValueError('expected 20 byte node')
59 raise ValueError('expected 20 byte node')
59
60
60 def validaterev(rev):
61 def validaterev(rev):
61 if not isinstance(rev, int):
62 if not isinstance(rev, int):
62 raise ValueError('expected int')
63 raise ValueError('expected int')
63
64
64 class simplestoreerror(error.StorageError):
65 class simplestoreerror(error.StorageError):
65 pass
66 pass
66
67
67 @interfaceutil.implementer(repository.irevisiondelta)
68 @interfaceutil.implementer(repository.irevisiondelta)
68 @attr.s(slots=True, frozen=True)
69 @attr.s(slots=True, frozen=True)
69 class simplestorerevisiondelta(object):
70 class simplestorerevisiondelta(object):
70 node = attr.ib()
71 node = attr.ib()
71 p1node = attr.ib()
72 p1node = attr.ib()
72 p2node = attr.ib()
73 p2node = attr.ib()
73 basenode = attr.ib()
74 basenode = attr.ib()
74 linknode = attr.ib()
75 linknode = attr.ib()
75 flags = attr.ib()
76 flags = attr.ib()
76 baserevisionsize = attr.ib()
77 baserevisionsize = attr.ib()
77 revision = attr.ib()
78 revision = attr.ib()
78 delta = attr.ib()
79 delta = attr.ib()
79
80
80 @interfaceutil.implementer(repository.ifilestorage)
81 @interfaceutil.implementer(repository.ifilestorage)
81 class filestorage(object):
82 class filestorage(object):
82 """Implements storage for a tracked path.
83 """Implements storage for a tracked path.
83
84
84 Data is stored in the VFS in a directory corresponding to the tracked
85 Data is stored in the VFS in a directory corresponding to the tracked
85 path.
86 path.
86
87
87 Index data is stored in an ``index`` file using CBOR.
88 Index data is stored in an ``index`` file using CBOR.
88
89
89 Fulltext data is stored in files having names of the node.
90 Fulltext data is stored in files having names of the node.
90 """
91 """
91
92
92 def __init__(self, svfs, path):
93 def __init__(self, svfs, path):
93 self._svfs = svfs
94 self._svfs = svfs
94 self._path = path
95 self._path = path
95
96
96 self._storepath = b'/'.join([b'data', path])
97 self._storepath = b'/'.join([b'data', path])
97 self._indexpath = b'/'.join([self._storepath, b'index'])
98 self._indexpath = b'/'.join([self._storepath, b'index'])
98
99
99 indexdata = self._svfs.tryread(self._indexpath)
100 indexdata = self._svfs.tryread(self._indexpath)
100 if indexdata:
101 if indexdata:
101 indexdata = cbor.loads(indexdata)
102 indexdata = cbor.loads(indexdata)
102
103
103 self._indexdata = indexdata or []
104 self._indexdata = indexdata or []
104 self._indexbynode = {}
105 self._indexbynode = {}
105 self._indexbyrev = {}
106 self._indexbyrev = {}
106 self._index = []
107 self._index = []
107 self._refreshindex()
108 self._refreshindex()
108
109
109 def _refreshindex(self):
110 def _refreshindex(self):
110 self._indexbynode.clear()
111 self._indexbynode.clear()
111 self._indexbyrev.clear()
112 self._indexbyrev.clear()
112 self._index = []
113 self._index = []
113
114
114 for i, entry in enumerate(self._indexdata):
115 for i, entry in enumerate(self._indexdata):
115 self._indexbynode[entry[b'node']] = entry
116 self._indexbynode[entry[b'node']] = entry
116 self._indexbyrev[i] = entry
117 self._indexbyrev[i] = entry
117
118
118 self._indexbynode[nullid] = {
119 self._indexbynode[nullid] = {
119 b'node': nullid,
120 b'node': nullid,
120 b'p1': nullid,
121 b'p1': nullid,
121 b'p2': nullid,
122 b'p2': nullid,
122 b'linkrev': nullrev,
123 b'linkrev': nullrev,
123 b'flags': 0,
124 b'flags': 0,
124 }
125 }
125
126
126 self._indexbyrev[nullrev] = {
127 self._indexbyrev[nullrev] = {
127 b'node': nullid,
128 b'node': nullid,
128 b'p1': nullid,
129 b'p1': nullid,
129 b'p2': nullid,
130 b'p2': nullid,
130 b'linkrev': nullrev,
131 b'linkrev': nullrev,
131 b'flags': 0,
132 b'flags': 0,
132 }
133 }
133
134
134 for i, entry in enumerate(self._indexdata):
135 for i, entry in enumerate(self._indexdata):
135 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
136 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
136
137
137 # start, length, rawsize, chainbase, linkrev, p1, p2, node
138 # start, length, rawsize, chainbase, linkrev, p1, p2, node
138 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
139 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
139 entry[b'node']))
140 entry[b'node']))
140
141
141 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
142 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
142
143
143 def __len__(self):
144 def __len__(self):
144 return len(self._indexdata)
145 return len(self._indexdata)
145
146
146 def __iter__(self):
147 def __iter__(self):
147 return iter(range(len(self)))
148 return iter(range(len(self)))
148
149
149 def revs(self, start=0, stop=None):
150 def revs(self, start=0, stop=None):
150 step = 1
151 step = 1
151 if stop is not None:
152 if stop is not None:
152 if start > stop:
153 if start > stop:
153 step = -1
154 step = -1
154
155
155 stop += step
156 stop += step
156 else:
157 else:
157 stop = len(self)
158 stop = len(self)
158
159
159 return range(start, stop, step)
160 return range(start, stop, step)
160
161
161 def parents(self, node):
162 def parents(self, node):
162 validatenode(node)
163 validatenode(node)
163
164
164 if node not in self._indexbynode:
165 if node not in self._indexbynode:
165 raise KeyError('unknown node')
166 raise KeyError('unknown node')
166
167
167 entry = self._indexbynode[node]
168 entry = self._indexbynode[node]
168
169
169 return entry[b'p1'], entry[b'p2']
170 return entry[b'p1'], entry[b'p2']
170
171
171 def parentrevs(self, rev):
172 def parentrevs(self, rev):
172 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
173 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
173 return self.rev(p1), self.rev(p2)
174 return self.rev(p1), self.rev(p2)
174
175
175 def rev(self, node):
176 def rev(self, node):
176 validatenode(node)
177 validatenode(node)
177
178
178 try:
179 try:
179 self._indexbynode[node]
180 self._indexbynode[node]
180 except KeyError:
181 except KeyError:
181 raise error.LookupError(node, self._indexpath, _('no node'))
182 raise error.LookupError(node, self._indexpath, _('no node'))
182
183
183 for rev, entry in self._indexbyrev.items():
184 for rev, entry in self._indexbyrev.items():
184 if entry[b'node'] == node:
185 if entry[b'node'] == node:
185 return rev
186 return rev
186
187
187 raise error.ProgrammingError('this should not occur')
188 raise error.ProgrammingError('this should not occur')
188
189
189 def node(self, rev):
190 def node(self, rev):
190 validaterev(rev)
191 validaterev(rev)
191
192
192 return self._indexbyrev[rev][b'node']
193 return self._indexbyrev[rev][b'node']
193
194
194 def lookup(self, node):
195 def lookup(self, node):
195 if isinstance(node, int):
196 if isinstance(node, int):
196 return self.node(node)
197 return self.node(node)
197
198
198 if len(node) == 20:
199 if len(node) == 20:
199 self.rev(node)
200 self.rev(node)
200 return node
201 return node
201
202
202 try:
203 try:
203 rev = int(node)
204 rev = int(node)
204 if '%d' % rev != node:
205 if '%d' % rev != node:
205 raise ValueError
206 raise ValueError
206
207
207 if rev < 0:
208 if rev < 0:
208 rev = len(self) + rev
209 rev = len(self) + rev
209 if rev < 0 or rev >= len(self):
210 if rev < 0 or rev >= len(self):
210 raise ValueError
211 raise ValueError
211
212
212 return self.node(rev)
213 return self.node(rev)
213 except (ValueError, OverflowError):
214 except (ValueError, OverflowError):
214 pass
215 pass
215
216
216 if len(node) == 40:
217 if len(node) == 40:
217 try:
218 try:
218 rawnode = bin(node)
219 rawnode = bin(node)
219 self.rev(rawnode)
220 self.rev(rawnode)
220 return rawnode
221 return rawnode
221 except TypeError:
222 except TypeError:
222 pass
223 pass
223
224
224 raise error.LookupError(node, self._path, _('invalid lookup input'))
225 raise error.LookupError(node, self._path, _('invalid lookup input'))
225
226
226 def linkrev(self, rev):
227 def linkrev(self, rev):
227 validaterev(rev)
228 validaterev(rev)
228
229
229 return self._indexbyrev[rev][b'linkrev']
230 return self._indexbyrev[rev][b'linkrev']
230
231
231 def _flags(self, rev):
232 def _flags(self, rev):
232 validaterev(rev)
233 validaterev(rev)
233
234
234 return self._indexbyrev[rev][b'flags']
235 return self._indexbyrev[rev][b'flags']
235
236
236 def _candelta(self, baserev, rev):
237 def _candelta(self, baserev, rev):
237 validaterev(baserev)
238 validaterev(baserev)
238 validaterev(rev)
239 validaterev(rev)
239
240
240 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
241 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
241 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
242 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
242 return False
243 return False
243
244
244 return True
245 return True
245
246
246 def _processflags(self, text, flags, operation, raw=False):
247 def _processflags(self, text, flags, operation, raw=False):
247 if flags == 0:
248 if flags == 0:
248 return text, True
249 return text, True
249
250
250 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
251 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
251 raise simplestoreerror(_("incompatible revision flag '%#x'") %
252 raise simplestoreerror(_("incompatible revision flag '%#x'") %
252 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
253 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
253
254
254 validatehash = True
255 validatehash = True
255 # Depending on the operation (read or write), the order might be
256 # Depending on the operation (read or write), the order might be
256 # reversed due to non-commutative transforms.
257 # reversed due to non-commutative transforms.
257 orderedflags = revlog.REVIDX_FLAGS_ORDER
258 orderedflags = revlog.REVIDX_FLAGS_ORDER
258 if operation == 'write':
259 if operation == 'write':
259 orderedflags = reversed(orderedflags)
260 orderedflags = reversed(orderedflags)
260
261
261 for flag in orderedflags:
262 for flag in orderedflags:
262 # If a flagprocessor has been registered for a known flag, apply the
263 # If a flagprocessor has been registered for a known flag, apply the
263 # related operation transform and update result tuple.
264 # related operation transform and update result tuple.
264 if flag & flags:
265 if flag & flags:
265 vhash = True
266 vhash = True
266
267
267 if flag not in revlog._flagprocessors:
268 if flag not in revlog._flagprocessors:
268 message = _("missing processor for flag '%#x'") % (flag)
269 message = _("missing processor for flag '%#x'") % (flag)
269 raise simplestoreerror(message)
270 raise simplestoreerror(message)
270
271
271 processor = revlog._flagprocessors[flag]
272 processor = revlog._flagprocessors[flag]
272 if processor is not None:
273 if processor is not None:
273 readtransform, writetransform, rawtransform = processor
274 readtransform, writetransform, rawtransform = processor
274
275
275 if raw:
276 if raw:
276 vhash = rawtransform(self, text)
277 vhash = rawtransform(self, text)
277 elif operation == 'read':
278 elif operation == 'read':
278 text, vhash = readtransform(self, text)
279 text, vhash = readtransform(self, text)
279 else: # write operation
280 else: # write operation
280 text, vhash = writetransform(self, text)
281 text, vhash = writetransform(self, text)
281 validatehash = validatehash and vhash
282 validatehash = validatehash and vhash
282
283
283 return text, validatehash
284 return text, validatehash
284
285
285 def checkhash(self, text, node, p1=None, p2=None, rev=None):
286 def checkhash(self, text, node, p1=None, p2=None, rev=None):
286 if p1 is None and p2 is None:
287 if p1 is None and p2 is None:
287 p1, p2 = self.parents(node)
288 p1, p2 = self.parents(node)
288 if node != revlog.hash(text, p1, p2):
289 if node != storageutil.hashrevisionsha1(text, p1, p2):
289 raise simplestoreerror(_("integrity check failed on %s") %
290 raise simplestoreerror(_("integrity check failed on %s") %
290 self._path)
291 self._path)
291
292
292 def revision(self, node, raw=False):
293 def revision(self, node, raw=False):
293 validatenode(node)
294 validatenode(node)
294
295
295 if node == nullid:
296 if node == nullid:
296 return b''
297 return b''
297
298
298 rev = self.rev(node)
299 rev = self.rev(node)
299 flags = self._flags(rev)
300 flags = self._flags(rev)
300
301
301 path = b'/'.join([self._storepath, hex(node)])
302 path = b'/'.join([self._storepath, hex(node)])
302 rawtext = self._svfs.read(path)
303 rawtext = self._svfs.read(path)
303
304
304 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
305 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
305 if validatehash:
306 if validatehash:
306 self.checkhash(text, node, rev=rev)
307 self.checkhash(text, node, rev=rev)
307
308
308 return text
309 return text
309
310
310 def read(self, node):
311 def read(self, node):
311 validatenode(node)
312 validatenode(node)
312
313
313 revision = self.revision(node)
314 revision = self.revision(node)
314
315
315 if not revision.startswith(b'\1\n'):
316 if not revision.startswith(b'\1\n'):
316 return revision
317 return revision
317
318
318 start = revision.index(b'\1\n', 2)
319 start = revision.index(b'\1\n', 2)
319 return revision[start + 2:]
320 return revision[start + 2:]
320
321
321 def renamed(self, node):
322 def renamed(self, node):
322 validatenode(node)
323 validatenode(node)
323
324
324 if self.parents(node)[0] != nullid:
325 if self.parents(node)[0] != nullid:
325 return False
326 return False
326
327
327 fulltext = self.revision(node)
328 fulltext = self.revision(node)
328 m = revlog.parsemeta(fulltext)[0]
329 m = revlog.parsemeta(fulltext)[0]
329
330
330 if m and 'copy' in m:
331 if m and 'copy' in m:
331 return m['copy'], bin(m['copyrev'])
332 return m['copy'], bin(m['copyrev'])
332
333
333 return False
334 return False
334
335
335 def cmp(self, node, text):
336 def cmp(self, node, text):
336 validatenode(node)
337 validatenode(node)
337
338
338 t = text
339 t = text
339
340
340 if text.startswith(b'\1\n'):
341 if text.startswith(b'\1\n'):
341 t = b'\1\n\1\n' + text
342 t = b'\1\n\1\n' + text
342
343
343 p1, p2 = self.parents(node)
344 p1, p2 = self.parents(node)
344
345
345 if revlog.hash(t, p1, p2) == node:
346 if storageutil.hashrevisionsha1(t, p1, p2) == node:
346 return False
347 return False
347
348
348 if self.iscensored(self.rev(node)):
349 if self.iscensored(self.rev(node)):
349 return text != b''
350 return text != b''
350
351
351 if self.renamed(node):
352 if self.renamed(node):
352 t2 = self.read(node)
353 t2 = self.read(node)
353 return t2 != text
354 return t2 != text
354
355
355 return True
356 return True
356
357
357 def size(self, rev):
358 def size(self, rev):
358 validaterev(rev)
359 validaterev(rev)
359
360
360 node = self._indexbyrev[rev][b'node']
361 node = self._indexbyrev[rev][b'node']
361
362
362 if self.renamed(node):
363 if self.renamed(node):
363 return len(self.read(node))
364 return len(self.read(node))
364
365
365 if self.iscensored(rev):
366 if self.iscensored(rev):
366 return 0
367 return 0
367
368
368 return len(self.revision(node))
369 return len(self.revision(node))
369
370
370 def iscensored(self, rev):
371 def iscensored(self, rev):
371 validaterev(rev)
372 validaterev(rev)
372
373
373 return self._flags(rev) & revlog.REVIDX_ISCENSORED
374 return self._flags(rev) & revlog.REVIDX_ISCENSORED
374
375
375 def commonancestorsheads(self, a, b):
376 def commonancestorsheads(self, a, b):
376 validatenode(a)
377 validatenode(a)
377 validatenode(b)
378 validatenode(b)
378
379
379 a = self.rev(a)
380 a = self.rev(a)
380 b = self.rev(b)
381 b = self.rev(b)
381
382
382 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
383 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
383 return pycompat.maplist(self.node, ancestors)
384 return pycompat.maplist(self.node, ancestors)
384
385
385 def descendants(self, revs):
386 def descendants(self, revs):
386 # This is a copy of revlog.descendants()
387 # This is a copy of revlog.descendants()
387 first = min(revs)
388 first = min(revs)
388 if first == nullrev:
389 if first == nullrev:
389 for i in self:
390 for i in self:
390 yield i
391 yield i
391 return
392 return
392
393
393 seen = set(revs)
394 seen = set(revs)
394 for i in self.revs(start=first + 1):
395 for i in self.revs(start=first + 1):
395 for x in self.parentrevs(i):
396 for x in self.parentrevs(i):
396 if x != nullrev and x in seen:
397 if x != nullrev and x in seen:
397 seen.add(i)
398 seen.add(i)
398 yield i
399 yield i
399 break
400 break
400
401
401 # Required by verify.
402 # Required by verify.
402 def files(self):
403 def files(self):
403 entries = self._svfs.listdir(self._storepath)
404 entries = self._svfs.listdir(self._storepath)
404
405
405 # Strip out undo.backup.* files created as part of transaction
406 # Strip out undo.backup.* files created as part of transaction
406 # recording.
407 # recording.
407 entries = [f for f in entries if not f.startswith('undo.backup.')]
408 entries = [f for f in entries if not f.startswith('undo.backup.')]
408
409
409 return [b'/'.join((self._storepath, f)) for f in entries]
410 return [b'/'.join((self._storepath, f)) for f in entries]
410
411
411 def add(self, text, meta, transaction, linkrev, p1, p2):
412 def add(self, text, meta, transaction, linkrev, p1, p2):
412 if meta or text.startswith(b'\1\n'):
413 if meta or text.startswith(b'\1\n'):
413 text = revlog.packmeta(meta, text)
414 text = revlog.packmeta(meta, text)
414
415
415 return self.addrevision(text, transaction, linkrev, p1, p2)
416 return self.addrevision(text, transaction, linkrev, p1, p2)
416
417
417 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
418 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
418 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
419 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
419 validatenode(p1)
420 validatenode(p1)
420 validatenode(p2)
421 validatenode(p2)
421
422
422 if flags:
423 if flags:
423 node = node or revlog.hash(text, p1, p2)
424 node = node or storageutil.hashrevisionsha1(text, p1, p2)
424
425
425 rawtext, validatehash = self._processflags(text, flags, 'write')
426 rawtext, validatehash = self._processflags(text, flags, 'write')
426
427
427 node = node or revlog.hash(text, p1, p2)
428 node = node or storageutil.hashrevisionsha1(text, p1, p2)
428
429
429 if node in self._indexbynode:
430 if node in self._indexbynode:
430 return node
431 return node
431
432
432 if validatehash:
433 if validatehash:
433 self.checkhash(rawtext, node, p1=p1, p2=p2)
434 self.checkhash(rawtext, node, p1=p1, p2=p2)
434
435
435 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
436 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
436 flags)
437 flags)
437
438
438 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
439 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
439 transaction.addbackup(self._indexpath)
440 transaction.addbackup(self._indexpath)
440
441
441 path = b'/'.join([self._storepath, hex(node)])
442 path = b'/'.join([self._storepath, hex(node)])
442
443
443 self._svfs.write(path, rawtext)
444 self._svfs.write(path, rawtext)
444
445
445 self._indexdata.append({
446 self._indexdata.append({
446 b'node': node,
447 b'node': node,
447 b'p1': p1,
448 b'p1': p1,
448 b'p2': p2,
449 b'p2': p2,
449 b'linkrev': link,
450 b'linkrev': link,
450 b'flags': flags,
451 b'flags': flags,
451 })
452 })
452
453
453 self._reflectindexupdate()
454 self._reflectindexupdate()
454
455
455 return node
456 return node
456
457
457 def _reflectindexupdate(self):
458 def _reflectindexupdate(self):
458 self._refreshindex()
459 self._refreshindex()
459 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
460 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
460
461
461 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
462 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
462 nodes = []
463 nodes = []
463
464
464 transaction.addbackup(self._indexpath)
465 transaction.addbackup(self._indexpath)
465
466
466 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
467 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
467 linkrev = linkmapper(linknode)
468 linkrev = linkmapper(linknode)
468 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
469 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
469
470
470 nodes.append(node)
471 nodes.append(node)
471
472
472 if node in self._indexbynode:
473 if node in self._indexbynode:
473 continue
474 continue
474
475
475 # Need to resolve the fulltext from the delta base.
476 # Need to resolve the fulltext from the delta base.
476 if deltabase == nullid:
477 if deltabase == nullid:
477 text = mdiff.patch(b'', delta)
478 text = mdiff.patch(b'', delta)
478 else:
479 else:
479 text = mdiff.patch(self.revision(deltabase), delta)
480 text = mdiff.patch(self.revision(deltabase), delta)
480
481
481 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
482 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
482 flags)
483 flags)
483
484
484 if addrevisioncb:
485 if addrevisioncb:
485 addrevisioncb(self, node)
486 addrevisioncb(self, node)
486
487
487 return nodes
488 return nodes
488
489
489 def revdiff(self, rev1, rev2):
490 def revdiff(self, rev1, rev2):
490 validaterev(rev1)
491 validaterev(rev1)
491 validaterev(rev2)
492 validaterev(rev2)
492
493
493 node1 = self.node(rev1)
494 node1 = self.node(rev1)
494 node2 = self.node(rev2)
495 node2 = self.node(rev2)
495
496
496 return mdiff.textdiff(self.revision(node1, raw=True),
497 return mdiff.textdiff(self.revision(node1, raw=True),
497 self.revision(node2, raw=True))
498 self.revision(node2, raw=True))
498
499
499 def heads(self, start=None, stop=None):
500 def heads(self, start=None, stop=None):
500 # This is copied from revlog.py.
501 # This is copied from revlog.py.
501 if start is None and stop is None:
502 if start is None and stop is None:
502 if not len(self):
503 if not len(self):
503 return [nullid]
504 return [nullid]
504 return [self.node(r) for r in self.headrevs()]
505 return [self.node(r) for r in self.headrevs()]
505
506
506 if start is None:
507 if start is None:
507 start = nullid
508 start = nullid
508 if stop is None:
509 if stop is None:
509 stop = []
510 stop = []
510 stoprevs = set([self.rev(n) for n in stop])
511 stoprevs = set([self.rev(n) for n in stop])
511 startrev = self.rev(start)
512 startrev = self.rev(start)
512 reachable = {startrev}
513 reachable = {startrev}
513 heads = {startrev}
514 heads = {startrev}
514
515
515 parentrevs = self.parentrevs
516 parentrevs = self.parentrevs
516 for r in self.revs(start=startrev + 1):
517 for r in self.revs(start=startrev + 1):
517 for p in parentrevs(r):
518 for p in parentrevs(r):
518 if p in reachable:
519 if p in reachable:
519 if r not in stoprevs:
520 if r not in stoprevs:
520 reachable.add(r)
521 reachable.add(r)
521 heads.add(r)
522 heads.add(r)
522 if p in heads and p not in stoprevs:
523 if p in heads and p not in stoprevs:
523 heads.remove(p)
524 heads.remove(p)
524
525
525 return [self.node(r) for r in heads]
526 return [self.node(r) for r in heads]
526
527
527 def children(self, node):
528 def children(self, node):
528 validatenode(node)
529 validatenode(node)
529
530
530 # This is a copy of revlog.children().
531 # This is a copy of revlog.children().
531 c = []
532 c = []
532 p = self.rev(node)
533 p = self.rev(node)
533 for r in self.revs(start=p + 1):
534 for r in self.revs(start=p + 1):
534 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
535 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
535 if prevs:
536 if prevs:
536 for pr in prevs:
537 for pr in prevs:
537 if pr == p:
538 if pr == p:
538 c.append(self.node(r))
539 c.append(self.node(r))
539 elif p == nullrev:
540 elif p == nullrev:
540 c.append(self.node(r))
541 c.append(self.node(r))
541 return c
542 return c
542
543
543 def getstrippoint(self, minlink):
544 def getstrippoint(self, minlink):
544
545
545 # This is largely a copy of revlog.getstrippoint().
546 # This is largely a copy of revlog.getstrippoint().
546 brokenrevs = set()
547 brokenrevs = set()
547 strippoint = len(self)
548 strippoint = len(self)
548
549
549 heads = {}
550 heads = {}
550 futurelargelinkrevs = set()
551 futurelargelinkrevs = set()
551 for head in self.heads():
552 for head in self.heads():
552 headlinkrev = self.linkrev(self.rev(head))
553 headlinkrev = self.linkrev(self.rev(head))
553 heads[head] = headlinkrev
554 heads[head] = headlinkrev
554 if headlinkrev >= minlink:
555 if headlinkrev >= minlink:
555 futurelargelinkrevs.add(headlinkrev)
556 futurelargelinkrevs.add(headlinkrev)
556
557
557 # This algorithm involves walking down the rev graph, starting at the
558 # This algorithm involves walking down the rev graph, starting at the
558 # heads. Since the revs are topologically sorted according to linkrev,
559 # heads. Since the revs are topologically sorted according to linkrev,
559 # once all head linkrevs are below the minlink, we know there are
560 # once all head linkrevs are below the minlink, we know there are
560 # no more revs that could have a linkrev greater than minlink.
561 # no more revs that could have a linkrev greater than minlink.
561 # So we can stop walking.
562 # So we can stop walking.
562 while futurelargelinkrevs:
563 while futurelargelinkrevs:
563 strippoint -= 1
564 strippoint -= 1
564 linkrev = heads.pop(strippoint)
565 linkrev = heads.pop(strippoint)
565
566
566 if linkrev < minlink:
567 if linkrev < minlink:
567 brokenrevs.add(strippoint)
568 brokenrevs.add(strippoint)
568 else:
569 else:
569 futurelargelinkrevs.remove(linkrev)
570 futurelargelinkrevs.remove(linkrev)
570
571
571 for p in self.parentrevs(strippoint):
572 for p in self.parentrevs(strippoint):
572 if p != nullrev:
573 if p != nullrev:
573 plinkrev = self.linkrev(p)
574 plinkrev = self.linkrev(p)
574 heads[p] = plinkrev
575 heads[p] = plinkrev
575 if plinkrev >= minlink:
576 if plinkrev >= minlink:
576 futurelargelinkrevs.add(plinkrev)
577 futurelargelinkrevs.add(plinkrev)
577
578
578 return strippoint, brokenrevs
579 return strippoint, brokenrevs
579
580
580 def strip(self, minlink, transaction):
581 def strip(self, minlink, transaction):
581 if not len(self):
582 if not len(self):
582 return
583 return
583
584
584 rev, _ignored = self.getstrippoint(minlink)
585 rev, _ignored = self.getstrippoint(minlink)
585 if rev == len(self):
586 if rev == len(self):
586 return
587 return
587
588
588 # Purge index data starting at the requested revision.
589 # Purge index data starting at the requested revision.
589 self._indexdata[rev:] = []
590 self._indexdata[rev:] = []
590 self._reflectindexupdate()
591 self._reflectindexupdate()
591
592
592 def issimplestorefile(f, kind, st):
593 def issimplestorefile(f, kind, st):
593 if kind != stat.S_IFREG:
594 if kind != stat.S_IFREG:
594 return False
595 return False
595
596
596 if store.isrevlog(f, kind, st):
597 if store.isrevlog(f, kind, st):
597 return False
598 return False
598
599
599 # Ignore transaction undo files.
600 # Ignore transaction undo files.
600 if f.startswith('undo.'):
601 if f.startswith('undo.'):
601 return False
602 return False
602
603
603 # Otherwise assume it belongs to the simple store.
604 # Otherwise assume it belongs to the simple store.
604 return True
605 return True
605
606
606 class simplestore(store.encodedstore):
607 class simplestore(store.encodedstore):
607 def datafiles(self):
608 def datafiles(self):
608 for x in super(simplestore, self).datafiles():
609 for x in super(simplestore, self).datafiles():
609 yield x
610 yield x
610
611
611 # Supplement with non-revlog files.
612 # Supplement with non-revlog files.
612 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
613 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
613
614
614 for unencoded, encoded, size in extrafiles:
615 for unencoded, encoded, size in extrafiles:
615 try:
616 try:
616 unencoded = store.decodefilename(unencoded)
617 unencoded = store.decodefilename(unencoded)
617 except KeyError:
618 except KeyError:
618 unencoded = None
619 unencoded = None
619
620
620 yield unencoded, encoded, size
621 yield unencoded, encoded, size
621
622
622 def reposetup(ui, repo):
623 def reposetup(ui, repo):
623 if not repo.local():
624 if not repo.local():
624 return
625 return
625
626
626 if isinstance(repo, bundlerepo.bundlerepository):
627 if isinstance(repo, bundlerepo.bundlerepository):
627 raise error.Abort(_('cannot use simple store with bundlerepo'))
628 raise error.Abort(_('cannot use simple store with bundlerepo'))
628
629
629 class simplestorerepo(repo.__class__):
630 class simplestorerepo(repo.__class__):
630 def file(self, f):
631 def file(self, f):
631 return filestorage(self.svfs, f)
632 return filestorage(self.svfs, f)
632
633
633 repo.__class__ = simplestorerepo
634 repo.__class__ = simplestorerepo
634
635
635 def featuresetup(ui, supported):
636 def featuresetup(ui, supported):
636 supported.add(REQUIREMENT)
637 supported.add(REQUIREMENT)
637
638
638 def newreporequirements(orig, ui):
639 def newreporequirements(orig, ui):
639 """Modifies default requirements for new repos to use the simple store."""
640 """Modifies default requirements for new repos to use the simple store."""
640 requirements = orig(ui)
641 requirements = orig(ui)
641
642
642 # These requirements are only used to affect creation of the store
643 # These requirements are only used to affect creation of the store
643 # object. We have our own store. So we can remove them.
644 # object. We have our own store. So we can remove them.
644 # TODO do this once we feel like taking the test hit.
645 # TODO do this once we feel like taking the test hit.
645 #if 'fncache' in requirements:
646 #if 'fncache' in requirements:
646 # requirements.remove('fncache')
647 # requirements.remove('fncache')
647 #if 'dotencode' in requirements:
648 #if 'dotencode' in requirements:
648 # requirements.remove('dotencode')
649 # requirements.remove('dotencode')
649
650
650 requirements.add(REQUIREMENT)
651 requirements.add(REQUIREMENT)
651
652
652 return requirements
653 return requirements
653
654
654 def makestore(orig, requirements, path, vfstype):
655 def makestore(orig, requirements, path, vfstype):
655 if REQUIREMENT not in requirements:
656 if REQUIREMENT not in requirements:
656 return orig(requirements, path, vfstype)
657 return orig(requirements, path, vfstype)
657
658
658 return simplestore(path, vfstype)
659 return simplestore(path, vfstype)
659
660
660 def verifierinit(orig, self, *args, **kwargs):
661 def verifierinit(orig, self, *args, **kwargs):
661 orig(self, *args, **kwargs)
662 orig(self, *args, **kwargs)
662
663
663 # We don't care that files in the store don't align with what is
664 # We don't care that files in the store don't align with what is
664 # advertised. So suppress these warnings.
665 # advertised. So suppress these warnings.
665 self.warnorphanstorefiles = False
666 self.warnorphanstorefiles = False
666
667
667 def extsetup(ui):
668 def extsetup(ui):
668 localrepo.featuresetupfuncs.add(featuresetup)
669 localrepo.featuresetupfuncs.add(featuresetup)
669
670
670 extensions.wrapfunction(localrepo, 'newreporequirements',
671 extensions.wrapfunction(localrepo, 'newreporequirements',
671 newreporequirements)
672 newreporequirements)
672 extensions.wrapfunction(store, 'store', makestore)
673 extensions.wrapfunction(store, 'store', makestore)
673 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
674 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now