##// END OF EJS Templates
rawdata: introduce a `rawdata` method on revlog...
marmoute -
r42945:415e4136 default
parent child Browse files
Show More
@@ -1,2695 +1,2702 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .thirdparty import (
57 from .thirdparty import (
58 attr,
58 attr,
59 )
59 )
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 repository,
67 repository,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .revlogutils import (
71 from .revlogutils import (
72 deltas as deltautil,
72 deltas as deltautil,
73 )
73 )
74 from .utils import (
74 from .utils import (
75 interfaceutil,
75 interfaceutil,
76 storageutil,
76 storageutil,
77 stringutil,
77 stringutil,
78 )
78 )
79
79
80 # blanked usage of all the name to prevent pyflakes constraints
80 # blanked usage of all the name to prevent pyflakes constraints
81 # We need these name available in the module for extensions.
81 # We need these name available in the module for extensions.
82 REVLOGV0
82 REVLOGV0
83 REVLOGV1
83 REVLOGV1
84 REVLOGV2
84 REVLOGV2
85 FLAG_INLINE_DATA
85 FLAG_INLINE_DATA
86 FLAG_GENERALDELTA
86 FLAG_GENERALDELTA
87 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FLAGS
88 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_FORMAT
89 REVLOG_DEFAULT_VERSION
89 REVLOG_DEFAULT_VERSION
90 REVLOGV1_FLAGS
90 REVLOGV1_FLAGS
91 REVLOGV2_FLAGS
91 REVLOGV2_FLAGS
92 REVIDX_ISCENSORED
92 REVIDX_ISCENSORED
93 REVIDX_ELLIPSIS
93 REVIDX_ELLIPSIS
94 REVIDX_EXTSTORED
94 REVIDX_EXTSTORED
95 REVIDX_DEFAULT_FLAGS
95 REVIDX_DEFAULT_FLAGS
96 REVIDX_FLAGS_ORDER
96 REVIDX_FLAGS_ORDER
97 REVIDX_KNOWN_FLAGS
97 REVIDX_KNOWN_FLAGS
98 REVIDX_RAWTEXT_CHANGING_FLAGS
98 REVIDX_RAWTEXT_CHANGING_FLAGS
99
99
100 parsers = policy.importmod(r'parsers')
100 parsers = policy.importmod(r'parsers')
101 rustancestor = policy.importrust(r'ancestor')
101 rustancestor = policy.importrust(r'ancestor')
102 rustdagop = policy.importrust(r'dagop')
102 rustdagop = policy.importrust(r'dagop')
103
103
104 # Aliased for performance.
104 # Aliased for performance.
105 _zlibdecompress = zlib.decompress
105 _zlibdecompress = zlib.decompress
106
106
107 # max size of revlog with inline data
107 # max size of revlog with inline data
108 _maxinline = 131072
108 _maxinline = 131072
109 _chunksize = 1048576
109 _chunksize = 1048576
110
110
111 # Store flag processors (cf. 'addflagprocessor()' to register)
111 # Store flag processors (cf. 'addflagprocessor()' to register)
112 _flagprocessors = {
112 _flagprocessors = {
113 REVIDX_ISCENSORED: None,
113 REVIDX_ISCENSORED: None,
114 }
114 }
115
115
116 # Flag processors for REVIDX_ELLIPSIS.
116 # Flag processors for REVIDX_ELLIPSIS.
117 def ellipsisreadprocessor(rl, text):
117 def ellipsisreadprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsiswriteprocessor(rl, text):
120 def ellipsiswriteprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123 def ellipsisrawprocessor(rl, text):
123 def ellipsisrawprocessor(rl, text):
124 return False
124 return False
125
125
126 ellipsisprocessor = (
126 ellipsisprocessor = (
127 ellipsisreadprocessor,
127 ellipsisreadprocessor,
128 ellipsiswriteprocessor,
128 ellipsiswriteprocessor,
129 ellipsisrawprocessor,
129 ellipsisrawprocessor,
130 )
130 )
131
131
132 def addflagprocessor(flag, processor):
132 def addflagprocessor(flag, processor):
133 """Register a flag processor on a revision data flag.
133 """Register a flag processor on a revision data flag.
134
134
135 Invariant:
135 Invariant:
136 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
136 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
137 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
137 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
138 - Only one flag processor can be registered on a specific flag.
138 - Only one flag processor can be registered on a specific flag.
139 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
139 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
140 following signatures:
140 following signatures:
141 - (read) f(self, rawtext) -> text, bool
141 - (read) f(self, rawtext) -> text, bool
142 - (write) f(self, text) -> rawtext, bool
142 - (write) f(self, text) -> rawtext, bool
143 - (raw) f(self, rawtext) -> bool
143 - (raw) f(self, rawtext) -> bool
144 "text" is presented to the user. "rawtext" is stored in revlog data, not
144 "text" is presented to the user. "rawtext" is stored in revlog data, not
145 directly visible to the user.
145 directly visible to the user.
146 The boolean returned by these transforms is used to determine whether
146 The boolean returned by these transforms is used to determine whether
147 the returned text can be used for hash integrity checking. For example,
147 the returned text can be used for hash integrity checking. For example,
148 if "write" returns False, then "text" is used to generate hash. If
148 if "write" returns False, then "text" is used to generate hash. If
149 "write" returns True, that basically means "rawtext" returned by "write"
149 "write" returns True, that basically means "rawtext" returned by "write"
150 should be used to generate hash. Usually, "write" and "read" return
150 should be used to generate hash. Usually, "write" and "read" return
151 different booleans. And "raw" returns a same boolean as "write".
151 different booleans. And "raw" returns a same boolean as "write".
152
152
153 Note: The 'raw' transform is used for changegroup generation and in some
153 Note: The 'raw' transform is used for changegroup generation and in some
154 debug commands. In this case the transform only indicates whether the
154 debug commands. In this case the transform only indicates whether the
155 contents can be used for hash integrity checks.
155 contents can be used for hash integrity checks.
156 """
156 """
157 _insertflagprocessor(flag, processor, _flagprocessors)
157 _insertflagprocessor(flag, processor, _flagprocessors)
158
158
159 def _insertflagprocessor(flag, processor, flagprocessors):
159 def _insertflagprocessor(flag, processor, flagprocessors):
160 if not flag & REVIDX_KNOWN_FLAGS:
160 if not flag & REVIDX_KNOWN_FLAGS:
161 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
161 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
162 raise error.ProgrammingError(msg)
162 raise error.ProgrammingError(msg)
163 if flag not in REVIDX_FLAGS_ORDER:
163 if flag not in REVIDX_FLAGS_ORDER:
164 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
164 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
165 raise error.ProgrammingError(msg)
165 raise error.ProgrammingError(msg)
166 if flag in flagprocessors:
166 if flag in flagprocessors:
167 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
167 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
168 raise error.Abort(msg)
168 raise error.Abort(msg)
169 flagprocessors[flag] = processor
169 flagprocessors[flag] = processor
170
170
171 def getoffset(q):
171 def getoffset(q):
172 return int(q >> 16)
172 return int(q >> 16)
173
173
174 def gettype(q):
174 def gettype(q):
175 return int(q & 0xFFFF)
175 return int(q & 0xFFFF)
176
176
177 def offset_type(offset, type):
177 def offset_type(offset, type):
178 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
178 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
179 raise ValueError('unknown revlog index flags')
179 raise ValueError('unknown revlog index flags')
180 return int(int(offset) << 16 | type)
180 return int(int(offset) << 16 | type)
181
181
182 @attr.s(slots=True, frozen=True)
182 @attr.s(slots=True, frozen=True)
183 class _revisioninfo(object):
183 class _revisioninfo(object):
184 """Information about a revision that allows building its fulltext
184 """Information about a revision that allows building its fulltext
185 node: expected hash of the revision
185 node: expected hash of the revision
186 p1, p2: parent revs of the revision
186 p1, p2: parent revs of the revision
187 btext: built text cache consisting of a one-element list
187 btext: built text cache consisting of a one-element list
188 cachedelta: (baserev, uncompressed_delta) or None
188 cachedelta: (baserev, uncompressed_delta) or None
189 flags: flags associated to the revision storage
189 flags: flags associated to the revision storage
190
190
191 One of btext[0] or cachedelta must be set.
191 One of btext[0] or cachedelta must be set.
192 """
192 """
193 node = attr.ib()
193 node = attr.ib()
194 p1 = attr.ib()
194 p1 = attr.ib()
195 p2 = attr.ib()
195 p2 = attr.ib()
196 btext = attr.ib()
196 btext = attr.ib()
197 textlen = attr.ib()
197 textlen = attr.ib()
198 cachedelta = attr.ib()
198 cachedelta = attr.ib()
199 flags = attr.ib()
199 flags = attr.ib()
200
200
201 @interfaceutil.implementer(repository.irevisiondelta)
201 @interfaceutil.implementer(repository.irevisiondelta)
202 @attr.s(slots=True)
202 @attr.s(slots=True)
203 class revlogrevisiondelta(object):
203 class revlogrevisiondelta(object):
204 node = attr.ib()
204 node = attr.ib()
205 p1node = attr.ib()
205 p1node = attr.ib()
206 p2node = attr.ib()
206 p2node = attr.ib()
207 basenode = attr.ib()
207 basenode = attr.ib()
208 flags = attr.ib()
208 flags = attr.ib()
209 baserevisionsize = attr.ib()
209 baserevisionsize = attr.ib()
210 revision = attr.ib()
210 revision = attr.ib()
211 delta = attr.ib()
211 delta = attr.ib()
212 linknode = attr.ib(default=None)
212 linknode = attr.ib(default=None)
213
213
214 @interfaceutil.implementer(repository.iverifyproblem)
214 @interfaceutil.implementer(repository.iverifyproblem)
215 @attr.s(frozen=True)
215 @attr.s(frozen=True)
216 class revlogproblem(object):
216 class revlogproblem(object):
217 warning = attr.ib(default=None)
217 warning = attr.ib(default=None)
218 error = attr.ib(default=None)
218 error = attr.ib(default=None)
219 node = attr.ib(default=None)
219 node = attr.ib(default=None)
220
220
221 # index v0:
221 # index v0:
222 # 4 bytes: offset
222 # 4 bytes: offset
223 # 4 bytes: compressed length
223 # 4 bytes: compressed length
224 # 4 bytes: base rev
224 # 4 bytes: base rev
225 # 4 bytes: link rev
225 # 4 bytes: link rev
226 # 20 bytes: parent 1 nodeid
226 # 20 bytes: parent 1 nodeid
227 # 20 bytes: parent 2 nodeid
227 # 20 bytes: parent 2 nodeid
228 # 20 bytes: nodeid
228 # 20 bytes: nodeid
229 indexformatv0 = struct.Struct(">4l20s20s20s")
229 indexformatv0 = struct.Struct(">4l20s20s20s")
230 indexformatv0_pack = indexformatv0.pack
230 indexformatv0_pack = indexformatv0.pack
231 indexformatv0_unpack = indexformatv0.unpack
231 indexformatv0_unpack = indexformatv0.unpack
232
232
233 class revlogoldindex(list):
233 class revlogoldindex(list):
234 def __getitem__(self, i):
234 def __getitem__(self, i):
235 if i == -1:
235 if i == -1:
236 return (0, 0, 0, -1, -1, -1, -1, nullid)
236 return (0, 0, 0, -1, -1, -1, -1, nullid)
237 return list.__getitem__(self, i)
237 return list.__getitem__(self, i)
238
238
239 class revlogoldio(object):
239 class revlogoldio(object):
240 def __init__(self):
240 def __init__(self):
241 self.size = indexformatv0.size
241 self.size = indexformatv0.size
242
242
243 def parseindex(self, data, inline):
243 def parseindex(self, data, inline):
244 s = self.size
244 s = self.size
245 index = []
245 index = []
246 nodemap = {nullid: nullrev}
246 nodemap = {nullid: nullrev}
247 n = off = 0
247 n = off = 0
248 l = len(data)
248 l = len(data)
249 while off + s <= l:
249 while off + s <= l:
250 cur = data[off:off + s]
250 cur = data[off:off + s]
251 off += s
251 off += s
252 e = indexformatv0_unpack(cur)
252 e = indexformatv0_unpack(cur)
253 # transform to revlogv1 format
253 # transform to revlogv1 format
254 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
254 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
255 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
255 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
256 index.append(e2)
256 index.append(e2)
257 nodemap[e[6]] = n
257 nodemap[e[6]] = n
258 n += 1
258 n += 1
259
259
260 return revlogoldindex(index), nodemap, None
260 return revlogoldindex(index), nodemap, None
261
261
262 def packentry(self, entry, node, version, rev):
262 def packentry(self, entry, node, version, rev):
263 if gettype(entry[0]):
263 if gettype(entry[0]):
264 raise error.RevlogError(_('index entry flags need revlog '
264 raise error.RevlogError(_('index entry flags need revlog '
265 'version 1'))
265 'version 1'))
266 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
266 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
267 node(entry[5]), node(entry[6]), entry[7])
267 node(entry[5]), node(entry[6]), entry[7])
268 return indexformatv0_pack(*e2)
268 return indexformatv0_pack(*e2)
269
269
270 # index ng:
270 # index ng:
271 # 6 bytes: offset
271 # 6 bytes: offset
272 # 2 bytes: flags
272 # 2 bytes: flags
273 # 4 bytes: compressed length
273 # 4 bytes: compressed length
274 # 4 bytes: uncompressed length
274 # 4 bytes: uncompressed length
275 # 4 bytes: base rev
275 # 4 bytes: base rev
276 # 4 bytes: link rev
276 # 4 bytes: link rev
277 # 4 bytes: parent 1 rev
277 # 4 bytes: parent 1 rev
278 # 4 bytes: parent 2 rev
278 # 4 bytes: parent 2 rev
279 # 32 bytes: nodeid
279 # 32 bytes: nodeid
280 indexformatng = struct.Struct(">Qiiiiii20s12x")
280 indexformatng = struct.Struct(">Qiiiiii20s12x")
281 indexformatng_pack = indexformatng.pack
281 indexformatng_pack = indexformatng.pack
282 versionformat = struct.Struct(">I")
282 versionformat = struct.Struct(">I")
283 versionformat_pack = versionformat.pack
283 versionformat_pack = versionformat.pack
284 versionformat_unpack = versionformat.unpack
284 versionformat_unpack = versionformat.unpack
285
285
286 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
286 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
287 # signed integer)
287 # signed integer)
288 _maxentrysize = 0x7fffffff
288 _maxentrysize = 0x7fffffff
289
289
290 class revlogio(object):
290 class revlogio(object):
291 def __init__(self):
291 def __init__(self):
292 self.size = indexformatng.size
292 self.size = indexformatng.size
293
293
294 def parseindex(self, data, inline):
294 def parseindex(self, data, inline):
295 # call the C implementation to parse the index data
295 # call the C implementation to parse the index data
296 index, cache = parsers.parse_index2(data, inline)
296 index, cache = parsers.parse_index2(data, inline)
297 return index, getattr(index, 'nodemap', None), cache
297 return index, getattr(index, 'nodemap', None), cache
298
298
299 def packentry(self, entry, node, version, rev):
299 def packentry(self, entry, node, version, rev):
300 p = indexformatng_pack(*entry)
300 p = indexformatng_pack(*entry)
301 if rev == 0:
301 if rev == 0:
302 p = versionformat_pack(version) + p[4:]
302 p = versionformat_pack(version) + p[4:]
303 return p
303 return p
304
304
305 class revlog(object):
305 class revlog(object):
306 """
306 """
307 the underlying revision storage object
307 the underlying revision storage object
308
308
309 A revlog consists of two parts, an index and the revision data.
309 A revlog consists of two parts, an index and the revision data.
310
310
311 The index is a file with a fixed record size containing
311 The index is a file with a fixed record size containing
312 information on each revision, including its nodeid (hash), the
312 information on each revision, including its nodeid (hash), the
313 nodeids of its parents, the position and offset of its data within
313 nodeids of its parents, the position and offset of its data within
314 the data file, and the revision it's based on. Finally, each entry
314 the data file, and the revision it's based on. Finally, each entry
315 contains a linkrev entry that can serve as a pointer to external
315 contains a linkrev entry that can serve as a pointer to external
316 data.
316 data.
317
317
318 The revision data itself is a linear collection of data chunks.
318 The revision data itself is a linear collection of data chunks.
319 Each chunk represents a revision and is usually represented as a
319 Each chunk represents a revision and is usually represented as a
320 delta against the previous chunk. To bound lookup time, runs of
320 delta against the previous chunk. To bound lookup time, runs of
321 deltas are limited to about 2 times the length of the original
321 deltas are limited to about 2 times the length of the original
322 version data. This makes retrieval of a version proportional to
322 version data. This makes retrieval of a version proportional to
323 its size, or O(1) relative to the number of revisions.
323 its size, or O(1) relative to the number of revisions.
324
324
325 Both pieces of the revlog are written to in an append-only
325 Both pieces of the revlog are written to in an append-only
326 fashion, which means we never need to rewrite a file to insert or
326 fashion, which means we never need to rewrite a file to insert or
327 remove data, and can use some simple techniques to avoid the need
327 remove data, and can use some simple techniques to avoid the need
328 for locking while reading.
328 for locking while reading.
329
329
330 If checkambig, indexfile is opened with checkambig=True at
330 If checkambig, indexfile is opened with checkambig=True at
331 writing, to avoid file stat ambiguity.
331 writing, to avoid file stat ambiguity.
332
332
333 If mmaplargeindex is True, and an mmapindexthreshold is set, the
333 If mmaplargeindex is True, and an mmapindexthreshold is set, the
334 index will be mmapped rather than read if it is larger than the
334 index will be mmapped rather than read if it is larger than the
335 configured threshold.
335 configured threshold.
336
336
337 If censorable is True, the revlog can have censored revisions.
337 If censorable is True, the revlog can have censored revisions.
338
338
339 If `upperboundcomp` is not None, this is the expected maximal gain from
339 If `upperboundcomp` is not None, this is the expected maximal gain from
340 compression for the data content.
340 compression for the data content.
341 """
341 """
342 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
343 mmaplargeindex=False, censorable=False,
343 mmaplargeindex=False, censorable=False,
344 upperboundcomp=None):
344 upperboundcomp=None):
345 """
345 """
346 create a revlog object
346 create a revlog object
347
347
348 opener is a function that abstracts the file opening operation
348 opener is a function that abstracts the file opening operation
349 and can be used to implement COW semantics or the like.
349 and can be used to implement COW semantics or the like.
350
350
351 """
351 """
352 self.upperboundcomp = upperboundcomp
352 self.upperboundcomp = upperboundcomp
353 self.indexfile = indexfile
353 self.indexfile = indexfile
354 self.datafile = datafile or (indexfile[:-2] + ".d")
354 self.datafile = datafile or (indexfile[:-2] + ".d")
355 self.opener = opener
355 self.opener = opener
356 # When True, indexfile is opened with checkambig=True at writing, to
356 # When True, indexfile is opened with checkambig=True at writing, to
357 # avoid file stat ambiguity.
357 # avoid file stat ambiguity.
358 self._checkambig = checkambig
358 self._checkambig = checkambig
359 self._mmaplargeindex = mmaplargeindex
359 self._mmaplargeindex = mmaplargeindex
360 self._censorable = censorable
360 self._censorable = censorable
361 # 3-tuple of (node, rev, text) for a raw revision.
361 # 3-tuple of (node, rev, text) for a raw revision.
362 self._revisioncache = None
362 self._revisioncache = None
363 # Maps rev to chain base rev.
363 # Maps rev to chain base rev.
364 self._chainbasecache = util.lrucachedict(100)
364 self._chainbasecache = util.lrucachedict(100)
365 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
365 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
366 self._chunkcache = (0, '')
366 self._chunkcache = (0, '')
367 # How much data to read and cache into the raw revlog data cache.
367 # How much data to read and cache into the raw revlog data cache.
368 self._chunkcachesize = 65536
368 self._chunkcachesize = 65536
369 self._maxchainlen = None
369 self._maxchainlen = None
370 self._deltabothparents = True
370 self._deltabothparents = True
371 self.index = []
371 self.index = []
372 # Mapping of partial identifiers to full nodes.
372 # Mapping of partial identifiers to full nodes.
373 self._pcache = {}
373 self._pcache = {}
374 # Mapping of revision integer to full node.
374 # Mapping of revision integer to full node.
375 self._nodecache = {nullid: nullrev}
375 self._nodecache = {nullid: nullrev}
376 self._nodepos = None
376 self._nodepos = None
377 self._compengine = 'zlib'
377 self._compengine = 'zlib'
378 self._compengineopts = {}
378 self._compengineopts = {}
379 self._maxdeltachainspan = -1
379 self._maxdeltachainspan = -1
380 self._withsparseread = False
380 self._withsparseread = False
381 self._sparserevlog = False
381 self._sparserevlog = False
382 self._srdensitythreshold = 0.50
382 self._srdensitythreshold = 0.50
383 self._srmingapsize = 262144
383 self._srmingapsize = 262144
384
384
385 # Make copy of flag processors so each revlog instance can support
385 # Make copy of flag processors so each revlog instance can support
386 # custom flags.
386 # custom flags.
387 self._flagprocessors = dict(_flagprocessors)
387 self._flagprocessors = dict(_flagprocessors)
388
388
389 # 2-tuple of file handles being used for active writing.
389 # 2-tuple of file handles being used for active writing.
390 self._writinghandles = None
390 self._writinghandles = None
391
391
392 self._loadindex()
392 self._loadindex()
393
393
394 def _loadindex(self):
394 def _loadindex(self):
395 mmapindexthreshold = None
395 mmapindexthreshold = None
396 opts = getattr(self.opener, 'options', {}) or {}
396 opts = getattr(self.opener, 'options', {}) or {}
397
397
398 if 'revlogv2' in opts:
398 if 'revlogv2' in opts:
399 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
399 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
400 elif 'revlogv1' in opts:
400 elif 'revlogv1' in opts:
401 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
401 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
402 if 'generaldelta' in opts:
402 if 'generaldelta' in opts:
403 newversionflags |= FLAG_GENERALDELTA
403 newversionflags |= FLAG_GENERALDELTA
404 elif getattr(self.opener, 'options', None) is not None:
404 elif getattr(self.opener, 'options', None) is not None:
405 # If options provided but no 'revlog*' found, the repository
405 # If options provided but no 'revlog*' found, the repository
406 # would have no 'requires' file in it, which means we have to
406 # would have no 'requires' file in it, which means we have to
407 # stick to the old format.
407 # stick to the old format.
408 newversionflags = REVLOGV0
408 newversionflags = REVLOGV0
409 else:
409 else:
410 newversionflags = REVLOG_DEFAULT_VERSION
410 newversionflags = REVLOG_DEFAULT_VERSION
411
411
412 if 'chunkcachesize' in opts:
412 if 'chunkcachesize' in opts:
413 self._chunkcachesize = opts['chunkcachesize']
413 self._chunkcachesize = opts['chunkcachesize']
414 if 'maxchainlen' in opts:
414 if 'maxchainlen' in opts:
415 self._maxchainlen = opts['maxchainlen']
415 self._maxchainlen = opts['maxchainlen']
416 if 'deltabothparents' in opts:
416 if 'deltabothparents' in opts:
417 self._deltabothparents = opts['deltabothparents']
417 self._deltabothparents = opts['deltabothparents']
418 self._lazydelta = bool(opts.get('lazydelta', True))
418 self._lazydelta = bool(opts.get('lazydelta', True))
419 self._lazydeltabase = False
419 self._lazydeltabase = False
420 if self._lazydelta:
420 if self._lazydelta:
421 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
421 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
422 if 'compengine' in opts:
422 if 'compengine' in opts:
423 self._compengine = opts['compengine']
423 self._compengine = opts['compengine']
424 if 'zlib.level' in opts:
424 if 'zlib.level' in opts:
425 self._compengineopts['zlib.level'] = opts['zlib.level']
425 self._compengineopts['zlib.level'] = opts['zlib.level']
426 if 'zstd.level' in opts:
426 if 'zstd.level' in opts:
427 self._compengineopts['zstd.level'] = opts['zstd.level']
427 self._compengineopts['zstd.level'] = opts['zstd.level']
428 if 'maxdeltachainspan' in opts:
428 if 'maxdeltachainspan' in opts:
429 self._maxdeltachainspan = opts['maxdeltachainspan']
429 self._maxdeltachainspan = opts['maxdeltachainspan']
430 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
430 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
431 mmapindexthreshold = opts['mmapindexthreshold']
431 mmapindexthreshold = opts['mmapindexthreshold']
432 self._sparserevlog = bool(opts.get('sparse-revlog', False))
432 self._sparserevlog = bool(opts.get('sparse-revlog', False))
433 withsparseread = bool(opts.get('with-sparse-read', False))
433 withsparseread = bool(opts.get('with-sparse-read', False))
434 # sparse-revlog forces sparse-read
434 # sparse-revlog forces sparse-read
435 self._withsparseread = self._sparserevlog or withsparseread
435 self._withsparseread = self._sparserevlog or withsparseread
436 if 'sparse-read-density-threshold' in opts:
436 if 'sparse-read-density-threshold' in opts:
437 self._srdensitythreshold = opts['sparse-read-density-threshold']
437 self._srdensitythreshold = opts['sparse-read-density-threshold']
438 if 'sparse-read-min-gap-size' in opts:
438 if 'sparse-read-min-gap-size' in opts:
439 self._srmingapsize = opts['sparse-read-min-gap-size']
439 self._srmingapsize = opts['sparse-read-min-gap-size']
440 if opts.get('enableellipsis'):
440 if opts.get('enableellipsis'):
441 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
441 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
442
442
443 # revlog v0 doesn't have flag processors
443 # revlog v0 doesn't have flag processors
444 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
444 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
445 _insertflagprocessor(flag, processor, self._flagprocessors)
445 _insertflagprocessor(flag, processor, self._flagprocessors)
446
446
447 if self._chunkcachesize <= 0:
447 if self._chunkcachesize <= 0:
448 raise error.RevlogError(_('revlog chunk cache size %r is not '
448 raise error.RevlogError(_('revlog chunk cache size %r is not '
449 'greater than 0') % self._chunkcachesize)
449 'greater than 0') % self._chunkcachesize)
450 elif self._chunkcachesize & (self._chunkcachesize - 1):
450 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 raise error.RevlogError(_('revlog chunk cache size %r is not a '
451 raise error.RevlogError(_('revlog chunk cache size %r is not a '
452 'power of 2') % self._chunkcachesize)
452 'power of 2') % self._chunkcachesize)
453
453
454 indexdata = ''
454 indexdata = ''
455 self._initempty = True
455 self._initempty = True
456 try:
456 try:
457 with self._indexfp() as f:
457 with self._indexfp() as f:
458 if (mmapindexthreshold is not None and
458 if (mmapindexthreshold is not None and
459 self.opener.fstat(f).st_size >= mmapindexthreshold):
459 self.opener.fstat(f).st_size >= mmapindexthreshold):
460 # TODO: should .close() to release resources without
460 # TODO: should .close() to release resources without
461 # relying on Python GC
461 # relying on Python GC
462 indexdata = util.buffer(util.mmapread(f))
462 indexdata = util.buffer(util.mmapread(f))
463 else:
463 else:
464 indexdata = f.read()
464 indexdata = f.read()
465 if len(indexdata) > 0:
465 if len(indexdata) > 0:
466 versionflags = versionformat_unpack(indexdata[:4])[0]
466 versionflags = versionformat_unpack(indexdata[:4])[0]
467 self._initempty = False
467 self._initempty = False
468 else:
468 else:
469 versionflags = newversionflags
469 versionflags = newversionflags
470 except IOError as inst:
470 except IOError as inst:
471 if inst.errno != errno.ENOENT:
471 if inst.errno != errno.ENOENT:
472 raise
472 raise
473
473
474 versionflags = newversionflags
474 versionflags = newversionflags
475
475
476 self.version = versionflags
476 self.version = versionflags
477
477
478 flags = versionflags & ~0xFFFF
478 flags = versionflags & ~0xFFFF
479 fmt = versionflags & 0xFFFF
479 fmt = versionflags & 0xFFFF
480
480
481 if fmt == REVLOGV0:
481 if fmt == REVLOGV0:
482 if flags:
482 if flags:
483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
484 'revlog %s') %
484 'revlog %s') %
485 (flags >> 16, fmt, self.indexfile))
485 (flags >> 16, fmt, self.indexfile))
486
486
487 self._inline = False
487 self._inline = False
488 self._generaldelta = False
488 self._generaldelta = False
489
489
490 elif fmt == REVLOGV1:
490 elif fmt == REVLOGV1:
491 if flags & ~REVLOGV1_FLAGS:
491 if flags & ~REVLOGV1_FLAGS:
492 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
492 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
493 'revlog %s') %
493 'revlog %s') %
494 (flags >> 16, fmt, self.indexfile))
494 (flags >> 16, fmt, self.indexfile))
495
495
496 self._inline = versionflags & FLAG_INLINE_DATA
496 self._inline = versionflags & FLAG_INLINE_DATA
497 self._generaldelta = versionflags & FLAG_GENERALDELTA
497 self._generaldelta = versionflags & FLAG_GENERALDELTA
498
498
499 elif fmt == REVLOGV2:
499 elif fmt == REVLOGV2:
500 if flags & ~REVLOGV2_FLAGS:
500 if flags & ~REVLOGV2_FLAGS:
501 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
501 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
502 'revlog %s') %
502 'revlog %s') %
503 (flags >> 16, fmt, self.indexfile))
503 (flags >> 16, fmt, self.indexfile))
504
504
505 self._inline = versionflags & FLAG_INLINE_DATA
505 self._inline = versionflags & FLAG_INLINE_DATA
506 # generaldelta implied by version 2 revlogs.
506 # generaldelta implied by version 2 revlogs.
507 self._generaldelta = True
507 self._generaldelta = True
508
508
509 else:
509 else:
510 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
510 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
511 (fmt, self.indexfile))
511 (fmt, self.indexfile))
512 # sparse-revlog can't be on without general-delta (issue6056)
512 # sparse-revlog can't be on without general-delta (issue6056)
513 if not self._generaldelta:
513 if not self._generaldelta:
514 self._sparserevlog = False
514 self._sparserevlog = False
515
515
516 self._storedeltachains = True
516 self._storedeltachains = True
517
517
518 self._io = revlogio()
518 self._io = revlogio()
519 if self.version == REVLOGV0:
519 if self.version == REVLOGV0:
520 self._io = revlogoldio()
520 self._io = revlogoldio()
521 try:
521 try:
522 d = self._io.parseindex(indexdata, self._inline)
522 d = self._io.parseindex(indexdata, self._inline)
523 except (ValueError, IndexError):
523 except (ValueError, IndexError):
524 raise error.RevlogError(_("index %s is corrupted") %
524 raise error.RevlogError(_("index %s is corrupted") %
525 self.indexfile)
525 self.indexfile)
526 self.index, nodemap, self._chunkcache = d
526 self.index, nodemap, self._chunkcache = d
527 if nodemap is not None:
527 if nodemap is not None:
528 self.nodemap = self._nodecache = nodemap
528 self.nodemap = self._nodecache = nodemap
529 if not self._chunkcache:
529 if not self._chunkcache:
530 self._chunkclear()
530 self._chunkclear()
531 # revnum -> (chain-length, sum-delta-length)
531 # revnum -> (chain-length, sum-delta-length)
532 self._chaininfocache = {}
532 self._chaininfocache = {}
533 # revlog header -> revlog compressor
533 # revlog header -> revlog compressor
534 self._decompressors = {}
534 self._decompressors = {}
535
535
536 @util.propertycache
536 @util.propertycache
537 def _compressor(self):
537 def _compressor(self):
538 engine = util.compengines[self._compengine]
538 engine = util.compengines[self._compengine]
539 return engine.revlogcompressor(self._compengineopts)
539 return engine.revlogcompressor(self._compengineopts)
540
540
541 def _indexfp(self, mode='r'):
541 def _indexfp(self, mode='r'):
542 """file object for the revlog's index file"""
542 """file object for the revlog's index file"""
543 args = {r'mode': mode}
543 args = {r'mode': mode}
544 if mode != 'r':
544 if mode != 'r':
545 args[r'checkambig'] = self._checkambig
545 args[r'checkambig'] = self._checkambig
546 if mode == 'w':
546 if mode == 'w':
547 args[r'atomictemp'] = True
547 args[r'atomictemp'] = True
548 return self.opener(self.indexfile, **args)
548 return self.opener(self.indexfile, **args)
549
549
550 def _datafp(self, mode='r'):
550 def _datafp(self, mode='r'):
551 """file object for the revlog's data file"""
551 """file object for the revlog's data file"""
552 return self.opener(self.datafile, mode=mode)
552 return self.opener(self.datafile, mode=mode)
553
553
554 @contextlib.contextmanager
554 @contextlib.contextmanager
555 def _datareadfp(self, existingfp=None):
555 def _datareadfp(self, existingfp=None):
556 """file object suitable to read data"""
556 """file object suitable to read data"""
557 # Use explicit file handle, if given.
557 # Use explicit file handle, if given.
558 if existingfp is not None:
558 if existingfp is not None:
559 yield existingfp
559 yield existingfp
560
560
561 # Use a file handle being actively used for writes, if available.
561 # Use a file handle being actively used for writes, if available.
562 # There is some danger to doing this because reads will seek the
562 # There is some danger to doing this because reads will seek the
563 # file. However, _writeentry() performs a SEEK_END before all writes,
563 # file. However, _writeentry() performs a SEEK_END before all writes,
564 # so we should be safe.
564 # so we should be safe.
565 elif self._writinghandles:
565 elif self._writinghandles:
566 if self._inline:
566 if self._inline:
567 yield self._writinghandles[0]
567 yield self._writinghandles[0]
568 else:
568 else:
569 yield self._writinghandles[1]
569 yield self._writinghandles[1]
570
570
571 # Otherwise open a new file handle.
571 # Otherwise open a new file handle.
572 else:
572 else:
573 if self._inline:
573 if self._inline:
574 func = self._indexfp
574 func = self._indexfp
575 else:
575 else:
576 func = self._datafp
576 func = self._datafp
577 with func() as fp:
577 with func() as fp:
578 yield fp
578 yield fp
579
579
580 def tip(self):
580 def tip(self):
581 return self.node(len(self.index) - 1)
581 return self.node(len(self.index) - 1)
582 def __contains__(self, rev):
582 def __contains__(self, rev):
583 return 0 <= rev < len(self)
583 return 0 <= rev < len(self)
584 def __len__(self):
584 def __len__(self):
585 return len(self.index)
585 return len(self.index)
586 def __iter__(self):
586 def __iter__(self):
587 return iter(pycompat.xrange(len(self)))
587 return iter(pycompat.xrange(len(self)))
588 def revs(self, start=0, stop=None):
588 def revs(self, start=0, stop=None):
589 """iterate over all rev in this revlog (from start to stop)"""
589 """iterate over all rev in this revlog (from start to stop)"""
590 return storageutil.iterrevs(len(self), start=start, stop=stop)
590 return storageutil.iterrevs(len(self), start=start, stop=stop)
591
591
592 @util.propertycache
592 @util.propertycache
593 def nodemap(self):
593 def nodemap(self):
594 if self.index:
594 if self.index:
595 # populate mapping down to the initial node
595 # populate mapping down to the initial node
596 node0 = self.index[0][7] # get around changelog filtering
596 node0 = self.index[0][7] # get around changelog filtering
597 self.rev(node0)
597 self.rev(node0)
598 return self._nodecache
598 return self._nodecache
599
599
600 def hasnode(self, node):
600 def hasnode(self, node):
601 try:
601 try:
602 self.rev(node)
602 self.rev(node)
603 return True
603 return True
604 except KeyError:
604 except KeyError:
605 return False
605 return False
606
606
607 def candelta(self, baserev, rev):
607 def candelta(self, baserev, rev):
608 """whether two revisions (baserev, rev) can be delta-ed or not"""
608 """whether two revisions (baserev, rev) can be delta-ed or not"""
609 # Disable delta if either rev requires a content-changing flag
609 # Disable delta if either rev requires a content-changing flag
610 # processor (ex. LFS). This is because such flag processor can alter
610 # processor (ex. LFS). This is because such flag processor can alter
611 # the rawtext content that the delta will be based on, and two clients
611 # the rawtext content that the delta will be based on, and two clients
612 # could have a same revlog node with different flags (i.e. different
612 # could have a same revlog node with different flags (i.e. different
613 # rawtext contents) and the delta could be incompatible.
613 # rawtext contents) and the delta could be incompatible.
614 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
614 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
615 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
615 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
616 return False
616 return False
617 return True
617 return True
618
618
619 def clearcaches(self):
619 def clearcaches(self):
620 self._revisioncache = None
620 self._revisioncache = None
621 self._chainbasecache.clear()
621 self._chainbasecache.clear()
622 self._chunkcache = (0, '')
622 self._chunkcache = (0, '')
623 self._pcache = {}
623 self._pcache = {}
624
624
625 try:
625 try:
626 # If we are using the native C version, you are in a fun case
626 # If we are using the native C version, you are in a fun case
627 # where self.index, self.nodemap and self._nodecaches is the same
627 # where self.index, self.nodemap and self._nodecaches is the same
628 # object.
628 # object.
629 self._nodecache.clearcaches()
629 self._nodecache.clearcaches()
630 except AttributeError:
630 except AttributeError:
631 self._nodecache = {nullid: nullrev}
631 self._nodecache = {nullid: nullrev}
632 self._nodepos = None
632 self._nodepos = None
633
633
634 def rev(self, node):
634 def rev(self, node):
635 try:
635 try:
636 return self._nodecache[node]
636 return self._nodecache[node]
637 except TypeError:
637 except TypeError:
638 raise
638 raise
639 except error.RevlogError:
639 except error.RevlogError:
640 # parsers.c radix tree lookup failed
640 # parsers.c radix tree lookup failed
641 if node == wdirid or node in wdirfilenodeids:
641 if node == wdirid or node in wdirfilenodeids:
642 raise error.WdirUnsupported
642 raise error.WdirUnsupported
643 raise error.LookupError(node, self.indexfile, _('no node'))
643 raise error.LookupError(node, self.indexfile, _('no node'))
644 except KeyError:
644 except KeyError:
645 # pure python cache lookup failed
645 # pure python cache lookup failed
646 n = self._nodecache
646 n = self._nodecache
647 i = self.index
647 i = self.index
648 p = self._nodepos
648 p = self._nodepos
649 if p is None:
649 if p is None:
650 p = len(i) - 1
650 p = len(i) - 1
651 else:
651 else:
652 assert p < len(i)
652 assert p < len(i)
653 for r in pycompat.xrange(p, -1, -1):
653 for r in pycompat.xrange(p, -1, -1):
654 v = i[r][7]
654 v = i[r][7]
655 n[v] = r
655 n[v] = r
656 if v == node:
656 if v == node:
657 self._nodepos = r - 1
657 self._nodepos = r - 1
658 return r
658 return r
659 if node == wdirid or node in wdirfilenodeids:
659 if node == wdirid or node in wdirfilenodeids:
660 raise error.WdirUnsupported
660 raise error.WdirUnsupported
661 raise error.LookupError(node, self.indexfile, _('no node'))
661 raise error.LookupError(node, self.indexfile, _('no node'))
662
662
663 # Accessors for index entries.
663 # Accessors for index entries.
664
664
665 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
665 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
666 # are flags.
666 # are flags.
667 def start(self, rev):
667 def start(self, rev):
668 return int(self.index[rev][0] >> 16)
668 return int(self.index[rev][0] >> 16)
669
669
670 def flags(self, rev):
670 def flags(self, rev):
671 return self.index[rev][0] & 0xFFFF
671 return self.index[rev][0] & 0xFFFF
672
672
673 def length(self, rev):
673 def length(self, rev):
674 return self.index[rev][1]
674 return self.index[rev][1]
675
675
676 def rawsize(self, rev):
676 def rawsize(self, rev):
677 """return the length of the uncompressed text for a given revision"""
677 """return the length of the uncompressed text for a given revision"""
678 l = self.index[rev][2]
678 l = self.index[rev][2]
679 if l >= 0:
679 if l >= 0:
680 return l
680 return l
681
681
682 t = self.revision(rev, raw=True)
682 t = self.revision(rev, raw=True)
683 return len(t)
683 return len(t)
684
684
685 def size(self, rev):
685 def size(self, rev):
686 """length of non-raw text (processed by a "read" flag processor)"""
686 """length of non-raw text (processed by a "read" flag processor)"""
687 # fast path: if no "read" flag processor could change the content,
687 # fast path: if no "read" flag processor could change the content,
688 # size is rawsize. note: ELLIPSIS is known to not change the content.
688 # size is rawsize. note: ELLIPSIS is known to not change the content.
689 flags = self.flags(rev)
689 flags = self.flags(rev)
690 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
690 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
691 return self.rawsize(rev)
691 return self.rawsize(rev)
692
692
693 return len(self.revision(rev, raw=False))
693 return len(self.revision(rev, raw=False))
694
694
695 def chainbase(self, rev):
695 def chainbase(self, rev):
696 base = self._chainbasecache.get(rev)
696 base = self._chainbasecache.get(rev)
697 if base is not None:
697 if base is not None:
698 return base
698 return base
699
699
700 index = self.index
700 index = self.index
701 iterrev = rev
701 iterrev = rev
702 base = index[iterrev][3]
702 base = index[iterrev][3]
703 while base != iterrev:
703 while base != iterrev:
704 iterrev = base
704 iterrev = base
705 base = index[iterrev][3]
705 base = index[iterrev][3]
706
706
707 self._chainbasecache[rev] = base
707 self._chainbasecache[rev] = base
708 return base
708 return base
709
709
710 def linkrev(self, rev):
710 def linkrev(self, rev):
711 return self.index[rev][4]
711 return self.index[rev][4]
712
712
713 def parentrevs(self, rev):
713 def parentrevs(self, rev):
714 try:
714 try:
715 entry = self.index[rev]
715 entry = self.index[rev]
716 except IndexError:
716 except IndexError:
717 if rev == wdirrev:
717 if rev == wdirrev:
718 raise error.WdirUnsupported
718 raise error.WdirUnsupported
719 raise
719 raise
720
720
721 return entry[5], entry[6]
721 return entry[5], entry[6]
722
722
723 # fast parentrevs(rev) where rev isn't filtered
723 # fast parentrevs(rev) where rev isn't filtered
724 _uncheckedparentrevs = parentrevs
724 _uncheckedparentrevs = parentrevs
725
725
726 def node(self, rev):
726 def node(self, rev):
727 try:
727 try:
728 return self.index[rev][7]
728 return self.index[rev][7]
729 except IndexError:
729 except IndexError:
730 if rev == wdirrev:
730 if rev == wdirrev:
731 raise error.WdirUnsupported
731 raise error.WdirUnsupported
732 raise
732 raise
733
733
734 # Derived from index values.
734 # Derived from index values.
735
735
736 def end(self, rev):
736 def end(self, rev):
737 return self.start(rev) + self.length(rev)
737 return self.start(rev) + self.length(rev)
738
738
739 def parents(self, node):
739 def parents(self, node):
740 i = self.index
740 i = self.index
741 d = i[self.rev(node)]
741 d = i[self.rev(node)]
742 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
742 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
743
743
744 def chainlen(self, rev):
744 def chainlen(self, rev):
745 return self._chaininfo(rev)[0]
745 return self._chaininfo(rev)[0]
746
746
747 def _chaininfo(self, rev):
747 def _chaininfo(self, rev):
748 chaininfocache = self._chaininfocache
748 chaininfocache = self._chaininfocache
749 if rev in chaininfocache:
749 if rev in chaininfocache:
750 return chaininfocache[rev]
750 return chaininfocache[rev]
751 index = self.index
751 index = self.index
752 generaldelta = self._generaldelta
752 generaldelta = self._generaldelta
753 iterrev = rev
753 iterrev = rev
754 e = index[iterrev]
754 e = index[iterrev]
755 clen = 0
755 clen = 0
756 compresseddeltalen = 0
756 compresseddeltalen = 0
757 while iterrev != e[3]:
757 while iterrev != e[3]:
758 clen += 1
758 clen += 1
759 compresseddeltalen += e[1]
759 compresseddeltalen += e[1]
760 if generaldelta:
760 if generaldelta:
761 iterrev = e[3]
761 iterrev = e[3]
762 else:
762 else:
763 iterrev -= 1
763 iterrev -= 1
764 if iterrev in chaininfocache:
764 if iterrev in chaininfocache:
765 t = chaininfocache[iterrev]
765 t = chaininfocache[iterrev]
766 clen += t[0]
766 clen += t[0]
767 compresseddeltalen += t[1]
767 compresseddeltalen += t[1]
768 break
768 break
769 e = index[iterrev]
769 e = index[iterrev]
770 else:
770 else:
771 # Add text length of base since decompressing that also takes
771 # Add text length of base since decompressing that also takes
772 # work. For cache hits the length is already included.
772 # work. For cache hits the length is already included.
773 compresseddeltalen += e[1]
773 compresseddeltalen += e[1]
774 r = (clen, compresseddeltalen)
774 r = (clen, compresseddeltalen)
775 chaininfocache[rev] = r
775 chaininfocache[rev] = r
776 return r
776 return r
777
777
778 def _deltachain(self, rev, stoprev=None):
778 def _deltachain(self, rev, stoprev=None):
779 """Obtain the delta chain for a revision.
779 """Obtain the delta chain for a revision.
780
780
781 ``stoprev`` specifies a revision to stop at. If not specified, we
781 ``stoprev`` specifies a revision to stop at. If not specified, we
782 stop at the base of the chain.
782 stop at the base of the chain.
783
783
784 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
784 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
785 revs in ascending order and ``stopped`` is a bool indicating whether
785 revs in ascending order and ``stopped`` is a bool indicating whether
786 ``stoprev`` was hit.
786 ``stoprev`` was hit.
787 """
787 """
788 # Try C implementation.
788 # Try C implementation.
789 try:
789 try:
790 return self.index.deltachain(rev, stoprev, self._generaldelta)
790 return self.index.deltachain(rev, stoprev, self._generaldelta)
791 except AttributeError:
791 except AttributeError:
792 pass
792 pass
793
793
794 chain = []
794 chain = []
795
795
796 # Alias to prevent attribute lookup in tight loop.
796 # Alias to prevent attribute lookup in tight loop.
797 index = self.index
797 index = self.index
798 generaldelta = self._generaldelta
798 generaldelta = self._generaldelta
799
799
800 iterrev = rev
800 iterrev = rev
801 e = index[iterrev]
801 e = index[iterrev]
802 while iterrev != e[3] and iterrev != stoprev:
802 while iterrev != e[3] and iterrev != stoprev:
803 chain.append(iterrev)
803 chain.append(iterrev)
804 if generaldelta:
804 if generaldelta:
805 iterrev = e[3]
805 iterrev = e[3]
806 else:
806 else:
807 iterrev -= 1
807 iterrev -= 1
808 e = index[iterrev]
808 e = index[iterrev]
809
809
810 if iterrev == stoprev:
810 if iterrev == stoprev:
811 stopped = True
811 stopped = True
812 else:
812 else:
813 chain.append(iterrev)
813 chain.append(iterrev)
814 stopped = False
814 stopped = False
815
815
816 chain.reverse()
816 chain.reverse()
817 return chain, stopped
817 return chain, stopped
818
818
819 def ancestors(self, revs, stoprev=0, inclusive=False):
819 def ancestors(self, revs, stoprev=0, inclusive=False):
820 """Generate the ancestors of 'revs' in reverse revision order.
820 """Generate the ancestors of 'revs' in reverse revision order.
821 Does not generate revs lower than stoprev.
821 Does not generate revs lower than stoprev.
822
822
823 See the documentation for ancestor.lazyancestors for more details."""
823 See the documentation for ancestor.lazyancestors for more details."""
824
824
825 # first, make sure start revisions aren't filtered
825 # first, make sure start revisions aren't filtered
826 revs = list(revs)
826 revs = list(revs)
827 checkrev = self.node
827 checkrev = self.node
828 for r in revs:
828 for r in revs:
829 checkrev(r)
829 checkrev(r)
830 # and we're sure ancestors aren't filtered as well
830 # and we're sure ancestors aren't filtered as well
831
831
832 if rustancestor is not None:
832 if rustancestor is not None:
833 lazyancestors = rustancestor.LazyAncestors
833 lazyancestors = rustancestor.LazyAncestors
834 arg = self.index
834 arg = self.index
835 elif util.safehasattr(parsers, 'rustlazyancestors'):
835 elif util.safehasattr(parsers, 'rustlazyancestors'):
836 lazyancestors = ancestor.rustlazyancestors
836 lazyancestors = ancestor.rustlazyancestors
837 arg = self.index
837 arg = self.index
838 else:
838 else:
839 lazyancestors = ancestor.lazyancestors
839 lazyancestors = ancestor.lazyancestors
840 arg = self._uncheckedparentrevs
840 arg = self._uncheckedparentrevs
841 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
841 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
842
842
843 def descendants(self, revs):
843 def descendants(self, revs):
844 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
844 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
845
845
846 def findcommonmissing(self, common=None, heads=None):
846 def findcommonmissing(self, common=None, heads=None):
847 """Return a tuple of the ancestors of common and the ancestors of heads
847 """Return a tuple of the ancestors of common and the ancestors of heads
848 that are not ancestors of common. In revset terminology, we return the
848 that are not ancestors of common. In revset terminology, we return the
849 tuple:
849 tuple:
850
850
851 ::common, (::heads) - (::common)
851 ::common, (::heads) - (::common)
852
852
853 The list is sorted by revision number, meaning it is
853 The list is sorted by revision number, meaning it is
854 topologically sorted.
854 topologically sorted.
855
855
856 'heads' and 'common' are both lists of node IDs. If heads is
856 'heads' and 'common' are both lists of node IDs. If heads is
857 not supplied, uses all of the revlog's heads. If common is not
857 not supplied, uses all of the revlog's heads. If common is not
858 supplied, uses nullid."""
858 supplied, uses nullid."""
859 if common is None:
859 if common is None:
860 common = [nullid]
860 common = [nullid]
861 if heads is None:
861 if heads is None:
862 heads = self.heads()
862 heads = self.heads()
863
863
864 common = [self.rev(n) for n in common]
864 common = [self.rev(n) for n in common]
865 heads = [self.rev(n) for n in heads]
865 heads = [self.rev(n) for n in heads]
866
866
867 # we want the ancestors, but inclusive
867 # we want the ancestors, but inclusive
868 class lazyset(object):
868 class lazyset(object):
869 def __init__(self, lazyvalues):
869 def __init__(self, lazyvalues):
870 self.addedvalues = set()
870 self.addedvalues = set()
871 self.lazyvalues = lazyvalues
871 self.lazyvalues = lazyvalues
872
872
873 def __contains__(self, value):
873 def __contains__(self, value):
874 return value in self.addedvalues or value in self.lazyvalues
874 return value in self.addedvalues or value in self.lazyvalues
875
875
876 def __iter__(self):
876 def __iter__(self):
877 added = self.addedvalues
877 added = self.addedvalues
878 for r in added:
878 for r in added:
879 yield r
879 yield r
880 for r in self.lazyvalues:
880 for r in self.lazyvalues:
881 if not r in added:
881 if not r in added:
882 yield r
882 yield r
883
883
884 def add(self, value):
884 def add(self, value):
885 self.addedvalues.add(value)
885 self.addedvalues.add(value)
886
886
887 def update(self, values):
887 def update(self, values):
888 self.addedvalues.update(values)
888 self.addedvalues.update(values)
889
889
890 has = lazyset(self.ancestors(common))
890 has = lazyset(self.ancestors(common))
891 has.add(nullrev)
891 has.add(nullrev)
892 has.update(common)
892 has.update(common)
893
893
894 # take all ancestors from heads that aren't in has
894 # take all ancestors from heads that aren't in has
895 missing = set()
895 missing = set()
896 visit = collections.deque(r for r in heads if r not in has)
896 visit = collections.deque(r for r in heads if r not in has)
897 while visit:
897 while visit:
898 r = visit.popleft()
898 r = visit.popleft()
899 if r in missing:
899 if r in missing:
900 continue
900 continue
901 else:
901 else:
902 missing.add(r)
902 missing.add(r)
903 for p in self.parentrevs(r):
903 for p in self.parentrevs(r):
904 if p not in has:
904 if p not in has:
905 visit.append(p)
905 visit.append(p)
906 missing = list(missing)
906 missing = list(missing)
907 missing.sort()
907 missing.sort()
908 return has, [self.node(miss) for miss in missing]
908 return has, [self.node(miss) for miss in missing]
909
909
910 def incrementalmissingrevs(self, common=None):
910 def incrementalmissingrevs(self, common=None):
911 """Return an object that can be used to incrementally compute the
911 """Return an object that can be used to incrementally compute the
912 revision numbers of the ancestors of arbitrary sets that are not
912 revision numbers of the ancestors of arbitrary sets that are not
913 ancestors of common. This is an ancestor.incrementalmissingancestors
913 ancestors of common. This is an ancestor.incrementalmissingancestors
914 object.
914 object.
915
915
916 'common' is a list of revision numbers. If common is not supplied, uses
916 'common' is a list of revision numbers. If common is not supplied, uses
917 nullrev.
917 nullrev.
918 """
918 """
919 if common is None:
919 if common is None:
920 common = [nullrev]
920 common = [nullrev]
921
921
922 if rustancestor is not None:
922 if rustancestor is not None:
923 return rustancestor.MissingAncestors(self.index, common)
923 return rustancestor.MissingAncestors(self.index, common)
924 return ancestor.incrementalmissingancestors(self.parentrevs, common)
924 return ancestor.incrementalmissingancestors(self.parentrevs, common)
925
925
926 def findmissingrevs(self, common=None, heads=None):
926 def findmissingrevs(self, common=None, heads=None):
927 """Return the revision numbers of the ancestors of heads that
927 """Return the revision numbers of the ancestors of heads that
928 are not ancestors of common.
928 are not ancestors of common.
929
929
930 More specifically, return a list of revision numbers corresponding to
930 More specifically, return a list of revision numbers corresponding to
931 nodes N such that every N satisfies the following constraints:
931 nodes N such that every N satisfies the following constraints:
932
932
933 1. N is an ancestor of some node in 'heads'
933 1. N is an ancestor of some node in 'heads'
934 2. N is not an ancestor of any node in 'common'
934 2. N is not an ancestor of any node in 'common'
935
935
936 The list is sorted by revision number, meaning it is
936 The list is sorted by revision number, meaning it is
937 topologically sorted.
937 topologically sorted.
938
938
939 'heads' and 'common' are both lists of revision numbers. If heads is
939 'heads' and 'common' are both lists of revision numbers. If heads is
940 not supplied, uses all of the revlog's heads. If common is not
940 not supplied, uses all of the revlog's heads. If common is not
941 supplied, uses nullid."""
941 supplied, uses nullid."""
942 if common is None:
942 if common is None:
943 common = [nullrev]
943 common = [nullrev]
944 if heads is None:
944 if heads is None:
945 heads = self.headrevs()
945 heads = self.headrevs()
946
946
947 inc = self.incrementalmissingrevs(common=common)
947 inc = self.incrementalmissingrevs(common=common)
948 return inc.missingancestors(heads)
948 return inc.missingancestors(heads)
949
949
950 def findmissing(self, common=None, heads=None):
950 def findmissing(self, common=None, heads=None):
951 """Return the ancestors of heads that are not ancestors of common.
951 """Return the ancestors of heads that are not ancestors of common.
952
952
953 More specifically, return a list of nodes N such that every N
953 More specifically, return a list of nodes N such that every N
954 satisfies the following constraints:
954 satisfies the following constraints:
955
955
956 1. N is an ancestor of some node in 'heads'
956 1. N is an ancestor of some node in 'heads'
957 2. N is not an ancestor of any node in 'common'
957 2. N is not an ancestor of any node in 'common'
958
958
959 The list is sorted by revision number, meaning it is
959 The list is sorted by revision number, meaning it is
960 topologically sorted.
960 topologically sorted.
961
961
962 'heads' and 'common' are both lists of node IDs. If heads is
962 'heads' and 'common' are both lists of node IDs. If heads is
963 not supplied, uses all of the revlog's heads. If common is not
963 not supplied, uses all of the revlog's heads. If common is not
964 supplied, uses nullid."""
964 supplied, uses nullid."""
965 if common is None:
965 if common is None:
966 common = [nullid]
966 common = [nullid]
967 if heads is None:
967 if heads is None:
968 heads = self.heads()
968 heads = self.heads()
969
969
970 common = [self.rev(n) for n in common]
970 common = [self.rev(n) for n in common]
971 heads = [self.rev(n) for n in heads]
971 heads = [self.rev(n) for n in heads]
972
972
973 inc = self.incrementalmissingrevs(common=common)
973 inc = self.incrementalmissingrevs(common=common)
974 return [self.node(r) for r in inc.missingancestors(heads)]
974 return [self.node(r) for r in inc.missingancestors(heads)]
975
975
976 def nodesbetween(self, roots=None, heads=None):
976 def nodesbetween(self, roots=None, heads=None):
977 """Return a topological path from 'roots' to 'heads'.
977 """Return a topological path from 'roots' to 'heads'.
978
978
979 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
979 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
980 topologically sorted list of all nodes N that satisfy both of
980 topologically sorted list of all nodes N that satisfy both of
981 these constraints:
981 these constraints:
982
982
983 1. N is a descendant of some node in 'roots'
983 1. N is a descendant of some node in 'roots'
984 2. N is an ancestor of some node in 'heads'
984 2. N is an ancestor of some node in 'heads'
985
985
986 Every node is considered to be both a descendant and an ancestor
986 Every node is considered to be both a descendant and an ancestor
987 of itself, so every reachable node in 'roots' and 'heads' will be
987 of itself, so every reachable node in 'roots' and 'heads' will be
988 included in 'nodes'.
988 included in 'nodes'.
989
989
990 'outroots' is the list of reachable nodes in 'roots', i.e., the
990 'outroots' is the list of reachable nodes in 'roots', i.e., the
991 subset of 'roots' that is returned in 'nodes'. Likewise,
991 subset of 'roots' that is returned in 'nodes'. Likewise,
992 'outheads' is the subset of 'heads' that is also in 'nodes'.
992 'outheads' is the subset of 'heads' that is also in 'nodes'.
993
993
994 'roots' and 'heads' are both lists of node IDs. If 'roots' is
994 'roots' and 'heads' are both lists of node IDs. If 'roots' is
995 unspecified, uses nullid as the only root. If 'heads' is
995 unspecified, uses nullid as the only root. If 'heads' is
996 unspecified, uses list of all of the revlog's heads."""
996 unspecified, uses list of all of the revlog's heads."""
997 nonodes = ([], [], [])
997 nonodes = ([], [], [])
998 if roots is not None:
998 if roots is not None:
999 roots = list(roots)
999 roots = list(roots)
1000 if not roots:
1000 if not roots:
1001 return nonodes
1001 return nonodes
1002 lowestrev = min([self.rev(n) for n in roots])
1002 lowestrev = min([self.rev(n) for n in roots])
1003 else:
1003 else:
1004 roots = [nullid] # Everybody's a descendant of nullid
1004 roots = [nullid] # Everybody's a descendant of nullid
1005 lowestrev = nullrev
1005 lowestrev = nullrev
1006 if (lowestrev == nullrev) and (heads is None):
1006 if (lowestrev == nullrev) and (heads is None):
1007 # We want _all_ the nodes!
1007 # We want _all_ the nodes!
1008 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1008 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1009 if heads is None:
1009 if heads is None:
1010 # All nodes are ancestors, so the latest ancestor is the last
1010 # All nodes are ancestors, so the latest ancestor is the last
1011 # node.
1011 # node.
1012 highestrev = len(self) - 1
1012 highestrev = len(self) - 1
1013 # Set ancestors to None to signal that every node is an ancestor.
1013 # Set ancestors to None to signal that every node is an ancestor.
1014 ancestors = None
1014 ancestors = None
1015 # Set heads to an empty dictionary for later discovery of heads
1015 # Set heads to an empty dictionary for later discovery of heads
1016 heads = {}
1016 heads = {}
1017 else:
1017 else:
1018 heads = list(heads)
1018 heads = list(heads)
1019 if not heads:
1019 if not heads:
1020 return nonodes
1020 return nonodes
1021 ancestors = set()
1021 ancestors = set()
1022 # Turn heads into a dictionary so we can remove 'fake' heads.
1022 # Turn heads into a dictionary so we can remove 'fake' heads.
1023 # Also, later we will be using it to filter out the heads we can't
1023 # Also, later we will be using it to filter out the heads we can't
1024 # find from roots.
1024 # find from roots.
1025 heads = dict.fromkeys(heads, False)
1025 heads = dict.fromkeys(heads, False)
1026 # Start at the top and keep marking parents until we're done.
1026 # Start at the top and keep marking parents until we're done.
1027 nodestotag = set(heads)
1027 nodestotag = set(heads)
1028 # Remember where the top was so we can use it as a limit later.
1028 # Remember where the top was so we can use it as a limit later.
1029 highestrev = max([self.rev(n) for n in nodestotag])
1029 highestrev = max([self.rev(n) for n in nodestotag])
1030 while nodestotag:
1030 while nodestotag:
1031 # grab a node to tag
1031 # grab a node to tag
1032 n = nodestotag.pop()
1032 n = nodestotag.pop()
1033 # Never tag nullid
1033 # Never tag nullid
1034 if n == nullid:
1034 if n == nullid:
1035 continue
1035 continue
1036 # A node's revision number represents its place in a
1036 # A node's revision number represents its place in a
1037 # topologically sorted list of nodes.
1037 # topologically sorted list of nodes.
1038 r = self.rev(n)
1038 r = self.rev(n)
1039 if r >= lowestrev:
1039 if r >= lowestrev:
1040 if n not in ancestors:
1040 if n not in ancestors:
1041 # If we are possibly a descendant of one of the roots
1041 # If we are possibly a descendant of one of the roots
1042 # and we haven't already been marked as an ancestor
1042 # and we haven't already been marked as an ancestor
1043 ancestors.add(n) # Mark as ancestor
1043 ancestors.add(n) # Mark as ancestor
1044 # Add non-nullid parents to list of nodes to tag.
1044 # Add non-nullid parents to list of nodes to tag.
1045 nodestotag.update([p for p in self.parents(n) if
1045 nodestotag.update([p for p in self.parents(n) if
1046 p != nullid])
1046 p != nullid])
1047 elif n in heads: # We've seen it before, is it a fake head?
1047 elif n in heads: # We've seen it before, is it a fake head?
1048 # So it is, real heads should not be the ancestors of
1048 # So it is, real heads should not be the ancestors of
1049 # any other heads.
1049 # any other heads.
1050 heads.pop(n)
1050 heads.pop(n)
1051 if not ancestors:
1051 if not ancestors:
1052 return nonodes
1052 return nonodes
1053 # Now that we have our set of ancestors, we want to remove any
1053 # Now that we have our set of ancestors, we want to remove any
1054 # roots that are not ancestors.
1054 # roots that are not ancestors.
1055
1055
1056 # If one of the roots was nullid, everything is included anyway.
1056 # If one of the roots was nullid, everything is included anyway.
1057 if lowestrev > nullrev:
1057 if lowestrev > nullrev:
1058 # But, since we weren't, let's recompute the lowest rev to not
1058 # But, since we weren't, let's recompute the lowest rev to not
1059 # include roots that aren't ancestors.
1059 # include roots that aren't ancestors.
1060
1060
1061 # Filter out roots that aren't ancestors of heads
1061 # Filter out roots that aren't ancestors of heads
1062 roots = [root for root in roots if root in ancestors]
1062 roots = [root for root in roots if root in ancestors]
1063 # Recompute the lowest revision
1063 # Recompute the lowest revision
1064 if roots:
1064 if roots:
1065 lowestrev = min([self.rev(root) for root in roots])
1065 lowestrev = min([self.rev(root) for root in roots])
1066 else:
1066 else:
1067 # No more roots? Return empty list
1067 # No more roots? Return empty list
1068 return nonodes
1068 return nonodes
1069 else:
1069 else:
1070 # We are descending from nullid, and don't need to care about
1070 # We are descending from nullid, and don't need to care about
1071 # any other roots.
1071 # any other roots.
1072 lowestrev = nullrev
1072 lowestrev = nullrev
1073 roots = [nullid]
1073 roots = [nullid]
1074 # Transform our roots list into a set.
1074 # Transform our roots list into a set.
1075 descendants = set(roots)
1075 descendants = set(roots)
1076 # Also, keep the original roots so we can filter out roots that aren't
1076 # Also, keep the original roots so we can filter out roots that aren't
1077 # 'real' roots (i.e. are descended from other roots).
1077 # 'real' roots (i.e. are descended from other roots).
1078 roots = descendants.copy()
1078 roots = descendants.copy()
1079 # Our topologically sorted list of output nodes.
1079 # Our topologically sorted list of output nodes.
1080 orderedout = []
1080 orderedout = []
1081 # Don't start at nullid since we don't want nullid in our output list,
1081 # Don't start at nullid since we don't want nullid in our output list,
1082 # and if nullid shows up in descendants, empty parents will look like
1082 # and if nullid shows up in descendants, empty parents will look like
1083 # they're descendants.
1083 # they're descendants.
1084 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1084 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1085 n = self.node(r)
1085 n = self.node(r)
1086 isdescendant = False
1086 isdescendant = False
1087 if lowestrev == nullrev: # Everybody is a descendant of nullid
1087 if lowestrev == nullrev: # Everybody is a descendant of nullid
1088 isdescendant = True
1088 isdescendant = True
1089 elif n in descendants:
1089 elif n in descendants:
1090 # n is already a descendant
1090 # n is already a descendant
1091 isdescendant = True
1091 isdescendant = True
1092 # This check only needs to be done here because all the roots
1092 # This check only needs to be done here because all the roots
1093 # will start being marked is descendants before the loop.
1093 # will start being marked is descendants before the loop.
1094 if n in roots:
1094 if n in roots:
1095 # If n was a root, check if it's a 'real' root.
1095 # If n was a root, check if it's a 'real' root.
1096 p = tuple(self.parents(n))
1096 p = tuple(self.parents(n))
1097 # If any of its parents are descendants, it's not a root.
1097 # If any of its parents are descendants, it's not a root.
1098 if (p[0] in descendants) or (p[1] in descendants):
1098 if (p[0] in descendants) or (p[1] in descendants):
1099 roots.remove(n)
1099 roots.remove(n)
1100 else:
1100 else:
1101 p = tuple(self.parents(n))
1101 p = tuple(self.parents(n))
1102 # A node is a descendant if either of its parents are
1102 # A node is a descendant if either of its parents are
1103 # descendants. (We seeded the dependents list with the roots
1103 # descendants. (We seeded the dependents list with the roots
1104 # up there, remember?)
1104 # up there, remember?)
1105 if (p[0] in descendants) or (p[1] in descendants):
1105 if (p[0] in descendants) or (p[1] in descendants):
1106 descendants.add(n)
1106 descendants.add(n)
1107 isdescendant = True
1107 isdescendant = True
1108 if isdescendant and ((ancestors is None) or (n in ancestors)):
1108 if isdescendant and ((ancestors is None) or (n in ancestors)):
1109 # Only include nodes that are both descendants and ancestors.
1109 # Only include nodes that are both descendants and ancestors.
1110 orderedout.append(n)
1110 orderedout.append(n)
1111 if (ancestors is not None) and (n in heads):
1111 if (ancestors is not None) and (n in heads):
1112 # We're trying to figure out which heads are reachable
1112 # We're trying to figure out which heads are reachable
1113 # from roots.
1113 # from roots.
1114 # Mark this head as having been reached
1114 # Mark this head as having been reached
1115 heads[n] = True
1115 heads[n] = True
1116 elif ancestors is None:
1116 elif ancestors is None:
1117 # Otherwise, we're trying to discover the heads.
1117 # Otherwise, we're trying to discover the heads.
1118 # Assume this is a head because if it isn't, the next step
1118 # Assume this is a head because if it isn't, the next step
1119 # will eventually remove it.
1119 # will eventually remove it.
1120 heads[n] = True
1120 heads[n] = True
1121 # But, obviously its parents aren't.
1121 # But, obviously its parents aren't.
1122 for p in self.parents(n):
1122 for p in self.parents(n):
1123 heads.pop(p, None)
1123 heads.pop(p, None)
1124 heads = [head for head, flag in heads.iteritems() if flag]
1124 heads = [head for head, flag in heads.iteritems() if flag]
1125 roots = list(roots)
1125 roots = list(roots)
1126 assert orderedout
1126 assert orderedout
1127 assert roots
1127 assert roots
1128 assert heads
1128 assert heads
1129 return (orderedout, roots, heads)
1129 return (orderedout, roots, heads)
1130
1130
1131 def headrevs(self, revs=None):
1131 def headrevs(self, revs=None):
1132 if revs is None:
1132 if revs is None:
1133 try:
1133 try:
1134 return self.index.headrevs()
1134 return self.index.headrevs()
1135 except AttributeError:
1135 except AttributeError:
1136 return self._headrevs()
1136 return self._headrevs()
1137 if rustdagop is not None:
1137 if rustdagop is not None:
1138 return rustdagop.headrevs(self.index, revs)
1138 return rustdagop.headrevs(self.index, revs)
1139 return dagop.headrevs(revs, self._uncheckedparentrevs)
1139 return dagop.headrevs(revs, self._uncheckedparentrevs)
1140
1140
1141 def computephases(self, roots):
1141 def computephases(self, roots):
1142 return self.index.computephasesmapsets(roots)
1142 return self.index.computephasesmapsets(roots)
1143
1143
1144 def _headrevs(self):
1144 def _headrevs(self):
1145 count = len(self)
1145 count = len(self)
1146 if not count:
1146 if not count:
1147 return [nullrev]
1147 return [nullrev]
1148 # we won't iter over filtered rev so nobody is a head at start
1148 # we won't iter over filtered rev so nobody is a head at start
1149 ishead = [0] * (count + 1)
1149 ishead = [0] * (count + 1)
1150 index = self.index
1150 index = self.index
1151 for r in self:
1151 for r in self:
1152 ishead[r] = 1 # I may be an head
1152 ishead[r] = 1 # I may be an head
1153 e = index[r]
1153 e = index[r]
1154 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1154 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1155 return [r for r, val in enumerate(ishead) if val]
1155 return [r for r, val in enumerate(ishead) if val]
1156
1156
1157 def heads(self, start=None, stop=None):
1157 def heads(self, start=None, stop=None):
1158 """return the list of all nodes that have no children
1158 """return the list of all nodes that have no children
1159
1159
1160 if start is specified, only heads that are descendants of
1160 if start is specified, only heads that are descendants of
1161 start will be returned
1161 start will be returned
1162 if stop is specified, it will consider all the revs from stop
1162 if stop is specified, it will consider all the revs from stop
1163 as if they had no children
1163 as if they had no children
1164 """
1164 """
1165 if start is None and stop is None:
1165 if start is None and stop is None:
1166 if not len(self):
1166 if not len(self):
1167 return [nullid]
1167 return [nullid]
1168 return [self.node(r) for r in self.headrevs()]
1168 return [self.node(r) for r in self.headrevs()]
1169
1169
1170 if start is None:
1170 if start is None:
1171 start = nullrev
1171 start = nullrev
1172 else:
1172 else:
1173 start = self.rev(start)
1173 start = self.rev(start)
1174
1174
1175 stoprevs = set(self.rev(n) for n in stop or [])
1175 stoprevs = set(self.rev(n) for n in stop or [])
1176
1176
1177 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1177 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1178 stoprevs=stoprevs)
1178 stoprevs=stoprevs)
1179
1179
1180 return [self.node(rev) for rev in revs]
1180 return [self.node(rev) for rev in revs]
1181
1181
1182 def children(self, node):
1182 def children(self, node):
1183 """find the children of a given node"""
1183 """find the children of a given node"""
1184 c = []
1184 c = []
1185 p = self.rev(node)
1185 p = self.rev(node)
1186 for r in self.revs(start=p + 1):
1186 for r in self.revs(start=p + 1):
1187 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1187 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1188 if prevs:
1188 if prevs:
1189 for pr in prevs:
1189 for pr in prevs:
1190 if pr == p:
1190 if pr == p:
1191 c.append(self.node(r))
1191 c.append(self.node(r))
1192 elif p == nullrev:
1192 elif p == nullrev:
1193 c.append(self.node(r))
1193 c.append(self.node(r))
1194 return c
1194 return c
1195
1195
1196 def commonancestorsheads(self, a, b):
1196 def commonancestorsheads(self, a, b):
1197 """calculate all the heads of the common ancestors of nodes a and b"""
1197 """calculate all the heads of the common ancestors of nodes a and b"""
1198 a, b = self.rev(a), self.rev(b)
1198 a, b = self.rev(a), self.rev(b)
1199 ancs = self._commonancestorsheads(a, b)
1199 ancs = self._commonancestorsheads(a, b)
1200 return pycompat.maplist(self.node, ancs)
1200 return pycompat.maplist(self.node, ancs)
1201
1201
1202 def _commonancestorsheads(self, *revs):
1202 def _commonancestorsheads(self, *revs):
1203 """calculate all the heads of the common ancestors of revs"""
1203 """calculate all the heads of the common ancestors of revs"""
1204 try:
1204 try:
1205 ancs = self.index.commonancestorsheads(*revs)
1205 ancs = self.index.commonancestorsheads(*revs)
1206 except (AttributeError, OverflowError): # C implementation failed
1206 except (AttributeError, OverflowError): # C implementation failed
1207 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1207 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1208 return ancs
1208 return ancs
1209
1209
1210 def isancestor(self, a, b):
1210 def isancestor(self, a, b):
1211 """return True if node a is an ancestor of node b
1211 """return True if node a is an ancestor of node b
1212
1212
1213 A revision is considered an ancestor of itself."""
1213 A revision is considered an ancestor of itself."""
1214 a, b = self.rev(a), self.rev(b)
1214 a, b = self.rev(a), self.rev(b)
1215 return self.isancestorrev(a, b)
1215 return self.isancestorrev(a, b)
1216
1216
1217 def isancestorrev(self, a, b):
1217 def isancestorrev(self, a, b):
1218 """return True if revision a is an ancestor of revision b
1218 """return True if revision a is an ancestor of revision b
1219
1219
1220 A revision is considered an ancestor of itself.
1220 A revision is considered an ancestor of itself.
1221
1221
1222 The implementation of this is trivial but the use of
1222 The implementation of this is trivial but the use of
1223 reachableroots is not."""
1223 reachableroots is not."""
1224 if a == nullrev:
1224 if a == nullrev:
1225 return True
1225 return True
1226 elif a == b:
1226 elif a == b:
1227 return True
1227 return True
1228 elif a > b:
1228 elif a > b:
1229 return False
1229 return False
1230 return bool(self.reachableroots(a, [b], [a], includepath=False))
1230 return bool(self.reachableroots(a, [b], [a], includepath=False))
1231
1231
1232 def reachableroots(self, minroot, heads, roots, includepath=False):
1232 def reachableroots(self, minroot, heads, roots, includepath=False):
1233 """return (heads(::<roots> and <roots>::<heads>))
1233 """return (heads(::<roots> and <roots>::<heads>))
1234
1234
1235 If includepath is True, return (<roots>::<heads>)."""
1235 If includepath is True, return (<roots>::<heads>)."""
1236 try:
1236 try:
1237 return self.index.reachableroots2(minroot, heads, roots,
1237 return self.index.reachableroots2(minroot, heads, roots,
1238 includepath)
1238 includepath)
1239 except AttributeError:
1239 except AttributeError:
1240 return dagop._reachablerootspure(self.parentrevs,
1240 return dagop._reachablerootspure(self.parentrevs,
1241 minroot, roots, heads, includepath)
1241 minroot, roots, heads, includepath)
1242
1242
1243 def ancestor(self, a, b):
1243 def ancestor(self, a, b):
1244 """calculate the "best" common ancestor of nodes a and b"""
1244 """calculate the "best" common ancestor of nodes a and b"""
1245
1245
1246 a, b = self.rev(a), self.rev(b)
1246 a, b = self.rev(a), self.rev(b)
1247 try:
1247 try:
1248 ancs = self.index.ancestors(a, b)
1248 ancs = self.index.ancestors(a, b)
1249 except (AttributeError, OverflowError):
1249 except (AttributeError, OverflowError):
1250 ancs = ancestor.ancestors(self.parentrevs, a, b)
1250 ancs = ancestor.ancestors(self.parentrevs, a, b)
1251 if ancs:
1251 if ancs:
1252 # choose a consistent winner when there's a tie
1252 # choose a consistent winner when there's a tie
1253 return min(map(self.node, ancs))
1253 return min(map(self.node, ancs))
1254 return nullid
1254 return nullid
1255
1255
1256 def _match(self, id):
1256 def _match(self, id):
1257 if isinstance(id, int):
1257 if isinstance(id, int):
1258 # rev
1258 # rev
1259 return self.node(id)
1259 return self.node(id)
1260 if len(id) == 20:
1260 if len(id) == 20:
1261 # possibly a binary node
1261 # possibly a binary node
1262 # odds of a binary node being all hex in ASCII are 1 in 10**25
1262 # odds of a binary node being all hex in ASCII are 1 in 10**25
1263 try:
1263 try:
1264 node = id
1264 node = id
1265 self.rev(node) # quick search the index
1265 self.rev(node) # quick search the index
1266 return node
1266 return node
1267 except error.LookupError:
1267 except error.LookupError:
1268 pass # may be partial hex id
1268 pass # may be partial hex id
1269 try:
1269 try:
1270 # str(rev)
1270 # str(rev)
1271 rev = int(id)
1271 rev = int(id)
1272 if "%d" % rev != id:
1272 if "%d" % rev != id:
1273 raise ValueError
1273 raise ValueError
1274 if rev < 0:
1274 if rev < 0:
1275 rev = len(self) + rev
1275 rev = len(self) + rev
1276 if rev < 0 or rev >= len(self):
1276 if rev < 0 or rev >= len(self):
1277 raise ValueError
1277 raise ValueError
1278 return self.node(rev)
1278 return self.node(rev)
1279 except (ValueError, OverflowError):
1279 except (ValueError, OverflowError):
1280 pass
1280 pass
1281 if len(id) == 40:
1281 if len(id) == 40:
1282 try:
1282 try:
1283 # a full hex nodeid?
1283 # a full hex nodeid?
1284 node = bin(id)
1284 node = bin(id)
1285 self.rev(node)
1285 self.rev(node)
1286 return node
1286 return node
1287 except (TypeError, error.LookupError):
1287 except (TypeError, error.LookupError):
1288 pass
1288 pass
1289
1289
1290 def _partialmatch(self, id):
1290 def _partialmatch(self, id):
1291 # we don't care wdirfilenodeids as they should be always full hash
1291 # we don't care wdirfilenodeids as they should be always full hash
1292 maybewdir = wdirhex.startswith(id)
1292 maybewdir = wdirhex.startswith(id)
1293 try:
1293 try:
1294 partial = self.index.partialmatch(id)
1294 partial = self.index.partialmatch(id)
1295 if partial and self.hasnode(partial):
1295 if partial and self.hasnode(partial):
1296 if maybewdir:
1296 if maybewdir:
1297 # single 'ff...' match in radix tree, ambiguous with wdir
1297 # single 'ff...' match in radix tree, ambiguous with wdir
1298 raise error.RevlogError
1298 raise error.RevlogError
1299 return partial
1299 return partial
1300 if maybewdir:
1300 if maybewdir:
1301 # no 'ff...' match in radix tree, wdir identified
1301 # no 'ff...' match in radix tree, wdir identified
1302 raise error.WdirUnsupported
1302 raise error.WdirUnsupported
1303 return None
1303 return None
1304 except error.RevlogError:
1304 except error.RevlogError:
1305 # parsers.c radix tree lookup gave multiple matches
1305 # parsers.c radix tree lookup gave multiple matches
1306 # fast path: for unfiltered changelog, radix tree is accurate
1306 # fast path: for unfiltered changelog, radix tree is accurate
1307 if not getattr(self, 'filteredrevs', None):
1307 if not getattr(self, 'filteredrevs', None):
1308 raise error.AmbiguousPrefixLookupError(
1308 raise error.AmbiguousPrefixLookupError(
1309 id, self.indexfile, _('ambiguous identifier'))
1309 id, self.indexfile, _('ambiguous identifier'))
1310 # fall through to slow path that filters hidden revisions
1310 # fall through to slow path that filters hidden revisions
1311 except (AttributeError, ValueError):
1311 except (AttributeError, ValueError):
1312 # we are pure python, or key was too short to search radix tree
1312 # we are pure python, or key was too short to search radix tree
1313 pass
1313 pass
1314
1314
1315 if id in self._pcache:
1315 if id in self._pcache:
1316 return self._pcache[id]
1316 return self._pcache[id]
1317
1317
1318 if len(id) <= 40:
1318 if len(id) <= 40:
1319 try:
1319 try:
1320 # hex(node)[:...]
1320 # hex(node)[:...]
1321 l = len(id) // 2 # grab an even number of digits
1321 l = len(id) // 2 # grab an even number of digits
1322 prefix = bin(id[:l * 2])
1322 prefix = bin(id[:l * 2])
1323 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1323 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1324 nl = [n for n in nl if hex(n).startswith(id) and
1324 nl = [n for n in nl if hex(n).startswith(id) and
1325 self.hasnode(n)]
1325 self.hasnode(n)]
1326 if nullhex.startswith(id):
1326 if nullhex.startswith(id):
1327 nl.append(nullid)
1327 nl.append(nullid)
1328 if len(nl) > 0:
1328 if len(nl) > 0:
1329 if len(nl) == 1 and not maybewdir:
1329 if len(nl) == 1 and not maybewdir:
1330 self._pcache[id] = nl[0]
1330 self._pcache[id] = nl[0]
1331 return nl[0]
1331 return nl[0]
1332 raise error.AmbiguousPrefixLookupError(
1332 raise error.AmbiguousPrefixLookupError(
1333 id, self.indexfile, _('ambiguous identifier'))
1333 id, self.indexfile, _('ambiguous identifier'))
1334 if maybewdir:
1334 if maybewdir:
1335 raise error.WdirUnsupported
1335 raise error.WdirUnsupported
1336 return None
1336 return None
1337 except TypeError:
1337 except TypeError:
1338 pass
1338 pass
1339
1339
1340 def lookup(self, id):
1340 def lookup(self, id):
1341 """locate a node based on:
1341 """locate a node based on:
1342 - revision number or str(revision number)
1342 - revision number or str(revision number)
1343 - nodeid or subset of hex nodeid
1343 - nodeid or subset of hex nodeid
1344 """
1344 """
1345 n = self._match(id)
1345 n = self._match(id)
1346 if n is not None:
1346 if n is not None:
1347 return n
1347 return n
1348 n = self._partialmatch(id)
1348 n = self._partialmatch(id)
1349 if n:
1349 if n:
1350 return n
1350 return n
1351
1351
1352 raise error.LookupError(id, self.indexfile, _('no match found'))
1352 raise error.LookupError(id, self.indexfile, _('no match found'))
1353
1353
1354 def shortest(self, node, minlength=1):
1354 def shortest(self, node, minlength=1):
1355 """Find the shortest unambiguous prefix that matches node."""
1355 """Find the shortest unambiguous prefix that matches node."""
1356 def isvalid(prefix):
1356 def isvalid(prefix):
1357 try:
1357 try:
1358 matchednode = self._partialmatch(prefix)
1358 matchednode = self._partialmatch(prefix)
1359 except error.AmbiguousPrefixLookupError:
1359 except error.AmbiguousPrefixLookupError:
1360 return False
1360 return False
1361 except error.WdirUnsupported:
1361 except error.WdirUnsupported:
1362 # single 'ff...' match
1362 # single 'ff...' match
1363 return True
1363 return True
1364 if matchednode is None:
1364 if matchednode is None:
1365 raise error.LookupError(node, self.indexfile, _('no node'))
1365 raise error.LookupError(node, self.indexfile, _('no node'))
1366 return True
1366 return True
1367
1367
1368 def maybewdir(prefix):
1368 def maybewdir(prefix):
1369 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1369 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1370
1370
1371 hexnode = hex(node)
1371 hexnode = hex(node)
1372
1372
1373 def disambiguate(hexnode, minlength):
1373 def disambiguate(hexnode, minlength):
1374 """Disambiguate against wdirid."""
1374 """Disambiguate against wdirid."""
1375 for length in range(minlength, 41):
1375 for length in range(minlength, 41):
1376 prefix = hexnode[:length]
1376 prefix = hexnode[:length]
1377 if not maybewdir(prefix):
1377 if not maybewdir(prefix):
1378 return prefix
1378 return prefix
1379
1379
1380 if not getattr(self, 'filteredrevs', None):
1380 if not getattr(self, 'filteredrevs', None):
1381 try:
1381 try:
1382 length = max(self.index.shortest(node), minlength)
1382 length = max(self.index.shortest(node), minlength)
1383 return disambiguate(hexnode, length)
1383 return disambiguate(hexnode, length)
1384 except error.RevlogError:
1384 except error.RevlogError:
1385 if node != wdirid:
1385 if node != wdirid:
1386 raise error.LookupError(node, self.indexfile, _('no node'))
1386 raise error.LookupError(node, self.indexfile, _('no node'))
1387 except AttributeError:
1387 except AttributeError:
1388 # Fall through to pure code
1388 # Fall through to pure code
1389 pass
1389 pass
1390
1390
1391 if node == wdirid:
1391 if node == wdirid:
1392 for length in range(minlength, 41):
1392 for length in range(minlength, 41):
1393 prefix = hexnode[:length]
1393 prefix = hexnode[:length]
1394 if isvalid(prefix):
1394 if isvalid(prefix):
1395 return prefix
1395 return prefix
1396
1396
1397 for length in range(minlength, 41):
1397 for length in range(minlength, 41):
1398 prefix = hexnode[:length]
1398 prefix = hexnode[:length]
1399 if isvalid(prefix):
1399 if isvalid(prefix):
1400 return disambiguate(hexnode, length)
1400 return disambiguate(hexnode, length)
1401
1401
1402 def cmp(self, node, text):
1402 def cmp(self, node, text):
1403 """compare text with a given file revision
1403 """compare text with a given file revision
1404
1404
1405 returns True if text is different than what is stored.
1405 returns True if text is different than what is stored.
1406 """
1406 """
1407 p1, p2 = self.parents(node)
1407 p1, p2 = self.parents(node)
1408 return storageutil.hashrevisionsha1(text, p1, p2) != node
1408 return storageutil.hashrevisionsha1(text, p1, p2) != node
1409
1409
1410 def _cachesegment(self, offset, data):
1410 def _cachesegment(self, offset, data):
1411 """Add a segment to the revlog cache.
1411 """Add a segment to the revlog cache.
1412
1412
1413 Accepts an absolute offset and the data that is at that location.
1413 Accepts an absolute offset and the data that is at that location.
1414 """
1414 """
1415 o, d = self._chunkcache
1415 o, d = self._chunkcache
1416 # try to add to existing cache
1416 # try to add to existing cache
1417 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1417 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1418 self._chunkcache = o, d + data
1418 self._chunkcache = o, d + data
1419 else:
1419 else:
1420 self._chunkcache = offset, data
1420 self._chunkcache = offset, data
1421
1421
1422 def _readsegment(self, offset, length, df=None):
1422 def _readsegment(self, offset, length, df=None):
1423 """Load a segment of raw data from the revlog.
1423 """Load a segment of raw data from the revlog.
1424
1424
1425 Accepts an absolute offset, length to read, and an optional existing
1425 Accepts an absolute offset, length to read, and an optional existing
1426 file handle to read from.
1426 file handle to read from.
1427
1427
1428 If an existing file handle is passed, it will be seeked and the
1428 If an existing file handle is passed, it will be seeked and the
1429 original seek position will NOT be restored.
1429 original seek position will NOT be restored.
1430
1430
1431 Returns a str or buffer of raw byte data.
1431 Returns a str or buffer of raw byte data.
1432
1432
1433 Raises if the requested number of bytes could not be read.
1433 Raises if the requested number of bytes could not be read.
1434 """
1434 """
1435 # Cache data both forward and backward around the requested
1435 # Cache data both forward and backward around the requested
1436 # data, in a fixed size window. This helps speed up operations
1436 # data, in a fixed size window. This helps speed up operations
1437 # involving reading the revlog backwards.
1437 # involving reading the revlog backwards.
1438 cachesize = self._chunkcachesize
1438 cachesize = self._chunkcachesize
1439 realoffset = offset & ~(cachesize - 1)
1439 realoffset = offset & ~(cachesize - 1)
1440 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1440 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1441 - realoffset)
1441 - realoffset)
1442 with self._datareadfp(df) as df:
1442 with self._datareadfp(df) as df:
1443 df.seek(realoffset)
1443 df.seek(realoffset)
1444 d = df.read(reallength)
1444 d = df.read(reallength)
1445
1445
1446 self._cachesegment(realoffset, d)
1446 self._cachesegment(realoffset, d)
1447 if offset != realoffset or reallength != length:
1447 if offset != realoffset or reallength != length:
1448 startoffset = offset - realoffset
1448 startoffset = offset - realoffset
1449 if len(d) - startoffset < length:
1449 if len(d) - startoffset < length:
1450 raise error.RevlogError(
1450 raise error.RevlogError(
1451 _('partial read of revlog %s; expected %d bytes from '
1451 _('partial read of revlog %s; expected %d bytes from '
1452 'offset %d, got %d') %
1452 'offset %d, got %d') %
1453 (self.indexfile if self._inline else self.datafile,
1453 (self.indexfile if self._inline else self.datafile,
1454 length, realoffset, len(d) - startoffset))
1454 length, realoffset, len(d) - startoffset))
1455
1455
1456 return util.buffer(d, startoffset, length)
1456 return util.buffer(d, startoffset, length)
1457
1457
1458 if len(d) < length:
1458 if len(d) < length:
1459 raise error.RevlogError(
1459 raise error.RevlogError(
1460 _('partial read of revlog %s; expected %d bytes from offset '
1460 _('partial read of revlog %s; expected %d bytes from offset '
1461 '%d, got %d') %
1461 '%d, got %d') %
1462 (self.indexfile if self._inline else self.datafile,
1462 (self.indexfile if self._inline else self.datafile,
1463 length, offset, len(d)))
1463 length, offset, len(d)))
1464
1464
1465 return d
1465 return d
1466
1466
1467 def _getsegment(self, offset, length, df=None):
1467 def _getsegment(self, offset, length, df=None):
1468 """Obtain a segment of raw data from the revlog.
1468 """Obtain a segment of raw data from the revlog.
1469
1469
1470 Accepts an absolute offset, length of bytes to obtain, and an
1470 Accepts an absolute offset, length of bytes to obtain, and an
1471 optional file handle to the already-opened revlog. If the file
1471 optional file handle to the already-opened revlog. If the file
1472 handle is used, it's original seek position will not be preserved.
1472 handle is used, it's original seek position will not be preserved.
1473
1473
1474 Requests for data may be returned from a cache.
1474 Requests for data may be returned from a cache.
1475
1475
1476 Returns a str or a buffer instance of raw byte data.
1476 Returns a str or a buffer instance of raw byte data.
1477 """
1477 """
1478 o, d = self._chunkcache
1478 o, d = self._chunkcache
1479 l = len(d)
1479 l = len(d)
1480
1480
1481 # is it in the cache?
1481 # is it in the cache?
1482 cachestart = offset - o
1482 cachestart = offset - o
1483 cacheend = cachestart + length
1483 cacheend = cachestart + length
1484 if cachestart >= 0 and cacheend <= l:
1484 if cachestart >= 0 and cacheend <= l:
1485 if cachestart == 0 and cacheend == l:
1485 if cachestart == 0 and cacheend == l:
1486 return d # avoid a copy
1486 return d # avoid a copy
1487 return util.buffer(d, cachestart, cacheend - cachestart)
1487 return util.buffer(d, cachestart, cacheend - cachestart)
1488
1488
1489 return self._readsegment(offset, length, df=df)
1489 return self._readsegment(offset, length, df=df)
1490
1490
1491 def _getsegmentforrevs(self, startrev, endrev, df=None):
1491 def _getsegmentforrevs(self, startrev, endrev, df=None):
1492 """Obtain a segment of raw data corresponding to a range of revisions.
1492 """Obtain a segment of raw data corresponding to a range of revisions.
1493
1493
1494 Accepts the start and end revisions and an optional already-open
1494 Accepts the start and end revisions and an optional already-open
1495 file handle to be used for reading. If the file handle is read, its
1495 file handle to be used for reading. If the file handle is read, its
1496 seek position will not be preserved.
1496 seek position will not be preserved.
1497
1497
1498 Requests for data may be satisfied by a cache.
1498 Requests for data may be satisfied by a cache.
1499
1499
1500 Returns a 2-tuple of (offset, data) for the requested range of
1500 Returns a 2-tuple of (offset, data) for the requested range of
1501 revisions. Offset is the integer offset from the beginning of the
1501 revisions. Offset is the integer offset from the beginning of the
1502 revlog and data is a str or buffer of the raw byte data.
1502 revlog and data is a str or buffer of the raw byte data.
1503
1503
1504 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1504 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1505 to determine where each revision's data begins and ends.
1505 to determine where each revision's data begins and ends.
1506 """
1506 """
1507 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1507 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1508 # (functions are expensive).
1508 # (functions are expensive).
1509 index = self.index
1509 index = self.index
1510 istart = index[startrev]
1510 istart = index[startrev]
1511 start = int(istart[0] >> 16)
1511 start = int(istart[0] >> 16)
1512 if startrev == endrev:
1512 if startrev == endrev:
1513 end = start + istart[1]
1513 end = start + istart[1]
1514 else:
1514 else:
1515 iend = index[endrev]
1515 iend = index[endrev]
1516 end = int(iend[0] >> 16) + iend[1]
1516 end = int(iend[0] >> 16) + iend[1]
1517
1517
1518 if self._inline:
1518 if self._inline:
1519 start += (startrev + 1) * self._io.size
1519 start += (startrev + 1) * self._io.size
1520 end += (endrev + 1) * self._io.size
1520 end += (endrev + 1) * self._io.size
1521 length = end - start
1521 length = end - start
1522
1522
1523 return start, self._getsegment(start, length, df=df)
1523 return start, self._getsegment(start, length, df=df)
1524
1524
1525 def _chunk(self, rev, df=None):
1525 def _chunk(self, rev, df=None):
1526 """Obtain a single decompressed chunk for a revision.
1526 """Obtain a single decompressed chunk for a revision.
1527
1527
1528 Accepts an integer revision and an optional already-open file handle
1528 Accepts an integer revision and an optional already-open file handle
1529 to be used for reading. If used, the seek position of the file will not
1529 to be used for reading. If used, the seek position of the file will not
1530 be preserved.
1530 be preserved.
1531
1531
1532 Returns a str holding uncompressed data for the requested revision.
1532 Returns a str holding uncompressed data for the requested revision.
1533 """
1533 """
1534 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1534 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1535
1535
1536 def _chunks(self, revs, df=None, targetsize=None):
1536 def _chunks(self, revs, df=None, targetsize=None):
1537 """Obtain decompressed chunks for the specified revisions.
1537 """Obtain decompressed chunks for the specified revisions.
1538
1538
1539 Accepts an iterable of numeric revisions that are assumed to be in
1539 Accepts an iterable of numeric revisions that are assumed to be in
1540 ascending order. Also accepts an optional already-open file handle
1540 ascending order. Also accepts an optional already-open file handle
1541 to be used for reading. If used, the seek position of the file will
1541 to be used for reading. If used, the seek position of the file will
1542 not be preserved.
1542 not be preserved.
1543
1543
1544 This function is similar to calling ``self._chunk()`` multiple times,
1544 This function is similar to calling ``self._chunk()`` multiple times,
1545 but is faster.
1545 but is faster.
1546
1546
1547 Returns a list with decompressed data for each requested revision.
1547 Returns a list with decompressed data for each requested revision.
1548 """
1548 """
1549 if not revs:
1549 if not revs:
1550 return []
1550 return []
1551 start = self.start
1551 start = self.start
1552 length = self.length
1552 length = self.length
1553 inline = self._inline
1553 inline = self._inline
1554 iosize = self._io.size
1554 iosize = self._io.size
1555 buffer = util.buffer
1555 buffer = util.buffer
1556
1556
1557 l = []
1557 l = []
1558 ladd = l.append
1558 ladd = l.append
1559
1559
1560 if not self._withsparseread:
1560 if not self._withsparseread:
1561 slicedchunks = (revs,)
1561 slicedchunks = (revs,)
1562 else:
1562 else:
1563 slicedchunks = deltautil.slicechunk(self, revs,
1563 slicedchunks = deltautil.slicechunk(self, revs,
1564 targetsize=targetsize)
1564 targetsize=targetsize)
1565
1565
1566 for revschunk in slicedchunks:
1566 for revschunk in slicedchunks:
1567 firstrev = revschunk[0]
1567 firstrev = revschunk[0]
1568 # Skip trailing revisions with empty diff
1568 # Skip trailing revisions with empty diff
1569 for lastrev in revschunk[::-1]:
1569 for lastrev in revschunk[::-1]:
1570 if length(lastrev) != 0:
1570 if length(lastrev) != 0:
1571 break
1571 break
1572
1572
1573 try:
1573 try:
1574 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1574 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1575 except OverflowError:
1575 except OverflowError:
1576 # issue4215 - we can't cache a run of chunks greater than
1576 # issue4215 - we can't cache a run of chunks greater than
1577 # 2G on Windows
1577 # 2G on Windows
1578 return [self._chunk(rev, df=df) for rev in revschunk]
1578 return [self._chunk(rev, df=df) for rev in revschunk]
1579
1579
1580 decomp = self.decompress
1580 decomp = self.decompress
1581 for rev in revschunk:
1581 for rev in revschunk:
1582 chunkstart = start(rev)
1582 chunkstart = start(rev)
1583 if inline:
1583 if inline:
1584 chunkstart += (rev + 1) * iosize
1584 chunkstart += (rev + 1) * iosize
1585 chunklength = length(rev)
1585 chunklength = length(rev)
1586 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1586 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1587
1587
1588 return l
1588 return l
1589
1589
1590 def _chunkclear(self):
1590 def _chunkclear(self):
1591 """Clear the raw chunk cache."""
1591 """Clear the raw chunk cache."""
1592 self._chunkcache = (0, '')
1592 self._chunkcache = (0, '')
1593
1593
1594 def deltaparent(self, rev):
1594 def deltaparent(self, rev):
1595 """return deltaparent of the given revision"""
1595 """return deltaparent of the given revision"""
1596 base = self.index[rev][3]
1596 base = self.index[rev][3]
1597 if base == rev:
1597 if base == rev:
1598 return nullrev
1598 return nullrev
1599 elif self._generaldelta:
1599 elif self._generaldelta:
1600 return base
1600 return base
1601 else:
1601 else:
1602 return rev - 1
1602 return rev - 1
1603
1603
1604 def issnapshot(self, rev):
1604 def issnapshot(self, rev):
1605 """tells whether rev is a snapshot
1605 """tells whether rev is a snapshot
1606 """
1606 """
1607 if not self._sparserevlog:
1607 if not self._sparserevlog:
1608 return self.deltaparent(rev) == nullrev
1608 return self.deltaparent(rev) == nullrev
1609 elif util.safehasattr(self.index, 'issnapshot'):
1609 elif util.safehasattr(self.index, 'issnapshot'):
1610 # directly assign the method to cache the testing and access
1610 # directly assign the method to cache the testing and access
1611 self.issnapshot = self.index.issnapshot
1611 self.issnapshot = self.index.issnapshot
1612 return self.issnapshot(rev)
1612 return self.issnapshot(rev)
1613 if rev == nullrev:
1613 if rev == nullrev:
1614 return True
1614 return True
1615 entry = self.index[rev]
1615 entry = self.index[rev]
1616 base = entry[3]
1616 base = entry[3]
1617 if base == rev:
1617 if base == rev:
1618 return True
1618 return True
1619 if base == nullrev:
1619 if base == nullrev:
1620 return True
1620 return True
1621 p1 = entry[5]
1621 p1 = entry[5]
1622 p2 = entry[6]
1622 p2 = entry[6]
1623 if base == p1 or base == p2:
1623 if base == p1 or base == p2:
1624 return False
1624 return False
1625 return self.issnapshot(base)
1625 return self.issnapshot(base)
1626
1626
1627 def snapshotdepth(self, rev):
1627 def snapshotdepth(self, rev):
1628 """number of snapshot in the chain before this one"""
1628 """number of snapshot in the chain before this one"""
1629 if not self.issnapshot(rev):
1629 if not self.issnapshot(rev):
1630 raise error.ProgrammingError('revision %d not a snapshot')
1630 raise error.ProgrammingError('revision %d not a snapshot')
1631 return len(self._deltachain(rev)[0]) - 1
1631 return len(self._deltachain(rev)[0]) - 1
1632
1632
1633 def revdiff(self, rev1, rev2):
1633 def revdiff(self, rev1, rev2):
1634 """return or calculate a delta between two revisions
1634 """return or calculate a delta between two revisions
1635
1635
1636 The delta calculated is in binary form and is intended to be written to
1636 The delta calculated is in binary form and is intended to be written to
1637 revlog data directly. So this function needs raw revision data.
1637 revlog data directly. So this function needs raw revision data.
1638 """
1638 """
1639 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1639 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1640 return bytes(self._chunk(rev2))
1640 return bytes(self._chunk(rev2))
1641
1641
1642 return mdiff.textdiff(self.revision(rev1, raw=True),
1642 return mdiff.textdiff(self.revision(rev1, raw=True),
1643 self.revision(rev2, raw=True))
1643 self.revision(rev2, raw=True))
1644
1644
1645 def revision(self, nodeorrev, _df=None, raw=False):
1645 def revision(self, nodeorrev, _df=None, raw=False):
1646 """return an uncompressed revision of a given node or revision
1646 """return an uncompressed revision of a given node or revision
1647 number.
1647 number.
1648
1648
1649 _df - an existing file handle to read from. (internal-only)
1649 _df - an existing file handle to read from. (internal-only)
1650 raw - an optional argument specifying if the revision data is to be
1650 raw - an optional argument specifying if the revision data is to be
1651 treated as raw data when applying flag transforms. 'raw' should be set
1651 treated as raw data when applying flag transforms. 'raw' should be set
1652 to True when generating changegroups or in debug commands.
1652 to True when generating changegroups or in debug commands.
1653 """
1653 """
1654 return self._revisiondata(nodeorrev, _df, raw=raw)
1654 return self._revisiondata(nodeorrev, _df, raw=raw)
1655
1655
1656 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1656 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1657 if isinstance(nodeorrev, int):
1657 if isinstance(nodeorrev, int):
1658 rev = nodeorrev
1658 rev = nodeorrev
1659 node = self.node(rev)
1659 node = self.node(rev)
1660 else:
1660 else:
1661 node = nodeorrev
1661 node = nodeorrev
1662 rev = None
1662 rev = None
1663
1663
1664 cachedrev = None
1664 cachedrev = None
1665 flags = None
1665 flags = None
1666 rawtext = None
1666 rawtext = None
1667 if node == nullid:
1667 if node == nullid:
1668 return ""
1668 return ""
1669 if self._revisioncache:
1669 if self._revisioncache:
1670 if self._revisioncache[0] == node:
1670 if self._revisioncache[0] == node:
1671 # _cache only stores rawtext
1671 # _cache only stores rawtext
1672 if raw:
1672 if raw:
1673 return self._revisioncache[2]
1673 return self._revisioncache[2]
1674 # duplicated, but good for perf
1674 # duplicated, but good for perf
1675 if rev is None:
1675 if rev is None:
1676 rev = self.rev(node)
1676 rev = self.rev(node)
1677 if flags is None:
1677 if flags is None:
1678 flags = self.flags(rev)
1678 flags = self.flags(rev)
1679 # no extra flags set, no flag processor runs, text = rawtext
1679 # no extra flags set, no flag processor runs, text = rawtext
1680 if flags == REVIDX_DEFAULT_FLAGS:
1680 if flags == REVIDX_DEFAULT_FLAGS:
1681 return self._revisioncache[2]
1681 return self._revisioncache[2]
1682 # rawtext is reusable. need to run flag processor
1682 # rawtext is reusable. need to run flag processor
1683 rawtext = self._revisioncache[2]
1683 rawtext = self._revisioncache[2]
1684
1684
1685 cachedrev = self._revisioncache[1]
1685 cachedrev = self._revisioncache[1]
1686
1686
1687 # look up what we need to read
1687 # look up what we need to read
1688 if rawtext is None:
1688 if rawtext is None:
1689 if rev is None:
1689 if rev is None:
1690 rev = self.rev(node)
1690 rev = self.rev(node)
1691
1691
1692 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1692 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1693 if stopped:
1693 if stopped:
1694 rawtext = self._revisioncache[2]
1694 rawtext = self._revisioncache[2]
1695
1695
1696 # drop cache to save memory
1696 # drop cache to save memory
1697 self._revisioncache = None
1697 self._revisioncache = None
1698
1698
1699 targetsize = None
1699 targetsize = None
1700 rawsize = self.index[rev][2]
1700 rawsize = self.index[rev][2]
1701 if 0 <= rawsize:
1701 if 0 <= rawsize:
1702 targetsize = 4 * rawsize
1702 targetsize = 4 * rawsize
1703
1703
1704 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1704 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1705 if rawtext is None:
1705 if rawtext is None:
1706 rawtext = bytes(bins[0])
1706 rawtext = bytes(bins[0])
1707 bins = bins[1:]
1707 bins = bins[1:]
1708
1708
1709 rawtext = mdiff.patches(rawtext, bins)
1709 rawtext = mdiff.patches(rawtext, bins)
1710 self._revisioncache = (node, rev, rawtext)
1710 self._revisioncache = (node, rev, rawtext)
1711
1711
1712 if flags is None:
1712 if flags is None:
1713 if rev is None:
1713 if rev is None:
1714 rev = self.rev(node)
1714 rev = self.rev(node)
1715 flags = self.flags(rev)
1715 flags = self.flags(rev)
1716
1716
1717 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1717 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1718 if validatehash:
1718 if validatehash:
1719 self.checkhash(text, node, rev=rev)
1719 self.checkhash(text, node, rev=rev)
1720
1720
1721 return text
1721 return text
1722
1722
1723 def rawdata(self, nodeorrev, _df=None, raw=False):
1724 """return an uncompressed raw data of a given node or revision number.
1725
1726 _df - an existing file handle to read from. (internal-only)
1727 """
1728 return self._revisiondata(nodeorrev, _df, raw=True)
1729
1723 def hash(self, text, p1, p2):
1730 def hash(self, text, p1, p2):
1724 """Compute a node hash.
1731 """Compute a node hash.
1725
1732
1726 Available as a function so that subclasses can replace the hash
1733 Available as a function so that subclasses can replace the hash
1727 as needed.
1734 as needed.
1728 """
1735 """
1729 return storageutil.hashrevisionsha1(text, p1, p2)
1736 return storageutil.hashrevisionsha1(text, p1, p2)
1730
1737
1731 def _processflags(self, text, flags, operation, raw=False):
1738 def _processflags(self, text, flags, operation, raw=False):
1732 """Inspect revision data flags and applies transforms defined by
1739 """Inspect revision data flags and applies transforms defined by
1733 registered flag processors.
1740 registered flag processors.
1734
1741
1735 ``text`` - the revision data to process
1742 ``text`` - the revision data to process
1736 ``flags`` - the revision flags
1743 ``flags`` - the revision flags
1737 ``operation`` - the operation being performed (read or write)
1744 ``operation`` - the operation being performed (read or write)
1738 ``raw`` - an optional argument describing if the raw transform should be
1745 ``raw`` - an optional argument describing if the raw transform should be
1739 applied.
1746 applied.
1740
1747
1741 This method processes the flags in the order (or reverse order if
1748 This method processes the flags in the order (or reverse order if
1742 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1749 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1743 flag processors registered for present flags. The order of flags defined
1750 flag processors registered for present flags. The order of flags defined
1744 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1751 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1745
1752
1746 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1753 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1747 processed text and ``validatehash`` is a bool indicating whether the
1754 processed text and ``validatehash`` is a bool indicating whether the
1748 returned text should be checked for hash integrity.
1755 returned text should be checked for hash integrity.
1749
1756
1750 Note: If the ``raw`` argument is set, it has precedence over the
1757 Note: If the ``raw`` argument is set, it has precedence over the
1751 operation and will only update the value of ``validatehash``.
1758 operation and will only update the value of ``validatehash``.
1752 """
1759 """
1753 # fast path: no flag processors will run
1760 # fast path: no flag processors will run
1754 if flags == 0:
1761 if flags == 0:
1755 return text, True
1762 return text, True
1756 if not operation in ('read', 'write'):
1763 if not operation in ('read', 'write'):
1757 raise error.ProgrammingError(_("invalid '%s' operation") %
1764 raise error.ProgrammingError(_("invalid '%s' operation") %
1758 operation)
1765 operation)
1759 # Check all flags are known.
1766 # Check all flags are known.
1760 if flags & ~REVIDX_KNOWN_FLAGS:
1767 if flags & ~REVIDX_KNOWN_FLAGS:
1761 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1768 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1762 (flags & ~REVIDX_KNOWN_FLAGS))
1769 (flags & ~REVIDX_KNOWN_FLAGS))
1763 validatehash = True
1770 validatehash = True
1764 # Depending on the operation (read or write), the order might be
1771 # Depending on the operation (read or write), the order might be
1765 # reversed due to non-commutative transforms.
1772 # reversed due to non-commutative transforms.
1766 orderedflags = REVIDX_FLAGS_ORDER
1773 orderedflags = REVIDX_FLAGS_ORDER
1767 if operation == 'write':
1774 if operation == 'write':
1768 orderedflags = reversed(orderedflags)
1775 orderedflags = reversed(orderedflags)
1769
1776
1770 for flag in orderedflags:
1777 for flag in orderedflags:
1771 # If a flagprocessor has been registered for a known flag, apply the
1778 # If a flagprocessor has been registered for a known flag, apply the
1772 # related operation transform and update result tuple.
1779 # related operation transform and update result tuple.
1773 if flag & flags:
1780 if flag & flags:
1774 vhash = True
1781 vhash = True
1775
1782
1776 if flag not in self._flagprocessors:
1783 if flag not in self._flagprocessors:
1777 message = _("missing processor for flag '%#x'") % (flag)
1784 message = _("missing processor for flag '%#x'") % (flag)
1778 raise error.RevlogError(message)
1785 raise error.RevlogError(message)
1779
1786
1780 processor = self._flagprocessors[flag]
1787 processor = self._flagprocessors[flag]
1781 if processor is not None:
1788 if processor is not None:
1782 readtransform, writetransform, rawtransform = processor
1789 readtransform, writetransform, rawtransform = processor
1783
1790
1784 if raw:
1791 if raw:
1785 vhash = rawtransform(self, text)
1792 vhash = rawtransform(self, text)
1786 elif operation == 'read':
1793 elif operation == 'read':
1787 text, vhash = readtransform(self, text)
1794 text, vhash = readtransform(self, text)
1788 else: # write operation
1795 else: # write operation
1789 text, vhash = writetransform(self, text)
1796 text, vhash = writetransform(self, text)
1790 validatehash = validatehash and vhash
1797 validatehash = validatehash and vhash
1791
1798
1792 return text, validatehash
1799 return text, validatehash
1793
1800
1794 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1801 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1795 """Check node hash integrity.
1802 """Check node hash integrity.
1796
1803
1797 Available as a function so that subclasses can extend hash mismatch
1804 Available as a function so that subclasses can extend hash mismatch
1798 behaviors as needed.
1805 behaviors as needed.
1799 """
1806 """
1800 try:
1807 try:
1801 if p1 is None and p2 is None:
1808 if p1 is None and p2 is None:
1802 p1, p2 = self.parents(node)
1809 p1, p2 = self.parents(node)
1803 if node != self.hash(text, p1, p2):
1810 if node != self.hash(text, p1, p2):
1804 # Clear the revision cache on hash failure. The revision cache
1811 # Clear the revision cache on hash failure. The revision cache
1805 # only stores the raw revision and clearing the cache does have
1812 # only stores the raw revision and clearing the cache does have
1806 # the side-effect that we won't have a cache hit when the raw
1813 # the side-effect that we won't have a cache hit when the raw
1807 # revision data is accessed. But this case should be rare and
1814 # revision data is accessed. But this case should be rare and
1808 # it is extra work to teach the cache about the hash
1815 # it is extra work to teach the cache about the hash
1809 # verification state.
1816 # verification state.
1810 if self._revisioncache and self._revisioncache[0] == node:
1817 if self._revisioncache and self._revisioncache[0] == node:
1811 self._revisioncache = None
1818 self._revisioncache = None
1812
1819
1813 revornode = rev
1820 revornode = rev
1814 if revornode is None:
1821 if revornode is None:
1815 revornode = templatefilters.short(hex(node))
1822 revornode = templatefilters.short(hex(node))
1816 raise error.RevlogError(_("integrity check failed on %s:%s")
1823 raise error.RevlogError(_("integrity check failed on %s:%s")
1817 % (self.indexfile, pycompat.bytestr(revornode)))
1824 % (self.indexfile, pycompat.bytestr(revornode)))
1818 except error.RevlogError:
1825 except error.RevlogError:
1819 if self._censorable and storageutil.iscensoredtext(text):
1826 if self._censorable and storageutil.iscensoredtext(text):
1820 raise error.CensoredNodeError(self.indexfile, node, text)
1827 raise error.CensoredNodeError(self.indexfile, node, text)
1821 raise
1828 raise
1822
1829
1823 def _enforceinlinesize(self, tr, fp=None):
1830 def _enforceinlinesize(self, tr, fp=None):
1824 """Check if the revlog is too big for inline and convert if so.
1831 """Check if the revlog is too big for inline and convert if so.
1825
1832
1826 This should be called after revisions are added to the revlog. If the
1833 This should be called after revisions are added to the revlog. If the
1827 revlog has grown too large to be an inline revlog, it will convert it
1834 revlog has grown too large to be an inline revlog, it will convert it
1828 to use multiple index and data files.
1835 to use multiple index and data files.
1829 """
1836 """
1830 tiprev = len(self) - 1
1837 tiprev = len(self) - 1
1831 if (not self._inline or
1838 if (not self._inline or
1832 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1839 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1833 return
1840 return
1834
1841
1835 trinfo = tr.find(self.indexfile)
1842 trinfo = tr.find(self.indexfile)
1836 if trinfo is None:
1843 if trinfo is None:
1837 raise error.RevlogError(_("%s not found in the transaction")
1844 raise error.RevlogError(_("%s not found in the transaction")
1838 % self.indexfile)
1845 % self.indexfile)
1839
1846
1840 trindex = trinfo[2]
1847 trindex = trinfo[2]
1841 if trindex is not None:
1848 if trindex is not None:
1842 dataoff = self.start(trindex)
1849 dataoff = self.start(trindex)
1843 else:
1850 else:
1844 # revlog was stripped at start of transaction, use all leftover data
1851 # revlog was stripped at start of transaction, use all leftover data
1845 trindex = len(self) - 1
1852 trindex = len(self) - 1
1846 dataoff = self.end(tiprev)
1853 dataoff = self.end(tiprev)
1847
1854
1848 tr.add(self.datafile, dataoff)
1855 tr.add(self.datafile, dataoff)
1849
1856
1850 if fp:
1857 if fp:
1851 fp.flush()
1858 fp.flush()
1852 fp.close()
1859 fp.close()
1853 # We can't use the cached file handle after close(). So prevent
1860 # We can't use the cached file handle after close(). So prevent
1854 # its usage.
1861 # its usage.
1855 self._writinghandles = None
1862 self._writinghandles = None
1856
1863
1857 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1864 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1858 for r in self:
1865 for r in self:
1859 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1866 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1860
1867
1861 with self._indexfp('w') as fp:
1868 with self._indexfp('w') as fp:
1862 self.version &= ~FLAG_INLINE_DATA
1869 self.version &= ~FLAG_INLINE_DATA
1863 self._inline = False
1870 self._inline = False
1864 io = self._io
1871 io = self._io
1865 for i in self:
1872 for i in self:
1866 e = io.packentry(self.index[i], self.node, self.version, i)
1873 e = io.packentry(self.index[i], self.node, self.version, i)
1867 fp.write(e)
1874 fp.write(e)
1868
1875
1869 # the temp file replace the real index when we exit the context
1876 # the temp file replace the real index when we exit the context
1870 # manager
1877 # manager
1871
1878
1872 tr.replace(self.indexfile, trindex * self._io.size)
1879 tr.replace(self.indexfile, trindex * self._io.size)
1873 self._chunkclear()
1880 self._chunkclear()
1874
1881
1875 def _nodeduplicatecallback(self, transaction, node):
1882 def _nodeduplicatecallback(self, transaction, node):
1876 """called when trying to add a node already stored.
1883 """called when trying to add a node already stored.
1877 """
1884 """
1878
1885
1879 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1886 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1880 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1887 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1881 """add a revision to the log
1888 """add a revision to the log
1882
1889
1883 text - the revision data to add
1890 text - the revision data to add
1884 transaction - the transaction object used for rollback
1891 transaction - the transaction object used for rollback
1885 link - the linkrev data to add
1892 link - the linkrev data to add
1886 p1, p2 - the parent nodeids of the revision
1893 p1, p2 - the parent nodeids of the revision
1887 cachedelta - an optional precomputed delta
1894 cachedelta - an optional precomputed delta
1888 node - nodeid of revision; typically node is not specified, and it is
1895 node - nodeid of revision; typically node is not specified, and it is
1889 computed by default as hash(text, p1, p2), however subclasses might
1896 computed by default as hash(text, p1, p2), however subclasses might
1890 use different hashing method (and override checkhash() in such case)
1897 use different hashing method (and override checkhash() in such case)
1891 flags - the known flags to set on the revision
1898 flags - the known flags to set on the revision
1892 deltacomputer - an optional deltacomputer instance shared between
1899 deltacomputer - an optional deltacomputer instance shared between
1893 multiple calls
1900 multiple calls
1894 """
1901 """
1895 if link == nullrev:
1902 if link == nullrev:
1896 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1903 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1897 % self.indexfile)
1904 % self.indexfile)
1898
1905
1899 if flags:
1906 if flags:
1900 node = node or self.hash(text, p1, p2)
1907 node = node or self.hash(text, p1, p2)
1901
1908
1902 rawtext, validatehash = self._processflags(text, flags, 'write')
1909 rawtext, validatehash = self._processflags(text, flags, 'write')
1903
1910
1904 # If the flag processor modifies the revision data, ignore any provided
1911 # If the flag processor modifies the revision data, ignore any provided
1905 # cachedelta.
1912 # cachedelta.
1906 if rawtext != text:
1913 if rawtext != text:
1907 cachedelta = None
1914 cachedelta = None
1908
1915
1909 if len(rawtext) > _maxentrysize:
1916 if len(rawtext) > _maxentrysize:
1910 raise error.RevlogError(
1917 raise error.RevlogError(
1911 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1918 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1912 % (self.indexfile, len(rawtext)))
1919 % (self.indexfile, len(rawtext)))
1913
1920
1914 node = node or self.hash(rawtext, p1, p2)
1921 node = node or self.hash(rawtext, p1, p2)
1915 if node in self.nodemap:
1922 if node in self.nodemap:
1916 return node
1923 return node
1917
1924
1918 if validatehash:
1925 if validatehash:
1919 self.checkhash(rawtext, node, p1=p1, p2=p2)
1926 self.checkhash(rawtext, node, p1=p1, p2=p2)
1920
1927
1921 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1928 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1922 flags, cachedelta=cachedelta,
1929 flags, cachedelta=cachedelta,
1923 deltacomputer=deltacomputer)
1930 deltacomputer=deltacomputer)
1924
1931
1925 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1932 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1926 cachedelta=None, deltacomputer=None):
1933 cachedelta=None, deltacomputer=None):
1927 """add a raw revision with known flags, node and parents
1934 """add a raw revision with known flags, node and parents
1928 useful when reusing a revision not stored in this revlog (ex: received
1935 useful when reusing a revision not stored in this revlog (ex: received
1929 over wire, or read from an external bundle).
1936 over wire, or read from an external bundle).
1930 """
1937 """
1931 dfh = None
1938 dfh = None
1932 if not self._inline:
1939 if not self._inline:
1933 dfh = self._datafp("a+")
1940 dfh = self._datafp("a+")
1934 ifh = self._indexfp("a+")
1941 ifh = self._indexfp("a+")
1935 try:
1942 try:
1936 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1943 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1937 flags, cachedelta, ifh, dfh,
1944 flags, cachedelta, ifh, dfh,
1938 deltacomputer=deltacomputer)
1945 deltacomputer=deltacomputer)
1939 finally:
1946 finally:
1940 if dfh:
1947 if dfh:
1941 dfh.close()
1948 dfh.close()
1942 ifh.close()
1949 ifh.close()
1943
1950
1944 def compress(self, data):
1951 def compress(self, data):
1945 """Generate a possibly-compressed representation of data."""
1952 """Generate a possibly-compressed representation of data."""
1946 if not data:
1953 if not data:
1947 return '', data
1954 return '', data
1948
1955
1949 compressed = self._compressor.compress(data)
1956 compressed = self._compressor.compress(data)
1950
1957
1951 if compressed:
1958 if compressed:
1952 # The revlog compressor added the header in the returned data.
1959 # The revlog compressor added the header in the returned data.
1953 return '', compressed
1960 return '', compressed
1954
1961
1955 if data[0:1] == '\0':
1962 if data[0:1] == '\0':
1956 return '', data
1963 return '', data
1957 return 'u', data
1964 return 'u', data
1958
1965
1959 def decompress(self, data):
1966 def decompress(self, data):
1960 """Decompress a revlog chunk.
1967 """Decompress a revlog chunk.
1961
1968
1962 The chunk is expected to begin with a header identifying the
1969 The chunk is expected to begin with a header identifying the
1963 format type so it can be routed to an appropriate decompressor.
1970 format type so it can be routed to an appropriate decompressor.
1964 """
1971 """
1965 if not data:
1972 if not data:
1966 return data
1973 return data
1967
1974
1968 # Revlogs are read much more frequently than they are written and many
1975 # Revlogs are read much more frequently than they are written and many
1969 # chunks only take microseconds to decompress, so performance is
1976 # chunks only take microseconds to decompress, so performance is
1970 # important here.
1977 # important here.
1971 #
1978 #
1972 # We can make a few assumptions about revlogs:
1979 # We can make a few assumptions about revlogs:
1973 #
1980 #
1974 # 1) the majority of chunks will be compressed (as opposed to inline
1981 # 1) the majority of chunks will be compressed (as opposed to inline
1975 # raw data).
1982 # raw data).
1976 # 2) decompressing *any* data will likely by at least 10x slower than
1983 # 2) decompressing *any* data will likely by at least 10x slower than
1977 # returning raw inline data.
1984 # returning raw inline data.
1978 # 3) we want to prioritize common and officially supported compression
1985 # 3) we want to prioritize common and officially supported compression
1979 # engines
1986 # engines
1980 #
1987 #
1981 # It follows that we want to optimize for "decompress compressed data
1988 # It follows that we want to optimize for "decompress compressed data
1982 # when encoded with common and officially supported compression engines"
1989 # when encoded with common and officially supported compression engines"
1983 # case over "raw data" and "data encoded by less common or non-official
1990 # case over "raw data" and "data encoded by less common or non-official
1984 # compression engines." That is why we have the inline lookup first
1991 # compression engines." That is why we have the inline lookup first
1985 # followed by the compengines lookup.
1992 # followed by the compengines lookup.
1986 #
1993 #
1987 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1994 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1988 # compressed chunks. And this matters for changelog and manifest reads.
1995 # compressed chunks. And this matters for changelog and manifest reads.
1989 t = data[0:1]
1996 t = data[0:1]
1990
1997
1991 if t == 'x':
1998 if t == 'x':
1992 try:
1999 try:
1993 return _zlibdecompress(data)
2000 return _zlibdecompress(data)
1994 except zlib.error as e:
2001 except zlib.error as e:
1995 raise error.RevlogError(_('revlog decompress error: %s') %
2002 raise error.RevlogError(_('revlog decompress error: %s') %
1996 stringutil.forcebytestr(e))
2003 stringutil.forcebytestr(e))
1997 # '\0' is more common than 'u' so it goes first.
2004 # '\0' is more common than 'u' so it goes first.
1998 elif t == '\0':
2005 elif t == '\0':
1999 return data
2006 return data
2000 elif t == 'u':
2007 elif t == 'u':
2001 return util.buffer(data, 1)
2008 return util.buffer(data, 1)
2002
2009
2003 try:
2010 try:
2004 compressor = self._decompressors[t]
2011 compressor = self._decompressors[t]
2005 except KeyError:
2012 except KeyError:
2006 try:
2013 try:
2007 engine = util.compengines.forrevlogheader(t)
2014 engine = util.compengines.forrevlogheader(t)
2008 compressor = engine.revlogcompressor(self._compengineopts)
2015 compressor = engine.revlogcompressor(self._compengineopts)
2009 self._decompressors[t] = compressor
2016 self._decompressors[t] = compressor
2010 except KeyError:
2017 except KeyError:
2011 raise error.RevlogError(_('unknown compression type %r') % t)
2018 raise error.RevlogError(_('unknown compression type %r') % t)
2012
2019
2013 return compressor.decompress(data)
2020 return compressor.decompress(data)
2014
2021
2015 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2022 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2016 cachedelta, ifh, dfh, alwayscache=False,
2023 cachedelta, ifh, dfh, alwayscache=False,
2017 deltacomputer=None):
2024 deltacomputer=None):
2018 """internal function to add revisions to the log
2025 """internal function to add revisions to the log
2019
2026
2020 see addrevision for argument descriptions.
2027 see addrevision for argument descriptions.
2021
2028
2022 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2029 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2023
2030
2024 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2031 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2025 be used.
2032 be used.
2026
2033
2027 invariants:
2034 invariants:
2028 - rawtext is optional (can be None); if not set, cachedelta must be set.
2035 - rawtext is optional (can be None); if not set, cachedelta must be set.
2029 if both are set, they must correspond to each other.
2036 if both are set, they must correspond to each other.
2030 """
2037 """
2031 if node == nullid:
2038 if node == nullid:
2032 raise error.RevlogError(_("%s: attempt to add null revision") %
2039 raise error.RevlogError(_("%s: attempt to add null revision") %
2033 self.indexfile)
2040 self.indexfile)
2034 if node == wdirid or node in wdirfilenodeids:
2041 if node == wdirid or node in wdirfilenodeids:
2035 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2042 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2036 self.indexfile)
2043 self.indexfile)
2037
2044
2038 if self._inline:
2045 if self._inline:
2039 fh = ifh
2046 fh = ifh
2040 else:
2047 else:
2041 fh = dfh
2048 fh = dfh
2042
2049
2043 btext = [rawtext]
2050 btext = [rawtext]
2044
2051
2045 curr = len(self)
2052 curr = len(self)
2046 prev = curr - 1
2053 prev = curr - 1
2047 offset = self.end(prev)
2054 offset = self.end(prev)
2048 p1r, p2r = self.rev(p1), self.rev(p2)
2055 p1r, p2r = self.rev(p1), self.rev(p2)
2049
2056
2050 # full versions are inserted when the needed deltas
2057 # full versions are inserted when the needed deltas
2051 # become comparable to the uncompressed text
2058 # become comparable to the uncompressed text
2052 if rawtext is None:
2059 if rawtext is None:
2053 # need rawtext size, before changed by flag processors, which is
2060 # need rawtext size, before changed by flag processors, which is
2054 # the non-raw size. use revlog explicitly to avoid filelog's extra
2061 # the non-raw size. use revlog explicitly to avoid filelog's extra
2055 # logic that might remove metadata size.
2062 # logic that might remove metadata size.
2056 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2063 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2057 cachedelta[1])
2064 cachedelta[1])
2058 else:
2065 else:
2059 textlen = len(rawtext)
2066 textlen = len(rawtext)
2060
2067
2061 if deltacomputer is None:
2068 if deltacomputer is None:
2062 deltacomputer = deltautil.deltacomputer(self)
2069 deltacomputer = deltautil.deltacomputer(self)
2063
2070
2064 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2071 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2065
2072
2066 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2073 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2067
2074
2068 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2075 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2069 deltainfo.base, link, p1r, p2r, node)
2076 deltainfo.base, link, p1r, p2r, node)
2070 self.index.append(e)
2077 self.index.append(e)
2071 self.nodemap[node] = curr
2078 self.nodemap[node] = curr
2072
2079
2073 # Reset the pure node cache start lookup offset to account for new
2080 # Reset the pure node cache start lookup offset to account for new
2074 # revision.
2081 # revision.
2075 if self._nodepos is not None:
2082 if self._nodepos is not None:
2076 self._nodepos = curr
2083 self._nodepos = curr
2077
2084
2078 entry = self._io.packentry(e, self.node, self.version, curr)
2085 entry = self._io.packentry(e, self.node, self.version, curr)
2079 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2086 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2080 link, offset)
2087 link, offset)
2081
2088
2082 rawtext = btext[0]
2089 rawtext = btext[0]
2083
2090
2084 if alwayscache and rawtext is None:
2091 if alwayscache and rawtext is None:
2085 rawtext = deltacomputer.buildtext(revinfo, fh)
2092 rawtext = deltacomputer.buildtext(revinfo, fh)
2086
2093
2087 if type(rawtext) == bytes: # only accept immutable objects
2094 if type(rawtext) == bytes: # only accept immutable objects
2088 self._revisioncache = (node, curr, rawtext)
2095 self._revisioncache = (node, curr, rawtext)
2089 self._chainbasecache[curr] = deltainfo.chainbase
2096 self._chainbasecache[curr] = deltainfo.chainbase
2090 return node
2097 return node
2091
2098
2092 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2099 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2093 # Files opened in a+ mode have inconsistent behavior on various
2100 # Files opened in a+ mode have inconsistent behavior on various
2094 # platforms. Windows requires that a file positioning call be made
2101 # platforms. Windows requires that a file positioning call be made
2095 # when the file handle transitions between reads and writes. See
2102 # when the file handle transitions between reads and writes. See
2096 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2103 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2097 # platforms, Python or the platform itself can be buggy. Some versions
2104 # platforms, Python or the platform itself can be buggy. Some versions
2098 # of Solaris have been observed to not append at the end of the file
2105 # of Solaris have been observed to not append at the end of the file
2099 # if the file was seeked to before the end. See issue4943 for more.
2106 # if the file was seeked to before the end. See issue4943 for more.
2100 #
2107 #
2101 # We work around this issue by inserting a seek() before writing.
2108 # We work around this issue by inserting a seek() before writing.
2102 # Note: This is likely not necessary on Python 3. However, because
2109 # Note: This is likely not necessary on Python 3. However, because
2103 # the file handle is reused for reads and may be seeked there, we need
2110 # the file handle is reused for reads and may be seeked there, we need
2104 # to be careful before changing this.
2111 # to be careful before changing this.
2105 ifh.seek(0, os.SEEK_END)
2112 ifh.seek(0, os.SEEK_END)
2106 if dfh:
2113 if dfh:
2107 dfh.seek(0, os.SEEK_END)
2114 dfh.seek(0, os.SEEK_END)
2108
2115
2109 curr = len(self) - 1
2116 curr = len(self) - 1
2110 if not self._inline:
2117 if not self._inline:
2111 transaction.add(self.datafile, offset)
2118 transaction.add(self.datafile, offset)
2112 transaction.add(self.indexfile, curr * len(entry))
2119 transaction.add(self.indexfile, curr * len(entry))
2113 if data[0]:
2120 if data[0]:
2114 dfh.write(data[0])
2121 dfh.write(data[0])
2115 dfh.write(data[1])
2122 dfh.write(data[1])
2116 ifh.write(entry)
2123 ifh.write(entry)
2117 else:
2124 else:
2118 offset += curr * self._io.size
2125 offset += curr * self._io.size
2119 transaction.add(self.indexfile, offset, curr)
2126 transaction.add(self.indexfile, offset, curr)
2120 ifh.write(entry)
2127 ifh.write(entry)
2121 ifh.write(data[0])
2128 ifh.write(data[0])
2122 ifh.write(data[1])
2129 ifh.write(data[1])
2123 self._enforceinlinesize(transaction, ifh)
2130 self._enforceinlinesize(transaction, ifh)
2124
2131
2125 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2132 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2126 """
2133 """
2127 add a delta group
2134 add a delta group
2128
2135
2129 given a set of deltas, add them to the revision log. the
2136 given a set of deltas, add them to the revision log. the
2130 first delta is against its parent, which should be in our
2137 first delta is against its parent, which should be in our
2131 log, the rest are against the previous delta.
2138 log, the rest are against the previous delta.
2132
2139
2133 If ``addrevisioncb`` is defined, it will be called with arguments of
2140 If ``addrevisioncb`` is defined, it will be called with arguments of
2134 this revlog and the node that was added.
2141 this revlog and the node that was added.
2135 """
2142 """
2136
2143
2137 if self._writinghandles:
2144 if self._writinghandles:
2138 raise error.ProgrammingError('cannot nest addgroup() calls')
2145 raise error.ProgrammingError('cannot nest addgroup() calls')
2139
2146
2140 nodes = []
2147 nodes = []
2141
2148
2142 r = len(self)
2149 r = len(self)
2143 end = 0
2150 end = 0
2144 if r:
2151 if r:
2145 end = self.end(r - 1)
2152 end = self.end(r - 1)
2146 ifh = self._indexfp("a+")
2153 ifh = self._indexfp("a+")
2147 isize = r * self._io.size
2154 isize = r * self._io.size
2148 if self._inline:
2155 if self._inline:
2149 transaction.add(self.indexfile, end + isize, r)
2156 transaction.add(self.indexfile, end + isize, r)
2150 dfh = None
2157 dfh = None
2151 else:
2158 else:
2152 transaction.add(self.indexfile, isize, r)
2159 transaction.add(self.indexfile, isize, r)
2153 transaction.add(self.datafile, end)
2160 transaction.add(self.datafile, end)
2154 dfh = self._datafp("a+")
2161 dfh = self._datafp("a+")
2155 def flush():
2162 def flush():
2156 if dfh:
2163 if dfh:
2157 dfh.flush()
2164 dfh.flush()
2158 ifh.flush()
2165 ifh.flush()
2159
2166
2160 self._writinghandles = (ifh, dfh)
2167 self._writinghandles = (ifh, dfh)
2161
2168
2162 try:
2169 try:
2163 deltacomputer = deltautil.deltacomputer(self)
2170 deltacomputer = deltautil.deltacomputer(self)
2164 # loop through our set of deltas
2171 # loop through our set of deltas
2165 for data in deltas:
2172 for data in deltas:
2166 node, p1, p2, linknode, deltabase, delta, flags = data
2173 node, p1, p2, linknode, deltabase, delta, flags = data
2167 link = linkmapper(linknode)
2174 link = linkmapper(linknode)
2168 flags = flags or REVIDX_DEFAULT_FLAGS
2175 flags = flags or REVIDX_DEFAULT_FLAGS
2169
2176
2170 nodes.append(node)
2177 nodes.append(node)
2171
2178
2172 if node in self.nodemap:
2179 if node in self.nodemap:
2173 self._nodeduplicatecallback(transaction, node)
2180 self._nodeduplicatecallback(transaction, node)
2174 # this can happen if two branches make the same change
2181 # this can happen if two branches make the same change
2175 continue
2182 continue
2176
2183
2177 for p in (p1, p2):
2184 for p in (p1, p2):
2178 if p not in self.nodemap:
2185 if p not in self.nodemap:
2179 raise error.LookupError(p, self.indexfile,
2186 raise error.LookupError(p, self.indexfile,
2180 _('unknown parent'))
2187 _('unknown parent'))
2181
2188
2182 if deltabase not in self.nodemap:
2189 if deltabase not in self.nodemap:
2183 raise error.LookupError(deltabase, self.indexfile,
2190 raise error.LookupError(deltabase, self.indexfile,
2184 _('unknown delta base'))
2191 _('unknown delta base'))
2185
2192
2186 baserev = self.rev(deltabase)
2193 baserev = self.rev(deltabase)
2187
2194
2188 if baserev != nullrev and self.iscensored(baserev):
2195 if baserev != nullrev and self.iscensored(baserev):
2189 # if base is censored, delta must be full replacement in a
2196 # if base is censored, delta must be full replacement in a
2190 # single patch operation
2197 # single patch operation
2191 hlen = struct.calcsize(">lll")
2198 hlen = struct.calcsize(">lll")
2192 oldlen = self.rawsize(baserev)
2199 oldlen = self.rawsize(baserev)
2193 newlen = len(delta) - hlen
2200 newlen = len(delta) - hlen
2194 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2201 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2195 raise error.CensoredBaseError(self.indexfile,
2202 raise error.CensoredBaseError(self.indexfile,
2196 self.node(baserev))
2203 self.node(baserev))
2197
2204
2198 if not flags and self._peek_iscensored(baserev, delta, flush):
2205 if not flags and self._peek_iscensored(baserev, delta, flush):
2199 flags |= REVIDX_ISCENSORED
2206 flags |= REVIDX_ISCENSORED
2200
2207
2201 # We assume consumers of addrevisioncb will want to retrieve
2208 # We assume consumers of addrevisioncb will want to retrieve
2202 # the added revision, which will require a call to
2209 # the added revision, which will require a call to
2203 # revision(). revision() will fast path if there is a cache
2210 # revision(). revision() will fast path if there is a cache
2204 # hit. So, we tell _addrevision() to always cache in this case.
2211 # hit. So, we tell _addrevision() to always cache in this case.
2205 # We're only using addgroup() in the context of changegroup
2212 # We're only using addgroup() in the context of changegroup
2206 # generation so the revision data can always be handled as raw
2213 # generation so the revision data can always be handled as raw
2207 # by the flagprocessor.
2214 # by the flagprocessor.
2208 self._addrevision(node, None, transaction, link,
2215 self._addrevision(node, None, transaction, link,
2209 p1, p2, flags, (baserev, delta),
2216 p1, p2, flags, (baserev, delta),
2210 ifh, dfh,
2217 ifh, dfh,
2211 alwayscache=bool(addrevisioncb),
2218 alwayscache=bool(addrevisioncb),
2212 deltacomputer=deltacomputer)
2219 deltacomputer=deltacomputer)
2213
2220
2214 if addrevisioncb:
2221 if addrevisioncb:
2215 addrevisioncb(self, node)
2222 addrevisioncb(self, node)
2216
2223
2217 if not dfh and not self._inline:
2224 if not dfh and not self._inline:
2218 # addrevision switched from inline to conventional
2225 # addrevision switched from inline to conventional
2219 # reopen the index
2226 # reopen the index
2220 ifh.close()
2227 ifh.close()
2221 dfh = self._datafp("a+")
2228 dfh = self._datafp("a+")
2222 ifh = self._indexfp("a+")
2229 ifh = self._indexfp("a+")
2223 self._writinghandles = (ifh, dfh)
2230 self._writinghandles = (ifh, dfh)
2224 finally:
2231 finally:
2225 self._writinghandles = None
2232 self._writinghandles = None
2226
2233
2227 if dfh:
2234 if dfh:
2228 dfh.close()
2235 dfh.close()
2229 ifh.close()
2236 ifh.close()
2230
2237
2231 return nodes
2238 return nodes
2232
2239
2233 def iscensored(self, rev):
2240 def iscensored(self, rev):
2234 """Check if a file revision is censored."""
2241 """Check if a file revision is censored."""
2235 if not self._censorable:
2242 if not self._censorable:
2236 return False
2243 return False
2237
2244
2238 return self.flags(rev) & REVIDX_ISCENSORED
2245 return self.flags(rev) & REVIDX_ISCENSORED
2239
2246
2240 def _peek_iscensored(self, baserev, delta, flush):
2247 def _peek_iscensored(self, baserev, delta, flush):
2241 """Quickly check if a delta produces a censored revision."""
2248 """Quickly check if a delta produces a censored revision."""
2242 if not self._censorable:
2249 if not self._censorable:
2243 return False
2250 return False
2244
2251
2245 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2252 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2246
2253
2247 def getstrippoint(self, minlink):
2254 def getstrippoint(self, minlink):
2248 """find the minimum rev that must be stripped to strip the linkrev
2255 """find the minimum rev that must be stripped to strip the linkrev
2249
2256
2250 Returns a tuple containing the minimum rev and a set of all revs that
2257 Returns a tuple containing the minimum rev and a set of all revs that
2251 have linkrevs that will be broken by this strip.
2258 have linkrevs that will be broken by this strip.
2252 """
2259 """
2253 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2260 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2254 self.headrevs(),
2261 self.headrevs(),
2255 self.linkrev, self.parentrevs)
2262 self.linkrev, self.parentrevs)
2256
2263
2257 def strip(self, minlink, transaction):
2264 def strip(self, minlink, transaction):
2258 """truncate the revlog on the first revision with a linkrev >= minlink
2265 """truncate the revlog on the first revision with a linkrev >= minlink
2259
2266
2260 This function is called when we're stripping revision minlink and
2267 This function is called when we're stripping revision minlink and
2261 its descendants from the repository.
2268 its descendants from the repository.
2262
2269
2263 We have to remove all revisions with linkrev >= minlink, because
2270 We have to remove all revisions with linkrev >= minlink, because
2264 the equivalent changelog revisions will be renumbered after the
2271 the equivalent changelog revisions will be renumbered after the
2265 strip.
2272 strip.
2266
2273
2267 So we truncate the revlog on the first of these revisions, and
2274 So we truncate the revlog on the first of these revisions, and
2268 trust that the caller has saved the revisions that shouldn't be
2275 trust that the caller has saved the revisions that shouldn't be
2269 removed and that it'll re-add them after this truncation.
2276 removed and that it'll re-add them after this truncation.
2270 """
2277 """
2271 if len(self) == 0:
2278 if len(self) == 0:
2272 return
2279 return
2273
2280
2274 rev, _ = self.getstrippoint(minlink)
2281 rev, _ = self.getstrippoint(minlink)
2275 if rev == len(self):
2282 if rev == len(self):
2276 return
2283 return
2277
2284
2278 # first truncate the files on disk
2285 # first truncate the files on disk
2279 end = self.start(rev)
2286 end = self.start(rev)
2280 if not self._inline:
2287 if not self._inline:
2281 transaction.add(self.datafile, end)
2288 transaction.add(self.datafile, end)
2282 end = rev * self._io.size
2289 end = rev * self._io.size
2283 else:
2290 else:
2284 end += rev * self._io.size
2291 end += rev * self._io.size
2285
2292
2286 transaction.add(self.indexfile, end)
2293 transaction.add(self.indexfile, end)
2287
2294
2288 # then reset internal state in memory to forget those revisions
2295 # then reset internal state in memory to forget those revisions
2289 self._revisioncache = None
2296 self._revisioncache = None
2290 self._chaininfocache = {}
2297 self._chaininfocache = {}
2291 self._chunkclear()
2298 self._chunkclear()
2292 for x in pycompat.xrange(rev, len(self)):
2299 for x in pycompat.xrange(rev, len(self)):
2293 del self.nodemap[self.node(x)]
2300 del self.nodemap[self.node(x)]
2294
2301
2295 del self.index[rev:-1]
2302 del self.index[rev:-1]
2296 self._nodepos = None
2303 self._nodepos = None
2297
2304
2298 def checksize(self):
2305 def checksize(self):
2299 """Check size of index and data files
2306 """Check size of index and data files
2300
2307
2301 return a (dd, di) tuple.
2308 return a (dd, di) tuple.
2302 - dd: extra bytes for the "data" file
2309 - dd: extra bytes for the "data" file
2303 - di: extra bytes for the "index" file
2310 - di: extra bytes for the "index" file
2304
2311
2305 A healthy revlog will return (0, 0).
2312 A healthy revlog will return (0, 0).
2306 """
2313 """
2307 expected = 0
2314 expected = 0
2308 if len(self):
2315 if len(self):
2309 expected = max(0, self.end(len(self) - 1))
2316 expected = max(0, self.end(len(self) - 1))
2310
2317
2311 try:
2318 try:
2312 with self._datafp() as f:
2319 with self._datafp() as f:
2313 f.seek(0, io.SEEK_END)
2320 f.seek(0, io.SEEK_END)
2314 actual = f.tell()
2321 actual = f.tell()
2315 dd = actual - expected
2322 dd = actual - expected
2316 except IOError as inst:
2323 except IOError as inst:
2317 if inst.errno != errno.ENOENT:
2324 if inst.errno != errno.ENOENT:
2318 raise
2325 raise
2319 dd = 0
2326 dd = 0
2320
2327
2321 try:
2328 try:
2322 f = self.opener(self.indexfile)
2329 f = self.opener(self.indexfile)
2323 f.seek(0, io.SEEK_END)
2330 f.seek(0, io.SEEK_END)
2324 actual = f.tell()
2331 actual = f.tell()
2325 f.close()
2332 f.close()
2326 s = self._io.size
2333 s = self._io.size
2327 i = max(0, actual // s)
2334 i = max(0, actual // s)
2328 di = actual - (i * s)
2335 di = actual - (i * s)
2329 if self._inline:
2336 if self._inline:
2330 databytes = 0
2337 databytes = 0
2331 for r in self:
2338 for r in self:
2332 databytes += max(0, self.length(r))
2339 databytes += max(0, self.length(r))
2333 dd = 0
2340 dd = 0
2334 di = actual - len(self) * s - databytes
2341 di = actual - len(self) * s - databytes
2335 except IOError as inst:
2342 except IOError as inst:
2336 if inst.errno != errno.ENOENT:
2343 if inst.errno != errno.ENOENT:
2337 raise
2344 raise
2338 di = 0
2345 di = 0
2339
2346
2340 return (dd, di)
2347 return (dd, di)
2341
2348
2342 def files(self):
2349 def files(self):
2343 res = [self.indexfile]
2350 res = [self.indexfile]
2344 if not self._inline:
2351 if not self._inline:
2345 res.append(self.datafile)
2352 res.append(self.datafile)
2346 return res
2353 return res
2347
2354
2348 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2355 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2349 assumehaveparentrevisions=False,
2356 assumehaveparentrevisions=False,
2350 deltamode=repository.CG_DELTAMODE_STD):
2357 deltamode=repository.CG_DELTAMODE_STD):
2351 if nodesorder not in ('nodes', 'storage', 'linear', None):
2358 if nodesorder not in ('nodes', 'storage', 'linear', None):
2352 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2359 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2353 nodesorder)
2360 nodesorder)
2354
2361
2355 if nodesorder is None and not self._generaldelta:
2362 if nodesorder is None and not self._generaldelta:
2356 nodesorder = 'storage'
2363 nodesorder = 'storage'
2357
2364
2358 if (not self._storedeltachains and
2365 if (not self._storedeltachains and
2359 deltamode != repository.CG_DELTAMODE_PREV):
2366 deltamode != repository.CG_DELTAMODE_PREV):
2360 deltamode = repository.CG_DELTAMODE_FULL
2367 deltamode = repository.CG_DELTAMODE_FULL
2361
2368
2362 return storageutil.emitrevisions(
2369 return storageutil.emitrevisions(
2363 self, nodes, nodesorder, revlogrevisiondelta,
2370 self, nodes, nodesorder, revlogrevisiondelta,
2364 deltaparentfn=self.deltaparent,
2371 deltaparentfn=self.deltaparent,
2365 candeltafn=self.candelta,
2372 candeltafn=self.candelta,
2366 rawsizefn=self.rawsize,
2373 rawsizefn=self.rawsize,
2367 revdifffn=self.revdiff,
2374 revdifffn=self.revdiff,
2368 flagsfn=self.flags,
2375 flagsfn=self.flags,
2369 deltamode=deltamode,
2376 deltamode=deltamode,
2370 revisiondata=revisiondata,
2377 revisiondata=revisiondata,
2371 assumehaveparentrevisions=assumehaveparentrevisions)
2378 assumehaveparentrevisions=assumehaveparentrevisions)
2372
2379
2373 DELTAREUSEALWAYS = 'always'
2380 DELTAREUSEALWAYS = 'always'
2374 DELTAREUSESAMEREVS = 'samerevs'
2381 DELTAREUSESAMEREVS = 'samerevs'
2375 DELTAREUSENEVER = 'never'
2382 DELTAREUSENEVER = 'never'
2376
2383
2377 DELTAREUSEFULLADD = 'fulladd'
2384 DELTAREUSEFULLADD = 'fulladd'
2378
2385
2379 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2386 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2380
2387
2381 def clone(self, tr, destrevlog, addrevisioncb=None,
2388 def clone(self, tr, destrevlog, addrevisioncb=None,
2382 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2389 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2383 """Copy this revlog to another, possibly with format changes.
2390 """Copy this revlog to another, possibly with format changes.
2384
2391
2385 The destination revlog will contain the same revisions and nodes.
2392 The destination revlog will contain the same revisions and nodes.
2386 However, it may not be bit-for-bit identical due to e.g. delta encoding
2393 However, it may not be bit-for-bit identical due to e.g. delta encoding
2387 differences.
2394 differences.
2388
2395
2389 The ``deltareuse`` argument control how deltas from the existing revlog
2396 The ``deltareuse`` argument control how deltas from the existing revlog
2390 are preserved in the destination revlog. The argument can have the
2397 are preserved in the destination revlog. The argument can have the
2391 following values:
2398 following values:
2392
2399
2393 DELTAREUSEALWAYS
2400 DELTAREUSEALWAYS
2394 Deltas will always be reused (if possible), even if the destination
2401 Deltas will always be reused (if possible), even if the destination
2395 revlog would not select the same revisions for the delta. This is the
2402 revlog would not select the same revisions for the delta. This is the
2396 fastest mode of operation.
2403 fastest mode of operation.
2397 DELTAREUSESAMEREVS
2404 DELTAREUSESAMEREVS
2398 Deltas will be reused if the destination revlog would pick the same
2405 Deltas will be reused if the destination revlog would pick the same
2399 revisions for the delta. This mode strikes a balance between speed
2406 revisions for the delta. This mode strikes a balance between speed
2400 and optimization.
2407 and optimization.
2401 DELTAREUSENEVER
2408 DELTAREUSENEVER
2402 Deltas will never be reused. This is the slowest mode of execution.
2409 Deltas will never be reused. This is the slowest mode of execution.
2403 This mode can be used to recompute deltas (e.g. if the diff/delta
2410 This mode can be used to recompute deltas (e.g. if the diff/delta
2404 algorithm changes).
2411 algorithm changes).
2405
2412
2406 Delta computation can be slow, so the choice of delta reuse policy can
2413 Delta computation can be slow, so the choice of delta reuse policy can
2407 significantly affect run time.
2414 significantly affect run time.
2408
2415
2409 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2416 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2410 two extremes. Deltas will be reused if they are appropriate. But if the
2417 two extremes. Deltas will be reused if they are appropriate. But if the
2411 delta could choose a better revision, it will do so. This means if you
2418 delta could choose a better revision, it will do so. This means if you
2412 are converting a non-generaldelta revlog to a generaldelta revlog,
2419 are converting a non-generaldelta revlog to a generaldelta revlog,
2413 deltas will be recomputed if the delta's parent isn't a parent of the
2420 deltas will be recomputed if the delta's parent isn't a parent of the
2414 revision.
2421 revision.
2415
2422
2416 In addition to the delta policy, the ``forcedeltabothparents``
2423 In addition to the delta policy, the ``forcedeltabothparents``
2417 argument controls whether to force compute deltas against both parents
2424 argument controls whether to force compute deltas against both parents
2418 for merges. By default, the current default is used.
2425 for merges. By default, the current default is used.
2419 """
2426 """
2420 if deltareuse not in self.DELTAREUSEALL:
2427 if deltareuse not in self.DELTAREUSEALL:
2421 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2428 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2422
2429
2423 if len(destrevlog):
2430 if len(destrevlog):
2424 raise ValueError(_('destination revlog is not empty'))
2431 raise ValueError(_('destination revlog is not empty'))
2425
2432
2426 if getattr(self, 'filteredrevs', None):
2433 if getattr(self, 'filteredrevs', None):
2427 raise ValueError(_('source revlog has filtered revisions'))
2434 raise ValueError(_('source revlog has filtered revisions'))
2428 if getattr(destrevlog, 'filteredrevs', None):
2435 if getattr(destrevlog, 'filteredrevs', None):
2429 raise ValueError(_('destination revlog has filtered revisions'))
2436 raise ValueError(_('destination revlog has filtered revisions'))
2430
2437
2431 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2438 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2432 # if possible.
2439 # if possible.
2433 oldlazydelta = destrevlog._lazydelta
2440 oldlazydelta = destrevlog._lazydelta
2434 oldlazydeltabase = destrevlog._lazydeltabase
2441 oldlazydeltabase = destrevlog._lazydeltabase
2435 oldamd = destrevlog._deltabothparents
2442 oldamd = destrevlog._deltabothparents
2436
2443
2437 try:
2444 try:
2438 if deltareuse == self.DELTAREUSEALWAYS:
2445 if deltareuse == self.DELTAREUSEALWAYS:
2439 destrevlog._lazydeltabase = True
2446 destrevlog._lazydeltabase = True
2440 destrevlog._lazydelta = True
2447 destrevlog._lazydelta = True
2441 elif deltareuse == self.DELTAREUSESAMEREVS:
2448 elif deltareuse == self.DELTAREUSESAMEREVS:
2442 destrevlog._lazydeltabase = False
2449 destrevlog._lazydeltabase = False
2443 destrevlog._lazydelta = True
2450 destrevlog._lazydelta = True
2444 elif deltareuse == self.DELTAREUSENEVER:
2451 elif deltareuse == self.DELTAREUSENEVER:
2445 destrevlog._lazydeltabase = False
2452 destrevlog._lazydeltabase = False
2446 destrevlog._lazydelta = False
2453 destrevlog._lazydelta = False
2447
2454
2448 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2455 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2449
2456
2450 deltacomputer = deltautil.deltacomputer(destrevlog)
2457 deltacomputer = deltautil.deltacomputer(destrevlog)
2451 index = self.index
2458 index = self.index
2452 for rev in self:
2459 for rev in self:
2453 entry = index[rev]
2460 entry = index[rev]
2454
2461
2455 # Some classes override linkrev to take filtered revs into
2462 # Some classes override linkrev to take filtered revs into
2456 # account. Use raw entry from index.
2463 # account. Use raw entry from index.
2457 flags = entry[0] & 0xffff
2464 flags = entry[0] & 0xffff
2458 linkrev = entry[4]
2465 linkrev = entry[4]
2459 p1 = index[entry[5]][7]
2466 p1 = index[entry[5]][7]
2460 p2 = index[entry[6]][7]
2467 p2 = index[entry[6]][7]
2461 node = entry[7]
2468 node = entry[7]
2462
2469
2463 # (Possibly) reuse the delta from the revlog if allowed and
2470 # (Possibly) reuse the delta from the revlog if allowed and
2464 # the revlog chunk is a delta.
2471 # the revlog chunk is a delta.
2465 cachedelta = None
2472 cachedelta = None
2466 rawtext = None
2473 rawtext = None
2467 if (deltareuse != self.DELTAREUSEFULLADD
2474 if (deltareuse != self.DELTAREUSEFULLADD
2468 and destrevlog._lazydelta):
2475 and destrevlog._lazydelta):
2469 dp = self.deltaparent(rev)
2476 dp = self.deltaparent(rev)
2470 if dp != nullrev:
2477 if dp != nullrev:
2471 cachedelta = (dp, bytes(self._chunk(rev)))
2478 cachedelta = (dp, bytes(self._chunk(rev)))
2472
2479
2473 if not cachedelta:
2480 if not cachedelta:
2474 rawtext = self.revision(rev, raw=True)
2481 rawtext = self.revision(rev, raw=True)
2475
2482
2476
2483
2477 if deltareuse == self.DELTAREUSEFULLADD:
2484 if deltareuse == self.DELTAREUSEFULLADD:
2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2485 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2479 cachedelta=cachedelta,
2486 cachedelta=cachedelta,
2480 node=node, flags=flags,
2487 node=node, flags=flags,
2481 deltacomputer=deltacomputer)
2488 deltacomputer=deltacomputer)
2482 else:
2489 else:
2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2490 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2484 checkambig=False)
2491 checkambig=False)
2485 dfh = None
2492 dfh = None
2486 if not destrevlog._inline:
2493 if not destrevlog._inline:
2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2494 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2488 try:
2495 try:
2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2496 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2490 p2, flags, cachedelta, ifh, dfh,
2497 p2, flags, cachedelta, ifh, dfh,
2491 deltacomputer=deltacomputer)
2498 deltacomputer=deltacomputer)
2492 finally:
2499 finally:
2493 if dfh:
2500 if dfh:
2494 dfh.close()
2501 dfh.close()
2495 ifh.close()
2502 ifh.close()
2496
2503
2497 if addrevisioncb:
2504 if addrevisioncb:
2498 addrevisioncb(self, rev, node)
2505 addrevisioncb(self, rev, node)
2499 finally:
2506 finally:
2500 destrevlog._lazydelta = oldlazydelta
2507 destrevlog._lazydelta = oldlazydelta
2501 destrevlog._lazydeltabase = oldlazydeltabase
2508 destrevlog._lazydeltabase = oldlazydeltabase
2502 destrevlog._deltabothparents = oldamd
2509 destrevlog._deltabothparents = oldamd
2503
2510
2504 def censorrevision(self, tr, censornode, tombstone=b''):
2511 def censorrevision(self, tr, censornode, tombstone=b''):
2505 if (self.version & 0xFFFF) == REVLOGV0:
2512 if (self.version & 0xFFFF) == REVLOGV0:
2506 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2513 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2507 self.version)
2514 self.version)
2508
2515
2509 censorrev = self.rev(censornode)
2516 censorrev = self.rev(censornode)
2510 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2517 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2511
2518
2512 if len(tombstone) > self.rawsize(censorrev):
2519 if len(tombstone) > self.rawsize(censorrev):
2513 raise error.Abort(_('censor tombstone must be no longer than '
2520 raise error.Abort(_('censor tombstone must be no longer than '
2514 'censored data'))
2521 'censored data'))
2515
2522
2516 # Rewriting the revlog in place is hard. Our strategy for censoring is
2523 # Rewriting the revlog in place is hard. Our strategy for censoring is
2517 # to create a new revlog, copy all revisions to it, then replace the
2524 # to create a new revlog, copy all revisions to it, then replace the
2518 # revlogs on transaction close.
2525 # revlogs on transaction close.
2519
2526
2520 newindexfile = self.indexfile + b'.tmpcensored'
2527 newindexfile = self.indexfile + b'.tmpcensored'
2521 newdatafile = self.datafile + b'.tmpcensored'
2528 newdatafile = self.datafile + b'.tmpcensored'
2522
2529
2523 # This is a bit dangerous. We could easily have a mismatch of state.
2530 # This is a bit dangerous. We could easily have a mismatch of state.
2524 newrl = revlog(self.opener, newindexfile, newdatafile,
2531 newrl = revlog(self.opener, newindexfile, newdatafile,
2525 censorable=True)
2532 censorable=True)
2526 newrl.version = self.version
2533 newrl.version = self.version
2527 newrl._generaldelta = self._generaldelta
2534 newrl._generaldelta = self._generaldelta
2528 newrl._io = self._io
2535 newrl._io = self._io
2529
2536
2530 for rev in self.revs():
2537 for rev in self.revs():
2531 node = self.node(rev)
2538 node = self.node(rev)
2532 p1, p2 = self.parents(node)
2539 p1, p2 = self.parents(node)
2533
2540
2534 if rev == censorrev:
2541 if rev == censorrev:
2535 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2542 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2536 p1, p2, censornode, REVIDX_ISCENSORED)
2543 p1, p2, censornode, REVIDX_ISCENSORED)
2537
2544
2538 if newrl.deltaparent(rev) != nullrev:
2545 if newrl.deltaparent(rev) != nullrev:
2539 raise error.Abort(_('censored revision stored as delta; '
2546 raise error.Abort(_('censored revision stored as delta; '
2540 'cannot censor'),
2547 'cannot censor'),
2541 hint=_('censoring of revlogs is not '
2548 hint=_('censoring of revlogs is not '
2542 'fully implemented; please report '
2549 'fully implemented; please report '
2543 'this bug'))
2550 'this bug'))
2544 continue
2551 continue
2545
2552
2546 if self.iscensored(rev):
2553 if self.iscensored(rev):
2547 if self.deltaparent(rev) != nullrev:
2554 if self.deltaparent(rev) != nullrev:
2548 raise error.Abort(_('cannot censor due to censored '
2555 raise error.Abort(_('cannot censor due to censored '
2549 'revision having delta stored'))
2556 'revision having delta stored'))
2550 rawtext = self._chunk(rev)
2557 rawtext = self._chunk(rev)
2551 else:
2558 else:
2552 rawtext = self.revision(rev, raw=True)
2559 rawtext = self.revision(rev, raw=True)
2553
2560
2554 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2561 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2555 self.flags(rev))
2562 self.flags(rev))
2556
2563
2557 tr.addbackup(self.indexfile, location='store')
2564 tr.addbackup(self.indexfile, location='store')
2558 if not self._inline:
2565 if not self._inline:
2559 tr.addbackup(self.datafile, location='store')
2566 tr.addbackup(self.datafile, location='store')
2560
2567
2561 self.opener.rename(newrl.indexfile, self.indexfile)
2568 self.opener.rename(newrl.indexfile, self.indexfile)
2562 if not self._inline:
2569 if not self._inline:
2563 self.opener.rename(newrl.datafile, self.datafile)
2570 self.opener.rename(newrl.datafile, self.datafile)
2564
2571
2565 self.clearcaches()
2572 self.clearcaches()
2566 self._loadindex()
2573 self._loadindex()
2567
2574
2568 def verifyintegrity(self, state):
2575 def verifyintegrity(self, state):
2569 """Verifies the integrity of the revlog.
2576 """Verifies the integrity of the revlog.
2570
2577
2571 Yields ``revlogproblem`` instances describing problems that are
2578 Yields ``revlogproblem`` instances describing problems that are
2572 found.
2579 found.
2573 """
2580 """
2574 dd, di = self.checksize()
2581 dd, di = self.checksize()
2575 if dd:
2582 if dd:
2576 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2583 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2577 if di:
2584 if di:
2578 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2585 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2579
2586
2580 version = self.version & 0xFFFF
2587 version = self.version & 0xFFFF
2581
2588
2582 # The verifier tells us what version revlog we should be.
2589 # The verifier tells us what version revlog we should be.
2583 if version != state['expectedversion']:
2590 if version != state['expectedversion']:
2584 yield revlogproblem(
2591 yield revlogproblem(
2585 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2592 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2586 (self.indexfile, version, state['expectedversion']))
2593 (self.indexfile, version, state['expectedversion']))
2587
2594
2588 state['skipread'] = set()
2595 state['skipread'] = set()
2589
2596
2590 for rev in self:
2597 for rev in self:
2591 node = self.node(rev)
2598 node = self.node(rev)
2592
2599
2593 # Verify contents. 4 cases to care about:
2600 # Verify contents. 4 cases to care about:
2594 #
2601 #
2595 # common: the most common case
2602 # common: the most common case
2596 # rename: with a rename
2603 # rename: with a rename
2597 # meta: file content starts with b'\1\n', the metadata
2604 # meta: file content starts with b'\1\n', the metadata
2598 # header defined in filelog.py, but without a rename
2605 # header defined in filelog.py, but without a rename
2599 # ext: content stored externally
2606 # ext: content stored externally
2600 #
2607 #
2601 # More formally, their differences are shown below:
2608 # More formally, their differences are shown below:
2602 #
2609 #
2603 # | common | rename | meta | ext
2610 # | common | rename | meta | ext
2604 # -------------------------------------------------------
2611 # -------------------------------------------------------
2605 # flags() | 0 | 0 | 0 | not 0
2612 # flags() | 0 | 0 | 0 | not 0
2606 # renamed() | False | True | False | ?
2613 # renamed() | False | True | False | ?
2607 # rawtext[0:2]=='\1\n'| False | True | True | ?
2614 # rawtext[0:2]=='\1\n'| False | True | True | ?
2608 #
2615 #
2609 # "rawtext" means the raw text stored in revlog data, which
2616 # "rawtext" means the raw text stored in revlog data, which
2610 # could be retrieved by "revision(rev, raw=True)". "text"
2617 # could be retrieved by "revision(rev, raw=True)". "text"
2611 # mentioned below is "revision(rev, raw=False)".
2618 # mentioned below is "revision(rev, raw=False)".
2612 #
2619 #
2613 # There are 3 different lengths stored physically:
2620 # There are 3 different lengths stored physically:
2614 # 1. L1: rawsize, stored in revlog index
2621 # 1. L1: rawsize, stored in revlog index
2615 # 2. L2: len(rawtext), stored in revlog data
2622 # 2. L2: len(rawtext), stored in revlog data
2616 # 3. L3: len(text), stored in revlog data if flags==0, or
2623 # 3. L3: len(text), stored in revlog data if flags==0, or
2617 # possibly somewhere else if flags!=0
2624 # possibly somewhere else if flags!=0
2618 #
2625 #
2619 # L1 should be equal to L2. L3 could be different from them.
2626 # L1 should be equal to L2. L3 could be different from them.
2620 # "text" may or may not affect commit hash depending on flag
2627 # "text" may or may not affect commit hash depending on flag
2621 # processors (see revlog.addflagprocessor).
2628 # processors (see revlog.addflagprocessor).
2622 #
2629 #
2623 # | common | rename | meta | ext
2630 # | common | rename | meta | ext
2624 # -------------------------------------------------
2631 # -------------------------------------------------
2625 # rawsize() | L1 | L1 | L1 | L1
2632 # rawsize() | L1 | L1 | L1 | L1
2626 # size() | L1 | L2-LM | L1(*) | L1 (?)
2633 # size() | L1 | L2-LM | L1(*) | L1 (?)
2627 # len(rawtext) | L2 | L2 | L2 | L2
2634 # len(rawtext) | L2 | L2 | L2 | L2
2628 # len(text) | L2 | L2 | L2 | L3
2635 # len(text) | L2 | L2 | L2 | L3
2629 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2636 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2630 #
2637 #
2631 # LM: length of metadata, depending on rawtext
2638 # LM: length of metadata, depending on rawtext
2632 # (*): not ideal, see comment in filelog.size
2639 # (*): not ideal, see comment in filelog.size
2633 # (?): could be "- len(meta)" if the resolved content has
2640 # (?): could be "- len(meta)" if the resolved content has
2634 # rename metadata
2641 # rename metadata
2635 #
2642 #
2636 # Checks needed to be done:
2643 # Checks needed to be done:
2637 # 1. length check: L1 == L2, in all cases.
2644 # 1. length check: L1 == L2, in all cases.
2638 # 2. hash check: depending on flag processor, we may need to
2645 # 2. hash check: depending on flag processor, we may need to
2639 # use either "text" (external), or "rawtext" (in revlog).
2646 # use either "text" (external), or "rawtext" (in revlog).
2640
2647
2641 try:
2648 try:
2642 skipflags = state.get('skipflags', 0)
2649 skipflags = state.get('skipflags', 0)
2643 if skipflags:
2650 if skipflags:
2644 skipflags &= self.flags(rev)
2651 skipflags &= self.flags(rev)
2645
2652
2646 if skipflags:
2653 if skipflags:
2647 state['skipread'].add(node)
2654 state['skipread'].add(node)
2648 else:
2655 else:
2649 # Side-effect: read content and verify hash.
2656 # Side-effect: read content and verify hash.
2650 self.revision(node)
2657 self.revision(node)
2651
2658
2652 l1 = self.rawsize(rev)
2659 l1 = self.rawsize(rev)
2653 l2 = len(self.revision(node, raw=True))
2660 l2 = len(self.revision(node, raw=True))
2654
2661
2655 if l1 != l2:
2662 if l1 != l2:
2656 yield revlogproblem(
2663 yield revlogproblem(
2657 error=_('unpacked size is %d, %d expected') % (l2, l1),
2664 error=_('unpacked size is %d, %d expected') % (l2, l1),
2658 node=node)
2665 node=node)
2659
2666
2660 except error.CensoredNodeError:
2667 except error.CensoredNodeError:
2661 if state['erroroncensored']:
2668 if state['erroroncensored']:
2662 yield revlogproblem(error=_('censored file data'),
2669 yield revlogproblem(error=_('censored file data'),
2663 node=node)
2670 node=node)
2664 state['skipread'].add(node)
2671 state['skipread'].add(node)
2665 except Exception as e:
2672 except Exception as e:
2666 yield revlogproblem(
2673 yield revlogproblem(
2667 error=_('unpacking %s: %s') % (short(node),
2674 error=_('unpacking %s: %s') % (short(node),
2668 stringutil.forcebytestr(e)),
2675 stringutil.forcebytestr(e)),
2669 node=node)
2676 node=node)
2670 state['skipread'].add(node)
2677 state['skipread'].add(node)
2671
2678
2672 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2679 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2673 revisionscount=False, trackedsize=False,
2680 revisionscount=False, trackedsize=False,
2674 storedsize=False):
2681 storedsize=False):
2675 d = {}
2682 d = {}
2676
2683
2677 if exclusivefiles:
2684 if exclusivefiles:
2678 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2685 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2679 if not self._inline:
2686 if not self._inline:
2680 d['exclusivefiles'].append((self.opener, self.datafile))
2687 d['exclusivefiles'].append((self.opener, self.datafile))
2681
2688
2682 if sharedfiles:
2689 if sharedfiles:
2683 d['sharedfiles'] = []
2690 d['sharedfiles'] = []
2684
2691
2685 if revisionscount:
2692 if revisionscount:
2686 d['revisionscount'] = len(self)
2693 d['revisionscount'] = len(self)
2687
2694
2688 if trackedsize:
2695 if trackedsize:
2689 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2696 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2690
2697
2691 if storedsize:
2698 if storedsize:
2692 d['storedsize'] = sum(self.opener.stat(path).st_size
2699 d['storedsize'] = sum(self.opener.stat(path).st_size
2693 for path in self.files())
2700 for path in self.files())
2694
2701
2695 return d
2702 return d
General Comments 0
You need to be logged in to leave comments. Login now