##// END OF EJS Templates
revlog: more efficient implementation for issnapshot...
Boris Feld -
r41116:84491ae0 default
parent child Browse files
Show More
@@ -1,2599 +1,2603 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 _insertflagprocessor(flag, processor, _flagprocessors)
154 _insertflagprocessor(flag, processor, _flagprocessors)
155
155
156 def _insertflagprocessor(flag, processor, flagprocessors):
156 def _insertflagprocessor(flag, processor, flagprocessors):
157 if not flag & REVIDX_KNOWN_FLAGS:
157 if not flag & REVIDX_KNOWN_FLAGS:
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag not in REVIDX_FLAGS_ORDER:
160 if flag not in REVIDX_FLAGS_ORDER:
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 raise error.ProgrammingError(msg)
162 raise error.ProgrammingError(msg)
163 if flag in flagprocessors:
163 if flag in flagprocessors:
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 raise error.Abort(msg)
165 raise error.Abort(msg)
166 flagprocessors[flag] = processor
166 flagprocessors[flag] = processor
167
167
168 def getoffset(q):
168 def getoffset(q):
169 return int(q >> 16)
169 return int(q >> 16)
170
170
171 def gettype(q):
171 def gettype(q):
172 return int(q & 0xFFFF)
172 return int(q & 0xFFFF)
173
173
174 def offset_type(offset, type):
174 def offset_type(offset, type):
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 raise ValueError('unknown revlog index flags')
176 raise ValueError('unknown revlog index flags')
177 return int(int(offset) << 16 | type)
177 return int(int(offset) << 16 | type)
178
178
179 @attr.s(slots=True, frozen=True)
179 @attr.s(slots=True, frozen=True)
180 class _revisioninfo(object):
180 class _revisioninfo(object):
181 """Information about a revision that allows building its fulltext
181 """Information about a revision that allows building its fulltext
182 node: expected hash of the revision
182 node: expected hash of the revision
183 p1, p2: parent revs of the revision
183 p1, p2: parent revs of the revision
184 btext: built text cache consisting of a one-element list
184 btext: built text cache consisting of a one-element list
185 cachedelta: (baserev, uncompressed_delta) or None
185 cachedelta: (baserev, uncompressed_delta) or None
186 flags: flags associated to the revision storage
186 flags: flags associated to the revision storage
187
187
188 One of btext[0] or cachedelta must be set.
188 One of btext[0] or cachedelta must be set.
189 """
189 """
190 node = attr.ib()
190 node = attr.ib()
191 p1 = attr.ib()
191 p1 = attr.ib()
192 p2 = attr.ib()
192 p2 = attr.ib()
193 btext = attr.ib()
193 btext = attr.ib()
194 textlen = attr.ib()
194 textlen = attr.ib()
195 cachedelta = attr.ib()
195 cachedelta = attr.ib()
196 flags = attr.ib()
196 flags = attr.ib()
197
197
198 @interfaceutil.implementer(repository.irevisiondelta)
198 @interfaceutil.implementer(repository.irevisiondelta)
199 @attr.s(slots=True)
199 @attr.s(slots=True)
200 class revlogrevisiondelta(object):
200 class revlogrevisiondelta(object):
201 node = attr.ib()
201 node = attr.ib()
202 p1node = attr.ib()
202 p1node = attr.ib()
203 p2node = attr.ib()
203 p2node = attr.ib()
204 basenode = attr.ib()
204 basenode = attr.ib()
205 flags = attr.ib()
205 flags = attr.ib()
206 baserevisionsize = attr.ib()
206 baserevisionsize = attr.ib()
207 revision = attr.ib()
207 revision = attr.ib()
208 delta = attr.ib()
208 delta = attr.ib()
209 linknode = attr.ib(default=None)
209 linknode = attr.ib(default=None)
210
210
211 @interfaceutil.implementer(repository.iverifyproblem)
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
216 node = attr.ib(default=None)
217
217
218 # index v0:
218 # index v0:
219 # 4 bytes: offset
219 # 4 bytes: offset
220 # 4 bytes: compressed length
220 # 4 bytes: compressed length
221 # 4 bytes: base rev
221 # 4 bytes: base rev
222 # 4 bytes: link rev
222 # 4 bytes: link rev
223 # 20 bytes: parent 1 nodeid
223 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 2 nodeid
224 # 20 bytes: parent 2 nodeid
225 # 20 bytes: nodeid
225 # 20 bytes: nodeid
226 indexformatv0 = struct.Struct(">4l20s20s20s")
226 indexformatv0 = struct.Struct(">4l20s20s20s")
227 indexformatv0_pack = indexformatv0.pack
227 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_unpack = indexformatv0.unpack
228 indexformatv0_unpack = indexformatv0.unpack
229
229
230 class revlogoldindex(list):
230 class revlogoldindex(list):
231 def __getitem__(self, i):
231 def __getitem__(self, i):
232 if i == -1:
232 if i == -1:
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 return list.__getitem__(self, i)
234 return list.__getitem__(self, i)
235
235
236 class revlogoldio(object):
236 class revlogoldio(object):
237 def __init__(self):
237 def __init__(self):
238 self.size = indexformatv0.size
238 self.size = indexformatv0.size
239
239
240 def parseindex(self, data, inline):
240 def parseindex(self, data, inline):
241 s = self.size
241 s = self.size
242 index = []
242 index = []
243 nodemap = {nullid: nullrev}
243 nodemap = {nullid: nullrev}
244 n = off = 0
244 n = off = 0
245 l = len(data)
245 l = len(data)
246 while off + s <= l:
246 while off + s <= l:
247 cur = data[off:off + s]
247 cur = data[off:off + s]
248 off += s
248 off += s
249 e = indexformatv0_unpack(cur)
249 e = indexformatv0_unpack(cur)
250 # transform to revlogv1 format
250 # transform to revlogv1 format
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 index.append(e2)
253 index.append(e2)
254 nodemap[e[6]] = n
254 nodemap[e[6]] = n
255 n += 1
255 n += 1
256
256
257 return revlogoldindex(index), nodemap, None
257 return revlogoldindex(index), nodemap, None
258
258
259 def packentry(self, entry, node, version, rev):
259 def packentry(self, entry, node, version, rev):
260 if gettype(entry[0]):
260 if gettype(entry[0]):
261 raise error.RevlogError(_('index entry flags need revlog '
261 raise error.RevlogError(_('index entry flags need revlog '
262 'version 1'))
262 'version 1'))
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 node(entry[5]), node(entry[6]), entry[7])
264 node(entry[5]), node(entry[6]), entry[7])
265 return indexformatv0_pack(*e2)
265 return indexformatv0_pack(*e2)
266
266
267 # index ng:
267 # index ng:
268 # 6 bytes: offset
268 # 6 bytes: offset
269 # 2 bytes: flags
269 # 2 bytes: flags
270 # 4 bytes: compressed length
270 # 4 bytes: compressed length
271 # 4 bytes: uncompressed length
271 # 4 bytes: uncompressed length
272 # 4 bytes: base rev
272 # 4 bytes: base rev
273 # 4 bytes: link rev
273 # 4 bytes: link rev
274 # 4 bytes: parent 1 rev
274 # 4 bytes: parent 1 rev
275 # 4 bytes: parent 2 rev
275 # 4 bytes: parent 2 rev
276 # 32 bytes: nodeid
276 # 32 bytes: nodeid
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 indexformatng_pack = indexformatng.pack
278 indexformatng_pack = indexformatng.pack
279 versionformat = struct.Struct(">I")
279 versionformat = struct.Struct(">I")
280 versionformat_pack = versionformat.pack
280 versionformat_pack = versionformat.pack
281 versionformat_unpack = versionformat.unpack
281 versionformat_unpack = versionformat.unpack
282
282
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 # signed integer)
284 # signed integer)
285 _maxentrysize = 0x7fffffff
285 _maxentrysize = 0x7fffffff
286
286
287 class revlogio(object):
287 class revlogio(object):
288 def __init__(self):
288 def __init__(self):
289 self.size = indexformatng.size
289 self.size = indexformatng.size
290
290
291 def parseindex(self, data, inline):
291 def parseindex(self, data, inline):
292 # call the C implementation to parse the index data
292 # call the C implementation to parse the index data
293 index, cache = parsers.parse_index2(data, inline)
293 index, cache = parsers.parse_index2(data, inline)
294 return index, getattr(index, 'nodemap', None), cache
294 return index, getattr(index, 'nodemap', None), cache
295
295
296 def packentry(self, entry, node, version, rev):
296 def packentry(self, entry, node, version, rev):
297 p = indexformatng_pack(*entry)
297 p = indexformatng_pack(*entry)
298 if rev == 0:
298 if rev == 0:
299 p = versionformat_pack(version) + p[4:]
299 p = versionformat_pack(version) + p[4:]
300 return p
300 return p
301
301
302 class revlog(object):
302 class revlog(object):
303 """
303 """
304 the underlying revision storage object
304 the underlying revision storage object
305
305
306 A revlog consists of two parts, an index and the revision data.
306 A revlog consists of two parts, an index and the revision data.
307
307
308 The index is a file with a fixed record size containing
308 The index is a file with a fixed record size containing
309 information on each revision, including its nodeid (hash), the
309 information on each revision, including its nodeid (hash), the
310 nodeids of its parents, the position and offset of its data within
310 nodeids of its parents, the position and offset of its data within
311 the data file, and the revision it's based on. Finally, each entry
311 the data file, and the revision it's based on. Finally, each entry
312 contains a linkrev entry that can serve as a pointer to external
312 contains a linkrev entry that can serve as a pointer to external
313 data.
313 data.
314
314
315 The revision data itself is a linear collection of data chunks.
315 The revision data itself is a linear collection of data chunks.
316 Each chunk represents a revision and is usually represented as a
316 Each chunk represents a revision and is usually represented as a
317 delta against the previous chunk. To bound lookup time, runs of
317 delta against the previous chunk. To bound lookup time, runs of
318 deltas are limited to about 2 times the length of the original
318 deltas are limited to about 2 times the length of the original
319 version data. This makes retrieval of a version proportional to
319 version data. This makes retrieval of a version proportional to
320 its size, or O(1) relative to the number of revisions.
320 its size, or O(1) relative to the number of revisions.
321
321
322 Both pieces of the revlog are written to in an append-only
322 Both pieces of the revlog are written to in an append-only
323 fashion, which means we never need to rewrite a file to insert or
323 fashion, which means we never need to rewrite a file to insert or
324 remove data, and can use some simple techniques to avoid the need
324 remove data, and can use some simple techniques to avoid the need
325 for locking while reading.
325 for locking while reading.
326
326
327 If checkambig, indexfile is opened with checkambig=True at
327 If checkambig, indexfile is opened with checkambig=True at
328 writing, to avoid file stat ambiguity.
328 writing, to avoid file stat ambiguity.
329
329
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 index will be mmapped rather than read if it is larger than the
331 index will be mmapped rather than read if it is larger than the
332 configured threshold.
332 configured threshold.
333
333
334 If censorable is True, the revlog can have censored revisions.
334 If censorable is True, the revlog can have censored revisions.
335 """
335 """
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 mmaplargeindex=False, censorable=False):
337 mmaplargeindex=False, censorable=False):
338 """
338 """
339 create a revlog object
339 create a revlog object
340
340
341 opener is a function that abstracts the file opening operation
341 opener is a function that abstracts the file opening operation
342 and can be used to implement COW semantics or the like.
342 and can be used to implement COW semantics or the like.
343 """
343 """
344 self.indexfile = indexfile
344 self.indexfile = indexfile
345 self.datafile = datafile or (indexfile[:-2] + ".d")
345 self.datafile = datafile or (indexfile[:-2] + ".d")
346 self.opener = opener
346 self.opener = opener
347 # When True, indexfile is opened with checkambig=True at writing, to
347 # When True, indexfile is opened with checkambig=True at writing, to
348 # avoid file stat ambiguity.
348 # avoid file stat ambiguity.
349 self._checkambig = checkambig
349 self._checkambig = checkambig
350 self._censorable = censorable
350 self._censorable = censorable
351 # 3-tuple of (node, rev, text) for a raw revision.
351 # 3-tuple of (node, rev, text) for a raw revision.
352 self._revisioncache = None
352 self._revisioncache = None
353 # Maps rev to chain base rev.
353 # Maps rev to chain base rev.
354 self._chainbasecache = util.lrucachedict(100)
354 self._chainbasecache = util.lrucachedict(100)
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 self._chunkcache = (0, '')
356 self._chunkcache = (0, '')
357 # How much data to read and cache into the raw revlog data cache.
357 # How much data to read and cache into the raw revlog data cache.
358 self._chunkcachesize = 65536
358 self._chunkcachesize = 65536
359 self._maxchainlen = None
359 self._maxchainlen = None
360 self._deltabothparents = True
360 self._deltabothparents = True
361 self.index = []
361 self.index = []
362 # Mapping of partial identifiers to full nodes.
362 # Mapping of partial identifiers to full nodes.
363 self._pcache = {}
363 self._pcache = {}
364 # Mapping of revision integer to full node.
364 # Mapping of revision integer to full node.
365 self._nodecache = {nullid: nullrev}
365 self._nodecache = {nullid: nullrev}
366 self._nodepos = None
366 self._nodepos = None
367 self._compengine = 'zlib'
367 self._compengine = 'zlib'
368 self._maxdeltachainspan = -1
368 self._maxdeltachainspan = -1
369 self._withsparseread = False
369 self._withsparseread = False
370 self._sparserevlog = False
370 self._sparserevlog = False
371 self._srdensitythreshold = 0.50
371 self._srdensitythreshold = 0.50
372 self._srmingapsize = 262144
372 self._srmingapsize = 262144
373
373
374 # Make copy of flag processors so each revlog instance can support
374 # Make copy of flag processors so each revlog instance can support
375 # custom flags.
375 # custom flags.
376 self._flagprocessors = dict(_flagprocessors)
376 self._flagprocessors = dict(_flagprocessors)
377
377
378 # 2-tuple of file handles being used for active writing.
378 # 2-tuple of file handles being used for active writing.
379 self._writinghandles = None
379 self._writinghandles = None
380
380
381 mmapindexthreshold = None
381 mmapindexthreshold = None
382 v = REVLOG_DEFAULT_VERSION
382 v = REVLOG_DEFAULT_VERSION
383 opts = getattr(opener, 'options', None)
383 opts = getattr(opener, 'options', None)
384 if opts is not None:
384 if opts is not None:
385 if 'revlogv2' in opts:
385 if 'revlogv2' in opts:
386 # version 2 revlogs always use generaldelta.
386 # version 2 revlogs always use generaldelta.
387 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
387 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
388 elif 'revlogv1' in opts:
388 elif 'revlogv1' in opts:
389 if 'generaldelta' in opts:
389 if 'generaldelta' in opts:
390 v |= FLAG_GENERALDELTA
390 v |= FLAG_GENERALDELTA
391 else:
391 else:
392 v = 0
392 v = 0
393 if 'chunkcachesize' in opts:
393 if 'chunkcachesize' in opts:
394 self._chunkcachesize = opts['chunkcachesize']
394 self._chunkcachesize = opts['chunkcachesize']
395 if 'maxchainlen' in opts:
395 if 'maxchainlen' in opts:
396 self._maxchainlen = opts['maxchainlen']
396 self._maxchainlen = opts['maxchainlen']
397 if 'deltabothparents' in opts:
397 if 'deltabothparents' in opts:
398 self._deltabothparents = opts['deltabothparents']
398 self._deltabothparents = opts['deltabothparents']
399 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
399 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
400 if 'compengine' in opts:
400 if 'compengine' in opts:
401 self._compengine = opts['compengine']
401 self._compengine = opts['compengine']
402 if 'maxdeltachainspan' in opts:
402 if 'maxdeltachainspan' in opts:
403 self._maxdeltachainspan = opts['maxdeltachainspan']
403 self._maxdeltachainspan = opts['maxdeltachainspan']
404 if mmaplargeindex and 'mmapindexthreshold' in opts:
404 if mmaplargeindex and 'mmapindexthreshold' in opts:
405 mmapindexthreshold = opts['mmapindexthreshold']
405 mmapindexthreshold = opts['mmapindexthreshold']
406 self._sparserevlog = bool(opts.get('sparse-revlog', False))
406 self._sparserevlog = bool(opts.get('sparse-revlog', False))
407 withsparseread = bool(opts.get('with-sparse-read', False))
407 withsparseread = bool(opts.get('with-sparse-read', False))
408 # sparse-revlog forces sparse-read
408 # sparse-revlog forces sparse-read
409 self._withsparseread = self._sparserevlog or withsparseread
409 self._withsparseread = self._sparserevlog or withsparseread
410 if 'sparse-read-density-threshold' in opts:
410 if 'sparse-read-density-threshold' in opts:
411 self._srdensitythreshold = opts['sparse-read-density-threshold']
411 self._srdensitythreshold = opts['sparse-read-density-threshold']
412 if 'sparse-read-min-gap-size' in opts:
412 if 'sparse-read-min-gap-size' in opts:
413 self._srmingapsize = opts['sparse-read-min-gap-size']
413 self._srmingapsize = opts['sparse-read-min-gap-size']
414 if opts.get('enableellipsis'):
414 if opts.get('enableellipsis'):
415 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
415 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
416
416
417 # revlog v0 doesn't have flag processors
417 # revlog v0 doesn't have flag processors
418 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
418 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
419 _insertflagprocessor(flag, processor, self._flagprocessors)
419 _insertflagprocessor(flag, processor, self._flagprocessors)
420
420
421 if self._chunkcachesize <= 0:
421 if self._chunkcachesize <= 0:
422 raise error.RevlogError(_('revlog chunk cache size %r is not '
422 raise error.RevlogError(_('revlog chunk cache size %r is not '
423 'greater than 0') % self._chunkcachesize)
423 'greater than 0') % self._chunkcachesize)
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 raise error.RevlogError(_('revlog chunk cache size %r is not a '
425 raise error.RevlogError(_('revlog chunk cache size %r is not a '
426 'power of 2') % self._chunkcachesize)
426 'power of 2') % self._chunkcachesize)
427
427
428 self._loadindex(v, mmapindexthreshold)
428 self._loadindex(v, mmapindexthreshold)
429
429
430 def _loadindex(self, v, mmapindexthreshold):
430 def _loadindex(self, v, mmapindexthreshold):
431 indexdata = ''
431 indexdata = ''
432 self._initempty = True
432 self._initempty = True
433 try:
433 try:
434 with self._indexfp() as f:
434 with self._indexfp() as f:
435 if (mmapindexthreshold is not None and
435 if (mmapindexthreshold is not None and
436 self.opener.fstat(f).st_size >= mmapindexthreshold):
436 self.opener.fstat(f).st_size >= mmapindexthreshold):
437 indexdata = util.buffer(util.mmapread(f))
437 indexdata = util.buffer(util.mmapread(f))
438 else:
438 else:
439 indexdata = f.read()
439 indexdata = f.read()
440 if len(indexdata) > 0:
440 if len(indexdata) > 0:
441 v = versionformat_unpack(indexdata[:4])[0]
441 v = versionformat_unpack(indexdata[:4])[0]
442 self._initempty = False
442 self._initempty = False
443 except IOError as inst:
443 except IOError as inst:
444 if inst.errno != errno.ENOENT:
444 if inst.errno != errno.ENOENT:
445 raise
445 raise
446
446
447 self.version = v
447 self.version = v
448 self._inline = v & FLAG_INLINE_DATA
448 self._inline = v & FLAG_INLINE_DATA
449 self._generaldelta = v & FLAG_GENERALDELTA
449 self._generaldelta = v & FLAG_GENERALDELTA
450 flags = v & ~0xFFFF
450 flags = v & ~0xFFFF
451 fmt = v & 0xFFFF
451 fmt = v & 0xFFFF
452 if fmt == REVLOGV0:
452 if fmt == REVLOGV0:
453 if flags:
453 if flags:
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 'revlog %s') %
455 'revlog %s') %
456 (flags >> 16, fmt, self.indexfile))
456 (flags >> 16, fmt, self.indexfile))
457 elif fmt == REVLOGV1:
457 elif fmt == REVLOGV1:
458 if flags & ~REVLOGV1_FLAGS:
458 if flags & ~REVLOGV1_FLAGS:
459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 'revlog %s') %
460 'revlog %s') %
461 (flags >> 16, fmt, self.indexfile))
461 (flags >> 16, fmt, self.indexfile))
462 elif fmt == REVLOGV2:
462 elif fmt == REVLOGV2:
463 if flags & ~REVLOGV2_FLAGS:
463 if flags & ~REVLOGV2_FLAGS:
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 'revlog %s') %
465 'revlog %s') %
466 (flags >> 16, fmt, self.indexfile))
466 (flags >> 16, fmt, self.indexfile))
467 else:
467 else:
468 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
468 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 (fmt, self.indexfile))
469 (fmt, self.indexfile))
470
470
471 self._storedeltachains = True
471 self._storedeltachains = True
472
472
473 self._io = revlogio()
473 self._io = revlogio()
474 if self.version == REVLOGV0:
474 if self.version == REVLOGV0:
475 self._io = revlogoldio()
475 self._io = revlogoldio()
476 try:
476 try:
477 d = self._io.parseindex(indexdata, self._inline)
477 d = self._io.parseindex(indexdata, self._inline)
478 except (ValueError, IndexError):
478 except (ValueError, IndexError):
479 raise error.RevlogError(_("index %s is corrupted") %
479 raise error.RevlogError(_("index %s is corrupted") %
480 self.indexfile)
480 self.indexfile)
481 self.index, nodemap, self._chunkcache = d
481 self.index, nodemap, self._chunkcache = d
482 if nodemap is not None:
482 if nodemap is not None:
483 self.nodemap = self._nodecache = nodemap
483 self.nodemap = self._nodecache = nodemap
484 if not self._chunkcache:
484 if not self._chunkcache:
485 self._chunkclear()
485 self._chunkclear()
486 # revnum -> (chain-length, sum-delta-length)
486 # revnum -> (chain-length, sum-delta-length)
487 self._chaininfocache = {}
487 self._chaininfocache = {}
488 # revlog header -> revlog compressor
488 # revlog header -> revlog compressor
489 self._decompressors = {}
489 self._decompressors = {}
490
490
491 @util.propertycache
491 @util.propertycache
492 def _compressor(self):
492 def _compressor(self):
493 return util.compengines[self._compengine].revlogcompressor()
493 return util.compengines[self._compengine].revlogcompressor()
494
494
495 def _indexfp(self, mode='r'):
495 def _indexfp(self, mode='r'):
496 """file object for the revlog's index file"""
496 """file object for the revlog's index file"""
497 args = {r'mode': mode}
497 args = {r'mode': mode}
498 if mode != 'r':
498 if mode != 'r':
499 args[r'checkambig'] = self._checkambig
499 args[r'checkambig'] = self._checkambig
500 if mode == 'w':
500 if mode == 'w':
501 args[r'atomictemp'] = True
501 args[r'atomictemp'] = True
502 return self.opener(self.indexfile, **args)
502 return self.opener(self.indexfile, **args)
503
503
504 def _datafp(self, mode='r'):
504 def _datafp(self, mode='r'):
505 """file object for the revlog's data file"""
505 """file object for the revlog's data file"""
506 return self.opener(self.datafile, mode=mode)
506 return self.opener(self.datafile, mode=mode)
507
507
508 @contextlib.contextmanager
508 @contextlib.contextmanager
509 def _datareadfp(self, existingfp=None):
509 def _datareadfp(self, existingfp=None):
510 """file object suitable to read data"""
510 """file object suitable to read data"""
511 # Use explicit file handle, if given.
511 # Use explicit file handle, if given.
512 if existingfp is not None:
512 if existingfp is not None:
513 yield existingfp
513 yield existingfp
514
514
515 # Use a file handle being actively used for writes, if available.
515 # Use a file handle being actively used for writes, if available.
516 # There is some danger to doing this because reads will seek the
516 # There is some danger to doing this because reads will seek the
517 # file. However, _writeentry() performs a SEEK_END before all writes,
517 # file. However, _writeentry() performs a SEEK_END before all writes,
518 # so we should be safe.
518 # so we should be safe.
519 elif self._writinghandles:
519 elif self._writinghandles:
520 if self._inline:
520 if self._inline:
521 yield self._writinghandles[0]
521 yield self._writinghandles[0]
522 else:
522 else:
523 yield self._writinghandles[1]
523 yield self._writinghandles[1]
524
524
525 # Otherwise open a new file handle.
525 # Otherwise open a new file handle.
526 else:
526 else:
527 if self._inline:
527 if self._inline:
528 func = self._indexfp
528 func = self._indexfp
529 else:
529 else:
530 func = self._datafp
530 func = self._datafp
531 with func() as fp:
531 with func() as fp:
532 yield fp
532 yield fp
533
533
534 def tip(self):
534 def tip(self):
535 return self.node(len(self.index) - 1)
535 return self.node(len(self.index) - 1)
536 def __contains__(self, rev):
536 def __contains__(self, rev):
537 return 0 <= rev < len(self)
537 return 0 <= rev < len(self)
538 def __len__(self):
538 def __len__(self):
539 return len(self.index)
539 return len(self.index)
540 def __iter__(self):
540 def __iter__(self):
541 return iter(pycompat.xrange(len(self)))
541 return iter(pycompat.xrange(len(self)))
542 def revs(self, start=0, stop=None):
542 def revs(self, start=0, stop=None):
543 """iterate over all rev in this revlog (from start to stop)"""
543 """iterate over all rev in this revlog (from start to stop)"""
544 return storageutil.iterrevs(len(self), start=start, stop=stop)
544 return storageutil.iterrevs(len(self), start=start, stop=stop)
545
545
546 @util.propertycache
546 @util.propertycache
547 def nodemap(self):
547 def nodemap(self):
548 if self.index:
548 if self.index:
549 # populate mapping down to the initial node
549 # populate mapping down to the initial node
550 node0 = self.index[0][7] # get around changelog filtering
550 node0 = self.index[0][7] # get around changelog filtering
551 self.rev(node0)
551 self.rev(node0)
552 return self._nodecache
552 return self._nodecache
553
553
554 def hasnode(self, node):
554 def hasnode(self, node):
555 try:
555 try:
556 self.rev(node)
556 self.rev(node)
557 return True
557 return True
558 except KeyError:
558 except KeyError:
559 return False
559 return False
560
560
561 def candelta(self, baserev, rev):
561 def candelta(self, baserev, rev):
562 """whether two revisions (baserev, rev) can be delta-ed or not"""
562 """whether two revisions (baserev, rev) can be delta-ed or not"""
563 # Disable delta if either rev requires a content-changing flag
563 # Disable delta if either rev requires a content-changing flag
564 # processor (ex. LFS). This is because such flag processor can alter
564 # processor (ex. LFS). This is because such flag processor can alter
565 # the rawtext content that the delta will be based on, and two clients
565 # the rawtext content that the delta will be based on, and two clients
566 # could have a same revlog node with different flags (i.e. different
566 # could have a same revlog node with different flags (i.e. different
567 # rawtext contents) and the delta could be incompatible.
567 # rawtext contents) and the delta could be incompatible.
568 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
568 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
569 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
569 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
570 return False
570 return False
571 return True
571 return True
572
572
573 def clearcaches(self):
573 def clearcaches(self):
574 self._revisioncache = None
574 self._revisioncache = None
575 self._chainbasecache.clear()
575 self._chainbasecache.clear()
576 self._chunkcache = (0, '')
576 self._chunkcache = (0, '')
577 self._pcache = {}
577 self._pcache = {}
578
578
579 try:
579 try:
580 self._nodecache.clearcaches()
580 self._nodecache.clearcaches()
581 except AttributeError:
581 except AttributeError:
582 self._nodecache = {nullid: nullrev}
582 self._nodecache = {nullid: nullrev}
583 self._nodepos = None
583 self._nodepos = None
584
584
585 def rev(self, node):
585 def rev(self, node):
586 try:
586 try:
587 return self._nodecache[node]
587 return self._nodecache[node]
588 except TypeError:
588 except TypeError:
589 raise
589 raise
590 except error.RevlogError:
590 except error.RevlogError:
591 # parsers.c radix tree lookup failed
591 # parsers.c radix tree lookup failed
592 if node == wdirid or node in wdirfilenodeids:
592 if node == wdirid or node in wdirfilenodeids:
593 raise error.WdirUnsupported
593 raise error.WdirUnsupported
594 raise error.LookupError(node, self.indexfile, _('no node'))
594 raise error.LookupError(node, self.indexfile, _('no node'))
595 except KeyError:
595 except KeyError:
596 # pure python cache lookup failed
596 # pure python cache lookup failed
597 n = self._nodecache
597 n = self._nodecache
598 i = self.index
598 i = self.index
599 p = self._nodepos
599 p = self._nodepos
600 if p is None:
600 if p is None:
601 p = len(i) - 1
601 p = len(i) - 1
602 else:
602 else:
603 assert p < len(i)
603 assert p < len(i)
604 for r in pycompat.xrange(p, -1, -1):
604 for r in pycompat.xrange(p, -1, -1):
605 v = i[r][7]
605 v = i[r][7]
606 n[v] = r
606 n[v] = r
607 if v == node:
607 if v == node:
608 self._nodepos = r - 1
608 self._nodepos = r - 1
609 return r
609 return r
610 if node == wdirid or node in wdirfilenodeids:
610 if node == wdirid or node in wdirfilenodeids:
611 raise error.WdirUnsupported
611 raise error.WdirUnsupported
612 raise error.LookupError(node, self.indexfile, _('no node'))
612 raise error.LookupError(node, self.indexfile, _('no node'))
613
613
614 # Accessors for index entries.
614 # Accessors for index entries.
615
615
616 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
616 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
617 # are flags.
617 # are flags.
618 def start(self, rev):
618 def start(self, rev):
619 return int(self.index[rev][0] >> 16)
619 return int(self.index[rev][0] >> 16)
620
620
621 def flags(self, rev):
621 def flags(self, rev):
622 return self.index[rev][0] & 0xFFFF
622 return self.index[rev][0] & 0xFFFF
623
623
624 def length(self, rev):
624 def length(self, rev):
625 return self.index[rev][1]
625 return self.index[rev][1]
626
626
627 def rawsize(self, rev):
627 def rawsize(self, rev):
628 """return the length of the uncompressed text for a given revision"""
628 """return the length of the uncompressed text for a given revision"""
629 l = self.index[rev][2]
629 l = self.index[rev][2]
630 if l >= 0:
630 if l >= 0:
631 return l
631 return l
632
632
633 t = self.revision(rev, raw=True)
633 t = self.revision(rev, raw=True)
634 return len(t)
634 return len(t)
635
635
636 def size(self, rev):
636 def size(self, rev):
637 """length of non-raw text (processed by a "read" flag processor)"""
637 """length of non-raw text (processed by a "read" flag processor)"""
638 # fast path: if no "read" flag processor could change the content,
638 # fast path: if no "read" flag processor could change the content,
639 # size is rawsize. note: ELLIPSIS is known to not change the content.
639 # size is rawsize. note: ELLIPSIS is known to not change the content.
640 flags = self.flags(rev)
640 flags = self.flags(rev)
641 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
641 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
642 return self.rawsize(rev)
642 return self.rawsize(rev)
643
643
644 return len(self.revision(rev, raw=False))
644 return len(self.revision(rev, raw=False))
645
645
646 def chainbase(self, rev):
646 def chainbase(self, rev):
647 base = self._chainbasecache.get(rev)
647 base = self._chainbasecache.get(rev)
648 if base is not None:
648 if base is not None:
649 return base
649 return base
650
650
651 index = self.index
651 index = self.index
652 iterrev = rev
652 iterrev = rev
653 base = index[iterrev][3]
653 base = index[iterrev][3]
654 while base != iterrev:
654 while base != iterrev:
655 iterrev = base
655 iterrev = base
656 base = index[iterrev][3]
656 base = index[iterrev][3]
657
657
658 self._chainbasecache[rev] = base
658 self._chainbasecache[rev] = base
659 return base
659 return base
660
660
661 def linkrev(self, rev):
661 def linkrev(self, rev):
662 return self.index[rev][4]
662 return self.index[rev][4]
663
663
664 def parentrevs(self, rev):
664 def parentrevs(self, rev):
665 try:
665 try:
666 entry = self.index[rev]
666 entry = self.index[rev]
667 except IndexError:
667 except IndexError:
668 if rev == wdirrev:
668 if rev == wdirrev:
669 raise error.WdirUnsupported
669 raise error.WdirUnsupported
670 raise
670 raise
671
671
672 return entry[5], entry[6]
672 return entry[5], entry[6]
673
673
674 # fast parentrevs(rev) where rev isn't filtered
674 # fast parentrevs(rev) where rev isn't filtered
675 _uncheckedparentrevs = parentrevs
675 _uncheckedparentrevs = parentrevs
676
676
677 def node(self, rev):
677 def node(self, rev):
678 try:
678 try:
679 return self.index[rev][7]
679 return self.index[rev][7]
680 except IndexError:
680 except IndexError:
681 if rev == wdirrev:
681 if rev == wdirrev:
682 raise error.WdirUnsupported
682 raise error.WdirUnsupported
683 raise
683 raise
684
684
685 # Derived from index values.
685 # Derived from index values.
686
686
687 def end(self, rev):
687 def end(self, rev):
688 return self.start(rev) + self.length(rev)
688 return self.start(rev) + self.length(rev)
689
689
690 def parents(self, node):
690 def parents(self, node):
691 i = self.index
691 i = self.index
692 d = i[self.rev(node)]
692 d = i[self.rev(node)]
693 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
693 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
694
694
695 def chainlen(self, rev):
695 def chainlen(self, rev):
696 return self._chaininfo(rev)[0]
696 return self._chaininfo(rev)[0]
697
697
698 def _chaininfo(self, rev):
698 def _chaininfo(self, rev):
699 chaininfocache = self._chaininfocache
699 chaininfocache = self._chaininfocache
700 if rev in chaininfocache:
700 if rev in chaininfocache:
701 return chaininfocache[rev]
701 return chaininfocache[rev]
702 index = self.index
702 index = self.index
703 generaldelta = self._generaldelta
703 generaldelta = self._generaldelta
704 iterrev = rev
704 iterrev = rev
705 e = index[iterrev]
705 e = index[iterrev]
706 clen = 0
706 clen = 0
707 compresseddeltalen = 0
707 compresseddeltalen = 0
708 while iterrev != e[3]:
708 while iterrev != e[3]:
709 clen += 1
709 clen += 1
710 compresseddeltalen += e[1]
710 compresseddeltalen += e[1]
711 if generaldelta:
711 if generaldelta:
712 iterrev = e[3]
712 iterrev = e[3]
713 else:
713 else:
714 iterrev -= 1
714 iterrev -= 1
715 if iterrev in chaininfocache:
715 if iterrev in chaininfocache:
716 t = chaininfocache[iterrev]
716 t = chaininfocache[iterrev]
717 clen += t[0]
717 clen += t[0]
718 compresseddeltalen += t[1]
718 compresseddeltalen += t[1]
719 break
719 break
720 e = index[iterrev]
720 e = index[iterrev]
721 else:
721 else:
722 # Add text length of base since decompressing that also takes
722 # Add text length of base since decompressing that also takes
723 # work. For cache hits the length is already included.
723 # work. For cache hits the length is already included.
724 compresseddeltalen += e[1]
724 compresseddeltalen += e[1]
725 r = (clen, compresseddeltalen)
725 r = (clen, compresseddeltalen)
726 chaininfocache[rev] = r
726 chaininfocache[rev] = r
727 return r
727 return r
728
728
729 def _deltachain(self, rev, stoprev=None):
729 def _deltachain(self, rev, stoprev=None):
730 """Obtain the delta chain for a revision.
730 """Obtain the delta chain for a revision.
731
731
732 ``stoprev`` specifies a revision to stop at. If not specified, we
732 ``stoprev`` specifies a revision to stop at. If not specified, we
733 stop at the base of the chain.
733 stop at the base of the chain.
734
734
735 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
735 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
736 revs in ascending order and ``stopped`` is a bool indicating whether
736 revs in ascending order and ``stopped`` is a bool indicating whether
737 ``stoprev`` was hit.
737 ``stoprev`` was hit.
738 """
738 """
739 # Try C implementation.
739 # Try C implementation.
740 try:
740 try:
741 return self.index.deltachain(rev, stoprev, self._generaldelta)
741 return self.index.deltachain(rev, stoprev, self._generaldelta)
742 except AttributeError:
742 except AttributeError:
743 pass
743 pass
744
744
745 chain = []
745 chain = []
746
746
747 # Alias to prevent attribute lookup in tight loop.
747 # Alias to prevent attribute lookup in tight loop.
748 index = self.index
748 index = self.index
749 generaldelta = self._generaldelta
749 generaldelta = self._generaldelta
750
750
751 iterrev = rev
751 iterrev = rev
752 e = index[iterrev]
752 e = index[iterrev]
753 while iterrev != e[3] and iterrev != stoprev:
753 while iterrev != e[3] and iterrev != stoprev:
754 chain.append(iterrev)
754 chain.append(iterrev)
755 if generaldelta:
755 if generaldelta:
756 iterrev = e[3]
756 iterrev = e[3]
757 else:
757 else:
758 iterrev -= 1
758 iterrev -= 1
759 e = index[iterrev]
759 e = index[iterrev]
760
760
761 if iterrev == stoprev:
761 if iterrev == stoprev:
762 stopped = True
762 stopped = True
763 else:
763 else:
764 chain.append(iterrev)
764 chain.append(iterrev)
765 stopped = False
765 stopped = False
766
766
767 chain.reverse()
767 chain.reverse()
768 return chain, stopped
768 return chain, stopped
769
769
770 def ancestors(self, revs, stoprev=0, inclusive=False):
770 def ancestors(self, revs, stoprev=0, inclusive=False):
771 """Generate the ancestors of 'revs' in reverse revision order.
771 """Generate the ancestors of 'revs' in reverse revision order.
772 Does not generate revs lower than stoprev.
772 Does not generate revs lower than stoprev.
773
773
774 See the documentation for ancestor.lazyancestors for more details."""
774 See the documentation for ancestor.lazyancestors for more details."""
775
775
776 # first, make sure start revisions aren't filtered
776 # first, make sure start revisions aren't filtered
777 revs = list(revs)
777 revs = list(revs)
778 checkrev = self.node
778 checkrev = self.node
779 for r in revs:
779 for r in revs:
780 checkrev(r)
780 checkrev(r)
781 # and we're sure ancestors aren't filtered as well
781 # and we're sure ancestors aren't filtered as well
782 if util.safehasattr(parsers, 'rustlazyancestors'):
782 if util.safehasattr(parsers, 'rustlazyancestors'):
783 return ancestor.rustlazyancestors(
783 return ancestor.rustlazyancestors(
784 self.index, revs,
784 self.index, revs,
785 stoprev=stoprev, inclusive=inclusive)
785 stoprev=stoprev, inclusive=inclusive)
786 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
786 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
787 stoprev=stoprev, inclusive=inclusive)
787 stoprev=stoprev, inclusive=inclusive)
788
788
789 def descendants(self, revs):
789 def descendants(self, revs):
790 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
790 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
791
791
792 def findcommonmissing(self, common=None, heads=None):
792 def findcommonmissing(self, common=None, heads=None):
793 """Return a tuple of the ancestors of common and the ancestors of heads
793 """Return a tuple of the ancestors of common and the ancestors of heads
794 that are not ancestors of common. In revset terminology, we return the
794 that are not ancestors of common. In revset terminology, we return the
795 tuple:
795 tuple:
796
796
797 ::common, (::heads) - (::common)
797 ::common, (::heads) - (::common)
798
798
799 The list is sorted by revision number, meaning it is
799 The list is sorted by revision number, meaning it is
800 topologically sorted.
800 topologically sorted.
801
801
802 'heads' and 'common' are both lists of node IDs. If heads is
802 'heads' and 'common' are both lists of node IDs. If heads is
803 not supplied, uses all of the revlog's heads. If common is not
803 not supplied, uses all of the revlog's heads. If common is not
804 supplied, uses nullid."""
804 supplied, uses nullid."""
805 if common is None:
805 if common is None:
806 common = [nullid]
806 common = [nullid]
807 if heads is None:
807 if heads is None:
808 heads = self.heads()
808 heads = self.heads()
809
809
810 common = [self.rev(n) for n in common]
810 common = [self.rev(n) for n in common]
811 heads = [self.rev(n) for n in heads]
811 heads = [self.rev(n) for n in heads]
812
812
813 # we want the ancestors, but inclusive
813 # we want the ancestors, but inclusive
814 class lazyset(object):
814 class lazyset(object):
815 def __init__(self, lazyvalues):
815 def __init__(self, lazyvalues):
816 self.addedvalues = set()
816 self.addedvalues = set()
817 self.lazyvalues = lazyvalues
817 self.lazyvalues = lazyvalues
818
818
819 def __contains__(self, value):
819 def __contains__(self, value):
820 return value in self.addedvalues or value in self.lazyvalues
820 return value in self.addedvalues or value in self.lazyvalues
821
821
822 def __iter__(self):
822 def __iter__(self):
823 added = self.addedvalues
823 added = self.addedvalues
824 for r in added:
824 for r in added:
825 yield r
825 yield r
826 for r in self.lazyvalues:
826 for r in self.lazyvalues:
827 if not r in added:
827 if not r in added:
828 yield r
828 yield r
829
829
830 def add(self, value):
830 def add(self, value):
831 self.addedvalues.add(value)
831 self.addedvalues.add(value)
832
832
833 def update(self, values):
833 def update(self, values):
834 self.addedvalues.update(values)
834 self.addedvalues.update(values)
835
835
836 has = lazyset(self.ancestors(common))
836 has = lazyset(self.ancestors(common))
837 has.add(nullrev)
837 has.add(nullrev)
838 has.update(common)
838 has.update(common)
839
839
840 # take all ancestors from heads that aren't in has
840 # take all ancestors from heads that aren't in has
841 missing = set()
841 missing = set()
842 visit = collections.deque(r for r in heads if r not in has)
842 visit = collections.deque(r for r in heads if r not in has)
843 while visit:
843 while visit:
844 r = visit.popleft()
844 r = visit.popleft()
845 if r in missing:
845 if r in missing:
846 continue
846 continue
847 else:
847 else:
848 missing.add(r)
848 missing.add(r)
849 for p in self.parentrevs(r):
849 for p in self.parentrevs(r):
850 if p not in has:
850 if p not in has:
851 visit.append(p)
851 visit.append(p)
852 missing = list(missing)
852 missing = list(missing)
853 missing.sort()
853 missing.sort()
854 return has, [self.node(miss) for miss in missing]
854 return has, [self.node(miss) for miss in missing]
855
855
856 def incrementalmissingrevs(self, common=None):
856 def incrementalmissingrevs(self, common=None):
857 """Return an object that can be used to incrementally compute the
857 """Return an object that can be used to incrementally compute the
858 revision numbers of the ancestors of arbitrary sets that are not
858 revision numbers of the ancestors of arbitrary sets that are not
859 ancestors of common. This is an ancestor.incrementalmissingancestors
859 ancestors of common. This is an ancestor.incrementalmissingancestors
860 object.
860 object.
861
861
862 'common' is a list of revision numbers. If common is not supplied, uses
862 'common' is a list of revision numbers. If common is not supplied, uses
863 nullrev.
863 nullrev.
864 """
864 """
865 if common is None:
865 if common is None:
866 common = [nullrev]
866 common = [nullrev]
867
867
868 return ancestor.incrementalmissingancestors(self.parentrevs, common)
868 return ancestor.incrementalmissingancestors(self.parentrevs, common)
869
869
870 def findmissingrevs(self, common=None, heads=None):
870 def findmissingrevs(self, common=None, heads=None):
871 """Return the revision numbers of the ancestors of heads that
871 """Return the revision numbers of the ancestors of heads that
872 are not ancestors of common.
872 are not ancestors of common.
873
873
874 More specifically, return a list of revision numbers corresponding to
874 More specifically, return a list of revision numbers corresponding to
875 nodes N such that every N satisfies the following constraints:
875 nodes N such that every N satisfies the following constraints:
876
876
877 1. N is an ancestor of some node in 'heads'
877 1. N is an ancestor of some node in 'heads'
878 2. N is not an ancestor of any node in 'common'
878 2. N is not an ancestor of any node in 'common'
879
879
880 The list is sorted by revision number, meaning it is
880 The list is sorted by revision number, meaning it is
881 topologically sorted.
881 topologically sorted.
882
882
883 'heads' and 'common' are both lists of revision numbers. If heads is
883 'heads' and 'common' are both lists of revision numbers. If heads is
884 not supplied, uses all of the revlog's heads. If common is not
884 not supplied, uses all of the revlog's heads. If common is not
885 supplied, uses nullid."""
885 supplied, uses nullid."""
886 if common is None:
886 if common is None:
887 common = [nullrev]
887 common = [nullrev]
888 if heads is None:
888 if heads is None:
889 heads = self.headrevs()
889 heads = self.headrevs()
890
890
891 inc = self.incrementalmissingrevs(common=common)
891 inc = self.incrementalmissingrevs(common=common)
892 return inc.missingancestors(heads)
892 return inc.missingancestors(heads)
893
893
894 def findmissing(self, common=None, heads=None):
894 def findmissing(self, common=None, heads=None):
895 """Return the ancestors of heads that are not ancestors of common.
895 """Return the ancestors of heads that are not ancestors of common.
896
896
897 More specifically, return a list of nodes N such that every N
897 More specifically, return a list of nodes N such that every N
898 satisfies the following constraints:
898 satisfies the following constraints:
899
899
900 1. N is an ancestor of some node in 'heads'
900 1. N is an ancestor of some node in 'heads'
901 2. N is not an ancestor of any node in 'common'
901 2. N is not an ancestor of any node in 'common'
902
902
903 The list is sorted by revision number, meaning it is
903 The list is sorted by revision number, meaning it is
904 topologically sorted.
904 topologically sorted.
905
905
906 'heads' and 'common' are both lists of node IDs. If heads is
906 'heads' and 'common' are both lists of node IDs. If heads is
907 not supplied, uses all of the revlog's heads. If common is not
907 not supplied, uses all of the revlog's heads. If common is not
908 supplied, uses nullid."""
908 supplied, uses nullid."""
909 if common is None:
909 if common is None:
910 common = [nullid]
910 common = [nullid]
911 if heads is None:
911 if heads is None:
912 heads = self.heads()
912 heads = self.heads()
913
913
914 common = [self.rev(n) for n in common]
914 common = [self.rev(n) for n in common]
915 heads = [self.rev(n) for n in heads]
915 heads = [self.rev(n) for n in heads]
916
916
917 inc = self.incrementalmissingrevs(common=common)
917 inc = self.incrementalmissingrevs(common=common)
918 return [self.node(r) for r in inc.missingancestors(heads)]
918 return [self.node(r) for r in inc.missingancestors(heads)]
919
919
920 def nodesbetween(self, roots=None, heads=None):
920 def nodesbetween(self, roots=None, heads=None):
921 """Return a topological path from 'roots' to 'heads'.
921 """Return a topological path from 'roots' to 'heads'.
922
922
923 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
923 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
924 topologically sorted list of all nodes N that satisfy both of
924 topologically sorted list of all nodes N that satisfy both of
925 these constraints:
925 these constraints:
926
926
927 1. N is a descendant of some node in 'roots'
927 1. N is a descendant of some node in 'roots'
928 2. N is an ancestor of some node in 'heads'
928 2. N is an ancestor of some node in 'heads'
929
929
930 Every node is considered to be both a descendant and an ancestor
930 Every node is considered to be both a descendant and an ancestor
931 of itself, so every reachable node in 'roots' and 'heads' will be
931 of itself, so every reachable node in 'roots' and 'heads' will be
932 included in 'nodes'.
932 included in 'nodes'.
933
933
934 'outroots' is the list of reachable nodes in 'roots', i.e., the
934 'outroots' is the list of reachable nodes in 'roots', i.e., the
935 subset of 'roots' that is returned in 'nodes'. Likewise,
935 subset of 'roots' that is returned in 'nodes'. Likewise,
936 'outheads' is the subset of 'heads' that is also in 'nodes'.
936 'outheads' is the subset of 'heads' that is also in 'nodes'.
937
937
938 'roots' and 'heads' are both lists of node IDs. If 'roots' is
938 'roots' and 'heads' are both lists of node IDs. If 'roots' is
939 unspecified, uses nullid as the only root. If 'heads' is
939 unspecified, uses nullid as the only root. If 'heads' is
940 unspecified, uses list of all of the revlog's heads."""
940 unspecified, uses list of all of the revlog's heads."""
941 nonodes = ([], [], [])
941 nonodes = ([], [], [])
942 if roots is not None:
942 if roots is not None:
943 roots = list(roots)
943 roots = list(roots)
944 if not roots:
944 if not roots:
945 return nonodes
945 return nonodes
946 lowestrev = min([self.rev(n) for n in roots])
946 lowestrev = min([self.rev(n) for n in roots])
947 else:
947 else:
948 roots = [nullid] # Everybody's a descendant of nullid
948 roots = [nullid] # Everybody's a descendant of nullid
949 lowestrev = nullrev
949 lowestrev = nullrev
950 if (lowestrev == nullrev) and (heads is None):
950 if (lowestrev == nullrev) and (heads is None):
951 # We want _all_ the nodes!
951 # We want _all_ the nodes!
952 return ([self.node(r) for r in self], [nullid], list(self.heads()))
952 return ([self.node(r) for r in self], [nullid], list(self.heads()))
953 if heads is None:
953 if heads is None:
954 # All nodes are ancestors, so the latest ancestor is the last
954 # All nodes are ancestors, so the latest ancestor is the last
955 # node.
955 # node.
956 highestrev = len(self) - 1
956 highestrev = len(self) - 1
957 # Set ancestors to None to signal that every node is an ancestor.
957 # Set ancestors to None to signal that every node is an ancestor.
958 ancestors = None
958 ancestors = None
959 # Set heads to an empty dictionary for later discovery of heads
959 # Set heads to an empty dictionary for later discovery of heads
960 heads = {}
960 heads = {}
961 else:
961 else:
962 heads = list(heads)
962 heads = list(heads)
963 if not heads:
963 if not heads:
964 return nonodes
964 return nonodes
965 ancestors = set()
965 ancestors = set()
966 # Turn heads into a dictionary so we can remove 'fake' heads.
966 # Turn heads into a dictionary so we can remove 'fake' heads.
967 # Also, later we will be using it to filter out the heads we can't
967 # Also, later we will be using it to filter out the heads we can't
968 # find from roots.
968 # find from roots.
969 heads = dict.fromkeys(heads, False)
969 heads = dict.fromkeys(heads, False)
970 # Start at the top and keep marking parents until we're done.
970 # Start at the top and keep marking parents until we're done.
971 nodestotag = set(heads)
971 nodestotag = set(heads)
972 # Remember where the top was so we can use it as a limit later.
972 # Remember where the top was so we can use it as a limit later.
973 highestrev = max([self.rev(n) for n in nodestotag])
973 highestrev = max([self.rev(n) for n in nodestotag])
974 while nodestotag:
974 while nodestotag:
975 # grab a node to tag
975 # grab a node to tag
976 n = nodestotag.pop()
976 n = nodestotag.pop()
977 # Never tag nullid
977 # Never tag nullid
978 if n == nullid:
978 if n == nullid:
979 continue
979 continue
980 # A node's revision number represents its place in a
980 # A node's revision number represents its place in a
981 # topologically sorted list of nodes.
981 # topologically sorted list of nodes.
982 r = self.rev(n)
982 r = self.rev(n)
983 if r >= lowestrev:
983 if r >= lowestrev:
984 if n not in ancestors:
984 if n not in ancestors:
985 # If we are possibly a descendant of one of the roots
985 # If we are possibly a descendant of one of the roots
986 # and we haven't already been marked as an ancestor
986 # and we haven't already been marked as an ancestor
987 ancestors.add(n) # Mark as ancestor
987 ancestors.add(n) # Mark as ancestor
988 # Add non-nullid parents to list of nodes to tag.
988 # Add non-nullid parents to list of nodes to tag.
989 nodestotag.update([p for p in self.parents(n) if
989 nodestotag.update([p for p in self.parents(n) if
990 p != nullid])
990 p != nullid])
991 elif n in heads: # We've seen it before, is it a fake head?
991 elif n in heads: # We've seen it before, is it a fake head?
992 # So it is, real heads should not be the ancestors of
992 # So it is, real heads should not be the ancestors of
993 # any other heads.
993 # any other heads.
994 heads.pop(n)
994 heads.pop(n)
995 if not ancestors:
995 if not ancestors:
996 return nonodes
996 return nonodes
997 # Now that we have our set of ancestors, we want to remove any
997 # Now that we have our set of ancestors, we want to remove any
998 # roots that are not ancestors.
998 # roots that are not ancestors.
999
999
1000 # If one of the roots was nullid, everything is included anyway.
1000 # If one of the roots was nullid, everything is included anyway.
1001 if lowestrev > nullrev:
1001 if lowestrev > nullrev:
1002 # But, since we weren't, let's recompute the lowest rev to not
1002 # But, since we weren't, let's recompute the lowest rev to not
1003 # include roots that aren't ancestors.
1003 # include roots that aren't ancestors.
1004
1004
1005 # Filter out roots that aren't ancestors of heads
1005 # Filter out roots that aren't ancestors of heads
1006 roots = [root for root in roots if root in ancestors]
1006 roots = [root for root in roots if root in ancestors]
1007 # Recompute the lowest revision
1007 # Recompute the lowest revision
1008 if roots:
1008 if roots:
1009 lowestrev = min([self.rev(root) for root in roots])
1009 lowestrev = min([self.rev(root) for root in roots])
1010 else:
1010 else:
1011 # No more roots? Return empty list
1011 # No more roots? Return empty list
1012 return nonodes
1012 return nonodes
1013 else:
1013 else:
1014 # We are descending from nullid, and don't need to care about
1014 # We are descending from nullid, and don't need to care about
1015 # any other roots.
1015 # any other roots.
1016 lowestrev = nullrev
1016 lowestrev = nullrev
1017 roots = [nullid]
1017 roots = [nullid]
1018 # Transform our roots list into a set.
1018 # Transform our roots list into a set.
1019 descendants = set(roots)
1019 descendants = set(roots)
1020 # Also, keep the original roots so we can filter out roots that aren't
1020 # Also, keep the original roots so we can filter out roots that aren't
1021 # 'real' roots (i.e. are descended from other roots).
1021 # 'real' roots (i.e. are descended from other roots).
1022 roots = descendants.copy()
1022 roots = descendants.copy()
1023 # Our topologically sorted list of output nodes.
1023 # Our topologically sorted list of output nodes.
1024 orderedout = []
1024 orderedout = []
1025 # Don't start at nullid since we don't want nullid in our output list,
1025 # Don't start at nullid since we don't want nullid in our output list,
1026 # and if nullid shows up in descendants, empty parents will look like
1026 # and if nullid shows up in descendants, empty parents will look like
1027 # they're descendants.
1027 # they're descendants.
1028 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1028 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1029 n = self.node(r)
1029 n = self.node(r)
1030 isdescendant = False
1030 isdescendant = False
1031 if lowestrev == nullrev: # Everybody is a descendant of nullid
1031 if lowestrev == nullrev: # Everybody is a descendant of nullid
1032 isdescendant = True
1032 isdescendant = True
1033 elif n in descendants:
1033 elif n in descendants:
1034 # n is already a descendant
1034 # n is already a descendant
1035 isdescendant = True
1035 isdescendant = True
1036 # This check only needs to be done here because all the roots
1036 # This check only needs to be done here because all the roots
1037 # will start being marked is descendants before the loop.
1037 # will start being marked is descendants before the loop.
1038 if n in roots:
1038 if n in roots:
1039 # If n was a root, check if it's a 'real' root.
1039 # If n was a root, check if it's a 'real' root.
1040 p = tuple(self.parents(n))
1040 p = tuple(self.parents(n))
1041 # If any of its parents are descendants, it's not a root.
1041 # If any of its parents are descendants, it's not a root.
1042 if (p[0] in descendants) or (p[1] in descendants):
1042 if (p[0] in descendants) or (p[1] in descendants):
1043 roots.remove(n)
1043 roots.remove(n)
1044 else:
1044 else:
1045 p = tuple(self.parents(n))
1045 p = tuple(self.parents(n))
1046 # A node is a descendant if either of its parents are
1046 # A node is a descendant if either of its parents are
1047 # descendants. (We seeded the dependents list with the roots
1047 # descendants. (We seeded the dependents list with the roots
1048 # up there, remember?)
1048 # up there, remember?)
1049 if (p[0] in descendants) or (p[1] in descendants):
1049 if (p[0] in descendants) or (p[1] in descendants):
1050 descendants.add(n)
1050 descendants.add(n)
1051 isdescendant = True
1051 isdescendant = True
1052 if isdescendant and ((ancestors is None) or (n in ancestors)):
1052 if isdescendant and ((ancestors is None) or (n in ancestors)):
1053 # Only include nodes that are both descendants and ancestors.
1053 # Only include nodes that are both descendants and ancestors.
1054 orderedout.append(n)
1054 orderedout.append(n)
1055 if (ancestors is not None) and (n in heads):
1055 if (ancestors is not None) and (n in heads):
1056 # We're trying to figure out which heads are reachable
1056 # We're trying to figure out which heads are reachable
1057 # from roots.
1057 # from roots.
1058 # Mark this head as having been reached
1058 # Mark this head as having been reached
1059 heads[n] = True
1059 heads[n] = True
1060 elif ancestors is None:
1060 elif ancestors is None:
1061 # Otherwise, we're trying to discover the heads.
1061 # Otherwise, we're trying to discover the heads.
1062 # Assume this is a head because if it isn't, the next step
1062 # Assume this is a head because if it isn't, the next step
1063 # will eventually remove it.
1063 # will eventually remove it.
1064 heads[n] = True
1064 heads[n] = True
1065 # But, obviously its parents aren't.
1065 # But, obviously its parents aren't.
1066 for p in self.parents(n):
1066 for p in self.parents(n):
1067 heads.pop(p, None)
1067 heads.pop(p, None)
1068 heads = [head for head, flag in heads.iteritems() if flag]
1068 heads = [head for head, flag in heads.iteritems() if flag]
1069 roots = list(roots)
1069 roots = list(roots)
1070 assert orderedout
1070 assert orderedout
1071 assert roots
1071 assert roots
1072 assert heads
1072 assert heads
1073 return (orderedout, roots, heads)
1073 return (orderedout, roots, heads)
1074
1074
1075 def headrevs(self):
1075 def headrevs(self):
1076 try:
1076 try:
1077 return self.index.headrevs()
1077 return self.index.headrevs()
1078 except AttributeError:
1078 except AttributeError:
1079 return self._headrevs()
1079 return self._headrevs()
1080
1080
1081 def computephases(self, roots):
1081 def computephases(self, roots):
1082 return self.index.computephasesmapsets(roots)
1082 return self.index.computephasesmapsets(roots)
1083
1083
1084 def _headrevs(self):
1084 def _headrevs(self):
1085 count = len(self)
1085 count = len(self)
1086 if not count:
1086 if not count:
1087 return [nullrev]
1087 return [nullrev]
1088 # we won't iter over filtered rev so nobody is a head at start
1088 # we won't iter over filtered rev so nobody is a head at start
1089 ishead = [0] * (count + 1)
1089 ishead = [0] * (count + 1)
1090 index = self.index
1090 index = self.index
1091 for r in self:
1091 for r in self:
1092 ishead[r] = 1 # I may be an head
1092 ishead[r] = 1 # I may be an head
1093 e = index[r]
1093 e = index[r]
1094 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1094 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1095 return [r for r, val in enumerate(ishead) if val]
1095 return [r for r, val in enumerate(ishead) if val]
1096
1096
1097 def heads(self, start=None, stop=None):
1097 def heads(self, start=None, stop=None):
1098 """return the list of all nodes that have no children
1098 """return the list of all nodes that have no children
1099
1099
1100 if start is specified, only heads that are descendants of
1100 if start is specified, only heads that are descendants of
1101 start will be returned
1101 start will be returned
1102 if stop is specified, it will consider all the revs from stop
1102 if stop is specified, it will consider all the revs from stop
1103 as if they had no children
1103 as if they had no children
1104 """
1104 """
1105 if start is None and stop is None:
1105 if start is None and stop is None:
1106 if not len(self):
1106 if not len(self):
1107 return [nullid]
1107 return [nullid]
1108 return [self.node(r) for r in self.headrevs()]
1108 return [self.node(r) for r in self.headrevs()]
1109
1109
1110 if start is None:
1110 if start is None:
1111 start = nullrev
1111 start = nullrev
1112 else:
1112 else:
1113 start = self.rev(start)
1113 start = self.rev(start)
1114
1114
1115 stoprevs = set(self.rev(n) for n in stop or [])
1115 stoprevs = set(self.rev(n) for n in stop or [])
1116
1116
1117 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1117 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1118 stoprevs=stoprevs)
1118 stoprevs=stoprevs)
1119
1119
1120 return [self.node(rev) for rev in revs]
1120 return [self.node(rev) for rev in revs]
1121
1121
1122 def children(self, node):
1122 def children(self, node):
1123 """find the children of a given node"""
1123 """find the children of a given node"""
1124 c = []
1124 c = []
1125 p = self.rev(node)
1125 p = self.rev(node)
1126 for r in self.revs(start=p + 1):
1126 for r in self.revs(start=p + 1):
1127 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1127 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1128 if prevs:
1128 if prevs:
1129 for pr in prevs:
1129 for pr in prevs:
1130 if pr == p:
1130 if pr == p:
1131 c.append(self.node(r))
1131 c.append(self.node(r))
1132 elif p == nullrev:
1132 elif p == nullrev:
1133 c.append(self.node(r))
1133 c.append(self.node(r))
1134 return c
1134 return c
1135
1135
1136 def commonancestorsheads(self, a, b):
1136 def commonancestorsheads(self, a, b):
1137 """calculate all the heads of the common ancestors of nodes a and b"""
1137 """calculate all the heads of the common ancestors of nodes a and b"""
1138 a, b = self.rev(a), self.rev(b)
1138 a, b = self.rev(a), self.rev(b)
1139 ancs = self._commonancestorsheads(a, b)
1139 ancs = self._commonancestorsheads(a, b)
1140 return pycompat.maplist(self.node, ancs)
1140 return pycompat.maplist(self.node, ancs)
1141
1141
1142 def _commonancestorsheads(self, *revs):
1142 def _commonancestorsheads(self, *revs):
1143 """calculate all the heads of the common ancestors of revs"""
1143 """calculate all the heads of the common ancestors of revs"""
1144 try:
1144 try:
1145 ancs = self.index.commonancestorsheads(*revs)
1145 ancs = self.index.commonancestorsheads(*revs)
1146 except (AttributeError, OverflowError): # C implementation failed
1146 except (AttributeError, OverflowError): # C implementation failed
1147 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1147 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1148 return ancs
1148 return ancs
1149
1149
1150 def isancestor(self, a, b):
1150 def isancestor(self, a, b):
1151 """return True if node a is an ancestor of node b
1151 """return True if node a is an ancestor of node b
1152
1152
1153 A revision is considered an ancestor of itself."""
1153 A revision is considered an ancestor of itself."""
1154 a, b = self.rev(a), self.rev(b)
1154 a, b = self.rev(a), self.rev(b)
1155 return self.isancestorrev(a, b)
1155 return self.isancestorrev(a, b)
1156
1156
1157 def isancestorrev(self, a, b):
1157 def isancestorrev(self, a, b):
1158 """return True if revision a is an ancestor of revision b
1158 """return True if revision a is an ancestor of revision b
1159
1159
1160 A revision is considered an ancestor of itself.
1160 A revision is considered an ancestor of itself.
1161
1161
1162 The implementation of this is trivial but the use of
1162 The implementation of this is trivial but the use of
1163 commonancestorsheads is not."""
1163 commonancestorsheads is not."""
1164 if a == nullrev:
1164 if a == nullrev:
1165 return True
1165 return True
1166 elif a == b:
1166 elif a == b:
1167 return True
1167 return True
1168 elif a > b:
1168 elif a > b:
1169 return False
1169 return False
1170 return a in self._commonancestorsheads(a, b)
1170 return a in self._commonancestorsheads(a, b)
1171
1171
1172 def ancestor(self, a, b):
1172 def ancestor(self, a, b):
1173 """calculate the "best" common ancestor of nodes a and b"""
1173 """calculate the "best" common ancestor of nodes a and b"""
1174
1174
1175 a, b = self.rev(a), self.rev(b)
1175 a, b = self.rev(a), self.rev(b)
1176 try:
1176 try:
1177 ancs = self.index.ancestors(a, b)
1177 ancs = self.index.ancestors(a, b)
1178 except (AttributeError, OverflowError):
1178 except (AttributeError, OverflowError):
1179 ancs = ancestor.ancestors(self.parentrevs, a, b)
1179 ancs = ancestor.ancestors(self.parentrevs, a, b)
1180 if ancs:
1180 if ancs:
1181 # choose a consistent winner when there's a tie
1181 # choose a consistent winner when there's a tie
1182 return min(map(self.node, ancs))
1182 return min(map(self.node, ancs))
1183 return nullid
1183 return nullid
1184
1184
1185 def _match(self, id):
1185 def _match(self, id):
1186 if isinstance(id, int):
1186 if isinstance(id, int):
1187 # rev
1187 # rev
1188 return self.node(id)
1188 return self.node(id)
1189 if len(id) == 20:
1189 if len(id) == 20:
1190 # possibly a binary node
1190 # possibly a binary node
1191 # odds of a binary node being all hex in ASCII are 1 in 10**25
1191 # odds of a binary node being all hex in ASCII are 1 in 10**25
1192 try:
1192 try:
1193 node = id
1193 node = id
1194 self.rev(node) # quick search the index
1194 self.rev(node) # quick search the index
1195 return node
1195 return node
1196 except error.LookupError:
1196 except error.LookupError:
1197 pass # may be partial hex id
1197 pass # may be partial hex id
1198 try:
1198 try:
1199 # str(rev)
1199 # str(rev)
1200 rev = int(id)
1200 rev = int(id)
1201 if "%d" % rev != id:
1201 if "%d" % rev != id:
1202 raise ValueError
1202 raise ValueError
1203 if rev < 0:
1203 if rev < 0:
1204 rev = len(self) + rev
1204 rev = len(self) + rev
1205 if rev < 0 or rev >= len(self):
1205 if rev < 0 or rev >= len(self):
1206 raise ValueError
1206 raise ValueError
1207 return self.node(rev)
1207 return self.node(rev)
1208 except (ValueError, OverflowError):
1208 except (ValueError, OverflowError):
1209 pass
1209 pass
1210 if len(id) == 40:
1210 if len(id) == 40:
1211 try:
1211 try:
1212 # a full hex nodeid?
1212 # a full hex nodeid?
1213 node = bin(id)
1213 node = bin(id)
1214 self.rev(node)
1214 self.rev(node)
1215 return node
1215 return node
1216 except (TypeError, error.LookupError):
1216 except (TypeError, error.LookupError):
1217 pass
1217 pass
1218
1218
1219 def _partialmatch(self, id):
1219 def _partialmatch(self, id):
1220 # we don't care wdirfilenodeids as they should be always full hash
1220 # we don't care wdirfilenodeids as they should be always full hash
1221 maybewdir = wdirhex.startswith(id)
1221 maybewdir = wdirhex.startswith(id)
1222 try:
1222 try:
1223 partial = self.index.partialmatch(id)
1223 partial = self.index.partialmatch(id)
1224 if partial and self.hasnode(partial):
1224 if partial and self.hasnode(partial):
1225 if maybewdir:
1225 if maybewdir:
1226 # single 'ff...' match in radix tree, ambiguous with wdir
1226 # single 'ff...' match in radix tree, ambiguous with wdir
1227 raise error.RevlogError
1227 raise error.RevlogError
1228 return partial
1228 return partial
1229 if maybewdir:
1229 if maybewdir:
1230 # no 'ff...' match in radix tree, wdir identified
1230 # no 'ff...' match in radix tree, wdir identified
1231 raise error.WdirUnsupported
1231 raise error.WdirUnsupported
1232 return None
1232 return None
1233 except error.RevlogError:
1233 except error.RevlogError:
1234 # parsers.c radix tree lookup gave multiple matches
1234 # parsers.c radix tree lookup gave multiple matches
1235 # fast path: for unfiltered changelog, radix tree is accurate
1235 # fast path: for unfiltered changelog, radix tree is accurate
1236 if not getattr(self, 'filteredrevs', None):
1236 if not getattr(self, 'filteredrevs', None):
1237 raise error.AmbiguousPrefixLookupError(
1237 raise error.AmbiguousPrefixLookupError(
1238 id, self.indexfile, _('ambiguous identifier'))
1238 id, self.indexfile, _('ambiguous identifier'))
1239 # fall through to slow path that filters hidden revisions
1239 # fall through to slow path that filters hidden revisions
1240 except (AttributeError, ValueError):
1240 except (AttributeError, ValueError):
1241 # we are pure python, or key was too short to search radix tree
1241 # we are pure python, or key was too short to search radix tree
1242 pass
1242 pass
1243
1243
1244 if id in self._pcache:
1244 if id in self._pcache:
1245 return self._pcache[id]
1245 return self._pcache[id]
1246
1246
1247 if len(id) <= 40:
1247 if len(id) <= 40:
1248 try:
1248 try:
1249 # hex(node)[:...]
1249 # hex(node)[:...]
1250 l = len(id) // 2 # grab an even number of digits
1250 l = len(id) // 2 # grab an even number of digits
1251 prefix = bin(id[:l * 2])
1251 prefix = bin(id[:l * 2])
1252 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1252 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1253 nl = [n for n in nl if hex(n).startswith(id) and
1253 nl = [n for n in nl if hex(n).startswith(id) and
1254 self.hasnode(n)]
1254 self.hasnode(n)]
1255 if nullhex.startswith(id):
1255 if nullhex.startswith(id):
1256 nl.append(nullid)
1256 nl.append(nullid)
1257 if len(nl) > 0:
1257 if len(nl) > 0:
1258 if len(nl) == 1 and not maybewdir:
1258 if len(nl) == 1 and not maybewdir:
1259 self._pcache[id] = nl[0]
1259 self._pcache[id] = nl[0]
1260 return nl[0]
1260 return nl[0]
1261 raise error.AmbiguousPrefixLookupError(
1261 raise error.AmbiguousPrefixLookupError(
1262 id, self.indexfile, _('ambiguous identifier'))
1262 id, self.indexfile, _('ambiguous identifier'))
1263 if maybewdir:
1263 if maybewdir:
1264 raise error.WdirUnsupported
1264 raise error.WdirUnsupported
1265 return None
1265 return None
1266 except TypeError:
1266 except TypeError:
1267 pass
1267 pass
1268
1268
1269 def lookup(self, id):
1269 def lookup(self, id):
1270 """locate a node based on:
1270 """locate a node based on:
1271 - revision number or str(revision number)
1271 - revision number or str(revision number)
1272 - nodeid or subset of hex nodeid
1272 - nodeid or subset of hex nodeid
1273 """
1273 """
1274 n = self._match(id)
1274 n = self._match(id)
1275 if n is not None:
1275 if n is not None:
1276 return n
1276 return n
1277 n = self._partialmatch(id)
1277 n = self._partialmatch(id)
1278 if n:
1278 if n:
1279 return n
1279 return n
1280
1280
1281 raise error.LookupError(id, self.indexfile, _('no match found'))
1281 raise error.LookupError(id, self.indexfile, _('no match found'))
1282
1282
1283 def shortest(self, node, minlength=1):
1283 def shortest(self, node, minlength=1):
1284 """Find the shortest unambiguous prefix that matches node."""
1284 """Find the shortest unambiguous prefix that matches node."""
1285 def isvalid(prefix):
1285 def isvalid(prefix):
1286 try:
1286 try:
1287 node = self._partialmatch(prefix)
1287 node = self._partialmatch(prefix)
1288 except error.AmbiguousPrefixLookupError:
1288 except error.AmbiguousPrefixLookupError:
1289 return False
1289 return False
1290 except error.WdirUnsupported:
1290 except error.WdirUnsupported:
1291 # single 'ff...' match
1291 # single 'ff...' match
1292 return True
1292 return True
1293 if node is None:
1293 if node is None:
1294 raise error.LookupError(node, self.indexfile, _('no node'))
1294 raise error.LookupError(node, self.indexfile, _('no node'))
1295 return True
1295 return True
1296
1296
1297 def maybewdir(prefix):
1297 def maybewdir(prefix):
1298 return all(c == 'f' for c in prefix)
1298 return all(c == 'f' for c in prefix)
1299
1299
1300 hexnode = hex(node)
1300 hexnode = hex(node)
1301
1301
1302 def disambiguate(hexnode, minlength):
1302 def disambiguate(hexnode, minlength):
1303 """Disambiguate against wdirid."""
1303 """Disambiguate against wdirid."""
1304 for length in range(minlength, 41):
1304 for length in range(minlength, 41):
1305 prefix = hexnode[:length]
1305 prefix = hexnode[:length]
1306 if not maybewdir(prefix):
1306 if not maybewdir(prefix):
1307 return prefix
1307 return prefix
1308
1308
1309 if not getattr(self, 'filteredrevs', None):
1309 if not getattr(self, 'filteredrevs', None):
1310 try:
1310 try:
1311 length = max(self.index.shortest(node), minlength)
1311 length = max(self.index.shortest(node), minlength)
1312 return disambiguate(hexnode, length)
1312 return disambiguate(hexnode, length)
1313 except error.RevlogError:
1313 except error.RevlogError:
1314 if node != wdirid:
1314 if node != wdirid:
1315 raise error.LookupError(node, self.indexfile, _('no node'))
1315 raise error.LookupError(node, self.indexfile, _('no node'))
1316 except AttributeError:
1316 except AttributeError:
1317 # Fall through to pure code
1317 # Fall through to pure code
1318 pass
1318 pass
1319
1319
1320 if node == wdirid:
1320 if node == wdirid:
1321 for length in range(minlength, 41):
1321 for length in range(minlength, 41):
1322 prefix = hexnode[:length]
1322 prefix = hexnode[:length]
1323 if isvalid(prefix):
1323 if isvalid(prefix):
1324 return prefix
1324 return prefix
1325
1325
1326 for length in range(minlength, 41):
1326 for length in range(minlength, 41):
1327 prefix = hexnode[:length]
1327 prefix = hexnode[:length]
1328 if isvalid(prefix):
1328 if isvalid(prefix):
1329 return disambiguate(hexnode, length)
1329 return disambiguate(hexnode, length)
1330
1330
1331 def cmp(self, node, text):
1331 def cmp(self, node, text):
1332 """compare text with a given file revision
1332 """compare text with a given file revision
1333
1333
1334 returns True if text is different than what is stored.
1334 returns True if text is different than what is stored.
1335 """
1335 """
1336 p1, p2 = self.parents(node)
1336 p1, p2 = self.parents(node)
1337 return storageutil.hashrevisionsha1(text, p1, p2) != node
1337 return storageutil.hashrevisionsha1(text, p1, p2) != node
1338
1338
1339 def _cachesegment(self, offset, data):
1339 def _cachesegment(self, offset, data):
1340 """Add a segment to the revlog cache.
1340 """Add a segment to the revlog cache.
1341
1341
1342 Accepts an absolute offset and the data that is at that location.
1342 Accepts an absolute offset and the data that is at that location.
1343 """
1343 """
1344 o, d = self._chunkcache
1344 o, d = self._chunkcache
1345 # try to add to existing cache
1345 # try to add to existing cache
1346 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1346 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1347 self._chunkcache = o, d + data
1347 self._chunkcache = o, d + data
1348 else:
1348 else:
1349 self._chunkcache = offset, data
1349 self._chunkcache = offset, data
1350
1350
1351 def _readsegment(self, offset, length, df=None):
1351 def _readsegment(self, offset, length, df=None):
1352 """Load a segment of raw data from the revlog.
1352 """Load a segment of raw data from the revlog.
1353
1353
1354 Accepts an absolute offset, length to read, and an optional existing
1354 Accepts an absolute offset, length to read, and an optional existing
1355 file handle to read from.
1355 file handle to read from.
1356
1356
1357 If an existing file handle is passed, it will be seeked and the
1357 If an existing file handle is passed, it will be seeked and the
1358 original seek position will NOT be restored.
1358 original seek position will NOT be restored.
1359
1359
1360 Returns a str or buffer of raw byte data.
1360 Returns a str or buffer of raw byte data.
1361
1361
1362 Raises if the requested number of bytes could not be read.
1362 Raises if the requested number of bytes could not be read.
1363 """
1363 """
1364 # Cache data both forward and backward around the requested
1364 # Cache data both forward and backward around the requested
1365 # data, in a fixed size window. This helps speed up operations
1365 # data, in a fixed size window. This helps speed up operations
1366 # involving reading the revlog backwards.
1366 # involving reading the revlog backwards.
1367 cachesize = self._chunkcachesize
1367 cachesize = self._chunkcachesize
1368 realoffset = offset & ~(cachesize - 1)
1368 realoffset = offset & ~(cachesize - 1)
1369 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1369 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1370 - realoffset)
1370 - realoffset)
1371 with self._datareadfp(df) as df:
1371 with self._datareadfp(df) as df:
1372 df.seek(realoffset)
1372 df.seek(realoffset)
1373 d = df.read(reallength)
1373 d = df.read(reallength)
1374
1374
1375 self._cachesegment(realoffset, d)
1375 self._cachesegment(realoffset, d)
1376 if offset != realoffset or reallength != length:
1376 if offset != realoffset or reallength != length:
1377 startoffset = offset - realoffset
1377 startoffset = offset - realoffset
1378 if len(d) - startoffset < length:
1378 if len(d) - startoffset < length:
1379 raise error.RevlogError(
1379 raise error.RevlogError(
1380 _('partial read of revlog %s; expected %d bytes from '
1380 _('partial read of revlog %s; expected %d bytes from '
1381 'offset %d, got %d') %
1381 'offset %d, got %d') %
1382 (self.indexfile if self._inline else self.datafile,
1382 (self.indexfile if self._inline else self.datafile,
1383 length, realoffset, len(d) - startoffset))
1383 length, realoffset, len(d) - startoffset))
1384
1384
1385 return util.buffer(d, startoffset, length)
1385 return util.buffer(d, startoffset, length)
1386
1386
1387 if len(d) < length:
1387 if len(d) < length:
1388 raise error.RevlogError(
1388 raise error.RevlogError(
1389 _('partial read of revlog %s; expected %d bytes from offset '
1389 _('partial read of revlog %s; expected %d bytes from offset '
1390 '%d, got %d') %
1390 '%d, got %d') %
1391 (self.indexfile if self._inline else self.datafile,
1391 (self.indexfile if self._inline else self.datafile,
1392 length, offset, len(d)))
1392 length, offset, len(d)))
1393
1393
1394 return d
1394 return d
1395
1395
1396 def _getsegment(self, offset, length, df=None):
1396 def _getsegment(self, offset, length, df=None):
1397 """Obtain a segment of raw data from the revlog.
1397 """Obtain a segment of raw data from the revlog.
1398
1398
1399 Accepts an absolute offset, length of bytes to obtain, and an
1399 Accepts an absolute offset, length of bytes to obtain, and an
1400 optional file handle to the already-opened revlog. If the file
1400 optional file handle to the already-opened revlog. If the file
1401 handle is used, it's original seek position will not be preserved.
1401 handle is used, it's original seek position will not be preserved.
1402
1402
1403 Requests for data may be returned from a cache.
1403 Requests for data may be returned from a cache.
1404
1404
1405 Returns a str or a buffer instance of raw byte data.
1405 Returns a str or a buffer instance of raw byte data.
1406 """
1406 """
1407 o, d = self._chunkcache
1407 o, d = self._chunkcache
1408 l = len(d)
1408 l = len(d)
1409
1409
1410 # is it in the cache?
1410 # is it in the cache?
1411 cachestart = offset - o
1411 cachestart = offset - o
1412 cacheend = cachestart + length
1412 cacheend = cachestart + length
1413 if cachestart >= 0 and cacheend <= l:
1413 if cachestart >= 0 and cacheend <= l:
1414 if cachestart == 0 and cacheend == l:
1414 if cachestart == 0 and cacheend == l:
1415 return d # avoid a copy
1415 return d # avoid a copy
1416 return util.buffer(d, cachestart, cacheend - cachestart)
1416 return util.buffer(d, cachestart, cacheend - cachestart)
1417
1417
1418 return self._readsegment(offset, length, df=df)
1418 return self._readsegment(offset, length, df=df)
1419
1419
1420 def _getsegmentforrevs(self, startrev, endrev, df=None):
1420 def _getsegmentforrevs(self, startrev, endrev, df=None):
1421 """Obtain a segment of raw data corresponding to a range of revisions.
1421 """Obtain a segment of raw data corresponding to a range of revisions.
1422
1422
1423 Accepts the start and end revisions and an optional already-open
1423 Accepts the start and end revisions and an optional already-open
1424 file handle to be used for reading. If the file handle is read, its
1424 file handle to be used for reading. If the file handle is read, its
1425 seek position will not be preserved.
1425 seek position will not be preserved.
1426
1426
1427 Requests for data may be satisfied by a cache.
1427 Requests for data may be satisfied by a cache.
1428
1428
1429 Returns a 2-tuple of (offset, data) for the requested range of
1429 Returns a 2-tuple of (offset, data) for the requested range of
1430 revisions. Offset is the integer offset from the beginning of the
1430 revisions. Offset is the integer offset from the beginning of the
1431 revlog and data is a str or buffer of the raw byte data.
1431 revlog and data is a str or buffer of the raw byte data.
1432
1432
1433 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1433 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1434 to determine where each revision's data begins and ends.
1434 to determine where each revision's data begins and ends.
1435 """
1435 """
1436 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1436 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1437 # (functions are expensive).
1437 # (functions are expensive).
1438 index = self.index
1438 index = self.index
1439 istart = index[startrev]
1439 istart = index[startrev]
1440 start = int(istart[0] >> 16)
1440 start = int(istart[0] >> 16)
1441 if startrev == endrev:
1441 if startrev == endrev:
1442 end = start + istart[1]
1442 end = start + istart[1]
1443 else:
1443 else:
1444 iend = index[endrev]
1444 iend = index[endrev]
1445 end = int(iend[0] >> 16) + iend[1]
1445 end = int(iend[0] >> 16) + iend[1]
1446
1446
1447 if self._inline:
1447 if self._inline:
1448 start += (startrev + 1) * self._io.size
1448 start += (startrev + 1) * self._io.size
1449 end += (endrev + 1) * self._io.size
1449 end += (endrev + 1) * self._io.size
1450 length = end - start
1450 length = end - start
1451
1451
1452 return start, self._getsegment(start, length, df=df)
1452 return start, self._getsegment(start, length, df=df)
1453
1453
1454 def _chunk(self, rev, df=None):
1454 def _chunk(self, rev, df=None):
1455 """Obtain a single decompressed chunk for a revision.
1455 """Obtain a single decompressed chunk for a revision.
1456
1456
1457 Accepts an integer revision and an optional already-open file handle
1457 Accepts an integer revision and an optional already-open file handle
1458 to be used for reading. If used, the seek position of the file will not
1458 to be used for reading. If used, the seek position of the file will not
1459 be preserved.
1459 be preserved.
1460
1460
1461 Returns a str holding uncompressed data for the requested revision.
1461 Returns a str holding uncompressed data for the requested revision.
1462 """
1462 """
1463 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1463 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1464
1464
1465 def _chunks(self, revs, df=None, targetsize=None):
1465 def _chunks(self, revs, df=None, targetsize=None):
1466 """Obtain decompressed chunks for the specified revisions.
1466 """Obtain decompressed chunks for the specified revisions.
1467
1467
1468 Accepts an iterable of numeric revisions that are assumed to be in
1468 Accepts an iterable of numeric revisions that are assumed to be in
1469 ascending order. Also accepts an optional already-open file handle
1469 ascending order. Also accepts an optional already-open file handle
1470 to be used for reading. If used, the seek position of the file will
1470 to be used for reading. If used, the seek position of the file will
1471 not be preserved.
1471 not be preserved.
1472
1472
1473 This function is similar to calling ``self._chunk()`` multiple times,
1473 This function is similar to calling ``self._chunk()`` multiple times,
1474 but is faster.
1474 but is faster.
1475
1475
1476 Returns a list with decompressed data for each requested revision.
1476 Returns a list with decompressed data for each requested revision.
1477 """
1477 """
1478 if not revs:
1478 if not revs:
1479 return []
1479 return []
1480 start = self.start
1480 start = self.start
1481 length = self.length
1481 length = self.length
1482 inline = self._inline
1482 inline = self._inline
1483 iosize = self._io.size
1483 iosize = self._io.size
1484 buffer = util.buffer
1484 buffer = util.buffer
1485
1485
1486 l = []
1486 l = []
1487 ladd = l.append
1487 ladd = l.append
1488
1488
1489 if not self._withsparseread:
1489 if not self._withsparseread:
1490 slicedchunks = (revs,)
1490 slicedchunks = (revs,)
1491 else:
1491 else:
1492 slicedchunks = deltautil.slicechunk(self, revs,
1492 slicedchunks = deltautil.slicechunk(self, revs,
1493 targetsize=targetsize)
1493 targetsize=targetsize)
1494
1494
1495 for revschunk in slicedchunks:
1495 for revschunk in slicedchunks:
1496 firstrev = revschunk[0]
1496 firstrev = revschunk[0]
1497 # Skip trailing revisions with empty diff
1497 # Skip trailing revisions with empty diff
1498 for lastrev in revschunk[::-1]:
1498 for lastrev in revschunk[::-1]:
1499 if length(lastrev) != 0:
1499 if length(lastrev) != 0:
1500 break
1500 break
1501
1501
1502 try:
1502 try:
1503 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1503 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1504 except OverflowError:
1504 except OverflowError:
1505 # issue4215 - we can't cache a run of chunks greater than
1505 # issue4215 - we can't cache a run of chunks greater than
1506 # 2G on Windows
1506 # 2G on Windows
1507 return [self._chunk(rev, df=df) for rev in revschunk]
1507 return [self._chunk(rev, df=df) for rev in revschunk]
1508
1508
1509 decomp = self.decompress
1509 decomp = self.decompress
1510 for rev in revschunk:
1510 for rev in revschunk:
1511 chunkstart = start(rev)
1511 chunkstart = start(rev)
1512 if inline:
1512 if inline:
1513 chunkstart += (rev + 1) * iosize
1513 chunkstart += (rev + 1) * iosize
1514 chunklength = length(rev)
1514 chunklength = length(rev)
1515 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1515 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1516
1516
1517 return l
1517 return l
1518
1518
1519 def _chunkclear(self):
1519 def _chunkclear(self):
1520 """Clear the raw chunk cache."""
1520 """Clear the raw chunk cache."""
1521 self._chunkcache = (0, '')
1521 self._chunkcache = (0, '')
1522
1522
1523 def deltaparent(self, rev):
1523 def deltaparent(self, rev):
1524 """return deltaparent of the given revision"""
1524 """return deltaparent of the given revision"""
1525 base = self.index[rev][3]
1525 base = self.index[rev][3]
1526 if base == rev:
1526 if base == rev:
1527 return nullrev
1527 return nullrev
1528 elif self._generaldelta:
1528 elif self._generaldelta:
1529 return base
1529 return base
1530 else:
1530 else:
1531 return rev - 1
1531 return rev - 1
1532
1532
1533 def issnapshot(self, rev):
1533 def issnapshot(self, rev):
1534 """tells whether rev is a snapshot
1534 """tells whether rev is a snapshot
1535 """
1535 """
1536 if rev == nullrev:
1536 if rev == nullrev:
1537 return True
1537 return True
1538 deltap = self.deltaparent(rev)
1538 entry = self.index[rev]
1539 if deltap == nullrev:
1539 base = entry[3]
1540 if base == rev:
1540 return True
1541 return True
1541 elif not self._sparserevlog:
1542 elif not self._sparserevlog:
1542 return False
1543 return False
1543 p1, p2 = self.parentrevs(rev)
1544 if base == nullrev:
1544 if deltap in (p1, p2):
1545 return True
1546 p1 = entry[5]
1547 p2 = entry[6]
1548 if base == p1 or base == p2:
1545 return False
1549 return False
1546 return self.issnapshot(deltap)
1550 return self.issnapshot(base)
1547
1551
1548 def snapshotdepth(self, rev):
1552 def snapshotdepth(self, rev):
1549 """number of snapshot in the chain before this one"""
1553 """number of snapshot in the chain before this one"""
1550 if not self.issnapshot(rev):
1554 if not self.issnapshot(rev):
1551 raise error.ProgrammingError('revision %d not a snapshot')
1555 raise error.ProgrammingError('revision %d not a snapshot')
1552 return len(self._deltachain(rev)[0]) - 1
1556 return len(self._deltachain(rev)[0]) - 1
1553
1557
1554 def revdiff(self, rev1, rev2):
1558 def revdiff(self, rev1, rev2):
1555 """return or calculate a delta between two revisions
1559 """return or calculate a delta between two revisions
1556
1560
1557 The delta calculated is in binary form and is intended to be written to
1561 The delta calculated is in binary form and is intended to be written to
1558 revlog data directly. So this function needs raw revision data.
1562 revlog data directly. So this function needs raw revision data.
1559 """
1563 """
1560 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1564 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1561 return bytes(self._chunk(rev2))
1565 return bytes(self._chunk(rev2))
1562
1566
1563 return mdiff.textdiff(self.revision(rev1, raw=True),
1567 return mdiff.textdiff(self.revision(rev1, raw=True),
1564 self.revision(rev2, raw=True))
1568 self.revision(rev2, raw=True))
1565
1569
1566 def revision(self, nodeorrev, _df=None, raw=False):
1570 def revision(self, nodeorrev, _df=None, raw=False):
1567 """return an uncompressed revision of a given node or revision
1571 """return an uncompressed revision of a given node or revision
1568 number.
1572 number.
1569
1573
1570 _df - an existing file handle to read from. (internal-only)
1574 _df - an existing file handle to read from. (internal-only)
1571 raw - an optional argument specifying if the revision data is to be
1575 raw - an optional argument specifying if the revision data is to be
1572 treated as raw data when applying flag transforms. 'raw' should be set
1576 treated as raw data when applying flag transforms. 'raw' should be set
1573 to True when generating changegroups or in debug commands.
1577 to True when generating changegroups or in debug commands.
1574 """
1578 """
1575 if isinstance(nodeorrev, int):
1579 if isinstance(nodeorrev, int):
1576 rev = nodeorrev
1580 rev = nodeorrev
1577 node = self.node(rev)
1581 node = self.node(rev)
1578 else:
1582 else:
1579 node = nodeorrev
1583 node = nodeorrev
1580 rev = None
1584 rev = None
1581
1585
1582 cachedrev = None
1586 cachedrev = None
1583 flags = None
1587 flags = None
1584 rawtext = None
1588 rawtext = None
1585 if node == nullid:
1589 if node == nullid:
1586 return ""
1590 return ""
1587 if self._revisioncache:
1591 if self._revisioncache:
1588 if self._revisioncache[0] == node:
1592 if self._revisioncache[0] == node:
1589 # _cache only stores rawtext
1593 # _cache only stores rawtext
1590 if raw:
1594 if raw:
1591 return self._revisioncache[2]
1595 return self._revisioncache[2]
1592 # duplicated, but good for perf
1596 # duplicated, but good for perf
1593 if rev is None:
1597 if rev is None:
1594 rev = self.rev(node)
1598 rev = self.rev(node)
1595 if flags is None:
1599 if flags is None:
1596 flags = self.flags(rev)
1600 flags = self.flags(rev)
1597 # no extra flags set, no flag processor runs, text = rawtext
1601 # no extra flags set, no flag processor runs, text = rawtext
1598 if flags == REVIDX_DEFAULT_FLAGS:
1602 if flags == REVIDX_DEFAULT_FLAGS:
1599 return self._revisioncache[2]
1603 return self._revisioncache[2]
1600 # rawtext is reusable. need to run flag processor
1604 # rawtext is reusable. need to run flag processor
1601 rawtext = self._revisioncache[2]
1605 rawtext = self._revisioncache[2]
1602
1606
1603 cachedrev = self._revisioncache[1]
1607 cachedrev = self._revisioncache[1]
1604
1608
1605 # look up what we need to read
1609 # look up what we need to read
1606 if rawtext is None:
1610 if rawtext is None:
1607 if rev is None:
1611 if rev is None:
1608 rev = self.rev(node)
1612 rev = self.rev(node)
1609
1613
1610 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1614 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1611 if stopped:
1615 if stopped:
1612 rawtext = self._revisioncache[2]
1616 rawtext = self._revisioncache[2]
1613
1617
1614 # drop cache to save memory
1618 # drop cache to save memory
1615 self._revisioncache = None
1619 self._revisioncache = None
1616
1620
1617 targetsize = None
1621 targetsize = None
1618 rawsize = self.index[rev][2]
1622 rawsize = self.index[rev][2]
1619 if 0 <= rawsize:
1623 if 0 <= rawsize:
1620 targetsize = 4 * rawsize
1624 targetsize = 4 * rawsize
1621
1625
1622 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1626 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1623 if rawtext is None:
1627 if rawtext is None:
1624 rawtext = bytes(bins[0])
1628 rawtext = bytes(bins[0])
1625 bins = bins[1:]
1629 bins = bins[1:]
1626
1630
1627 rawtext = mdiff.patches(rawtext, bins)
1631 rawtext = mdiff.patches(rawtext, bins)
1628 self._revisioncache = (node, rev, rawtext)
1632 self._revisioncache = (node, rev, rawtext)
1629
1633
1630 if flags is None:
1634 if flags is None:
1631 if rev is None:
1635 if rev is None:
1632 rev = self.rev(node)
1636 rev = self.rev(node)
1633 flags = self.flags(rev)
1637 flags = self.flags(rev)
1634
1638
1635 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1639 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1636 if validatehash:
1640 if validatehash:
1637 self.checkhash(text, node, rev=rev)
1641 self.checkhash(text, node, rev=rev)
1638
1642
1639 return text
1643 return text
1640
1644
1641 def hash(self, text, p1, p2):
1645 def hash(self, text, p1, p2):
1642 """Compute a node hash.
1646 """Compute a node hash.
1643
1647
1644 Available as a function so that subclasses can replace the hash
1648 Available as a function so that subclasses can replace the hash
1645 as needed.
1649 as needed.
1646 """
1650 """
1647 return storageutil.hashrevisionsha1(text, p1, p2)
1651 return storageutil.hashrevisionsha1(text, p1, p2)
1648
1652
1649 def _processflags(self, text, flags, operation, raw=False):
1653 def _processflags(self, text, flags, operation, raw=False):
1650 """Inspect revision data flags and applies transforms defined by
1654 """Inspect revision data flags and applies transforms defined by
1651 registered flag processors.
1655 registered flag processors.
1652
1656
1653 ``text`` - the revision data to process
1657 ``text`` - the revision data to process
1654 ``flags`` - the revision flags
1658 ``flags`` - the revision flags
1655 ``operation`` - the operation being performed (read or write)
1659 ``operation`` - the operation being performed (read or write)
1656 ``raw`` - an optional argument describing if the raw transform should be
1660 ``raw`` - an optional argument describing if the raw transform should be
1657 applied.
1661 applied.
1658
1662
1659 This method processes the flags in the order (or reverse order if
1663 This method processes the flags in the order (or reverse order if
1660 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1664 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1661 flag processors registered for present flags. The order of flags defined
1665 flag processors registered for present flags. The order of flags defined
1662 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1666 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1663
1667
1664 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1668 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1665 processed text and ``validatehash`` is a bool indicating whether the
1669 processed text and ``validatehash`` is a bool indicating whether the
1666 returned text should be checked for hash integrity.
1670 returned text should be checked for hash integrity.
1667
1671
1668 Note: If the ``raw`` argument is set, it has precedence over the
1672 Note: If the ``raw`` argument is set, it has precedence over the
1669 operation and will only update the value of ``validatehash``.
1673 operation and will only update the value of ``validatehash``.
1670 """
1674 """
1671 # fast path: no flag processors will run
1675 # fast path: no flag processors will run
1672 if flags == 0:
1676 if flags == 0:
1673 return text, True
1677 return text, True
1674 if not operation in ('read', 'write'):
1678 if not operation in ('read', 'write'):
1675 raise error.ProgrammingError(_("invalid '%s' operation") %
1679 raise error.ProgrammingError(_("invalid '%s' operation") %
1676 operation)
1680 operation)
1677 # Check all flags are known.
1681 # Check all flags are known.
1678 if flags & ~REVIDX_KNOWN_FLAGS:
1682 if flags & ~REVIDX_KNOWN_FLAGS:
1679 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1683 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1680 (flags & ~REVIDX_KNOWN_FLAGS))
1684 (flags & ~REVIDX_KNOWN_FLAGS))
1681 validatehash = True
1685 validatehash = True
1682 # Depending on the operation (read or write), the order might be
1686 # Depending on the operation (read or write), the order might be
1683 # reversed due to non-commutative transforms.
1687 # reversed due to non-commutative transforms.
1684 orderedflags = REVIDX_FLAGS_ORDER
1688 orderedflags = REVIDX_FLAGS_ORDER
1685 if operation == 'write':
1689 if operation == 'write':
1686 orderedflags = reversed(orderedflags)
1690 orderedflags = reversed(orderedflags)
1687
1691
1688 for flag in orderedflags:
1692 for flag in orderedflags:
1689 # If a flagprocessor has been registered for a known flag, apply the
1693 # If a flagprocessor has been registered for a known flag, apply the
1690 # related operation transform and update result tuple.
1694 # related operation transform and update result tuple.
1691 if flag & flags:
1695 if flag & flags:
1692 vhash = True
1696 vhash = True
1693
1697
1694 if flag not in self._flagprocessors:
1698 if flag not in self._flagprocessors:
1695 message = _("missing processor for flag '%#x'") % (flag)
1699 message = _("missing processor for flag '%#x'") % (flag)
1696 raise error.RevlogError(message)
1700 raise error.RevlogError(message)
1697
1701
1698 processor = self._flagprocessors[flag]
1702 processor = self._flagprocessors[flag]
1699 if processor is not None:
1703 if processor is not None:
1700 readtransform, writetransform, rawtransform = processor
1704 readtransform, writetransform, rawtransform = processor
1701
1705
1702 if raw:
1706 if raw:
1703 vhash = rawtransform(self, text)
1707 vhash = rawtransform(self, text)
1704 elif operation == 'read':
1708 elif operation == 'read':
1705 text, vhash = readtransform(self, text)
1709 text, vhash = readtransform(self, text)
1706 else: # write operation
1710 else: # write operation
1707 text, vhash = writetransform(self, text)
1711 text, vhash = writetransform(self, text)
1708 validatehash = validatehash and vhash
1712 validatehash = validatehash and vhash
1709
1713
1710 return text, validatehash
1714 return text, validatehash
1711
1715
1712 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1716 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1713 """Check node hash integrity.
1717 """Check node hash integrity.
1714
1718
1715 Available as a function so that subclasses can extend hash mismatch
1719 Available as a function so that subclasses can extend hash mismatch
1716 behaviors as needed.
1720 behaviors as needed.
1717 """
1721 """
1718 try:
1722 try:
1719 if p1 is None and p2 is None:
1723 if p1 is None and p2 is None:
1720 p1, p2 = self.parents(node)
1724 p1, p2 = self.parents(node)
1721 if node != self.hash(text, p1, p2):
1725 if node != self.hash(text, p1, p2):
1722 # Clear the revision cache on hash failure. The revision cache
1726 # Clear the revision cache on hash failure. The revision cache
1723 # only stores the raw revision and clearing the cache does have
1727 # only stores the raw revision and clearing the cache does have
1724 # the side-effect that we won't have a cache hit when the raw
1728 # the side-effect that we won't have a cache hit when the raw
1725 # revision data is accessed. But this case should be rare and
1729 # revision data is accessed. But this case should be rare and
1726 # it is extra work to teach the cache about the hash
1730 # it is extra work to teach the cache about the hash
1727 # verification state.
1731 # verification state.
1728 if self._revisioncache and self._revisioncache[0] == node:
1732 if self._revisioncache and self._revisioncache[0] == node:
1729 self._revisioncache = None
1733 self._revisioncache = None
1730
1734
1731 revornode = rev
1735 revornode = rev
1732 if revornode is None:
1736 if revornode is None:
1733 revornode = templatefilters.short(hex(node))
1737 revornode = templatefilters.short(hex(node))
1734 raise error.RevlogError(_("integrity check failed on %s:%s")
1738 raise error.RevlogError(_("integrity check failed on %s:%s")
1735 % (self.indexfile, pycompat.bytestr(revornode)))
1739 % (self.indexfile, pycompat.bytestr(revornode)))
1736 except error.RevlogError:
1740 except error.RevlogError:
1737 if self._censorable and storageutil.iscensoredtext(text):
1741 if self._censorable and storageutil.iscensoredtext(text):
1738 raise error.CensoredNodeError(self.indexfile, node, text)
1742 raise error.CensoredNodeError(self.indexfile, node, text)
1739 raise
1743 raise
1740
1744
1741 def _enforceinlinesize(self, tr, fp=None):
1745 def _enforceinlinesize(self, tr, fp=None):
1742 """Check if the revlog is too big for inline and convert if so.
1746 """Check if the revlog is too big for inline and convert if so.
1743
1747
1744 This should be called after revisions are added to the revlog. If the
1748 This should be called after revisions are added to the revlog. If the
1745 revlog has grown too large to be an inline revlog, it will convert it
1749 revlog has grown too large to be an inline revlog, it will convert it
1746 to use multiple index and data files.
1750 to use multiple index and data files.
1747 """
1751 """
1748 tiprev = len(self) - 1
1752 tiprev = len(self) - 1
1749 if (not self._inline or
1753 if (not self._inline or
1750 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1754 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1751 return
1755 return
1752
1756
1753 trinfo = tr.find(self.indexfile)
1757 trinfo = tr.find(self.indexfile)
1754 if trinfo is None:
1758 if trinfo is None:
1755 raise error.RevlogError(_("%s not found in the transaction")
1759 raise error.RevlogError(_("%s not found in the transaction")
1756 % self.indexfile)
1760 % self.indexfile)
1757
1761
1758 trindex = trinfo[2]
1762 trindex = trinfo[2]
1759 if trindex is not None:
1763 if trindex is not None:
1760 dataoff = self.start(trindex)
1764 dataoff = self.start(trindex)
1761 else:
1765 else:
1762 # revlog was stripped at start of transaction, use all leftover data
1766 # revlog was stripped at start of transaction, use all leftover data
1763 trindex = len(self) - 1
1767 trindex = len(self) - 1
1764 dataoff = self.end(tiprev)
1768 dataoff = self.end(tiprev)
1765
1769
1766 tr.add(self.datafile, dataoff)
1770 tr.add(self.datafile, dataoff)
1767
1771
1768 if fp:
1772 if fp:
1769 fp.flush()
1773 fp.flush()
1770 fp.close()
1774 fp.close()
1771 # We can't use the cached file handle after close(). So prevent
1775 # We can't use the cached file handle after close(). So prevent
1772 # its usage.
1776 # its usage.
1773 self._writinghandles = None
1777 self._writinghandles = None
1774
1778
1775 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1779 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1776 for r in self:
1780 for r in self:
1777 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1781 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1778
1782
1779 with self._indexfp('w') as fp:
1783 with self._indexfp('w') as fp:
1780 self.version &= ~FLAG_INLINE_DATA
1784 self.version &= ~FLAG_INLINE_DATA
1781 self._inline = False
1785 self._inline = False
1782 io = self._io
1786 io = self._io
1783 for i in self:
1787 for i in self:
1784 e = io.packentry(self.index[i], self.node, self.version, i)
1788 e = io.packentry(self.index[i], self.node, self.version, i)
1785 fp.write(e)
1789 fp.write(e)
1786
1790
1787 # the temp file replace the real index when we exit the context
1791 # the temp file replace the real index when we exit the context
1788 # manager
1792 # manager
1789
1793
1790 tr.replace(self.indexfile, trindex * self._io.size)
1794 tr.replace(self.indexfile, trindex * self._io.size)
1791 self._chunkclear()
1795 self._chunkclear()
1792
1796
1793 def _nodeduplicatecallback(self, transaction, node):
1797 def _nodeduplicatecallback(self, transaction, node):
1794 """called when trying to add a node already stored.
1798 """called when trying to add a node already stored.
1795 """
1799 """
1796
1800
1797 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1801 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1798 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1802 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1799 """add a revision to the log
1803 """add a revision to the log
1800
1804
1801 text - the revision data to add
1805 text - the revision data to add
1802 transaction - the transaction object used for rollback
1806 transaction - the transaction object used for rollback
1803 link - the linkrev data to add
1807 link - the linkrev data to add
1804 p1, p2 - the parent nodeids of the revision
1808 p1, p2 - the parent nodeids of the revision
1805 cachedelta - an optional precomputed delta
1809 cachedelta - an optional precomputed delta
1806 node - nodeid of revision; typically node is not specified, and it is
1810 node - nodeid of revision; typically node is not specified, and it is
1807 computed by default as hash(text, p1, p2), however subclasses might
1811 computed by default as hash(text, p1, p2), however subclasses might
1808 use different hashing method (and override checkhash() in such case)
1812 use different hashing method (and override checkhash() in such case)
1809 flags - the known flags to set on the revision
1813 flags - the known flags to set on the revision
1810 deltacomputer - an optional deltacomputer instance shared between
1814 deltacomputer - an optional deltacomputer instance shared between
1811 multiple calls
1815 multiple calls
1812 """
1816 """
1813 if link == nullrev:
1817 if link == nullrev:
1814 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1818 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1815 % self.indexfile)
1819 % self.indexfile)
1816
1820
1817 if flags:
1821 if flags:
1818 node = node or self.hash(text, p1, p2)
1822 node = node or self.hash(text, p1, p2)
1819
1823
1820 rawtext, validatehash = self._processflags(text, flags, 'write')
1824 rawtext, validatehash = self._processflags(text, flags, 'write')
1821
1825
1822 # If the flag processor modifies the revision data, ignore any provided
1826 # If the flag processor modifies the revision data, ignore any provided
1823 # cachedelta.
1827 # cachedelta.
1824 if rawtext != text:
1828 if rawtext != text:
1825 cachedelta = None
1829 cachedelta = None
1826
1830
1827 if len(rawtext) > _maxentrysize:
1831 if len(rawtext) > _maxentrysize:
1828 raise error.RevlogError(
1832 raise error.RevlogError(
1829 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1833 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1830 % (self.indexfile, len(rawtext)))
1834 % (self.indexfile, len(rawtext)))
1831
1835
1832 node = node or self.hash(rawtext, p1, p2)
1836 node = node or self.hash(rawtext, p1, p2)
1833 if node in self.nodemap:
1837 if node in self.nodemap:
1834 return node
1838 return node
1835
1839
1836 if validatehash:
1840 if validatehash:
1837 self.checkhash(rawtext, node, p1=p1, p2=p2)
1841 self.checkhash(rawtext, node, p1=p1, p2=p2)
1838
1842
1839 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1843 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1840 flags, cachedelta=cachedelta,
1844 flags, cachedelta=cachedelta,
1841 deltacomputer=deltacomputer)
1845 deltacomputer=deltacomputer)
1842
1846
1843 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1847 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1844 cachedelta=None, deltacomputer=None):
1848 cachedelta=None, deltacomputer=None):
1845 """add a raw revision with known flags, node and parents
1849 """add a raw revision with known flags, node and parents
1846 useful when reusing a revision not stored in this revlog (ex: received
1850 useful when reusing a revision not stored in this revlog (ex: received
1847 over wire, or read from an external bundle).
1851 over wire, or read from an external bundle).
1848 """
1852 """
1849 dfh = None
1853 dfh = None
1850 if not self._inline:
1854 if not self._inline:
1851 dfh = self._datafp("a+")
1855 dfh = self._datafp("a+")
1852 ifh = self._indexfp("a+")
1856 ifh = self._indexfp("a+")
1853 try:
1857 try:
1854 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1858 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1855 flags, cachedelta, ifh, dfh,
1859 flags, cachedelta, ifh, dfh,
1856 deltacomputer=deltacomputer)
1860 deltacomputer=deltacomputer)
1857 finally:
1861 finally:
1858 if dfh:
1862 if dfh:
1859 dfh.close()
1863 dfh.close()
1860 ifh.close()
1864 ifh.close()
1861
1865
1862 def compress(self, data):
1866 def compress(self, data):
1863 """Generate a possibly-compressed representation of data."""
1867 """Generate a possibly-compressed representation of data."""
1864 if not data:
1868 if not data:
1865 return '', data
1869 return '', data
1866
1870
1867 compressed = self._compressor.compress(data)
1871 compressed = self._compressor.compress(data)
1868
1872
1869 if compressed:
1873 if compressed:
1870 # The revlog compressor added the header in the returned data.
1874 # The revlog compressor added the header in the returned data.
1871 return '', compressed
1875 return '', compressed
1872
1876
1873 if data[0:1] == '\0':
1877 if data[0:1] == '\0':
1874 return '', data
1878 return '', data
1875 return 'u', data
1879 return 'u', data
1876
1880
1877 def decompress(self, data):
1881 def decompress(self, data):
1878 """Decompress a revlog chunk.
1882 """Decompress a revlog chunk.
1879
1883
1880 The chunk is expected to begin with a header identifying the
1884 The chunk is expected to begin with a header identifying the
1881 format type so it can be routed to an appropriate decompressor.
1885 format type so it can be routed to an appropriate decompressor.
1882 """
1886 """
1883 if not data:
1887 if not data:
1884 return data
1888 return data
1885
1889
1886 # Revlogs are read much more frequently than they are written and many
1890 # Revlogs are read much more frequently than they are written and many
1887 # chunks only take microseconds to decompress, so performance is
1891 # chunks only take microseconds to decompress, so performance is
1888 # important here.
1892 # important here.
1889 #
1893 #
1890 # We can make a few assumptions about revlogs:
1894 # We can make a few assumptions about revlogs:
1891 #
1895 #
1892 # 1) the majority of chunks will be compressed (as opposed to inline
1896 # 1) the majority of chunks will be compressed (as opposed to inline
1893 # raw data).
1897 # raw data).
1894 # 2) decompressing *any* data will likely by at least 10x slower than
1898 # 2) decompressing *any* data will likely by at least 10x slower than
1895 # returning raw inline data.
1899 # returning raw inline data.
1896 # 3) we want to prioritize common and officially supported compression
1900 # 3) we want to prioritize common and officially supported compression
1897 # engines
1901 # engines
1898 #
1902 #
1899 # It follows that we want to optimize for "decompress compressed data
1903 # It follows that we want to optimize for "decompress compressed data
1900 # when encoded with common and officially supported compression engines"
1904 # when encoded with common and officially supported compression engines"
1901 # case over "raw data" and "data encoded by less common or non-official
1905 # case over "raw data" and "data encoded by less common or non-official
1902 # compression engines." That is why we have the inline lookup first
1906 # compression engines." That is why we have the inline lookup first
1903 # followed by the compengines lookup.
1907 # followed by the compengines lookup.
1904 #
1908 #
1905 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1909 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1906 # compressed chunks. And this matters for changelog and manifest reads.
1910 # compressed chunks. And this matters for changelog and manifest reads.
1907 t = data[0:1]
1911 t = data[0:1]
1908
1912
1909 if t == 'x':
1913 if t == 'x':
1910 try:
1914 try:
1911 return _zlibdecompress(data)
1915 return _zlibdecompress(data)
1912 except zlib.error as e:
1916 except zlib.error as e:
1913 raise error.RevlogError(_('revlog decompress error: %s') %
1917 raise error.RevlogError(_('revlog decompress error: %s') %
1914 stringutil.forcebytestr(e))
1918 stringutil.forcebytestr(e))
1915 # '\0' is more common than 'u' so it goes first.
1919 # '\0' is more common than 'u' so it goes first.
1916 elif t == '\0':
1920 elif t == '\0':
1917 return data
1921 return data
1918 elif t == 'u':
1922 elif t == 'u':
1919 return util.buffer(data, 1)
1923 return util.buffer(data, 1)
1920
1924
1921 try:
1925 try:
1922 compressor = self._decompressors[t]
1926 compressor = self._decompressors[t]
1923 except KeyError:
1927 except KeyError:
1924 try:
1928 try:
1925 engine = util.compengines.forrevlogheader(t)
1929 engine = util.compengines.forrevlogheader(t)
1926 compressor = engine.revlogcompressor()
1930 compressor = engine.revlogcompressor()
1927 self._decompressors[t] = compressor
1931 self._decompressors[t] = compressor
1928 except KeyError:
1932 except KeyError:
1929 raise error.RevlogError(_('unknown compression type %r') % t)
1933 raise error.RevlogError(_('unknown compression type %r') % t)
1930
1934
1931 return compressor.decompress(data)
1935 return compressor.decompress(data)
1932
1936
1933 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1937 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1934 cachedelta, ifh, dfh, alwayscache=False,
1938 cachedelta, ifh, dfh, alwayscache=False,
1935 deltacomputer=None):
1939 deltacomputer=None):
1936 """internal function to add revisions to the log
1940 """internal function to add revisions to the log
1937
1941
1938 see addrevision for argument descriptions.
1942 see addrevision for argument descriptions.
1939
1943
1940 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1944 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1941
1945
1942 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1946 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1943 be used.
1947 be used.
1944
1948
1945 invariants:
1949 invariants:
1946 - rawtext is optional (can be None); if not set, cachedelta must be set.
1950 - rawtext is optional (can be None); if not set, cachedelta must be set.
1947 if both are set, they must correspond to each other.
1951 if both are set, they must correspond to each other.
1948 """
1952 """
1949 if node == nullid:
1953 if node == nullid:
1950 raise error.RevlogError(_("%s: attempt to add null revision") %
1954 raise error.RevlogError(_("%s: attempt to add null revision") %
1951 self.indexfile)
1955 self.indexfile)
1952 if node == wdirid or node in wdirfilenodeids:
1956 if node == wdirid or node in wdirfilenodeids:
1953 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1957 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1954 self.indexfile)
1958 self.indexfile)
1955
1959
1956 if self._inline:
1960 if self._inline:
1957 fh = ifh
1961 fh = ifh
1958 else:
1962 else:
1959 fh = dfh
1963 fh = dfh
1960
1964
1961 btext = [rawtext]
1965 btext = [rawtext]
1962
1966
1963 curr = len(self)
1967 curr = len(self)
1964 prev = curr - 1
1968 prev = curr - 1
1965 offset = self.end(prev)
1969 offset = self.end(prev)
1966 p1r, p2r = self.rev(p1), self.rev(p2)
1970 p1r, p2r = self.rev(p1), self.rev(p2)
1967
1971
1968 # full versions are inserted when the needed deltas
1972 # full versions are inserted when the needed deltas
1969 # become comparable to the uncompressed text
1973 # become comparable to the uncompressed text
1970 if rawtext is None:
1974 if rawtext is None:
1971 # need rawtext size, before changed by flag processors, which is
1975 # need rawtext size, before changed by flag processors, which is
1972 # the non-raw size. use revlog explicitly to avoid filelog's extra
1976 # the non-raw size. use revlog explicitly to avoid filelog's extra
1973 # logic that might remove metadata size.
1977 # logic that might remove metadata size.
1974 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1978 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1975 cachedelta[1])
1979 cachedelta[1])
1976 else:
1980 else:
1977 textlen = len(rawtext)
1981 textlen = len(rawtext)
1978
1982
1979 if deltacomputer is None:
1983 if deltacomputer is None:
1980 deltacomputer = deltautil.deltacomputer(self)
1984 deltacomputer = deltautil.deltacomputer(self)
1981
1985
1982 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1986 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1983
1987
1984 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1988 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1985
1989
1986 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1990 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1987 deltainfo.base, link, p1r, p2r, node)
1991 deltainfo.base, link, p1r, p2r, node)
1988 self.index.append(e)
1992 self.index.append(e)
1989 self.nodemap[node] = curr
1993 self.nodemap[node] = curr
1990
1994
1991 # Reset the pure node cache start lookup offset to account for new
1995 # Reset the pure node cache start lookup offset to account for new
1992 # revision.
1996 # revision.
1993 if self._nodepos is not None:
1997 if self._nodepos is not None:
1994 self._nodepos = curr
1998 self._nodepos = curr
1995
1999
1996 entry = self._io.packentry(e, self.node, self.version, curr)
2000 entry = self._io.packentry(e, self.node, self.version, curr)
1997 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2001 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1998 link, offset)
2002 link, offset)
1999
2003
2000 rawtext = btext[0]
2004 rawtext = btext[0]
2001
2005
2002 if alwayscache and rawtext is None:
2006 if alwayscache and rawtext is None:
2003 rawtext = deltacomputer.buildtext(revinfo, fh)
2007 rawtext = deltacomputer.buildtext(revinfo, fh)
2004
2008
2005 if type(rawtext) == bytes: # only accept immutable objects
2009 if type(rawtext) == bytes: # only accept immutable objects
2006 self._revisioncache = (node, curr, rawtext)
2010 self._revisioncache = (node, curr, rawtext)
2007 self._chainbasecache[curr] = deltainfo.chainbase
2011 self._chainbasecache[curr] = deltainfo.chainbase
2008 return node
2012 return node
2009
2013
2010 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2014 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2011 # Files opened in a+ mode have inconsistent behavior on various
2015 # Files opened in a+ mode have inconsistent behavior on various
2012 # platforms. Windows requires that a file positioning call be made
2016 # platforms. Windows requires that a file positioning call be made
2013 # when the file handle transitions between reads and writes. See
2017 # when the file handle transitions between reads and writes. See
2014 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2018 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2015 # platforms, Python or the platform itself can be buggy. Some versions
2019 # platforms, Python or the platform itself can be buggy. Some versions
2016 # of Solaris have been observed to not append at the end of the file
2020 # of Solaris have been observed to not append at the end of the file
2017 # if the file was seeked to before the end. See issue4943 for more.
2021 # if the file was seeked to before the end. See issue4943 for more.
2018 #
2022 #
2019 # We work around this issue by inserting a seek() before writing.
2023 # We work around this issue by inserting a seek() before writing.
2020 # Note: This is likely not necessary on Python 3. However, because
2024 # Note: This is likely not necessary on Python 3. However, because
2021 # the file handle is reused for reads and may be seeked there, we need
2025 # the file handle is reused for reads and may be seeked there, we need
2022 # to be careful before changing this.
2026 # to be careful before changing this.
2023 ifh.seek(0, os.SEEK_END)
2027 ifh.seek(0, os.SEEK_END)
2024 if dfh:
2028 if dfh:
2025 dfh.seek(0, os.SEEK_END)
2029 dfh.seek(0, os.SEEK_END)
2026
2030
2027 curr = len(self) - 1
2031 curr = len(self) - 1
2028 if not self._inline:
2032 if not self._inline:
2029 transaction.add(self.datafile, offset)
2033 transaction.add(self.datafile, offset)
2030 transaction.add(self.indexfile, curr * len(entry))
2034 transaction.add(self.indexfile, curr * len(entry))
2031 if data[0]:
2035 if data[0]:
2032 dfh.write(data[0])
2036 dfh.write(data[0])
2033 dfh.write(data[1])
2037 dfh.write(data[1])
2034 ifh.write(entry)
2038 ifh.write(entry)
2035 else:
2039 else:
2036 offset += curr * self._io.size
2040 offset += curr * self._io.size
2037 transaction.add(self.indexfile, offset, curr)
2041 transaction.add(self.indexfile, offset, curr)
2038 ifh.write(entry)
2042 ifh.write(entry)
2039 ifh.write(data[0])
2043 ifh.write(data[0])
2040 ifh.write(data[1])
2044 ifh.write(data[1])
2041 self._enforceinlinesize(transaction, ifh)
2045 self._enforceinlinesize(transaction, ifh)
2042
2046
2043 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2047 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2044 """
2048 """
2045 add a delta group
2049 add a delta group
2046
2050
2047 given a set of deltas, add them to the revision log. the
2051 given a set of deltas, add them to the revision log. the
2048 first delta is against its parent, which should be in our
2052 first delta is against its parent, which should be in our
2049 log, the rest are against the previous delta.
2053 log, the rest are against the previous delta.
2050
2054
2051 If ``addrevisioncb`` is defined, it will be called with arguments of
2055 If ``addrevisioncb`` is defined, it will be called with arguments of
2052 this revlog and the node that was added.
2056 this revlog and the node that was added.
2053 """
2057 """
2054
2058
2055 if self._writinghandles:
2059 if self._writinghandles:
2056 raise error.ProgrammingError('cannot nest addgroup() calls')
2060 raise error.ProgrammingError('cannot nest addgroup() calls')
2057
2061
2058 nodes = []
2062 nodes = []
2059
2063
2060 r = len(self)
2064 r = len(self)
2061 end = 0
2065 end = 0
2062 if r:
2066 if r:
2063 end = self.end(r - 1)
2067 end = self.end(r - 1)
2064 ifh = self._indexfp("a+")
2068 ifh = self._indexfp("a+")
2065 isize = r * self._io.size
2069 isize = r * self._io.size
2066 if self._inline:
2070 if self._inline:
2067 transaction.add(self.indexfile, end + isize, r)
2071 transaction.add(self.indexfile, end + isize, r)
2068 dfh = None
2072 dfh = None
2069 else:
2073 else:
2070 transaction.add(self.indexfile, isize, r)
2074 transaction.add(self.indexfile, isize, r)
2071 transaction.add(self.datafile, end)
2075 transaction.add(self.datafile, end)
2072 dfh = self._datafp("a+")
2076 dfh = self._datafp("a+")
2073 def flush():
2077 def flush():
2074 if dfh:
2078 if dfh:
2075 dfh.flush()
2079 dfh.flush()
2076 ifh.flush()
2080 ifh.flush()
2077
2081
2078 self._writinghandles = (ifh, dfh)
2082 self._writinghandles = (ifh, dfh)
2079
2083
2080 try:
2084 try:
2081 deltacomputer = deltautil.deltacomputer(self)
2085 deltacomputer = deltautil.deltacomputer(self)
2082 # loop through our set of deltas
2086 # loop through our set of deltas
2083 for data in deltas:
2087 for data in deltas:
2084 node, p1, p2, linknode, deltabase, delta, flags = data
2088 node, p1, p2, linknode, deltabase, delta, flags = data
2085 link = linkmapper(linknode)
2089 link = linkmapper(linknode)
2086 flags = flags or REVIDX_DEFAULT_FLAGS
2090 flags = flags or REVIDX_DEFAULT_FLAGS
2087
2091
2088 nodes.append(node)
2092 nodes.append(node)
2089
2093
2090 if node in self.nodemap:
2094 if node in self.nodemap:
2091 self._nodeduplicatecallback(transaction, node)
2095 self._nodeduplicatecallback(transaction, node)
2092 # this can happen if two branches make the same change
2096 # this can happen if two branches make the same change
2093 continue
2097 continue
2094
2098
2095 for p in (p1, p2):
2099 for p in (p1, p2):
2096 if p not in self.nodemap:
2100 if p not in self.nodemap:
2097 raise error.LookupError(p, self.indexfile,
2101 raise error.LookupError(p, self.indexfile,
2098 _('unknown parent'))
2102 _('unknown parent'))
2099
2103
2100 if deltabase not in self.nodemap:
2104 if deltabase not in self.nodemap:
2101 raise error.LookupError(deltabase, self.indexfile,
2105 raise error.LookupError(deltabase, self.indexfile,
2102 _('unknown delta base'))
2106 _('unknown delta base'))
2103
2107
2104 baserev = self.rev(deltabase)
2108 baserev = self.rev(deltabase)
2105
2109
2106 if baserev != nullrev and self.iscensored(baserev):
2110 if baserev != nullrev and self.iscensored(baserev):
2107 # if base is censored, delta must be full replacement in a
2111 # if base is censored, delta must be full replacement in a
2108 # single patch operation
2112 # single patch operation
2109 hlen = struct.calcsize(">lll")
2113 hlen = struct.calcsize(">lll")
2110 oldlen = self.rawsize(baserev)
2114 oldlen = self.rawsize(baserev)
2111 newlen = len(delta) - hlen
2115 newlen = len(delta) - hlen
2112 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2116 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2113 raise error.CensoredBaseError(self.indexfile,
2117 raise error.CensoredBaseError(self.indexfile,
2114 self.node(baserev))
2118 self.node(baserev))
2115
2119
2116 if not flags and self._peek_iscensored(baserev, delta, flush):
2120 if not flags and self._peek_iscensored(baserev, delta, flush):
2117 flags |= REVIDX_ISCENSORED
2121 flags |= REVIDX_ISCENSORED
2118
2122
2119 # We assume consumers of addrevisioncb will want to retrieve
2123 # We assume consumers of addrevisioncb will want to retrieve
2120 # the added revision, which will require a call to
2124 # the added revision, which will require a call to
2121 # revision(). revision() will fast path if there is a cache
2125 # revision(). revision() will fast path if there is a cache
2122 # hit. So, we tell _addrevision() to always cache in this case.
2126 # hit. So, we tell _addrevision() to always cache in this case.
2123 # We're only using addgroup() in the context of changegroup
2127 # We're only using addgroup() in the context of changegroup
2124 # generation so the revision data can always be handled as raw
2128 # generation so the revision data can always be handled as raw
2125 # by the flagprocessor.
2129 # by the flagprocessor.
2126 self._addrevision(node, None, transaction, link,
2130 self._addrevision(node, None, transaction, link,
2127 p1, p2, flags, (baserev, delta),
2131 p1, p2, flags, (baserev, delta),
2128 ifh, dfh,
2132 ifh, dfh,
2129 alwayscache=bool(addrevisioncb),
2133 alwayscache=bool(addrevisioncb),
2130 deltacomputer=deltacomputer)
2134 deltacomputer=deltacomputer)
2131
2135
2132 if addrevisioncb:
2136 if addrevisioncb:
2133 addrevisioncb(self, node)
2137 addrevisioncb(self, node)
2134
2138
2135 if not dfh and not self._inline:
2139 if not dfh and not self._inline:
2136 # addrevision switched from inline to conventional
2140 # addrevision switched from inline to conventional
2137 # reopen the index
2141 # reopen the index
2138 ifh.close()
2142 ifh.close()
2139 dfh = self._datafp("a+")
2143 dfh = self._datafp("a+")
2140 ifh = self._indexfp("a+")
2144 ifh = self._indexfp("a+")
2141 self._writinghandles = (ifh, dfh)
2145 self._writinghandles = (ifh, dfh)
2142 finally:
2146 finally:
2143 self._writinghandles = None
2147 self._writinghandles = None
2144
2148
2145 if dfh:
2149 if dfh:
2146 dfh.close()
2150 dfh.close()
2147 ifh.close()
2151 ifh.close()
2148
2152
2149 return nodes
2153 return nodes
2150
2154
2151 def iscensored(self, rev):
2155 def iscensored(self, rev):
2152 """Check if a file revision is censored."""
2156 """Check if a file revision is censored."""
2153 if not self._censorable:
2157 if not self._censorable:
2154 return False
2158 return False
2155
2159
2156 return self.flags(rev) & REVIDX_ISCENSORED
2160 return self.flags(rev) & REVIDX_ISCENSORED
2157
2161
2158 def _peek_iscensored(self, baserev, delta, flush):
2162 def _peek_iscensored(self, baserev, delta, flush):
2159 """Quickly check if a delta produces a censored revision."""
2163 """Quickly check if a delta produces a censored revision."""
2160 if not self._censorable:
2164 if not self._censorable:
2161 return False
2165 return False
2162
2166
2163 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2167 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2164
2168
2165 def getstrippoint(self, minlink):
2169 def getstrippoint(self, minlink):
2166 """find the minimum rev that must be stripped to strip the linkrev
2170 """find the minimum rev that must be stripped to strip the linkrev
2167
2171
2168 Returns a tuple containing the minimum rev and a set of all revs that
2172 Returns a tuple containing the minimum rev and a set of all revs that
2169 have linkrevs that will be broken by this strip.
2173 have linkrevs that will be broken by this strip.
2170 """
2174 """
2171 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2175 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2172 self.headrevs(),
2176 self.headrevs(),
2173 self.linkrev, self.parentrevs)
2177 self.linkrev, self.parentrevs)
2174
2178
2175 def strip(self, minlink, transaction):
2179 def strip(self, minlink, transaction):
2176 """truncate the revlog on the first revision with a linkrev >= minlink
2180 """truncate the revlog on the first revision with a linkrev >= minlink
2177
2181
2178 This function is called when we're stripping revision minlink and
2182 This function is called when we're stripping revision minlink and
2179 its descendants from the repository.
2183 its descendants from the repository.
2180
2184
2181 We have to remove all revisions with linkrev >= minlink, because
2185 We have to remove all revisions with linkrev >= minlink, because
2182 the equivalent changelog revisions will be renumbered after the
2186 the equivalent changelog revisions will be renumbered after the
2183 strip.
2187 strip.
2184
2188
2185 So we truncate the revlog on the first of these revisions, and
2189 So we truncate the revlog on the first of these revisions, and
2186 trust that the caller has saved the revisions that shouldn't be
2190 trust that the caller has saved the revisions that shouldn't be
2187 removed and that it'll re-add them after this truncation.
2191 removed and that it'll re-add them after this truncation.
2188 """
2192 """
2189 if len(self) == 0:
2193 if len(self) == 0:
2190 return
2194 return
2191
2195
2192 rev, _ = self.getstrippoint(minlink)
2196 rev, _ = self.getstrippoint(minlink)
2193 if rev == len(self):
2197 if rev == len(self):
2194 return
2198 return
2195
2199
2196 # first truncate the files on disk
2200 # first truncate the files on disk
2197 end = self.start(rev)
2201 end = self.start(rev)
2198 if not self._inline:
2202 if not self._inline:
2199 transaction.add(self.datafile, end)
2203 transaction.add(self.datafile, end)
2200 end = rev * self._io.size
2204 end = rev * self._io.size
2201 else:
2205 else:
2202 end += rev * self._io.size
2206 end += rev * self._io.size
2203
2207
2204 transaction.add(self.indexfile, end)
2208 transaction.add(self.indexfile, end)
2205
2209
2206 # then reset internal state in memory to forget those revisions
2210 # then reset internal state in memory to forget those revisions
2207 self._revisioncache = None
2211 self._revisioncache = None
2208 self._chaininfocache = {}
2212 self._chaininfocache = {}
2209 self._chunkclear()
2213 self._chunkclear()
2210 for x in pycompat.xrange(rev, len(self)):
2214 for x in pycompat.xrange(rev, len(self)):
2211 del self.nodemap[self.node(x)]
2215 del self.nodemap[self.node(x)]
2212
2216
2213 del self.index[rev:-1]
2217 del self.index[rev:-1]
2214 self._nodepos = None
2218 self._nodepos = None
2215
2219
2216 def checksize(self):
2220 def checksize(self):
2217 expected = 0
2221 expected = 0
2218 if len(self):
2222 if len(self):
2219 expected = max(0, self.end(len(self) - 1))
2223 expected = max(0, self.end(len(self) - 1))
2220
2224
2221 try:
2225 try:
2222 with self._datafp() as f:
2226 with self._datafp() as f:
2223 f.seek(0, 2)
2227 f.seek(0, 2)
2224 actual = f.tell()
2228 actual = f.tell()
2225 dd = actual - expected
2229 dd = actual - expected
2226 except IOError as inst:
2230 except IOError as inst:
2227 if inst.errno != errno.ENOENT:
2231 if inst.errno != errno.ENOENT:
2228 raise
2232 raise
2229 dd = 0
2233 dd = 0
2230
2234
2231 try:
2235 try:
2232 f = self.opener(self.indexfile)
2236 f = self.opener(self.indexfile)
2233 f.seek(0, 2)
2237 f.seek(0, 2)
2234 actual = f.tell()
2238 actual = f.tell()
2235 f.close()
2239 f.close()
2236 s = self._io.size
2240 s = self._io.size
2237 i = max(0, actual // s)
2241 i = max(0, actual // s)
2238 di = actual - (i * s)
2242 di = actual - (i * s)
2239 if self._inline:
2243 if self._inline:
2240 databytes = 0
2244 databytes = 0
2241 for r in self:
2245 for r in self:
2242 databytes += max(0, self.length(r))
2246 databytes += max(0, self.length(r))
2243 dd = 0
2247 dd = 0
2244 di = actual - len(self) * s - databytes
2248 di = actual - len(self) * s - databytes
2245 except IOError as inst:
2249 except IOError as inst:
2246 if inst.errno != errno.ENOENT:
2250 if inst.errno != errno.ENOENT:
2247 raise
2251 raise
2248 di = 0
2252 di = 0
2249
2253
2250 return (dd, di)
2254 return (dd, di)
2251
2255
2252 def files(self):
2256 def files(self):
2253 res = [self.indexfile]
2257 res = [self.indexfile]
2254 if not self._inline:
2258 if not self._inline:
2255 res.append(self.datafile)
2259 res.append(self.datafile)
2256 return res
2260 return res
2257
2261
2258 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2262 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2259 assumehaveparentrevisions=False,
2263 assumehaveparentrevisions=False,
2260 deltamode=repository.CG_DELTAMODE_STD):
2264 deltamode=repository.CG_DELTAMODE_STD):
2261 if nodesorder not in ('nodes', 'storage', 'linear', None):
2265 if nodesorder not in ('nodes', 'storage', 'linear', None):
2262 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2266 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2263 nodesorder)
2267 nodesorder)
2264
2268
2265 if nodesorder is None and not self._generaldelta:
2269 if nodesorder is None and not self._generaldelta:
2266 nodesorder = 'storage'
2270 nodesorder = 'storage'
2267
2271
2268 if (not self._storedeltachains and
2272 if (not self._storedeltachains and
2269 deltamode != repository.CG_DELTAMODE_PREV):
2273 deltamode != repository.CG_DELTAMODE_PREV):
2270 deltamode = repository.CG_DELTAMODE_FULL
2274 deltamode = repository.CG_DELTAMODE_FULL
2271
2275
2272 return storageutil.emitrevisions(
2276 return storageutil.emitrevisions(
2273 self, nodes, nodesorder, revlogrevisiondelta,
2277 self, nodes, nodesorder, revlogrevisiondelta,
2274 deltaparentfn=self.deltaparent,
2278 deltaparentfn=self.deltaparent,
2275 candeltafn=self.candelta,
2279 candeltafn=self.candelta,
2276 rawsizefn=self.rawsize,
2280 rawsizefn=self.rawsize,
2277 revdifffn=self.revdiff,
2281 revdifffn=self.revdiff,
2278 flagsfn=self.flags,
2282 flagsfn=self.flags,
2279 deltamode=deltamode,
2283 deltamode=deltamode,
2280 revisiondata=revisiondata,
2284 revisiondata=revisiondata,
2281 assumehaveparentrevisions=assumehaveparentrevisions)
2285 assumehaveparentrevisions=assumehaveparentrevisions)
2282
2286
2283 DELTAREUSEALWAYS = 'always'
2287 DELTAREUSEALWAYS = 'always'
2284 DELTAREUSESAMEREVS = 'samerevs'
2288 DELTAREUSESAMEREVS = 'samerevs'
2285 DELTAREUSENEVER = 'never'
2289 DELTAREUSENEVER = 'never'
2286
2290
2287 DELTAREUSEFULLADD = 'fulladd'
2291 DELTAREUSEFULLADD = 'fulladd'
2288
2292
2289 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2293 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2290
2294
2291 def clone(self, tr, destrevlog, addrevisioncb=None,
2295 def clone(self, tr, destrevlog, addrevisioncb=None,
2292 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2296 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2293 """Copy this revlog to another, possibly with format changes.
2297 """Copy this revlog to another, possibly with format changes.
2294
2298
2295 The destination revlog will contain the same revisions and nodes.
2299 The destination revlog will contain the same revisions and nodes.
2296 However, it may not be bit-for-bit identical due to e.g. delta encoding
2300 However, it may not be bit-for-bit identical due to e.g. delta encoding
2297 differences.
2301 differences.
2298
2302
2299 The ``deltareuse`` argument control how deltas from the existing revlog
2303 The ``deltareuse`` argument control how deltas from the existing revlog
2300 are preserved in the destination revlog. The argument can have the
2304 are preserved in the destination revlog. The argument can have the
2301 following values:
2305 following values:
2302
2306
2303 DELTAREUSEALWAYS
2307 DELTAREUSEALWAYS
2304 Deltas will always be reused (if possible), even if the destination
2308 Deltas will always be reused (if possible), even if the destination
2305 revlog would not select the same revisions for the delta. This is the
2309 revlog would not select the same revisions for the delta. This is the
2306 fastest mode of operation.
2310 fastest mode of operation.
2307 DELTAREUSESAMEREVS
2311 DELTAREUSESAMEREVS
2308 Deltas will be reused if the destination revlog would pick the same
2312 Deltas will be reused if the destination revlog would pick the same
2309 revisions for the delta. This mode strikes a balance between speed
2313 revisions for the delta. This mode strikes a balance between speed
2310 and optimization.
2314 and optimization.
2311 DELTAREUSENEVER
2315 DELTAREUSENEVER
2312 Deltas will never be reused. This is the slowest mode of execution.
2316 Deltas will never be reused. This is the slowest mode of execution.
2313 This mode can be used to recompute deltas (e.g. if the diff/delta
2317 This mode can be used to recompute deltas (e.g. if the diff/delta
2314 algorithm changes).
2318 algorithm changes).
2315
2319
2316 Delta computation can be slow, so the choice of delta reuse policy can
2320 Delta computation can be slow, so the choice of delta reuse policy can
2317 significantly affect run time.
2321 significantly affect run time.
2318
2322
2319 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2323 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2320 two extremes. Deltas will be reused if they are appropriate. But if the
2324 two extremes. Deltas will be reused if they are appropriate. But if the
2321 delta could choose a better revision, it will do so. This means if you
2325 delta could choose a better revision, it will do so. This means if you
2322 are converting a non-generaldelta revlog to a generaldelta revlog,
2326 are converting a non-generaldelta revlog to a generaldelta revlog,
2323 deltas will be recomputed if the delta's parent isn't a parent of the
2327 deltas will be recomputed if the delta's parent isn't a parent of the
2324 revision.
2328 revision.
2325
2329
2326 In addition to the delta policy, the ``forcedeltabothparents``
2330 In addition to the delta policy, the ``forcedeltabothparents``
2327 argument controls whether to force compute deltas against both parents
2331 argument controls whether to force compute deltas against both parents
2328 for merges. By default, the current default is used.
2332 for merges. By default, the current default is used.
2329 """
2333 """
2330 if deltareuse not in self.DELTAREUSEALL:
2334 if deltareuse not in self.DELTAREUSEALL:
2331 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2335 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2332
2336
2333 if len(destrevlog):
2337 if len(destrevlog):
2334 raise ValueError(_('destination revlog is not empty'))
2338 raise ValueError(_('destination revlog is not empty'))
2335
2339
2336 if getattr(self, 'filteredrevs', None):
2340 if getattr(self, 'filteredrevs', None):
2337 raise ValueError(_('source revlog has filtered revisions'))
2341 raise ValueError(_('source revlog has filtered revisions'))
2338 if getattr(destrevlog, 'filteredrevs', None):
2342 if getattr(destrevlog, 'filteredrevs', None):
2339 raise ValueError(_('destination revlog has filtered revisions'))
2343 raise ValueError(_('destination revlog has filtered revisions'))
2340
2344
2341 # lazydeltabase controls whether to reuse a cached delta, if possible.
2345 # lazydeltabase controls whether to reuse a cached delta, if possible.
2342 oldlazydeltabase = destrevlog._lazydeltabase
2346 oldlazydeltabase = destrevlog._lazydeltabase
2343 oldamd = destrevlog._deltabothparents
2347 oldamd = destrevlog._deltabothparents
2344
2348
2345 try:
2349 try:
2346 if deltareuse == self.DELTAREUSEALWAYS:
2350 if deltareuse == self.DELTAREUSEALWAYS:
2347 destrevlog._lazydeltabase = True
2351 destrevlog._lazydeltabase = True
2348 elif deltareuse == self.DELTAREUSESAMEREVS:
2352 elif deltareuse == self.DELTAREUSESAMEREVS:
2349 destrevlog._lazydeltabase = False
2353 destrevlog._lazydeltabase = False
2350
2354
2351 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2355 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2352
2356
2353 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2357 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2354 self.DELTAREUSESAMEREVS)
2358 self.DELTAREUSESAMEREVS)
2355
2359
2356 deltacomputer = deltautil.deltacomputer(destrevlog)
2360 deltacomputer = deltautil.deltacomputer(destrevlog)
2357 index = self.index
2361 index = self.index
2358 for rev in self:
2362 for rev in self:
2359 entry = index[rev]
2363 entry = index[rev]
2360
2364
2361 # Some classes override linkrev to take filtered revs into
2365 # Some classes override linkrev to take filtered revs into
2362 # account. Use raw entry from index.
2366 # account. Use raw entry from index.
2363 flags = entry[0] & 0xffff
2367 flags = entry[0] & 0xffff
2364 linkrev = entry[4]
2368 linkrev = entry[4]
2365 p1 = index[entry[5]][7]
2369 p1 = index[entry[5]][7]
2366 p2 = index[entry[6]][7]
2370 p2 = index[entry[6]][7]
2367 node = entry[7]
2371 node = entry[7]
2368
2372
2369 # (Possibly) reuse the delta from the revlog if allowed and
2373 # (Possibly) reuse the delta from the revlog if allowed and
2370 # the revlog chunk is a delta.
2374 # the revlog chunk is a delta.
2371 cachedelta = None
2375 cachedelta = None
2372 rawtext = None
2376 rawtext = None
2373 if populatecachedelta:
2377 if populatecachedelta:
2374 dp = self.deltaparent(rev)
2378 dp = self.deltaparent(rev)
2375 if dp != nullrev:
2379 if dp != nullrev:
2376 cachedelta = (dp, bytes(self._chunk(rev)))
2380 cachedelta = (dp, bytes(self._chunk(rev)))
2377
2381
2378 if not cachedelta:
2382 if not cachedelta:
2379 rawtext = self.revision(rev, raw=True)
2383 rawtext = self.revision(rev, raw=True)
2380
2384
2381
2385
2382 if deltareuse == self.DELTAREUSEFULLADD:
2386 if deltareuse == self.DELTAREUSEFULLADD:
2383 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2387 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2384 cachedelta=cachedelta,
2388 cachedelta=cachedelta,
2385 node=node, flags=flags,
2389 node=node, flags=flags,
2386 deltacomputer=deltacomputer)
2390 deltacomputer=deltacomputer)
2387 else:
2391 else:
2388 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2392 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2389 checkambig=False)
2393 checkambig=False)
2390 dfh = None
2394 dfh = None
2391 if not destrevlog._inline:
2395 if not destrevlog._inline:
2392 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2396 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2393 try:
2397 try:
2394 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2398 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2395 p2, flags, cachedelta, ifh, dfh,
2399 p2, flags, cachedelta, ifh, dfh,
2396 deltacomputer=deltacomputer)
2400 deltacomputer=deltacomputer)
2397 finally:
2401 finally:
2398 if dfh:
2402 if dfh:
2399 dfh.close()
2403 dfh.close()
2400 ifh.close()
2404 ifh.close()
2401
2405
2402 if addrevisioncb:
2406 if addrevisioncb:
2403 addrevisioncb(self, rev, node)
2407 addrevisioncb(self, rev, node)
2404 finally:
2408 finally:
2405 destrevlog._lazydeltabase = oldlazydeltabase
2409 destrevlog._lazydeltabase = oldlazydeltabase
2406 destrevlog._deltabothparents = oldamd
2410 destrevlog._deltabothparents = oldamd
2407
2411
2408 def censorrevision(self, tr, censornode, tombstone=b''):
2412 def censorrevision(self, tr, censornode, tombstone=b''):
2409 if (self.version & 0xFFFF) == REVLOGV0:
2413 if (self.version & 0xFFFF) == REVLOGV0:
2410 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2414 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2411 self.version)
2415 self.version)
2412
2416
2413 censorrev = self.rev(censornode)
2417 censorrev = self.rev(censornode)
2414 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2418 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2415
2419
2416 if len(tombstone) > self.rawsize(censorrev):
2420 if len(tombstone) > self.rawsize(censorrev):
2417 raise error.Abort(_('censor tombstone must be no longer than '
2421 raise error.Abort(_('censor tombstone must be no longer than '
2418 'censored data'))
2422 'censored data'))
2419
2423
2420 # Rewriting the revlog in place is hard. Our strategy for censoring is
2424 # Rewriting the revlog in place is hard. Our strategy for censoring is
2421 # to create a new revlog, copy all revisions to it, then replace the
2425 # to create a new revlog, copy all revisions to it, then replace the
2422 # revlogs on transaction close.
2426 # revlogs on transaction close.
2423
2427
2424 newindexfile = self.indexfile + b'.tmpcensored'
2428 newindexfile = self.indexfile + b'.tmpcensored'
2425 newdatafile = self.datafile + b'.tmpcensored'
2429 newdatafile = self.datafile + b'.tmpcensored'
2426
2430
2427 # This is a bit dangerous. We could easily have a mismatch of state.
2431 # This is a bit dangerous. We could easily have a mismatch of state.
2428 newrl = revlog(self.opener, newindexfile, newdatafile,
2432 newrl = revlog(self.opener, newindexfile, newdatafile,
2429 censorable=True)
2433 censorable=True)
2430 newrl.version = self.version
2434 newrl.version = self.version
2431 newrl._generaldelta = self._generaldelta
2435 newrl._generaldelta = self._generaldelta
2432 newrl._io = self._io
2436 newrl._io = self._io
2433
2437
2434 for rev in self.revs():
2438 for rev in self.revs():
2435 node = self.node(rev)
2439 node = self.node(rev)
2436 p1, p2 = self.parents(node)
2440 p1, p2 = self.parents(node)
2437
2441
2438 if rev == censorrev:
2442 if rev == censorrev:
2439 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2443 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2440 p1, p2, censornode, REVIDX_ISCENSORED)
2444 p1, p2, censornode, REVIDX_ISCENSORED)
2441
2445
2442 if newrl.deltaparent(rev) != nullrev:
2446 if newrl.deltaparent(rev) != nullrev:
2443 raise error.Abort(_('censored revision stored as delta; '
2447 raise error.Abort(_('censored revision stored as delta; '
2444 'cannot censor'),
2448 'cannot censor'),
2445 hint=_('censoring of revlogs is not '
2449 hint=_('censoring of revlogs is not '
2446 'fully implemented; please report '
2450 'fully implemented; please report '
2447 'this bug'))
2451 'this bug'))
2448 continue
2452 continue
2449
2453
2450 if self.iscensored(rev):
2454 if self.iscensored(rev):
2451 if self.deltaparent(rev) != nullrev:
2455 if self.deltaparent(rev) != nullrev:
2452 raise error.Abort(_('cannot censor due to censored '
2456 raise error.Abort(_('cannot censor due to censored '
2453 'revision having delta stored'))
2457 'revision having delta stored'))
2454 rawtext = self._chunk(rev)
2458 rawtext = self._chunk(rev)
2455 else:
2459 else:
2456 rawtext = self.revision(rev, raw=True)
2460 rawtext = self.revision(rev, raw=True)
2457
2461
2458 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2462 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2459 self.flags(rev))
2463 self.flags(rev))
2460
2464
2461 tr.addbackup(self.indexfile, location='store')
2465 tr.addbackup(self.indexfile, location='store')
2462 if not self._inline:
2466 if not self._inline:
2463 tr.addbackup(self.datafile, location='store')
2467 tr.addbackup(self.datafile, location='store')
2464
2468
2465 self.opener.rename(newrl.indexfile, self.indexfile)
2469 self.opener.rename(newrl.indexfile, self.indexfile)
2466 if not self._inline:
2470 if not self._inline:
2467 self.opener.rename(newrl.datafile, self.datafile)
2471 self.opener.rename(newrl.datafile, self.datafile)
2468
2472
2469 self.clearcaches()
2473 self.clearcaches()
2470 self._loadindex(self.version, None)
2474 self._loadindex(self.version, None)
2471
2475
2472 def verifyintegrity(self, state):
2476 def verifyintegrity(self, state):
2473 """Verifies the integrity of the revlog.
2477 """Verifies the integrity of the revlog.
2474
2478
2475 Yields ``revlogproblem`` instances describing problems that are
2479 Yields ``revlogproblem`` instances describing problems that are
2476 found.
2480 found.
2477 """
2481 """
2478 dd, di = self.checksize()
2482 dd, di = self.checksize()
2479 if dd:
2483 if dd:
2480 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2484 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2481 if di:
2485 if di:
2482 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2486 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2483
2487
2484 version = self.version & 0xFFFF
2488 version = self.version & 0xFFFF
2485
2489
2486 # The verifier tells us what version revlog we should be.
2490 # The verifier tells us what version revlog we should be.
2487 if version != state['expectedversion']:
2491 if version != state['expectedversion']:
2488 yield revlogproblem(
2492 yield revlogproblem(
2489 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2493 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2490 (self.indexfile, version, state['expectedversion']))
2494 (self.indexfile, version, state['expectedversion']))
2491
2495
2492 state['skipread'] = set()
2496 state['skipread'] = set()
2493
2497
2494 for rev in self:
2498 for rev in self:
2495 node = self.node(rev)
2499 node = self.node(rev)
2496
2500
2497 # Verify contents. 4 cases to care about:
2501 # Verify contents. 4 cases to care about:
2498 #
2502 #
2499 # common: the most common case
2503 # common: the most common case
2500 # rename: with a rename
2504 # rename: with a rename
2501 # meta: file content starts with b'\1\n', the metadata
2505 # meta: file content starts with b'\1\n', the metadata
2502 # header defined in filelog.py, but without a rename
2506 # header defined in filelog.py, but without a rename
2503 # ext: content stored externally
2507 # ext: content stored externally
2504 #
2508 #
2505 # More formally, their differences are shown below:
2509 # More formally, their differences are shown below:
2506 #
2510 #
2507 # | common | rename | meta | ext
2511 # | common | rename | meta | ext
2508 # -------------------------------------------------------
2512 # -------------------------------------------------------
2509 # flags() | 0 | 0 | 0 | not 0
2513 # flags() | 0 | 0 | 0 | not 0
2510 # renamed() | False | True | False | ?
2514 # renamed() | False | True | False | ?
2511 # rawtext[0:2]=='\1\n'| False | True | True | ?
2515 # rawtext[0:2]=='\1\n'| False | True | True | ?
2512 #
2516 #
2513 # "rawtext" means the raw text stored in revlog data, which
2517 # "rawtext" means the raw text stored in revlog data, which
2514 # could be retrieved by "revision(rev, raw=True)". "text"
2518 # could be retrieved by "revision(rev, raw=True)". "text"
2515 # mentioned below is "revision(rev, raw=False)".
2519 # mentioned below is "revision(rev, raw=False)".
2516 #
2520 #
2517 # There are 3 different lengths stored physically:
2521 # There are 3 different lengths stored physically:
2518 # 1. L1: rawsize, stored in revlog index
2522 # 1. L1: rawsize, stored in revlog index
2519 # 2. L2: len(rawtext), stored in revlog data
2523 # 2. L2: len(rawtext), stored in revlog data
2520 # 3. L3: len(text), stored in revlog data if flags==0, or
2524 # 3. L3: len(text), stored in revlog data if flags==0, or
2521 # possibly somewhere else if flags!=0
2525 # possibly somewhere else if flags!=0
2522 #
2526 #
2523 # L1 should be equal to L2. L3 could be different from them.
2527 # L1 should be equal to L2. L3 could be different from them.
2524 # "text" may or may not affect commit hash depending on flag
2528 # "text" may or may not affect commit hash depending on flag
2525 # processors (see revlog.addflagprocessor).
2529 # processors (see revlog.addflagprocessor).
2526 #
2530 #
2527 # | common | rename | meta | ext
2531 # | common | rename | meta | ext
2528 # -------------------------------------------------
2532 # -------------------------------------------------
2529 # rawsize() | L1 | L1 | L1 | L1
2533 # rawsize() | L1 | L1 | L1 | L1
2530 # size() | L1 | L2-LM | L1(*) | L1 (?)
2534 # size() | L1 | L2-LM | L1(*) | L1 (?)
2531 # len(rawtext) | L2 | L2 | L2 | L2
2535 # len(rawtext) | L2 | L2 | L2 | L2
2532 # len(text) | L2 | L2 | L2 | L3
2536 # len(text) | L2 | L2 | L2 | L3
2533 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2537 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2534 #
2538 #
2535 # LM: length of metadata, depending on rawtext
2539 # LM: length of metadata, depending on rawtext
2536 # (*): not ideal, see comment in filelog.size
2540 # (*): not ideal, see comment in filelog.size
2537 # (?): could be "- len(meta)" if the resolved content has
2541 # (?): could be "- len(meta)" if the resolved content has
2538 # rename metadata
2542 # rename metadata
2539 #
2543 #
2540 # Checks needed to be done:
2544 # Checks needed to be done:
2541 # 1. length check: L1 == L2, in all cases.
2545 # 1. length check: L1 == L2, in all cases.
2542 # 2. hash check: depending on flag processor, we may need to
2546 # 2. hash check: depending on flag processor, we may need to
2543 # use either "text" (external), or "rawtext" (in revlog).
2547 # use either "text" (external), or "rawtext" (in revlog).
2544
2548
2545 try:
2549 try:
2546 skipflags = state.get('skipflags', 0)
2550 skipflags = state.get('skipflags', 0)
2547 if skipflags:
2551 if skipflags:
2548 skipflags &= self.flags(rev)
2552 skipflags &= self.flags(rev)
2549
2553
2550 if skipflags:
2554 if skipflags:
2551 state['skipread'].add(node)
2555 state['skipread'].add(node)
2552 else:
2556 else:
2553 # Side-effect: read content and verify hash.
2557 # Side-effect: read content and verify hash.
2554 self.revision(node)
2558 self.revision(node)
2555
2559
2556 l1 = self.rawsize(rev)
2560 l1 = self.rawsize(rev)
2557 l2 = len(self.revision(node, raw=True))
2561 l2 = len(self.revision(node, raw=True))
2558
2562
2559 if l1 != l2:
2563 if l1 != l2:
2560 yield revlogproblem(
2564 yield revlogproblem(
2561 error=_('unpacked size is %d, %d expected') % (l2, l1),
2565 error=_('unpacked size is %d, %d expected') % (l2, l1),
2562 node=node)
2566 node=node)
2563
2567
2564 except error.CensoredNodeError:
2568 except error.CensoredNodeError:
2565 if state['erroroncensored']:
2569 if state['erroroncensored']:
2566 yield revlogproblem(error=_('censored file data'),
2570 yield revlogproblem(error=_('censored file data'),
2567 node=node)
2571 node=node)
2568 state['skipread'].add(node)
2572 state['skipread'].add(node)
2569 except Exception as e:
2573 except Exception as e:
2570 yield revlogproblem(
2574 yield revlogproblem(
2571 error=_('unpacking %s: %s') % (short(node),
2575 error=_('unpacking %s: %s') % (short(node),
2572 stringutil.forcebytestr(e)),
2576 stringutil.forcebytestr(e)),
2573 node=node)
2577 node=node)
2574 state['skipread'].add(node)
2578 state['skipread'].add(node)
2575
2579
2576 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2580 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2577 revisionscount=False, trackedsize=False,
2581 revisionscount=False, trackedsize=False,
2578 storedsize=False):
2582 storedsize=False):
2579 d = {}
2583 d = {}
2580
2584
2581 if exclusivefiles:
2585 if exclusivefiles:
2582 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2586 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2583 if not self._inline:
2587 if not self._inline:
2584 d['exclusivefiles'].append((self.opener, self.datafile))
2588 d['exclusivefiles'].append((self.opener, self.datafile))
2585
2589
2586 if sharedfiles:
2590 if sharedfiles:
2587 d['sharedfiles'] = []
2591 d['sharedfiles'] = []
2588
2592
2589 if revisionscount:
2593 if revisionscount:
2590 d['revisionscount'] = len(self)
2594 d['revisionscount'] = len(self)
2591
2595
2592 if trackedsize:
2596 if trackedsize:
2593 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2597 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2594
2598
2595 if storedsize:
2599 if storedsize:
2596 d['storedsize'] = sum(self.opener.stat(path).st_size
2600 d['storedsize'] = sum(self.opener.stat(path).st_size
2597 for path in self.files())
2601 for path in self.files())
2598
2602
2599 return d
2603 return d
General Comments 0
You need to be logged in to leave comments. Login now