##// END OF EJS Templates
rust-cpython: using MissingAncestors from Python code...
Georges Racinet -
r41283:a89b20a4 default
parent child Browse files
Show More
@@ -1,2635 +1,2637 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
375 self._withsparseread = False
375 self._withsparseread = False
376 self._sparserevlog = False
376 self._sparserevlog = False
377 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
378 self._srmingapsize = 262144
378 self._srmingapsize = 262144
379
379
380 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
381 # custom flags.
381 # custom flags.
382 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
383
383
384 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
385 self._writinghandles = None
385 self._writinghandles = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 def _loadindex(self):
389 def _loadindex(self):
390 mmapindexthreshold = None
390 mmapindexthreshold = None
391 opts = getattr(self.opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
392
392
393 if 'revlogv2' in opts:
393 if 'revlogv2' in opts:
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 elif 'revlogv1' in opts:
395 elif 'revlogv1' in opts:
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 if 'generaldelta' in opts:
397 if 'generaldelta' in opts:
398 newversionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 else:
399 else:
400 newversionflags = REVLOG_DEFAULT_VERSION
400 newversionflags = REVLOG_DEFAULT_VERSION
401
401
402 if 'chunkcachesize' in opts:
402 if 'chunkcachesize' in opts:
403 self._chunkcachesize = opts['chunkcachesize']
403 self._chunkcachesize = opts['chunkcachesize']
404 if 'maxchainlen' in opts:
404 if 'maxchainlen' in opts:
405 self._maxchainlen = opts['maxchainlen']
405 self._maxchainlen = opts['maxchainlen']
406 if 'deltabothparents' in opts:
406 if 'deltabothparents' in opts:
407 self._deltabothparents = opts['deltabothparents']
407 self._deltabothparents = opts['deltabothparents']
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 if 'compengine' in opts:
409 if 'compengine' in opts:
410 self._compengine = opts['compengine']
410 self._compengine = opts['compengine']
411 if 'maxdeltachainspan' in opts:
411 if 'maxdeltachainspan' in opts:
412 self._maxdeltachainspan = opts['maxdeltachainspan']
412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 mmapindexthreshold = opts['mmapindexthreshold']
414 mmapindexthreshold = opts['mmapindexthreshold']
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
417 # sparse-revlog forces sparse-read
417 # sparse-revlog forces sparse-read
418 self._withsparseread = self._sparserevlog or withsparseread
418 self._withsparseread = self._sparserevlog or withsparseread
419 if 'sparse-read-density-threshold' in opts:
419 if 'sparse-read-density-threshold' in opts:
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 if 'sparse-read-min-gap-size' in opts:
421 if 'sparse-read-min-gap-size' in opts:
422 self._srmingapsize = opts['sparse-read-min-gap-size']
422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 if opts.get('enableellipsis'):
423 if opts.get('enableellipsis'):
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425
425
426 # revlog v0 doesn't have flag processors
426 # revlog v0 doesn't have flag processors
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 _insertflagprocessor(flag, processor, self._flagprocessors)
428 _insertflagprocessor(flag, processor, self._flagprocessors)
429
429
430 if self._chunkcachesize <= 0:
430 if self._chunkcachesize <= 0:
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 'greater than 0') % self._chunkcachesize)
432 'greater than 0') % self._chunkcachesize)
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 'power of 2') % self._chunkcachesize)
435 'power of 2') % self._chunkcachesize)
436
436
437 indexdata = ''
437 indexdata = ''
438 self._initempty = True
438 self._initempty = True
439 try:
439 try:
440 with self._indexfp() as f:
440 with self._indexfp() as f:
441 if (mmapindexthreshold is not None and
441 if (mmapindexthreshold is not None and
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 indexdata = util.buffer(util.mmapread(f))
443 indexdata = util.buffer(util.mmapread(f))
444 else:
444 else:
445 indexdata = f.read()
445 indexdata = f.read()
446 if len(indexdata) > 0:
446 if len(indexdata) > 0:
447 versionflags = versionformat_unpack(indexdata[:4])[0]
447 versionflags = versionformat_unpack(indexdata[:4])[0]
448 self._initempty = False
448 self._initempty = False
449 else:
449 else:
450 versionflags = newversionflags
450 versionflags = newversionflags
451 except IOError as inst:
451 except IOError as inst:
452 if inst.errno != errno.ENOENT:
452 if inst.errno != errno.ENOENT:
453 raise
453 raise
454
454
455 versionflags = newversionflags
455 versionflags = newversionflags
456
456
457 self.version = versionflags
457 self.version = versionflags
458
458
459 flags = versionflags & ~0xFFFF
459 flags = versionflags & ~0xFFFF
460 fmt = versionflags & 0xFFFF
460 fmt = versionflags & 0xFFFF
461
461
462 if fmt == REVLOGV0:
462 if fmt == REVLOGV0:
463 if flags:
463 if flags:
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 'revlog %s') %
465 'revlog %s') %
466 (flags >> 16, fmt, self.indexfile))
466 (flags >> 16, fmt, self.indexfile))
467
467
468 self._inline = False
468 self._inline = False
469 self._generaldelta = False
469 self._generaldelta = False
470
470
471 elif fmt == REVLOGV1:
471 elif fmt == REVLOGV1:
472 if flags & ~REVLOGV1_FLAGS:
472 if flags & ~REVLOGV1_FLAGS:
473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
474 'revlog %s') %
474 'revlog %s') %
475 (flags >> 16, fmt, self.indexfile))
475 (flags >> 16, fmt, self.indexfile))
476
476
477 self._inline = versionflags & FLAG_INLINE_DATA
477 self._inline = versionflags & FLAG_INLINE_DATA
478 self._generaldelta = versionflags & FLAG_GENERALDELTA
478 self._generaldelta = versionflags & FLAG_GENERALDELTA
479
479
480 elif fmt == REVLOGV2:
480 elif fmt == REVLOGV2:
481 if flags & ~REVLOGV2_FLAGS:
481 if flags & ~REVLOGV2_FLAGS:
482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 'revlog %s') %
483 'revlog %s') %
484 (flags >> 16, fmt, self.indexfile))
484 (flags >> 16, fmt, self.indexfile))
485
485
486 self._inline = versionflags & FLAG_INLINE_DATA
486 self._inline = versionflags & FLAG_INLINE_DATA
487 # generaldelta implied by version 2 revlogs.
487 # generaldelta implied by version 2 revlogs.
488 self._generaldelta = True
488 self._generaldelta = True
489
489
490 else:
490 else:
491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
492 (fmt, self.indexfile))
492 (fmt, self.indexfile))
493
493
494 self._storedeltachains = True
494 self._storedeltachains = True
495
495
496 self._io = revlogio()
496 self._io = revlogio()
497 if self.version == REVLOGV0:
497 if self.version == REVLOGV0:
498 self._io = revlogoldio()
498 self._io = revlogoldio()
499 try:
499 try:
500 d = self._io.parseindex(indexdata, self._inline)
500 d = self._io.parseindex(indexdata, self._inline)
501 except (ValueError, IndexError):
501 except (ValueError, IndexError):
502 raise error.RevlogError(_("index %s is corrupted") %
502 raise error.RevlogError(_("index %s is corrupted") %
503 self.indexfile)
503 self.indexfile)
504 self.index, nodemap, self._chunkcache = d
504 self.index, nodemap, self._chunkcache = d
505 if nodemap is not None:
505 if nodemap is not None:
506 self.nodemap = self._nodecache = nodemap
506 self.nodemap = self._nodecache = nodemap
507 if not self._chunkcache:
507 if not self._chunkcache:
508 self._chunkclear()
508 self._chunkclear()
509 # revnum -> (chain-length, sum-delta-length)
509 # revnum -> (chain-length, sum-delta-length)
510 self._chaininfocache = {}
510 self._chaininfocache = {}
511 # revlog header -> revlog compressor
511 # revlog header -> revlog compressor
512 self._decompressors = {}
512 self._decompressors = {}
513
513
514 @util.propertycache
514 @util.propertycache
515 def _compressor(self):
515 def _compressor(self):
516 return util.compengines[self._compengine].revlogcompressor()
516 return util.compengines[self._compengine].revlogcompressor()
517
517
518 def _indexfp(self, mode='r'):
518 def _indexfp(self, mode='r'):
519 """file object for the revlog's index file"""
519 """file object for the revlog's index file"""
520 args = {r'mode': mode}
520 args = {r'mode': mode}
521 if mode != 'r':
521 if mode != 'r':
522 args[r'checkambig'] = self._checkambig
522 args[r'checkambig'] = self._checkambig
523 if mode == 'w':
523 if mode == 'w':
524 args[r'atomictemp'] = True
524 args[r'atomictemp'] = True
525 return self.opener(self.indexfile, **args)
525 return self.opener(self.indexfile, **args)
526
526
527 def _datafp(self, mode='r'):
527 def _datafp(self, mode='r'):
528 """file object for the revlog's data file"""
528 """file object for the revlog's data file"""
529 return self.opener(self.datafile, mode=mode)
529 return self.opener(self.datafile, mode=mode)
530
530
531 @contextlib.contextmanager
531 @contextlib.contextmanager
532 def _datareadfp(self, existingfp=None):
532 def _datareadfp(self, existingfp=None):
533 """file object suitable to read data"""
533 """file object suitable to read data"""
534 # Use explicit file handle, if given.
534 # Use explicit file handle, if given.
535 if existingfp is not None:
535 if existingfp is not None:
536 yield existingfp
536 yield existingfp
537
537
538 # Use a file handle being actively used for writes, if available.
538 # Use a file handle being actively used for writes, if available.
539 # There is some danger to doing this because reads will seek the
539 # There is some danger to doing this because reads will seek the
540 # file. However, _writeentry() performs a SEEK_END before all writes,
540 # file. However, _writeentry() performs a SEEK_END before all writes,
541 # so we should be safe.
541 # so we should be safe.
542 elif self._writinghandles:
542 elif self._writinghandles:
543 if self._inline:
543 if self._inline:
544 yield self._writinghandles[0]
544 yield self._writinghandles[0]
545 else:
545 else:
546 yield self._writinghandles[1]
546 yield self._writinghandles[1]
547
547
548 # Otherwise open a new file handle.
548 # Otherwise open a new file handle.
549 else:
549 else:
550 if self._inline:
550 if self._inline:
551 func = self._indexfp
551 func = self._indexfp
552 else:
552 else:
553 func = self._datafp
553 func = self._datafp
554 with func() as fp:
554 with func() as fp:
555 yield fp
555 yield fp
556
556
557 def tip(self):
557 def tip(self):
558 return self.node(len(self.index) - 1)
558 return self.node(len(self.index) - 1)
559 def __contains__(self, rev):
559 def __contains__(self, rev):
560 return 0 <= rev < len(self)
560 return 0 <= rev < len(self)
561 def __len__(self):
561 def __len__(self):
562 return len(self.index)
562 return len(self.index)
563 def __iter__(self):
563 def __iter__(self):
564 return iter(pycompat.xrange(len(self)))
564 return iter(pycompat.xrange(len(self)))
565 def revs(self, start=0, stop=None):
565 def revs(self, start=0, stop=None):
566 """iterate over all rev in this revlog (from start to stop)"""
566 """iterate over all rev in this revlog (from start to stop)"""
567 return storageutil.iterrevs(len(self), start=start, stop=stop)
567 return storageutil.iterrevs(len(self), start=start, stop=stop)
568
568
569 @util.propertycache
569 @util.propertycache
570 def nodemap(self):
570 def nodemap(self):
571 if self.index:
571 if self.index:
572 # populate mapping down to the initial node
572 # populate mapping down to the initial node
573 node0 = self.index[0][7] # get around changelog filtering
573 node0 = self.index[0][7] # get around changelog filtering
574 self.rev(node0)
574 self.rev(node0)
575 return self._nodecache
575 return self._nodecache
576
576
577 def hasnode(self, node):
577 def hasnode(self, node):
578 try:
578 try:
579 self.rev(node)
579 self.rev(node)
580 return True
580 return True
581 except KeyError:
581 except KeyError:
582 return False
582 return False
583
583
584 def candelta(self, baserev, rev):
584 def candelta(self, baserev, rev):
585 """whether two revisions (baserev, rev) can be delta-ed or not"""
585 """whether two revisions (baserev, rev) can be delta-ed or not"""
586 # Disable delta if either rev requires a content-changing flag
586 # Disable delta if either rev requires a content-changing flag
587 # processor (ex. LFS). This is because such flag processor can alter
587 # processor (ex. LFS). This is because such flag processor can alter
588 # the rawtext content that the delta will be based on, and two clients
588 # the rawtext content that the delta will be based on, and two clients
589 # could have a same revlog node with different flags (i.e. different
589 # could have a same revlog node with different flags (i.e. different
590 # rawtext contents) and the delta could be incompatible.
590 # rawtext contents) and the delta could be incompatible.
591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
593 return False
593 return False
594 return True
594 return True
595
595
596 def clearcaches(self):
596 def clearcaches(self):
597 self._revisioncache = None
597 self._revisioncache = None
598 self._chainbasecache.clear()
598 self._chainbasecache.clear()
599 self._chunkcache = (0, '')
599 self._chunkcache = (0, '')
600 self._pcache = {}
600 self._pcache = {}
601
601
602 try:
602 try:
603 self._nodecache.clearcaches()
603 self._nodecache.clearcaches()
604 except AttributeError:
604 except AttributeError:
605 self._nodecache = {nullid: nullrev}
605 self._nodecache = {nullid: nullrev}
606 self._nodepos = None
606 self._nodepos = None
607
607
608 def rev(self, node):
608 def rev(self, node):
609 try:
609 try:
610 return self._nodecache[node]
610 return self._nodecache[node]
611 except TypeError:
611 except TypeError:
612 raise
612 raise
613 except error.RevlogError:
613 except error.RevlogError:
614 # parsers.c radix tree lookup failed
614 # parsers.c radix tree lookup failed
615 if node == wdirid or node in wdirfilenodeids:
615 if node == wdirid or node in wdirfilenodeids:
616 raise error.WdirUnsupported
616 raise error.WdirUnsupported
617 raise error.LookupError(node, self.indexfile, _('no node'))
617 raise error.LookupError(node, self.indexfile, _('no node'))
618 except KeyError:
618 except KeyError:
619 # pure python cache lookup failed
619 # pure python cache lookup failed
620 n = self._nodecache
620 n = self._nodecache
621 i = self.index
621 i = self.index
622 p = self._nodepos
622 p = self._nodepos
623 if p is None:
623 if p is None:
624 p = len(i) - 1
624 p = len(i) - 1
625 else:
625 else:
626 assert p < len(i)
626 assert p < len(i)
627 for r in pycompat.xrange(p, -1, -1):
627 for r in pycompat.xrange(p, -1, -1):
628 v = i[r][7]
628 v = i[r][7]
629 n[v] = r
629 n[v] = r
630 if v == node:
630 if v == node:
631 self._nodepos = r - 1
631 self._nodepos = r - 1
632 return r
632 return r
633 if node == wdirid or node in wdirfilenodeids:
633 if node == wdirid or node in wdirfilenodeids:
634 raise error.WdirUnsupported
634 raise error.WdirUnsupported
635 raise error.LookupError(node, self.indexfile, _('no node'))
635 raise error.LookupError(node, self.indexfile, _('no node'))
636
636
637 # Accessors for index entries.
637 # Accessors for index entries.
638
638
639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
640 # are flags.
640 # are flags.
641 def start(self, rev):
641 def start(self, rev):
642 return int(self.index[rev][0] >> 16)
642 return int(self.index[rev][0] >> 16)
643
643
644 def flags(self, rev):
644 def flags(self, rev):
645 return self.index[rev][0] & 0xFFFF
645 return self.index[rev][0] & 0xFFFF
646
646
647 def length(self, rev):
647 def length(self, rev):
648 return self.index[rev][1]
648 return self.index[rev][1]
649
649
650 def rawsize(self, rev):
650 def rawsize(self, rev):
651 """return the length of the uncompressed text for a given revision"""
651 """return the length of the uncompressed text for a given revision"""
652 l = self.index[rev][2]
652 l = self.index[rev][2]
653 if l >= 0:
653 if l >= 0:
654 return l
654 return l
655
655
656 t = self.revision(rev, raw=True)
656 t = self.revision(rev, raw=True)
657 return len(t)
657 return len(t)
658
658
659 def size(self, rev):
659 def size(self, rev):
660 """length of non-raw text (processed by a "read" flag processor)"""
660 """length of non-raw text (processed by a "read" flag processor)"""
661 # fast path: if no "read" flag processor could change the content,
661 # fast path: if no "read" flag processor could change the content,
662 # size is rawsize. note: ELLIPSIS is known to not change the content.
662 # size is rawsize. note: ELLIPSIS is known to not change the content.
663 flags = self.flags(rev)
663 flags = self.flags(rev)
664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
665 return self.rawsize(rev)
665 return self.rawsize(rev)
666
666
667 return len(self.revision(rev, raw=False))
667 return len(self.revision(rev, raw=False))
668
668
669 def chainbase(self, rev):
669 def chainbase(self, rev):
670 base = self._chainbasecache.get(rev)
670 base = self._chainbasecache.get(rev)
671 if base is not None:
671 if base is not None:
672 return base
672 return base
673
673
674 index = self.index
674 index = self.index
675 iterrev = rev
675 iterrev = rev
676 base = index[iterrev][3]
676 base = index[iterrev][3]
677 while base != iterrev:
677 while base != iterrev:
678 iterrev = base
678 iterrev = base
679 base = index[iterrev][3]
679 base = index[iterrev][3]
680
680
681 self._chainbasecache[rev] = base
681 self._chainbasecache[rev] = base
682 return base
682 return base
683
683
684 def linkrev(self, rev):
684 def linkrev(self, rev):
685 return self.index[rev][4]
685 return self.index[rev][4]
686
686
687 def parentrevs(self, rev):
687 def parentrevs(self, rev):
688 try:
688 try:
689 entry = self.index[rev]
689 entry = self.index[rev]
690 except IndexError:
690 except IndexError:
691 if rev == wdirrev:
691 if rev == wdirrev:
692 raise error.WdirUnsupported
692 raise error.WdirUnsupported
693 raise
693 raise
694
694
695 return entry[5], entry[6]
695 return entry[5], entry[6]
696
696
697 # fast parentrevs(rev) where rev isn't filtered
697 # fast parentrevs(rev) where rev isn't filtered
698 _uncheckedparentrevs = parentrevs
698 _uncheckedparentrevs = parentrevs
699
699
700 def node(self, rev):
700 def node(self, rev):
701 try:
701 try:
702 return self.index[rev][7]
702 return self.index[rev][7]
703 except IndexError:
703 except IndexError:
704 if rev == wdirrev:
704 if rev == wdirrev:
705 raise error.WdirUnsupported
705 raise error.WdirUnsupported
706 raise
706 raise
707
707
708 # Derived from index values.
708 # Derived from index values.
709
709
710 def end(self, rev):
710 def end(self, rev):
711 return self.start(rev) + self.length(rev)
711 return self.start(rev) + self.length(rev)
712
712
713 def parents(self, node):
713 def parents(self, node):
714 i = self.index
714 i = self.index
715 d = i[self.rev(node)]
715 d = i[self.rev(node)]
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
717
717
718 def chainlen(self, rev):
718 def chainlen(self, rev):
719 return self._chaininfo(rev)[0]
719 return self._chaininfo(rev)[0]
720
720
721 def _chaininfo(self, rev):
721 def _chaininfo(self, rev):
722 chaininfocache = self._chaininfocache
722 chaininfocache = self._chaininfocache
723 if rev in chaininfocache:
723 if rev in chaininfocache:
724 return chaininfocache[rev]
724 return chaininfocache[rev]
725 index = self.index
725 index = self.index
726 generaldelta = self._generaldelta
726 generaldelta = self._generaldelta
727 iterrev = rev
727 iterrev = rev
728 e = index[iterrev]
728 e = index[iterrev]
729 clen = 0
729 clen = 0
730 compresseddeltalen = 0
730 compresseddeltalen = 0
731 while iterrev != e[3]:
731 while iterrev != e[3]:
732 clen += 1
732 clen += 1
733 compresseddeltalen += e[1]
733 compresseddeltalen += e[1]
734 if generaldelta:
734 if generaldelta:
735 iterrev = e[3]
735 iterrev = e[3]
736 else:
736 else:
737 iterrev -= 1
737 iterrev -= 1
738 if iterrev in chaininfocache:
738 if iterrev in chaininfocache:
739 t = chaininfocache[iterrev]
739 t = chaininfocache[iterrev]
740 clen += t[0]
740 clen += t[0]
741 compresseddeltalen += t[1]
741 compresseddeltalen += t[1]
742 break
742 break
743 e = index[iterrev]
743 e = index[iterrev]
744 else:
744 else:
745 # Add text length of base since decompressing that also takes
745 # Add text length of base since decompressing that also takes
746 # work. For cache hits the length is already included.
746 # work. For cache hits the length is already included.
747 compresseddeltalen += e[1]
747 compresseddeltalen += e[1]
748 r = (clen, compresseddeltalen)
748 r = (clen, compresseddeltalen)
749 chaininfocache[rev] = r
749 chaininfocache[rev] = r
750 return r
750 return r
751
751
752 def _deltachain(self, rev, stoprev=None):
752 def _deltachain(self, rev, stoprev=None):
753 """Obtain the delta chain for a revision.
753 """Obtain the delta chain for a revision.
754
754
755 ``stoprev`` specifies a revision to stop at. If not specified, we
755 ``stoprev`` specifies a revision to stop at. If not specified, we
756 stop at the base of the chain.
756 stop at the base of the chain.
757
757
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
759 revs in ascending order and ``stopped`` is a bool indicating whether
759 revs in ascending order and ``stopped`` is a bool indicating whether
760 ``stoprev`` was hit.
760 ``stoprev`` was hit.
761 """
761 """
762 # Try C implementation.
762 # Try C implementation.
763 try:
763 try:
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
765 except AttributeError:
765 except AttributeError:
766 pass
766 pass
767
767
768 chain = []
768 chain = []
769
769
770 # Alias to prevent attribute lookup in tight loop.
770 # Alias to prevent attribute lookup in tight loop.
771 index = self.index
771 index = self.index
772 generaldelta = self._generaldelta
772 generaldelta = self._generaldelta
773
773
774 iterrev = rev
774 iterrev = rev
775 e = index[iterrev]
775 e = index[iterrev]
776 while iterrev != e[3] and iterrev != stoprev:
776 while iterrev != e[3] and iterrev != stoprev:
777 chain.append(iterrev)
777 chain.append(iterrev)
778 if generaldelta:
778 if generaldelta:
779 iterrev = e[3]
779 iterrev = e[3]
780 else:
780 else:
781 iterrev -= 1
781 iterrev -= 1
782 e = index[iterrev]
782 e = index[iterrev]
783
783
784 if iterrev == stoprev:
784 if iterrev == stoprev:
785 stopped = True
785 stopped = True
786 else:
786 else:
787 chain.append(iterrev)
787 chain.append(iterrev)
788 stopped = False
788 stopped = False
789
789
790 chain.reverse()
790 chain.reverse()
791 return chain, stopped
791 return chain, stopped
792
792
793 def ancestors(self, revs, stoprev=0, inclusive=False):
793 def ancestors(self, revs, stoprev=0, inclusive=False):
794 """Generate the ancestors of 'revs' in reverse revision order.
794 """Generate the ancestors of 'revs' in reverse revision order.
795 Does not generate revs lower than stoprev.
795 Does not generate revs lower than stoprev.
796
796
797 See the documentation for ancestor.lazyancestors for more details."""
797 See the documentation for ancestor.lazyancestors for more details."""
798
798
799 # first, make sure start revisions aren't filtered
799 # first, make sure start revisions aren't filtered
800 revs = list(revs)
800 revs = list(revs)
801 checkrev = self.node
801 checkrev = self.node
802 for r in revs:
802 for r in revs:
803 checkrev(r)
803 checkrev(r)
804 # and we're sure ancestors aren't filtered as well
804 # and we're sure ancestors aren't filtered as well
805
805
806 if rustext is not None:
806 if rustext is not None:
807 lazyancestors = rustext.ancestor.LazyAncestors
807 lazyancestors = rustext.ancestor.LazyAncestors
808 arg = self.index
808 arg = self.index
809 elif util.safehasattr(parsers, 'rustlazyancestors'):
809 elif util.safehasattr(parsers, 'rustlazyancestors'):
810 lazyancestors = ancestor.rustlazyancestors
810 lazyancestors = ancestor.rustlazyancestors
811 arg = self.index
811 arg = self.index
812 else:
812 else:
813 lazyancestors = ancestor.lazyancestors
813 lazyancestors = ancestor.lazyancestors
814 arg = self._uncheckedparentrevs
814 arg = self._uncheckedparentrevs
815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
816
816
817 def descendants(self, revs):
817 def descendants(self, revs):
818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
819
819
820 def findcommonmissing(self, common=None, heads=None):
820 def findcommonmissing(self, common=None, heads=None):
821 """Return a tuple of the ancestors of common and the ancestors of heads
821 """Return a tuple of the ancestors of common and the ancestors of heads
822 that are not ancestors of common. In revset terminology, we return the
822 that are not ancestors of common. In revset terminology, we return the
823 tuple:
823 tuple:
824
824
825 ::common, (::heads) - (::common)
825 ::common, (::heads) - (::common)
826
826
827 The list is sorted by revision number, meaning it is
827 The list is sorted by revision number, meaning it is
828 topologically sorted.
828 topologically sorted.
829
829
830 'heads' and 'common' are both lists of node IDs. If heads is
830 'heads' and 'common' are both lists of node IDs. If heads is
831 not supplied, uses all of the revlog's heads. If common is not
831 not supplied, uses all of the revlog's heads. If common is not
832 supplied, uses nullid."""
832 supplied, uses nullid."""
833 if common is None:
833 if common is None:
834 common = [nullid]
834 common = [nullid]
835 if heads is None:
835 if heads is None:
836 heads = self.heads()
836 heads = self.heads()
837
837
838 common = [self.rev(n) for n in common]
838 common = [self.rev(n) for n in common]
839 heads = [self.rev(n) for n in heads]
839 heads = [self.rev(n) for n in heads]
840
840
841 # we want the ancestors, but inclusive
841 # we want the ancestors, but inclusive
842 class lazyset(object):
842 class lazyset(object):
843 def __init__(self, lazyvalues):
843 def __init__(self, lazyvalues):
844 self.addedvalues = set()
844 self.addedvalues = set()
845 self.lazyvalues = lazyvalues
845 self.lazyvalues = lazyvalues
846
846
847 def __contains__(self, value):
847 def __contains__(self, value):
848 return value in self.addedvalues or value in self.lazyvalues
848 return value in self.addedvalues or value in self.lazyvalues
849
849
850 def __iter__(self):
850 def __iter__(self):
851 added = self.addedvalues
851 added = self.addedvalues
852 for r in added:
852 for r in added:
853 yield r
853 yield r
854 for r in self.lazyvalues:
854 for r in self.lazyvalues:
855 if not r in added:
855 if not r in added:
856 yield r
856 yield r
857
857
858 def add(self, value):
858 def add(self, value):
859 self.addedvalues.add(value)
859 self.addedvalues.add(value)
860
860
861 def update(self, values):
861 def update(self, values):
862 self.addedvalues.update(values)
862 self.addedvalues.update(values)
863
863
864 has = lazyset(self.ancestors(common))
864 has = lazyset(self.ancestors(common))
865 has.add(nullrev)
865 has.add(nullrev)
866 has.update(common)
866 has.update(common)
867
867
868 # take all ancestors from heads that aren't in has
868 # take all ancestors from heads that aren't in has
869 missing = set()
869 missing = set()
870 visit = collections.deque(r for r in heads if r not in has)
870 visit = collections.deque(r for r in heads if r not in has)
871 while visit:
871 while visit:
872 r = visit.popleft()
872 r = visit.popleft()
873 if r in missing:
873 if r in missing:
874 continue
874 continue
875 else:
875 else:
876 missing.add(r)
876 missing.add(r)
877 for p in self.parentrevs(r):
877 for p in self.parentrevs(r):
878 if p not in has:
878 if p not in has:
879 visit.append(p)
879 visit.append(p)
880 missing = list(missing)
880 missing = list(missing)
881 missing.sort()
881 missing.sort()
882 return has, [self.node(miss) for miss in missing]
882 return has, [self.node(miss) for miss in missing]
883
883
884 def incrementalmissingrevs(self, common=None):
884 def incrementalmissingrevs(self, common=None):
885 """Return an object that can be used to incrementally compute the
885 """Return an object that can be used to incrementally compute the
886 revision numbers of the ancestors of arbitrary sets that are not
886 revision numbers of the ancestors of arbitrary sets that are not
887 ancestors of common. This is an ancestor.incrementalmissingancestors
887 ancestors of common. This is an ancestor.incrementalmissingancestors
888 object.
888 object.
889
889
890 'common' is a list of revision numbers. If common is not supplied, uses
890 'common' is a list of revision numbers. If common is not supplied, uses
891 nullrev.
891 nullrev.
892 """
892 """
893 if common is None:
893 if common is None:
894 common = [nullrev]
894 common = [nullrev]
895
895
896 if rustext is not None:
897 return rustext.ancestor.MissingAncestors(self.index, common)
896 return ancestor.incrementalmissingancestors(self.parentrevs, common)
898 return ancestor.incrementalmissingancestors(self.parentrevs, common)
897
899
898 def findmissingrevs(self, common=None, heads=None):
900 def findmissingrevs(self, common=None, heads=None):
899 """Return the revision numbers of the ancestors of heads that
901 """Return the revision numbers of the ancestors of heads that
900 are not ancestors of common.
902 are not ancestors of common.
901
903
902 More specifically, return a list of revision numbers corresponding to
904 More specifically, return a list of revision numbers corresponding to
903 nodes N such that every N satisfies the following constraints:
905 nodes N such that every N satisfies the following constraints:
904
906
905 1. N is an ancestor of some node in 'heads'
907 1. N is an ancestor of some node in 'heads'
906 2. N is not an ancestor of any node in 'common'
908 2. N is not an ancestor of any node in 'common'
907
909
908 The list is sorted by revision number, meaning it is
910 The list is sorted by revision number, meaning it is
909 topologically sorted.
911 topologically sorted.
910
912
911 'heads' and 'common' are both lists of revision numbers. If heads is
913 'heads' and 'common' are both lists of revision numbers. If heads is
912 not supplied, uses all of the revlog's heads. If common is not
914 not supplied, uses all of the revlog's heads. If common is not
913 supplied, uses nullid."""
915 supplied, uses nullid."""
914 if common is None:
916 if common is None:
915 common = [nullrev]
917 common = [nullrev]
916 if heads is None:
918 if heads is None:
917 heads = self.headrevs()
919 heads = self.headrevs()
918
920
919 inc = self.incrementalmissingrevs(common=common)
921 inc = self.incrementalmissingrevs(common=common)
920 return inc.missingancestors(heads)
922 return inc.missingancestors(heads)
921
923
922 def findmissing(self, common=None, heads=None):
924 def findmissing(self, common=None, heads=None):
923 """Return the ancestors of heads that are not ancestors of common.
925 """Return the ancestors of heads that are not ancestors of common.
924
926
925 More specifically, return a list of nodes N such that every N
927 More specifically, return a list of nodes N such that every N
926 satisfies the following constraints:
928 satisfies the following constraints:
927
929
928 1. N is an ancestor of some node in 'heads'
930 1. N is an ancestor of some node in 'heads'
929 2. N is not an ancestor of any node in 'common'
931 2. N is not an ancestor of any node in 'common'
930
932
931 The list is sorted by revision number, meaning it is
933 The list is sorted by revision number, meaning it is
932 topologically sorted.
934 topologically sorted.
933
935
934 'heads' and 'common' are both lists of node IDs. If heads is
936 'heads' and 'common' are both lists of node IDs. If heads is
935 not supplied, uses all of the revlog's heads. If common is not
937 not supplied, uses all of the revlog's heads. If common is not
936 supplied, uses nullid."""
938 supplied, uses nullid."""
937 if common is None:
939 if common is None:
938 common = [nullid]
940 common = [nullid]
939 if heads is None:
941 if heads is None:
940 heads = self.heads()
942 heads = self.heads()
941
943
942 common = [self.rev(n) for n in common]
944 common = [self.rev(n) for n in common]
943 heads = [self.rev(n) for n in heads]
945 heads = [self.rev(n) for n in heads]
944
946
945 inc = self.incrementalmissingrevs(common=common)
947 inc = self.incrementalmissingrevs(common=common)
946 return [self.node(r) for r in inc.missingancestors(heads)]
948 return [self.node(r) for r in inc.missingancestors(heads)]
947
949
948 def nodesbetween(self, roots=None, heads=None):
950 def nodesbetween(self, roots=None, heads=None):
949 """Return a topological path from 'roots' to 'heads'.
951 """Return a topological path from 'roots' to 'heads'.
950
952
951 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
953 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
952 topologically sorted list of all nodes N that satisfy both of
954 topologically sorted list of all nodes N that satisfy both of
953 these constraints:
955 these constraints:
954
956
955 1. N is a descendant of some node in 'roots'
957 1. N is a descendant of some node in 'roots'
956 2. N is an ancestor of some node in 'heads'
958 2. N is an ancestor of some node in 'heads'
957
959
958 Every node is considered to be both a descendant and an ancestor
960 Every node is considered to be both a descendant and an ancestor
959 of itself, so every reachable node in 'roots' and 'heads' will be
961 of itself, so every reachable node in 'roots' and 'heads' will be
960 included in 'nodes'.
962 included in 'nodes'.
961
963
962 'outroots' is the list of reachable nodes in 'roots', i.e., the
964 'outroots' is the list of reachable nodes in 'roots', i.e., the
963 subset of 'roots' that is returned in 'nodes'. Likewise,
965 subset of 'roots' that is returned in 'nodes'. Likewise,
964 'outheads' is the subset of 'heads' that is also in 'nodes'.
966 'outheads' is the subset of 'heads' that is also in 'nodes'.
965
967
966 'roots' and 'heads' are both lists of node IDs. If 'roots' is
968 'roots' and 'heads' are both lists of node IDs. If 'roots' is
967 unspecified, uses nullid as the only root. If 'heads' is
969 unspecified, uses nullid as the only root. If 'heads' is
968 unspecified, uses list of all of the revlog's heads."""
970 unspecified, uses list of all of the revlog's heads."""
969 nonodes = ([], [], [])
971 nonodes = ([], [], [])
970 if roots is not None:
972 if roots is not None:
971 roots = list(roots)
973 roots = list(roots)
972 if not roots:
974 if not roots:
973 return nonodes
975 return nonodes
974 lowestrev = min([self.rev(n) for n in roots])
976 lowestrev = min([self.rev(n) for n in roots])
975 else:
977 else:
976 roots = [nullid] # Everybody's a descendant of nullid
978 roots = [nullid] # Everybody's a descendant of nullid
977 lowestrev = nullrev
979 lowestrev = nullrev
978 if (lowestrev == nullrev) and (heads is None):
980 if (lowestrev == nullrev) and (heads is None):
979 # We want _all_ the nodes!
981 # We want _all_ the nodes!
980 return ([self.node(r) for r in self], [nullid], list(self.heads()))
982 return ([self.node(r) for r in self], [nullid], list(self.heads()))
981 if heads is None:
983 if heads is None:
982 # All nodes are ancestors, so the latest ancestor is the last
984 # All nodes are ancestors, so the latest ancestor is the last
983 # node.
985 # node.
984 highestrev = len(self) - 1
986 highestrev = len(self) - 1
985 # Set ancestors to None to signal that every node is an ancestor.
987 # Set ancestors to None to signal that every node is an ancestor.
986 ancestors = None
988 ancestors = None
987 # Set heads to an empty dictionary for later discovery of heads
989 # Set heads to an empty dictionary for later discovery of heads
988 heads = {}
990 heads = {}
989 else:
991 else:
990 heads = list(heads)
992 heads = list(heads)
991 if not heads:
993 if not heads:
992 return nonodes
994 return nonodes
993 ancestors = set()
995 ancestors = set()
994 # Turn heads into a dictionary so we can remove 'fake' heads.
996 # Turn heads into a dictionary so we can remove 'fake' heads.
995 # Also, later we will be using it to filter out the heads we can't
997 # Also, later we will be using it to filter out the heads we can't
996 # find from roots.
998 # find from roots.
997 heads = dict.fromkeys(heads, False)
999 heads = dict.fromkeys(heads, False)
998 # Start at the top and keep marking parents until we're done.
1000 # Start at the top and keep marking parents until we're done.
999 nodestotag = set(heads)
1001 nodestotag = set(heads)
1000 # Remember where the top was so we can use it as a limit later.
1002 # Remember where the top was so we can use it as a limit later.
1001 highestrev = max([self.rev(n) for n in nodestotag])
1003 highestrev = max([self.rev(n) for n in nodestotag])
1002 while nodestotag:
1004 while nodestotag:
1003 # grab a node to tag
1005 # grab a node to tag
1004 n = nodestotag.pop()
1006 n = nodestotag.pop()
1005 # Never tag nullid
1007 # Never tag nullid
1006 if n == nullid:
1008 if n == nullid:
1007 continue
1009 continue
1008 # A node's revision number represents its place in a
1010 # A node's revision number represents its place in a
1009 # topologically sorted list of nodes.
1011 # topologically sorted list of nodes.
1010 r = self.rev(n)
1012 r = self.rev(n)
1011 if r >= lowestrev:
1013 if r >= lowestrev:
1012 if n not in ancestors:
1014 if n not in ancestors:
1013 # If we are possibly a descendant of one of the roots
1015 # If we are possibly a descendant of one of the roots
1014 # and we haven't already been marked as an ancestor
1016 # and we haven't already been marked as an ancestor
1015 ancestors.add(n) # Mark as ancestor
1017 ancestors.add(n) # Mark as ancestor
1016 # Add non-nullid parents to list of nodes to tag.
1018 # Add non-nullid parents to list of nodes to tag.
1017 nodestotag.update([p for p in self.parents(n) if
1019 nodestotag.update([p for p in self.parents(n) if
1018 p != nullid])
1020 p != nullid])
1019 elif n in heads: # We've seen it before, is it a fake head?
1021 elif n in heads: # We've seen it before, is it a fake head?
1020 # So it is, real heads should not be the ancestors of
1022 # So it is, real heads should not be the ancestors of
1021 # any other heads.
1023 # any other heads.
1022 heads.pop(n)
1024 heads.pop(n)
1023 if not ancestors:
1025 if not ancestors:
1024 return nonodes
1026 return nonodes
1025 # Now that we have our set of ancestors, we want to remove any
1027 # Now that we have our set of ancestors, we want to remove any
1026 # roots that are not ancestors.
1028 # roots that are not ancestors.
1027
1029
1028 # If one of the roots was nullid, everything is included anyway.
1030 # If one of the roots was nullid, everything is included anyway.
1029 if lowestrev > nullrev:
1031 if lowestrev > nullrev:
1030 # But, since we weren't, let's recompute the lowest rev to not
1032 # But, since we weren't, let's recompute the lowest rev to not
1031 # include roots that aren't ancestors.
1033 # include roots that aren't ancestors.
1032
1034
1033 # Filter out roots that aren't ancestors of heads
1035 # Filter out roots that aren't ancestors of heads
1034 roots = [root for root in roots if root in ancestors]
1036 roots = [root for root in roots if root in ancestors]
1035 # Recompute the lowest revision
1037 # Recompute the lowest revision
1036 if roots:
1038 if roots:
1037 lowestrev = min([self.rev(root) for root in roots])
1039 lowestrev = min([self.rev(root) for root in roots])
1038 else:
1040 else:
1039 # No more roots? Return empty list
1041 # No more roots? Return empty list
1040 return nonodes
1042 return nonodes
1041 else:
1043 else:
1042 # We are descending from nullid, and don't need to care about
1044 # We are descending from nullid, and don't need to care about
1043 # any other roots.
1045 # any other roots.
1044 lowestrev = nullrev
1046 lowestrev = nullrev
1045 roots = [nullid]
1047 roots = [nullid]
1046 # Transform our roots list into a set.
1048 # Transform our roots list into a set.
1047 descendants = set(roots)
1049 descendants = set(roots)
1048 # Also, keep the original roots so we can filter out roots that aren't
1050 # Also, keep the original roots so we can filter out roots that aren't
1049 # 'real' roots (i.e. are descended from other roots).
1051 # 'real' roots (i.e. are descended from other roots).
1050 roots = descendants.copy()
1052 roots = descendants.copy()
1051 # Our topologically sorted list of output nodes.
1053 # Our topologically sorted list of output nodes.
1052 orderedout = []
1054 orderedout = []
1053 # Don't start at nullid since we don't want nullid in our output list,
1055 # Don't start at nullid since we don't want nullid in our output list,
1054 # and if nullid shows up in descendants, empty parents will look like
1056 # and if nullid shows up in descendants, empty parents will look like
1055 # they're descendants.
1057 # they're descendants.
1056 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1058 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1057 n = self.node(r)
1059 n = self.node(r)
1058 isdescendant = False
1060 isdescendant = False
1059 if lowestrev == nullrev: # Everybody is a descendant of nullid
1061 if lowestrev == nullrev: # Everybody is a descendant of nullid
1060 isdescendant = True
1062 isdescendant = True
1061 elif n in descendants:
1063 elif n in descendants:
1062 # n is already a descendant
1064 # n is already a descendant
1063 isdescendant = True
1065 isdescendant = True
1064 # This check only needs to be done here because all the roots
1066 # This check only needs to be done here because all the roots
1065 # will start being marked is descendants before the loop.
1067 # will start being marked is descendants before the loop.
1066 if n in roots:
1068 if n in roots:
1067 # If n was a root, check if it's a 'real' root.
1069 # If n was a root, check if it's a 'real' root.
1068 p = tuple(self.parents(n))
1070 p = tuple(self.parents(n))
1069 # If any of its parents are descendants, it's not a root.
1071 # If any of its parents are descendants, it's not a root.
1070 if (p[0] in descendants) or (p[1] in descendants):
1072 if (p[0] in descendants) or (p[1] in descendants):
1071 roots.remove(n)
1073 roots.remove(n)
1072 else:
1074 else:
1073 p = tuple(self.parents(n))
1075 p = tuple(self.parents(n))
1074 # A node is a descendant if either of its parents are
1076 # A node is a descendant if either of its parents are
1075 # descendants. (We seeded the dependents list with the roots
1077 # descendants. (We seeded the dependents list with the roots
1076 # up there, remember?)
1078 # up there, remember?)
1077 if (p[0] in descendants) or (p[1] in descendants):
1079 if (p[0] in descendants) or (p[1] in descendants):
1078 descendants.add(n)
1080 descendants.add(n)
1079 isdescendant = True
1081 isdescendant = True
1080 if isdescendant and ((ancestors is None) or (n in ancestors)):
1082 if isdescendant and ((ancestors is None) or (n in ancestors)):
1081 # Only include nodes that are both descendants and ancestors.
1083 # Only include nodes that are both descendants and ancestors.
1082 orderedout.append(n)
1084 orderedout.append(n)
1083 if (ancestors is not None) and (n in heads):
1085 if (ancestors is not None) and (n in heads):
1084 # We're trying to figure out which heads are reachable
1086 # We're trying to figure out which heads are reachable
1085 # from roots.
1087 # from roots.
1086 # Mark this head as having been reached
1088 # Mark this head as having been reached
1087 heads[n] = True
1089 heads[n] = True
1088 elif ancestors is None:
1090 elif ancestors is None:
1089 # Otherwise, we're trying to discover the heads.
1091 # Otherwise, we're trying to discover the heads.
1090 # Assume this is a head because if it isn't, the next step
1092 # Assume this is a head because if it isn't, the next step
1091 # will eventually remove it.
1093 # will eventually remove it.
1092 heads[n] = True
1094 heads[n] = True
1093 # But, obviously its parents aren't.
1095 # But, obviously its parents aren't.
1094 for p in self.parents(n):
1096 for p in self.parents(n):
1095 heads.pop(p, None)
1097 heads.pop(p, None)
1096 heads = [head for head, flag in heads.iteritems() if flag]
1098 heads = [head for head, flag in heads.iteritems() if flag]
1097 roots = list(roots)
1099 roots = list(roots)
1098 assert orderedout
1100 assert orderedout
1099 assert roots
1101 assert roots
1100 assert heads
1102 assert heads
1101 return (orderedout, roots, heads)
1103 return (orderedout, roots, heads)
1102
1104
1103 def headrevs(self):
1105 def headrevs(self):
1104 try:
1106 try:
1105 return self.index.headrevs()
1107 return self.index.headrevs()
1106 except AttributeError:
1108 except AttributeError:
1107 return self._headrevs()
1109 return self._headrevs()
1108
1110
1109 def computephases(self, roots):
1111 def computephases(self, roots):
1110 return self.index.computephasesmapsets(roots)
1112 return self.index.computephasesmapsets(roots)
1111
1113
1112 def _headrevs(self):
1114 def _headrevs(self):
1113 count = len(self)
1115 count = len(self)
1114 if not count:
1116 if not count:
1115 return [nullrev]
1117 return [nullrev]
1116 # we won't iter over filtered rev so nobody is a head at start
1118 # we won't iter over filtered rev so nobody is a head at start
1117 ishead = [0] * (count + 1)
1119 ishead = [0] * (count + 1)
1118 index = self.index
1120 index = self.index
1119 for r in self:
1121 for r in self:
1120 ishead[r] = 1 # I may be an head
1122 ishead[r] = 1 # I may be an head
1121 e = index[r]
1123 e = index[r]
1122 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1124 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1123 return [r for r, val in enumerate(ishead) if val]
1125 return [r for r, val in enumerate(ishead) if val]
1124
1126
1125 def heads(self, start=None, stop=None):
1127 def heads(self, start=None, stop=None):
1126 """return the list of all nodes that have no children
1128 """return the list of all nodes that have no children
1127
1129
1128 if start is specified, only heads that are descendants of
1130 if start is specified, only heads that are descendants of
1129 start will be returned
1131 start will be returned
1130 if stop is specified, it will consider all the revs from stop
1132 if stop is specified, it will consider all the revs from stop
1131 as if they had no children
1133 as if they had no children
1132 """
1134 """
1133 if start is None and stop is None:
1135 if start is None and stop is None:
1134 if not len(self):
1136 if not len(self):
1135 return [nullid]
1137 return [nullid]
1136 return [self.node(r) for r in self.headrevs()]
1138 return [self.node(r) for r in self.headrevs()]
1137
1139
1138 if start is None:
1140 if start is None:
1139 start = nullrev
1141 start = nullrev
1140 else:
1142 else:
1141 start = self.rev(start)
1143 start = self.rev(start)
1142
1144
1143 stoprevs = set(self.rev(n) for n in stop or [])
1145 stoprevs = set(self.rev(n) for n in stop or [])
1144
1146
1145 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1147 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1146 stoprevs=stoprevs)
1148 stoprevs=stoprevs)
1147
1149
1148 return [self.node(rev) for rev in revs]
1150 return [self.node(rev) for rev in revs]
1149
1151
1150 def children(self, node):
1152 def children(self, node):
1151 """find the children of a given node"""
1153 """find the children of a given node"""
1152 c = []
1154 c = []
1153 p = self.rev(node)
1155 p = self.rev(node)
1154 for r in self.revs(start=p + 1):
1156 for r in self.revs(start=p + 1):
1155 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1157 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1156 if prevs:
1158 if prevs:
1157 for pr in prevs:
1159 for pr in prevs:
1158 if pr == p:
1160 if pr == p:
1159 c.append(self.node(r))
1161 c.append(self.node(r))
1160 elif p == nullrev:
1162 elif p == nullrev:
1161 c.append(self.node(r))
1163 c.append(self.node(r))
1162 return c
1164 return c
1163
1165
1164 def commonancestorsheads(self, a, b):
1166 def commonancestorsheads(self, a, b):
1165 """calculate all the heads of the common ancestors of nodes a and b"""
1167 """calculate all the heads of the common ancestors of nodes a and b"""
1166 a, b = self.rev(a), self.rev(b)
1168 a, b = self.rev(a), self.rev(b)
1167 ancs = self._commonancestorsheads(a, b)
1169 ancs = self._commonancestorsheads(a, b)
1168 return pycompat.maplist(self.node, ancs)
1170 return pycompat.maplist(self.node, ancs)
1169
1171
1170 def _commonancestorsheads(self, *revs):
1172 def _commonancestorsheads(self, *revs):
1171 """calculate all the heads of the common ancestors of revs"""
1173 """calculate all the heads of the common ancestors of revs"""
1172 try:
1174 try:
1173 ancs = self.index.commonancestorsheads(*revs)
1175 ancs = self.index.commonancestorsheads(*revs)
1174 except (AttributeError, OverflowError): # C implementation failed
1176 except (AttributeError, OverflowError): # C implementation failed
1175 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1177 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1176 return ancs
1178 return ancs
1177
1179
1178 def isancestor(self, a, b):
1180 def isancestor(self, a, b):
1179 """return True if node a is an ancestor of node b
1181 """return True if node a is an ancestor of node b
1180
1182
1181 A revision is considered an ancestor of itself."""
1183 A revision is considered an ancestor of itself."""
1182 a, b = self.rev(a), self.rev(b)
1184 a, b = self.rev(a), self.rev(b)
1183 return self.isancestorrev(a, b)
1185 return self.isancestorrev(a, b)
1184
1186
1185 def isancestorrev(self, a, b):
1187 def isancestorrev(self, a, b):
1186 """return True if revision a is an ancestor of revision b
1188 """return True if revision a is an ancestor of revision b
1187
1189
1188 A revision is considered an ancestor of itself.
1190 A revision is considered an ancestor of itself.
1189
1191
1190 The implementation of this is trivial but the use of
1192 The implementation of this is trivial but the use of
1191 commonancestorsheads is not."""
1193 commonancestorsheads is not."""
1192 if a == nullrev:
1194 if a == nullrev:
1193 return True
1195 return True
1194 elif a == b:
1196 elif a == b:
1195 return True
1197 return True
1196 elif a > b:
1198 elif a > b:
1197 return False
1199 return False
1198 return a in self._commonancestorsheads(a, b)
1200 return a in self._commonancestorsheads(a, b)
1199
1201
1200 def ancestor(self, a, b):
1202 def ancestor(self, a, b):
1201 """calculate the "best" common ancestor of nodes a and b"""
1203 """calculate the "best" common ancestor of nodes a and b"""
1202
1204
1203 a, b = self.rev(a), self.rev(b)
1205 a, b = self.rev(a), self.rev(b)
1204 try:
1206 try:
1205 ancs = self.index.ancestors(a, b)
1207 ancs = self.index.ancestors(a, b)
1206 except (AttributeError, OverflowError):
1208 except (AttributeError, OverflowError):
1207 ancs = ancestor.ancestors(self.parentrevs, a, b)
1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1208 if ancs:
1210 if ancs:
1209 # choose a consistent winner when there's a tie
1211 # choose a consistent winner when there's a tie
1210 return min(map(self.node, ancs))
1212 return min(map(self.node, ancs))
1211 return nullid
1213 return nullid
1212
1214
1213 def _match(self, id):
1215 def _match(self, id):
1214 if isinstance(id, int):
1216 if isinstance(id, int):
1215 # rev
1217 # rev
1216 return self.node(id)
1218 return self.node(id)
1217 if len(id) == 20:
1219 if len(id) == 20:
1218 # possibly a binary node
1220 # possibly a binary node
1219 # odds of a binary node being all hex in ASCII are 1 in 10**25
1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1220 try:
1222 try:
1221 node = id
1223 node = id
1222 self.rev(node) # quick search the index
1224 self.rev(node) # quick search the index
1223 return node
1225 return node
1224 except error.LookupError:
1226 except error.LookupError:
1225 pass # may be partial hex id
1227 pass # may be partial hex id
1226 try:
1228 try:
1227 # str(rev)
1229 # str(rev)
1228 rev = int(id)
1230 rev = int(id)
1229 if "%d" % rev != id:
1231 if "%d" % rev != id:
1230 raise ValueError
1232 raise ValueError
1231 if rev < 0:
1233 if rev < 0:
1232 rev = len(self) + rev
1234 rev = len(self) + rev
1233 if rev < 0 or rev >= len(self):
1235 if rev < 0 or rev >= len(self):
1234 raise ValueError
1236 raise ValueError
1235 return self.node(rev)
1237 return self.node(rev)
1236 except (ValueError, OverflowError):
1238 except (ValueError, OverflowError):
1237 pass
1239 pass
1238 if len(id) == 40:
1240 if len(id) == 40:
1239 try:
1241 try:
1240 # a full hex nodeid?
1242 # a full hex nodeid?
1241 node = bin(id)
1243 node = bin(id)
1242 self.rev(node)
1244 self.rev(node)
1243 return node
1245 return node
1244 except (TypeError, error.LookupError):
1246 except (TypeError, error.LookupError):
1245 pass
1247 pass
1246
1248
1247 def _partialmatch(self, id):
1249 def _partialmatch(self, id):
1248 # we don't care wdirfilenodeids as they should be always full hash
1250 # we don't care wdirfilenodeids as they should be always full hash
1249 maybewdir = wdirhex.startswith(id)
1251 maybewdir = wdirhex.startswith(id)
1250 try:
1252 try:
1251 partial = self.index.partialmatch(id)
1253 partial = self.index.partialmatch(id)
1252 if partial and self.hasnode(partial):
1254 if partial and self.hasnode(partial):
1253 if maybewdir:
1255 if maybewdir:
1254 # single 'ff...' match in radix tree, ambiguous with wdir
1256 # single 'ff...' match in radix tree, ambiguous with wdir
1255 raise error.RevlogError
1257 raise error.RevlogError
1256 return partial
1258 return partial
1257 if maybewdir:
1259 if maybewdir:
1258 # no 'ff...' match in radix tree, wdir identified
1260 # no 'ff...' match in radix tree, wdir identified
1259 raise error.WdirUnsupported
1261 raise error.WdirUnsupported
1260 return None
1262 return None
1261 except error.RevlogError:
1263 except error.RevlogError:
1262 # parsers.c radix tree lookup gave multiple matches
1264 # parsers.c radix tree lookup gave multiple matches
1263 # fast path: for unfiltered changelog, radix tree is accurate
1265 # fast path: for unfiltered changelog, radix tree is accurate
1264 if not getattr(self, 'filteredrevs', None):
1266 if not getattr(self, 'filteredrevs', None):
1265 raise error.AmbiguousPrefixLookupError(
1267 raise error.AmbiguousPrefixLookupError(
1266 id, self.indexfile, _('ambiguous identifier'))
1268 id, self.indexfile, _('ambiguous identifier'))
1267 # fall through to slow path that filters hidden revisions
1269 # fall through to slow path that filters hidden revisions
1268 except (AttributeError, ValueError):
1270 except (AttributeError, ValueError):
1269 # we are pure python, or key was too short to search radix tree
1271 # we are pure python, or key was too short to search radix tree
1270 pass
1272 pass
1271
1273
1272 if id in self._pcache:
1274 if id in self._pcache:
1273 return self._pcache[id]
1275 return self._pcache[id]
1274
1276
1275 if len(id) <= 40:
1277 if len(id) <= 40:
1276 try:
1278 try:
1277 # hex(node)[:...]
1279 # hex(node)[:...]
1278 l = len(id) // 2 # grab an even number of digits
1280 l = len(id) // 2 # grab an even number of digits
1279 prefix = bin(id[:l * 2])
1281 prefix = bin(id[:l * 2])
1280 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1281 nl = [n for n in nl if hex(n).startswith(id) and
1283 nl = [n for n in nl if hex(n).startswith(id) and
1282 self.hasnode(n)]
1284 self.hasnode(n)]
1283 if nullhex.startswith(id):
1285 if nullhex.startswith(id):
1284 nl.append(nullid)
1286 nl.append(nullid)
1285 if len(nl) > 0:
1287 if len(nl) > 0:
1286 if len(nl) == 1 and not maybewdir:
1288 if len(nl) == 1 and not maybewdir:
1287 self._pcache[id] = nl[0]
1289 self._pcache[id] = nl[0]
1288 return nl[0]
1290 return nl[0]
1289 raise error.AmbiguousPrefixLookupError(
1291 raise error.AmbiguousPrefixLookupError(
1290 id, self.indexfile, _('ambiguous identifier'))
1292 id, self.indexfile, _('ambiguous identifier'))
1291 if maybewdir:
1293 if maybewdir:
1292 raise error.WdirUnsupported
1294 raise error.WdirUnsupported
1293 return None
1295 return None
1294 except TypeError:
1296 except TypeError:
1295 pass
1297 pass
1296
1298
1297 def lookup(self, id):
1299 def lookup(self, id):
1298 """locate a node based on:
1300 """locate a node based on:
1299 - revision number or str(revision number)
1301 - revision number or str(revision number)
1300 - nodeid or subset of hex nodeid
1302 - nodeid or subset of hex nodeid
1301 """
1303 """
1302 n = self._match(id)
1304 n = self._match(id)
1303 if n is not None:
1305 if n is not None:
1304 return n
1306 return n
1305 n = self._partialmatch(id)
1307 n = self._partialmatch(id)
1306 if n:
1308 if n:
1307 return n
1309 return n
1308
1310
1309 raise error.LookupError(id, self.indexfile, _('no match found'))
1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1310
1312
1311 def shortest(self, node, minlength=1):
1313 def shortest(self, node, minlength=1):
1312 """Find the shortest unambiguous prefix that matches node."""
1314 """Find the shortest unambiguous prefix that matches node."""
1313 def isvalid(prefix):
1315 def isvalid(prefix):
1314 try:
1316 try:
1315 node = self._partialmatch(prefix)
1317 node = self._partialmatch(prefix)
1316 except error.AmbiguousPrefixLookupError:
1318 except error.AmbiguousPrefixLookupError:
1317 return False
1319 return False
1318 except error.WdirUnsupported:
1320 except error.WdirUnsupported:
1319 # single 'ff...' match
1321 # single 'ff...' match
1320 return True
1322 return True
1321 if node is None:
1323 if node is None:
1322 raise error.LookupError(node, self.indexfile, _('no node'))
1324 raise error.LookupError(node, self.indexfile, _('no node'))
1323 return True
1325 return True
1324
1326
1325 def maybewdir(prefix):
1327 def maybewdir(prefix):
1326 return all(c == 'f' for c in prefix)
1328 return all(c == 'f' for c in prefix)
1327
1329
1328 hexnode = hex(node)
1330 hexnode = hex(node)
1329
1331
1330 def disambiguate(hexnode, minlength):
1332 def disambiguate(hexnode, minlength):
1331 """Disambiguate against wdirid."""
1333 """Disambiguate against wdirid."""
1332 for length in range(minlength, 41):
1334 for length in range(minlength, 41):
1333 prefix = hexnode[:length]
1335 prefix = hexnode[:length]
1334 if not maybewdir(prefix):
1336 if not maybewdir(prefix):
1335 return prefix
1337 return prefix
1336
1338
1337 if not getattr(self, 'filteredrevs', None):
1339 if not getattr(self, 'filteredrevs', None):
1338 try:
1340 try:
1339 length = max(self.index.shortest(node), minlength)
1341 length = max(self.index.shortest(node), minlength)
1340 return disambiguate(hexnode, length)
1342 return disambiguate(hexnode, length)
1341 except error.RevlogError:
1343 except error.RevlogError:
1342 if node != wdirid:
1344 if node != wdirid:
1343 raise error.LookupError(node, self.indexfile, _('no node'))
1345 raise error.LookupError(node, self.indexfile, _('no node'))
1344 except AttributeError:
1346 except AttributeError:
1345 # Fall through to pure code
1347 # Fall through to pure code
1346 pass
1348 pass
1347
1349
1348 if node == wdirid:
1350 if node == wdirid:
1349 for length in range(minlength, 41):
1351 for length in range(minlength, 41):
1350 prefix = hexnode[:length]
1352 prefix = hexnode[:length]
1351 if isvalid(prefix):
1353 if isvalid(prefix):
1352 return prefix
1354 return prefix
1353
1355
1354 for length in range(minlength, 41):
1356 for length in range(minlength, 41):
1355 prefix = hexnode[:length]
1357 prefix = hexnode[:length]
1356 if isvalid(prefix):
1358 if isvalid(prefix):
1357 return disambiguate(hexnode, length)
1359 return disambiguate(hexnode, length)
1358
1360
1359 def cmp(self, node, text):
1361 def cmp(self, node, text):
1360 """compare text with a given file revision
1362 """compare text with a given file revision
1361
1363
1362 returns True if text is different than what is stored.
1364 returns True if text is different than what is stored.
1363 """
1365 """
1364 p1, p2 = self.parents(node)
1366 p1, p2 = self.parents(node)
1365 return storageutil.hashrevisionsha1(text, p1, p2) != node
1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1366
1368
1367 def _cachesegment(self, offset, data):
1369 def _cachesegment(self, offset, data):
1368 """Add a segment to the revlog cache.
1370 """Add a segment to the revlog cache.
1369
1371
1370 Accepts an absolute offset and the data that is at that location.
1372 Accepts an absolute offset and the data that is at that location.
1371 """
1373 """
1372 o, d = self._chunkcache
1374 o, d = self._chunkcache
1373 # try to add to existing cache
1375 # try to add to existing cache
1374 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1375 self._chunkcache = o, d + data
1377 self._chunkcache = o, d + data
1376 else:
1378 else:
1377 self._chunkcache = offset, data
1379 self._chunkcache = offset, data
1378
1380
1379 def _readsegment(self, offset, length, df=None):
1381 def _readsegment(self, offset, length, df=None):
1380 """Load a segment of raw data from the revlog.
1382 """Load a segment of raw data from the revlog.
1381
1383
1382 Accepts an absolute offset, length to read, and an optional existing
1384 Accepts an absolute offset, length to read, and an optional existing
1383 file handle to read from.
1385 file handle to read from.
1384
1386
1385 If an existing file handle is passed, it will be seeked and the
1387 If an existing file handle is passed, it will be seeked and the
1386 original seek position will NOT be restored.
1388 original seek position will NOT be restored.
1387
1389
1388 Returns a str or buffer of raw byte data.
1390 Returns a str or buffer of raw byte data.
1389
1391
1390 Raises if the requested number of bytes could not be read.
1392 Raises if the requested number of bytes could not be read.
1391 """
1393 """
1392 # Cache data both forward and backward around the requested
1394 # Cache data both forward and backward around the requested
1393 # data, in a fixed size window. This helps speed up operations
1395 # data, in a fixed size window. This helps speed up operations
1394 # involving reading the revlog backwards.
1396 # involving reading the revlog backwards.
1395 cachesize = self._chunkcachesize
1397 cachesize = self._chunkcachesize
1396 realoffset = offset & ~(cachesize - 1)
1398 realoffset = offset & ~(cachesize - 1)
1397 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1398 - realoffset)
1400 - realoffset)
1399 with self._datareadfp(df) as df:
1401 with self._datareadfp(df) as df:
1400 df.seek(realoffset)
1402 df.seek(realoffset)
1401 d = df.read(reallength)
1403 d = df.read(reallength)
1402
1404
1403 self._cachesegment(realoffset, d)
1405 self._cachesegment(realoffset, d)
1404 if offset != realoffset or reallength != length:
1406 if offset != realoffset or reallength != length:
1405 startoffset = offset - realoffset
1407 startoffset = offset - realoffset
1406 if len(d) - startoffset < length:
1408 if len(d) - startoffset < length:
1407 raise error.RevlogError(
1409 raise error.RevlogError(
1408 _('partial read of revlog %s; expected %d bytes from '
1410 _('partial read of revlog %s; expected %d bytes from '
1409 'offset %d, got %d') %
1411 'offset %d, got %d') %
1410 (self.indexfile if self._inline else self.datafile,
1412 (self.indexfile if self._inline else self.datafile,
1411 length, realoffset, len(d) - startoffset))
1413 length, realoffset, len(d) - startoffset))
1412
1414
1413 return util.buffer(d, startoffset, length)
1415 return util.buffer(d, startoffset, length)
1414
1416
1415 if len(d) < length:
1417 if len(d) < length:
1416 raise error.RevlogError(
1418 raise error.RevlogError(
1417 _('partial read of revlog %s; expected %d bytes from offset '
1419 _('partial read of revlog %s; expected %d bytes from offset '
1418 '%d, got %d') %
1420 '%d, got %d') %
1419 (self.indexfile if self._inline else self.datafile,
1421 (self.indexfile if self._inline else self.datafile,
1420 length, offset, len(d)))
1422 length, offset, len(d)))
1421
1423
1422 return d
1424 return d
1423
1425
1424 def _getsegment(self, offset, length, df=None):
1426 def _getsegment(self, offset, length, df=None):
1425 """Obtain a segment of raw data from the revlog.
1427 """Obtain a segment of raw data from the revlog.
1426
1428
1427 Accepts an absolute offset, length of bytes to obtain, and an
1429 Accepts an absolute offset, length of bytes to obtain, and an
1428 optional file handle to the already-opened revlog. If the file
1430 optional file handle to the already-opened revlog. If the file
1429 handle is used, it's original seek position will not be preserved.
1431 handle is used, it's original seek position will not be preserved.
1430
1432
1431 Requests for data may be returned from a cache.
1433 Requests for data may be returned from a cache.
1432
1434
1433 Returns a str or a buffer instance of raw byte data.
1435 Returns a str or a buffer instance of raw byte data.
1434 """
1436 """
1435 o, d = self._chunkcache
1437 o, d = self._chunkcache
1436 l = len(d)
1438 l = len(d)
1437
1439
1438 # is it in the cache?
1440 # is it in the cache?
1439 cachestart = offset - o
1441 cachestart = offset - o
1440 cacheend = cachestart + length
1442 cacheend = cachestart + length
1441 if cachestart >= 0 and cacheend <= l:
1443 if cachestart >= 0 and cacheend <= l:
1442 if cachestart == 0 and cacheend == l:
1444 if cachestart == 0 and cacheend == l:
1443 return d # avoid a copy
1445 return d # avoid a copy
1444 return util.buffer(d, cachestart, cacheend - cachestart)
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1445
1447
1446 return self._readsegment(offset, length, df=df)
1448 return self._readsegment(offset, length, df=df)
1447
1449
1448 def _getsegmentforrevs(self, startrev, endrev, df=None):
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1449 """Obtain a segment of raw data corresponding to a range of revisions.
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1450
1452
1451 Accepts the start and end revisions and an optional already-open
1453 Accepts the start and end revisions and an optional already-open
1452 file handle to be used for reading. If the file handle is read, its
1454 file handle to be used for reading. If the file handle is read, its
1453 seek position will not be preserved.
1455 seek position will not be preserved.
1454
1456
1455 Requests for data may be satisfied by a cache.
1457 Requests for data may be satisfied by a cache.
1456
1458
1457 Returns a 2-tuple of (offset, data) for the requested range of
1459 Returns a 2-tuple of (offset, data) for the requested range of
1458 revisions. Offset is the integer offset from the beginning of the
1460 revisions. Offset is the integer offset from the beginning of the
1459 revlog and data is a str or buffer of the raw byte data.
1461 revlog and data is a str or buffer of the raw byte data.
1460
1462
1461 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1462 to determine where each revision's data begins and ends.
1464 to determine where each revision's data begins and ends.
1463 """
1465 """
1464 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1465 # (functions are expensive).
1467 # (functions are expensive).
1466 index = self.index
1468 index = self.index
1467 istart = index[startrev]
1469 istart = index[startrev]
1468 start = int(istart[0] >> 16)
1470 start = int(istart[0] >> 16)
1469 if startrev == endrev:
1471 if startrev == endrev:
1470 end = start + istart[1]
1472 end = start + istart[1]
1471 else:
1473 else:
1472 iend = index[endrev]
1474 iend = index[endrev]
1473 end = int(iend[0] >> 16) + iend[1]
1475 end = int(iend[0] >> 16) + iend[1]
1474
1476
1475 if self._inline:
1477 if self._inline:
1476 start += (startrev + 1) * self._io.size
1478 start += (startrev + 1) * self._io.size
1477 end += (endrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1478 length = end - start
1480 length = end - start
1479
1481
1480 return start, self._getsegment(start, length, df=df)
1482 return start, self._getsegment(start, length, df=df)
1481
1483
1482 def _chunk(self, rev, df=None):
1484 def _chunk(self, rev, df=None):
1483 """Obtain a single decompressed chunk for a revision.
1485 """Obtain a single decompressed chunk for a revision.
1484
1486
1485 Accepts an integer revision and an optional already-open file handle
1487 Accepts an integer revision and an optional already-open file handle
1486 to be used for reading. If used, the seek position of the file will not
1488 to be used for reading. If used, the seek position of the file will not
1487 be preserved.
1489 be preserved.
1488
1490
1489 Returns a str holding uncompressed data for the requested revision.
1491 Returns a str holding uncompressed data for the requested revision.
1490 """
1492 """
1491 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1492
1494
1493 def _chunks(self, revs, df=None, targetsize=None):
1495 def _chunks(self, revs, df=None, targetsize=None):
1494 """Obtain decompressed chunks for the specified revisions.
1496 """Obtain decompressed chunks for the specified revisions.
1495
1497
1496 Accepts an iterable of numeric revisions that are assumed to be in
1498 Accepts an iterable of numeric revisions that are assumed to be in
1497 ascending order. Also accepts an optional already-open file handle
1499 ascending order. Also accepts an optional already-open file handle
1498 to be used for reading. If used, the seek position of the file will
1500 to be used for reading. If used, the seek position of the file will
1499 not be preserved.
1501 not be preserved.
1500
1502
1501 This function is similar to calling ``self._chunk()`` multiple times,
1503 This function is similar to calling ``self._chunk()`` multiple times,
1502 but is faster.
1504 but is faster.
1503
1505
1504 Returns a list with decompressed data for each requested revision.
1506 Returns a list with decompressed data for each requested revision.
1505 """
1507 """
1506 if not revs:
1508 if not revs:
1507 return []
1509 return []
1508 start = self.start
1510 start = self.start
1509 length = self.length
1511 length = self.length
1510 inline = self._inline
1512 inline = self._inline
1511 iosize = self._io.size
1513 iosize = self._io.size
1512 buffer = util.buffer
1514 buffer = util.buffer
1513
1515
1514 l = []
1516 l = []
1515 ladd = l.append
1517 ladd = l.append
1516
1518
1517 if not self._withsparseread:
1519 if not self._withsparseread:
1518 slicedchunks = (revs,)
1520 slicedchunks = (revs,)
1519 else:
1521 else:
1520 slicedchunks = deltautil.slicechunk(self, revs,
1522 slicedchunks = deltautil.slicechunk(self, revs,
1521 targetsize=targetsize)
1523 targetsize=targetsize)
1522
1524
1523 for revschunk in slicedchunks:
1525 for revschunk in slicedchunks:
1524 firstrev = revschunk[0]
1526 firstrev = revschunk[0]
1525 # Skip trailing revisions with empty diff
1527 # Skip trailing revisions with empty diff
1526 for lastrev in revschunk[::-1]:
1528 for lastrev in revschunk[::-1]:
1527 if length(lastrev) != 0:
1529 if length(lastrev) != 0:
1528 break
1530 break
1529
1531
1530 try:
1532 try:
1531 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1532 except OverflowError:
1534 except OverflowError:
1533 # issue4215 - we can't cache a run of chunks greater than
1535 # issue4215 - we can't cache a run of chunks greater than
1534 # 2G on Windows
1536 # 2G on Windows
1535 return [self._chunk(rev, df=df) for rev in revschunk]
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1536
1538
1537 decomp = self.decompress
1539 decomp = self.decompress
1538 for rev in revschunk:
1540 for rev in revschunk:
1539 chunkstart = start(rev)
1541 chunkstart = start(rev)
1540 if inline:
1542 if inline:
1541 chunkstart += (rev + 1) * iosize
1543 chunkstart += (rev + 1) * iosize
1542 chunklength = length(rev)
1544 chunklength = length(rev)
1543 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1544
1546
1545 return l
1547 return l
1546
1548
1547 def _chunkclear(self):
1549 def _chunkclear(self):
1548 """Clear the raw chunk cache."""
1550 """Clear the raw chunk cache."""
1549 self._chunkcache = (0, '')
1551 self._chunkcache = (0, '')
1550
1552
1551 def deltaparent(self, rev):
1553 def deltaparent(self, rev):
1552 """return deltaparent of the given revision"""
1554 """return deltaparent of the given revision"""
1553 base = self.index[rev][3]
1555 base = self.index[rev][3]
1554 if base == rev:
1556 if base == rev:
1555 return nullrev
1557 return nullrev
1556 elif self._generaldelta:
1558 elif self._generaldelta:
1557 return base
1559 return base
1558 else:
1560 else:
1559 return rev - 1
1561 return rev - 1
1560
1562
1561 def issnapshot(self, rev):
1563 def issnapshot(self, rev):
1562 """tells whether rev is a snapshot
1564 """tells whether rev is a snapshot
1563 """
1565 """
1564 if not self._sparserevlog:
1566 if not self._sparserevlog:
1565 return self.deltaparent(rev) == nullrev
1567 return self.deltaparent(rev) == nullrev
1566 elif util.safehasattr(self.index, 'issnapshot'):
1568 elif util.safehasattr(self.index, 'issnapshot'):
1567 # directly assign the method to cache the testing and access
1569 # directly assign the method to cache the testing and access
1568 self.issnapshot = self.index.issnapshot
1570 self.issnapshot = self.index.issnapshot
1569 return self.issnapshot(rev)
1571 return self.issnapshot(rev)
1570 if rev == nullrev:
1572 if rev == nullrev:
1571 return True
1573 return True
1572 entry = self.index[rev]
1574 entry = self.index[rev]
1573 base = entry[3]
1575 base = entry[3]
1574 if base == rev:
1576 if base == rev:
1575 return True
1577 return True
1576 if base == nullrev:
1578 if base == nullrev:
1577 return True
1579 return True
1578 p1 = entry[5]
1580 p1 = entry[5]
1579 p2 = entry[6]
1581 p2 = entry[6]
1580 if base == p1 or base == p2:
1582 if base == p1 or base == p2:
1581 return False
1583 return False
1582 return self.issnapshot(base)
1584 return self.issnapshot(base)
1583
1585
1584 def snapshotdepth(self, rev):
1586 def snapshotdepth(self, rev):
1585 """number of snapshot in the chain before this one"""
1587 """number of snapshot in the chain before this one"""
1586 if not self.issnapshot(rev):
1588 if not self.issnapshot(rev):
1587 raise error.ProgrammingError('revision %d not a snapshot')
1589 raise error.ProgrammingError('revision %d not a snapshot')
1588 return len(self._deltachain(rev)[0]) - 1
1590 return len(self._deltachain(rev)[0]) - 1
1589
1591
1590 def revdiff(self, rev1, rev2):
1592 def revdiff(self, rev1, rev2):
1591 """return or calculate a delta between two revisions
1593 """return or calculate a delta between two revisions
1592
1594
1593 The delta calculated is in binary form and is intended to be written to
1595 The delta calculated is in binary form and is intended to be written to
1594 revlog data directly. So this function needs raw revision data.
1596 revlog data directly. So this function needs raw revision data.
1595 """
1597 """
1596 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1597 return bytes(self._chunk(rev2))
1599 return bytes(self._chunk(rev2))
1598
1600
1599 return mdiff.textdiff(self.revision(rev1, raw=True),
1601 return mdiff.textdiff(self.revision(rev1, raw=True),
1600 self.revision(rev2, raw=True))
1602 self.revision(rev2, raw=True))
1601
1603
1602 def revision(self, nodeorrev, _df=None, raw=False):
1604 def revision(self, nodeorrev, _df=None, raw=False):
1603 """return an uncompressed revision of a given node or revision
1605 """return an uncompressed revision of a given node or revision
1604 number.
1606 number.
1605
1607
1606 _df - an existing file handle to read from. (internal-only)
1608 _df - an existing file handle to read from. (internal-only)
1607 raw - an optional argument specifying if the revision data is to be
1609 raw - an optional argument specifying if the revision data is to be
1608 treated as raw data when applying flag transforms. 'raw' should be set
1610 treated as raw data when applying flag transforms. 'raw' should be set
1609 to True when generating changegroups or in debug commands.
1611 to True when generating changegroups or in debug commands.
1610 """
1612 """
1611 if isinstance(nodeorrev, int):
1613 if isinstance(nodeorrev, int):
1612 rev = nodeorrev
1614 rev = nodeorrev
1613 node = self.node(rev)
1615 node = self.node(rev)
1614 else:
1616 else:
1615 node = nodeorrev
1617 node = nodeorrev
1616 rev = None
1618 rev = None
1617
1619
1618 cachedrev = None
1620 cachedrev = None
1619 flags = None
1621 flags = None
1620 rawtext = None
1622 rawtext = None
1621 if node == nullid:
1623 if node == nullid:
1622 return ""
1624 return ""
1623 if self._revisioncache:
1625 if self._revisioncache:
1624 if self._revisioncache[0] == node:
1626 if self._revisioncache[0] == node:
1625 # _cache only stores rawtext
1627 # _cache only stores rawtext
1626 if raw:
1628 if raw:
1627 return self._revisioncache[2]
1629 return self._revisioncache[2]
1628 # duplicated, but good for perf
1630 # duplicated, but good for perf
1629 if rev is None:
1631 if rev is None:
1630 rev = self.rev(node)
1632 rev = self.rev(node)
1631 if flags is None:
1633 if flags is None:
1632 flags = self.flags(rev)
1634 flags = self.flags(rev)
1633 # no extra flags set, no flag processor runs, text = rawtext
1635 # no extra flags set, no flag processor runs, text = rawtext
1634 if flags == REVIDX_DEFAULT_FLAGS:
1636 if flags == REVIDX_DEFAULT_FLAGS:
1635 return self._revisioncache[2]
1637 return self._revisioncache[2]
1636 # rawtext is reusable. need to run flag processor
1638 # rawtext is reusable. need to run flag processor
1637 rawtext = self._revisioncache[2]
1639 rawtext = self._revisioncache[2]
1638
1640
1639 cachedrev = self._revisioncache[1]
1641 cachedrev = self._revisioncache[1]
1640
1642
1641 # look up what we need to read
1643 # look up what we need to read
1642 if rawtext is None:
1644 if rawtext is None:
1643 if rev is None:
1645 if rev is None:
1644 rev = self.rev(node)
1646 rev = self.rev(node)
1645
1647
1646 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1648 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1647 if stopped:
1649 if stopped:
1648 rawtext = self._revisioncache[2]
1650 rawtext = self._revisioncache[2]
1649
1651
1650 # drop cache to save memory
1652 # drop cache to save memory
1651 self._revisioncache = None
1653 self._revisioncache = None
1652
1654
1653 targetsize = None
1655 targetsize = None
1654 rawsize = self.index[rev][2]
1656 rawsize = self.index[rev][2]
1655 if 0 <= rawsize:
1657 if 0 <= rawsize:
1656 targetsize = 4 * rawsize
1658 targetsize = 4 * rawsize
1657
1659
1658 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1660 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1659 if rawtext is None:
1661 if rawtext is None:
1660 rawtext = bytes(bins[0])
1662 rawtext = bytes(bins[0])
1661 bins = bins[1:]
1663 bins = bins[1:]
1662
1664
1663 rawtext = mdiff.patches(rawtext, bins)
1665 rawtext = mdiff.patches(rawtext, bins)
1664 self._revisioncache = (node, rev, rawtext)
1666 self._revisioncache = (node, rev, rawtext)
1665
1667
1666 if flags is None:
1668 if flags is None:
1667 if rev is None:
1669 if rev is None:
1668 rev = self.rev(node)
1670 rev = self.rev(node)
1669 flags = self.flags(rev)
1671 flags = self.flags(rev)
1670
1672
1671 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1673 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1672 if validatehash:
1674 if validatehash:
1673 self.checkhash(text, node, rev=rev)
1675 self.checkhash(text, node, rev=rev)
1674
1676
1675 return text
1677 return text
1676
1678
1677 def hash(self, text, p1, p2):
1679 def hash(self, text, p1, p2):
1678 """Compute a node hash.
1680 """Compute a node hash.
1679
1681
1680 Available as a function so that subclasses can replace the hash
1682 Available as a function so that subclasses can replace the hash
1681 as needed.
1683 as needed.
1682 """
1684 """
1683 return storageutil.hashrevisionsha1(text, p1, p2)
1685 return storageutil.hashrevisionsha1(text, p1, p2)
1684
1686
1685 def _processflags(self, text, flags, operation, raw=False):
1687 def _processflags(self, text, flags, operation, raw=False):
1686 """Inspect revision data flags and applies transforms defined by
1688 """Inspect revision data flags and applies transforms defined by
1687 registered flag processors.
1689 registered flag processors.
1688
1690
1689 ``text`` - the revision data to process
1691 ``text`` - the revision data to process
1690 ``flags`` - the revision flags
1692 ``flags`` - the revision flags
1691 ``operation`` - the operation being performed (read or write)
1693 ``operation`` - the operation being performed (read or write)
1692 ``raw`` - an optional argument describing if the raw transform should be
1694 ``raw`` - an optional argument describing if the raw transform should be
1693 applied.
1695 applied.
1694
1696
1695 This method processes the flags in the order (or reverse order if
1697 This method processes the flags in the order (or reverse order if
1696 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1698 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1697 flag processors registered for present flags. The order of flags defined
1699 flag processors registered for present flags. The order of flags defined
1698 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1700 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1699
1701
1700 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1702 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1701 processed text and ``validatehash`` is a bool indicating whether the
1703 processed text and ``validatehash`` is a bool indicating whether the
1702 returned text should be checked for hash integrity.
1704 returned text should be checked for hash integrity.
1703
1705
1704 Note: If the ``raw`` argument is set, it has precedence over the
1706 Note: If the ``raw`` argument is set, it has precedence over the
1705 operation and will only update the value of ``validatehash``.
1707 operation and will only update the value of ``validatehash``.
1706 """
1708 """
1707 # fast path: no flag processors will run
1709 # fast path: no flag processors will run
1708 if flags == 0:
1710 if flags == 0:
1709 return text, True
1711 return text, True
1710 if not operation in ('read', 'write'):
1712 if not operation in ('read', 'write'):
1711 raise error.ProgrammingError(_("invalid '%s' operation") %
1713 raise error.ProgrammingError(_("invalid '%s' operation") %
1712 operation)
1714 operation)
1713 # Check all flags are known.
1715 # Check all flags are known.
1714 if flags & ~REVIDX_KNOWN_FLAGS:
1716 if flags & ~REVIDX_KNOWN_FLAGS:
1715 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1717 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1716 (flags & ~REVIDX_KNOWN_FLAGS))
1718 (flags & ~REVIDX_KNOWN_FLAGS))
1717 validatehash = True
1719 validatehash = True
1718 # Depending on the operation (read or write), the order might be
1720 # Depending on the operation (read or write), the order might be
1719 # reversed due to non-commutative transforms.
1721 # reversed due to non-commutative transforms.
1720 orderedflags = REVIDX_FLAGS_ORDER
1722 orderedflags = REVIDX_FLAGS_ORDER
1721 if operation == 'write':
1723 if operation == 'write':
1722 orderedflags = reversed(orderedflags)
1724 orderedflags = reversed(orderedflags)
1723
1725
1724 for flag in orderedflags:
1726 for flag in orderedflags:
1725 # If a flagprocessor has been registered for a known flag, apply the
1727 # If a flagprocessor has been registered for a known flag, apply the
1726 # related operation transform and update result tuple.
1728 # related operation transform and update result tuple.
1727 if flag & flags:
1729 if flag & flags:
1728 vhash = True
1730 vhash = True
1729
1731
1730 if flag not in self._flagprocessors:
1732 if flag not in self._flagprocessors:
1731 message = _("missing processor for flag '%#x'") % (flag)
1733 message = _("missing processor for flag '%#x'") % (flag)
1732 raise error.RevlogError(message)
1734 raise error.RevlogError(message)
1733
1735
1734 processor = self._flagprocessors[flag]
1736 processor = self._flagprocessors[flag]
1735 if processor is not None:
1737 if processor is not None:
1736 readtransform, writetransform, rawtransform = processor
1738 readtransform, writetransform, rawtransform = processor
1737
1739
1738 if raw:
1740 if raw:
1739 vhash = rawtransform(self, text)
1741 vhash = rawtransform(self, text)
1740 elif operation == 'read':
1742 elif operation == 'read':
1741 text, vhash = readtransform(self, text)
1743 text, vhash = readtransform(self, text)
1742 else: # write operation
1744 else: # write operation
1743 text, vhash = writetransform(self, text)
1745 text, vhash = writetransform(self, text)
1744 validatehash = validatehash and vhash
1746 validatehash = validatehash and vhash
1745
1747
1746 return text, validatehash
1748 return text, validatehash
1747
1749
1748 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1750 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1749 """Check node hash integrity.
1751 """Check node hash integrity.
1750
1752
1751 Available as a function so that subclasses can extend hash mismatch
1753 Available as a function so that subclasses can extend hash mismatch
1752 behaviors as needed.
1754 behaviors as needed.
1753 """
1755 """
1754 try:
1756 try:
1755 if p1 is None and p2 is None:
1757 if p1 is None and p2 is None:
1756 p1, p2 = self.parents(node)
1758 p1, p2 = self.parents(node)
1757 if node != self.hash(text, p1, p2):
1759 if node != self.hash(text, p1, p2):
1758 # Clear the revision cache on hash failure. The revision cache
1760 # Clear the revision cache on hash failure. The revision cache
1759 # only stores the raw revision and clearing the cache does have
1761 # only stores the raw revision and clearing the cache does have
1760 # the side-effect that we won't have a cache hit when the raw
1762 # the side-effect that we won't have a cache hit when the raw
1761 # revision data is accessed. But this case should be rare and
1763 # revision data is accessed. But this case should be rare and
1762 # it is extra work to teach the cache about the hash
1764 # it is extra work to teach the cache about the hash
1763 # verification state.
1765 # verification state.
1764 if self._revisioncache and self._revisioncache[0] == node:
1766 if self._revisioncache and self._revisioncache[0] == node:
1765 self._revisioncache = None
1767 self._revisioncache = None
1766
1768
1767 revornode = rev
1769 revornode = rev
1768 if revornode is None:
1770 if revornode is None:
1769 revornode = templatefilters.short(hex(node))
1771 revornode = templatefilters.short(hex(node))
1770 raise error.RevlogError(_("integrity check failed on %s:%s")
1772 raise error.RevlogError(_("integrity check failed on %s:%s")
1771 % (self.indexfile, pycompat.bytestr(revornode)))
1773 % (self.indexfile, pycompat.bytestr(revornode)))
1772 except error.RevlogError:
1774 except error.RevlogError:
1773 if self._censorable and storageutil.iscensoredtext(text):
1775 if self._censorable and storageutil.iscensoredtext(text):
1774 raise error.CensoredNodeError(self.indexfile, node, text)
1776 raise error.CensoredNodeError(self.indexfile, node, text)
1775 raise
1777 raise
1776
1778
1777 def _enforceinlinesize(self, tr, fp=None):
1779 def _enforceinlinesize(self, tr, fp=None):
1778 """Check if the revlog is too big for inline and convert if so.
1780 """Check if the revlog is too big for inline and convert if so.
1779
1781
1780 This should be called after revisions are added to the revlog. If the
1782 This should be called after revisions are added to the revlog. If the
1781 revlog has grown too large to be an inline revlog, it will convert it
1783 revlog has grown too large to be an inline revlog, it will convert it
1782 to use multiple index and data files.
1784 to use multiple index and data files.
1783 """
1785 """
1784 tiprev = len(self) - 1
1786 tiprev = len(self) - 1
1785 if (not self._inline or
1787 if (not self._inline or
1786 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1788 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1787 return
1789 return
1788
1790
1789 trinfo = tr.find(self.indexfile)
1791 trinfo = tr.find(self.indexfile)
1790 if trinfo is None:
1792 if trinfo is None:
1791 raise error.RevlogError(_("%s not found in the transaction")
1793 raise error.RevlogError(_("%s not found in the transaction")
1792 % self.indexfile)
1794 % self.indexfile)
1793
1795
1794 trindex = trinfo[2]
1796 trindex = trinfo[2]
1795 if trindex is not None:
1797 if trindex is not None:
1796 dataoff = self.start(trindex)
1798 dataoff = self.start(trindex)
1797 else:
1799 else:
1798 # revlog was stripped at start of transaction, use all leftover data
1800 # revlog was stripped at start of transaction, use all leftover data
1799 trindex = len(self) - 1
1801 trindex = len(self) - 1
1800 dataoff = self.end(tiprev)
1802 dataoff = self.end(tiprev)
1801
1803
1802 tr.add(self.datafile, dataoff)
1804 tr.add(self.datafile, dataoff)
1803
1805
1804 if fp:
1806 if fp:
1805 fp.flush()
1807 fp.flush()
1806 fp.close()
1808 fp.close()
1807 # We can't use the cached file handle after close(). So prevent
1809 # We can't use the cached file handle after close(). So prevent
1808 # its usage.
1810 # its usage.
1809 self._writinghandles = None
1811 self._writinghandles = None
1810
1812
1811 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1813 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1812 for r in self:
1814 for r in self:
1813 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1815 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1814
1816
1815 with self._indexfp('w') as fp:
1817 with self._indexfp('w') as fp:
1816 self.version &= ~FLAG_INLINE_DATA
1818 self.version &= ~FLAG_INLINE_DATA
1817 self._inline = False
1819 self._inline = False
1818 io = self._io
1820 io = self._io
1819 for i in self:
1821 for i in self:
1820 e = io.packentry(self.index[i], self.node, self.version, i)
1822 e = io.packentry(self.index[i], self.node, self.version, i)
1821 fp.write(e)
1823 fp.write(e)
1822
1824
1823 # the temp file replace the real index when we exit the context
1825 # the temp file replace the real index when we exit the context
1824 # manager
1826 # manager
1825
1827
1826 tr.replace(self.indexfile, trindex * self._io.size)
1828 tr.replace(self.indexfile, trindex * self._io.size)
1827 self._chunkclear()
1829 self._chunkclear()
1828
1830
1829 def _nodeduplicatecallback(self, transaction, node):
1831 def _nodeduplicatecallback(self, transaction, node):
1830 """called when trying to add a node already stored.
1832 """called when trying to add a node already stored.
1831 """
1833 """
1832
1834
1833 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1835 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1834 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1836 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1835 """add a revision to the log
1837 """add a revision to the log
1836
1838
1837 text - the revision data to add
1839 text - the revision data to add
1838 transaction - the transaction object used for rollback
1840 transaction - the transaction object used for rollback
1839 link - the linkrev data to add
1841 link - the linkrev data to add
1840 p1, p2 - the parent nodeids of the revision
1842 p1, p2 - the parent nodeids of the revision
1841 cachedelta - an optional precomputed delta
1843 cachedelta - an optional precomputed delta
1842 node - nodeid of revision; typically node is not specified, and it is
1844 node - nodeid of revision; typically node is not specified, and it is
1843 computed by default as hash(text, p1, p2), however subclasses might
1845 computed by default as hash(text, p1, p2), however subclasses might
1844 use different hashing method (and override checkhash() in such case)
1846 use different hashing method (and override checkhash() in such case)
1845 flags - the known flags to set on the revision
1847 flags - the known flags to set on the revision
1846 deltacomputer - an optional deltacomputer instance shared between
1848 deltacomputer - an optional deltacomputer instance shared between
1847 multiple calls
1849 multiple calls
1848 """
1850 """
1849 if link == nullrev:
1851 if link == nullrev:
1850 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1852 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1851 % self.indexfile)
1853 % self.indexfile)
1852
1854
1853 if flags:
1855 if flags:
1854 node = node or self.hash(text, p1, p2)
1856 node = node or self.hash(text, p1, p2)
1855
1857
1856 rawtext, validatehash = self._processflags(text, flags, 'write')
1858 rawtext, validatehash = self._processflags(text, flags, 'write')
1857
1859
1858 # If the flag processor modifies the revision data, ignore any provided
1860 # If the flag processor modifies the revision data, ignore any provided
1859 # cachedelta.
1861 # cachedelta.
1860 if rawtext != text:
1862 if rawtext != text:
1861 cachedelta = None
1863 cachedelta = None
1862
1864
1863 if len(rawtext) > _maxentrysize:
1865 if len(rawtext) > _maxentrysize:
1864 raise error.RevlogError(
1866 raise error.RevlogError(
1865 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1867 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1866 % (self.indexfile, len(rawtext)))
1868 % (self.indexfile, len(rawtext)))
1867
1869
1868 node = node or self.hash(rawtext, p1, p2)
1870 node = node or self.hash(rawtext, p1, p2)
1869 if node in self.nodemap:
1871 if node in self.nodemap:
1870 return node
1872 return node
1871
1873
1872 if validatehash:
1874 if validatehash:
1873 self.checkhash(rawtext, node, p1=p1, p2=p2)
1875 self.checkhash(rawtext, node, p1=p1, p2=p2)
1874
1876
1875 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1877 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1876 flags, cachedelta=cachedelta,
1878 flags, cachedelta=cachedelta,
1877 deltacomputer=deltacomputer)
1879 deltacomputer=deltacomputer)
1878
1880
1879 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1881 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1880 cachedelta=None, deltacomputer=None):
1882 cachedelta=None, deltacomputer=None):
1881 """add a raw revision with known flags, node and parents
1883 """add a raw revision with known flags, node and parents
1882 useful when reusing a revision not stored in this revlog (ex: received
1884 useful when reusing a revision not stored in this revlog (ex: received
1883 over wire, or read from an external bundle).
1885 over wire, or read from an external bundle).
1884 """
1886 """
1885 dfh = None
1887 dfh = None
1886 if not self._inline:
1888 if not self._inline:
1887 dfh = self._datafp("a+")
1889 dfh = self._datafp("a+")
1888 ifh = self._indexfp("a+")
1890 ifh = self._indexfp("a+")
1889 try:
1891 try:
1890 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1892 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1891 flags, cachedelta, ifh, dfh,
1893 flags, cachedelta, ifh, dfh,
1892 deltacomputer=deltacomputer)
1894 deltacomputer=deltacomputer)
1893 finally:
1895 finally:
1894 if dfh:
1896 if dfh:
1895 dfh.close()
1897 dfh.close()
1896 ifh.close()
1898 ifh.close()
1897
1899
1898 def compress(self, data):
1900 def compress(self, data):
1899 """Generate a possibly-compressed representation of data."""
1901 """Generate a possibly-compressed representation of data."""
1900 if not data:
1902 if not data:
1901 return '', data
1903 return '', data
1902
1904
1903 compressed = self._compressor.compress(data)
1905 compressed = self._compressor.compress(data)
1904
1906
1905 if compressed:
1907 if compressed:
1906 # The revlog compressor added the header in the returned data.
1908 # The revlog compressor added the header in the returned data.
1907 return '', compressed
1909 return '', compressed
1908
1910
1909 if data[0:1] == '\0':
1911 if data[0:1] == '\0':
1910 return '', data
1912 return '', data
1911 return 'u', data
1913 return 'u', data
1912
1914
1913 def decompress(self, data):
1915 def decompress(self, data):
1914 """Decompress a revlog chunk.
1916 """Decompress a revlog chunk.
1915
1917
1916 The chunk is expected to begin with a header identifying the
1918 The chunk is expected to begin with a header identifying the
1917 format type so it can be routed to an appropriate decompressor.
1919 format type so it can be routed to an appropriate decompressor.
1918 """
1920 """
1919 if not data:
1921 if not data:
1920 return data
1922 return data
1921
1923
1922 # Revlogs are read much more frequently than they are written and many
1924 # Revlogs are read much more frequently than they are written and many
1923 # chunks only take microseconds to decompress, so performance is
1925 # chunks only take microseconds to decompress, so performance is
1924 # important here.
1926 # important here.
1925 #
1927 #
1926 # We can make a few assumptions about revlogs:
1928 # We can make a few assumptions about revlogs:
1927 #
1929 #
1928 # 1) the majority of chunks will be compressed (as opposed to inline
1930 # 1) the majority of chunks will be compressed (as opposed to inline
1929 # raw data).
1931 # raw data).
1930 # 2) decompressing *any* data will likely by at least 10x slower than
1932 # 2) decompressing *any* data will likely by at least 10x slower than
1931 # returning raw inline data.
1933 # returning raw inline data.
1932 # 3) we want to prioritize common and officially supported compression
1934 # 3) we want to prioritize common and officially supported compression
1933 # engines
1935 # engines
1934 #
1936 #
1935 # It follows that we want to optimize for "decompress compressed data
1937 # It follows that we want to optimize for "decompress compressed data
1936 # when encoded with common and officially supported compression engines"
1938 # when encoded with common and officially supported compression engines"
1937 # case over "raw data" and "data encoded by less common or non-official
1939 # case over "raw data" and "data encoded by less common or non-official
1938 # compression engines." That is why we have the inline lookup first
1940 # compression engines." That is why we have the inline lookup first
1939 # followed by the compengines lookup.
1941 # followed by the compengines lookup.
1940 #
1942 #
1941 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1943 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1942 # compressed chunks. And this matters for changelog and manifest reads.
1944 # compressed chunks. And this matters for changelog and manifest reads.
1943 t = data[0:1]
1945 t = data[0:1]
1944
1946
1945 if t == 'x':
1947 if t == 'x':
1946 try:
1948 try:
1947 return _zlibdecompress(data)
1949 return _zlibdecompress(data)
1948 except zlib.error as e:
1950 except zlib.error as e:
1949 raise error.RevlogError(_('revlog decompress error: %s') %
1951 raise error.RevlogError(_('revlog decompress error: %s') %
1950 stringutil.forcebytestr(e))
1952 stringutil.forcebytestr(e))
1951 # '\0' is more common than 'u' so it goes first.
1953 # '\0' is more common than 'u' so it goes first.
1952 elif t == '\0':
1954 elif t == '\0':
1953 return data
1955 return data
1954 elif t == 'u':
1956 elif t == 'u':
1955 return util.buffer(data, 1)
1957 return util.buffer(data, 1)
1956
1958
1957 try:
1959 try:
1958 compressor = self._decompressors[t]
1960 compressor = self._decompressors[t]
1959 except KeyError:
1961 except KeyError:
1960 try:
1962 try:
1961 engine = util.compengines.forrevlogheader(t)
1963 engine = util.compengines.forrevlogheader(t)
1962 compressor = engine.revlogcompressor()
1964 compressor = engine.revlogcompressor()
1963 self._decompressors[t] = compressor
1965 self._decompressors[t] = compressor
1964 except KeyError:
1966 except KeyError:
1965 raise error.RevlogError(_('unknown compression type %r') % t)
1967 raise error.RevlogError(_('unknown compression type %r') % t)
1966
1968
1967 return compressor.decompress(data)
1969 return compressor.decompress(data)
1968
1970
1969 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1971 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1970 cachedelta, ifh, dfh, alwayscache=False,
1972 cachedelta, ifh, dfh, alwayscache=False,
1971 deltacomputer=None):
1973 deltacomputer=None):
1972 """internal function to add revisions to the log
1974 """internal function to add revisions to the log
1973
1975
1974 see addrevision for argument descriptions.
1976 see addrevision for argument descriptions.
1975
1977
1976 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1978 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1977
1979
1978 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1980 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1979 be used.
1981 be used.
1980
1982
1981 invariants:
1983 invariants:
1982 - rawtext is optional (can be None); if not set, cachedelta must be set.
1984 - rawtext is optional (can be None); if not set, cachedelta must be set.
1983 if both are set, they must correspond to each other.
1985 if both are set, they must correspond to each other.
1984 """
1986 """
1985 if node == nullid:
1987 if node == nullid:
1986 raise error.RevlogError(_("%s: attempt to add null revision") %
1988 raise error.RevlogError(_("%s: attempt to add null revision") %
1987 self.indexfile)
1989 self.indexfile)
1988 if node == wdirid or node in wdirfilenodeids:
1990 if node == wdirid or node in wdirfilenodeids:
1989 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1991 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1990 self.indexfile)
1992 self.indexfile)
1991
1993
1992 if self._inline:
1994 if self._inline:
1993 fh = ifh
1995 fh = ifh
1994 else:
1996 else:
1995 fh = dfh
1997 fh = dfh
1996
1998
1997 btext = [rawtext]
1999 btext = [rawtext]
1998
2000
1999 curr = len(self)
2001 curr = len(self)
2000 prev = curr - 1
2002 prev = curr - 1
2001 offset = self.end(prev)
2003 offset = self.end(prev)
2002 p1r, p2r = self.rev(p1), self.rev(p2)
2004 p1r, p2r = self.rev(p1), self.rev(p2)
2003
2005
2004 # full versions are inserted when the needed deltas
2006 # full versions are inserted when the needed deltas
2005 # become comparable to the uncompressed text
2007 # become comparable to the uncompressed text
2006 if rawtext is None:
2008 if rawtext is None:
2007 # need rawtext size, before changed by flag processors, which is
2009 # need rawtext size, before changed by flag processors, which is
2008 # the non-raw size. use revlog explicitly to avoid filelog's extra
2010 # the non-raw size. use revlog explicitly to avoid filelog's extra
2009 # logic that might remove metadata size.
2011 # logic that might remove metadata size.
2010 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2012 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2011 cachedelta[1])
2013 cachedelta[1])
2012 else:
2014 else:
2013 textlen = len(rawtext)
2015 textlen = len(rawtext)
2014
2016
2015 if deltacomputer is None:
2017 if deltacomputer is None:
2016 deltacomputer = deltautil.deltacomputer(self)
2018 deltacomputer = deltautil.deltacomputer(self)
2017
2019
2018 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2020 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2019
2021
2020 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2022 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2021
2023
2022 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2024 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2023 deltainfo.base, link, p1r, p2r, node)
2025 deltainfo.base, link, p1r, p2r, node)
2024 self.index.append(e)
2026 self.index.append(e)
2025 self.nodemap[node] = curr
2027 self.nodemap[node] = curr
2026
2028
2027 # Reset the pure node cache start lookup offset to account for new
2029 # Reset the pure node cache start lookup offset to account for new
2028 # revision.
2030 # revision.
2029 if self._nodepos is not None:
2031 if self._nodepos is not None:
2030 self._nodepos = curr
2032 self._nodepos = curr
2031
2033
2032 entry = self._io.packentry(e, self.node, self.version, curr)
2034 entry = self._io.packentry(e, self.node, self.version, curr)
2033 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2035 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2034 link, offset)
2036 link, offset)
2035
2037
2036 rawtext = btext[0]
2038 rawtext = btext[0]
2037
2039
2038 if alwayscache and rawtext is None:
2040 if alwayscache and rawtext is None:
2039 rawtext = deltacomputer.buildtext(revinfo, fh)
2041 rawtext = deltacomputer.buildtext(revinfo, fh)
2040
2042
2041 if type(rawtext) == bytes: # only accept immutable objects
2043 if type(rawtext) == bytes: # only accept immutable objects
2042 self._revisioncache = (node, curr, rawtext)
2044 self._revisioncache = (node, curr, rawtext)
2043 self._chainbasecache[curr] = deltainfo.chainbase
2045 self._chainbasecache[curr] = deltainfo.chainbase
2044 return node
2046 return node
2045
2047
2046 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2048 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2047 # Files opened in a+ mode have inconsistent behavior on various
2049 # Files opened in a+ mode have inconsistent behavior on various
2048 # platforms. Windows requires that a file positioning call be made
2050 # platforms. Windows requires that a file positioning call be made
2049 # when the file handle transitions between reads and writes. See
2051 # when the file handle transitions between reads and writes. See
2050 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2052 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2051 # platforms, Python or the platform itself can be buggy. Some versions
2053 # platforms, Python or the platform itself can be buggy. Some versions
2052 # of Solaris have been observed to not append at the end of the file
2054 # of Solaris have been observed to not append at the end of the file
2053 # if the file was seeked to before the end. See issue4943 for more.
2055 # if the file was seeked to before the end. See issue4943 for more.
2054 #
2056 #
2055 # We work around this issue by inserting a seek() before writing.
2057 # We work around this issue by inserting a seek() before writing.
2056 # Note: This is likely not necessary on Python 3. However, because
2058 # Note: This is likely not necessary on Python 3. However, because
2057 # the file handle is reused for reads and may be seeked there, we need
2059 # the file handle is reused for reads and may be seeked there, we need
2058 # to be careful before changing this.
2060 # to be careful before changing this.
2059 ifh.seek(0, os.SEEK_END)
2061 ifh.seek(0, os.SEEK_END)
2060 if dfh:
2062 if dfh:
2061 dfh.seek(0, os.SEEK_END)
2063 dfh.seek(0, os.SEEK_END)
2062
2064
2063 curr = len(self) - 1
2065 curr = len(self) - 1
2064 if not self._inline:
2066 if not self._inline:
2065 transaction.add(self.datafile, offset)
2067 transaction.add(self.datafile, offset)
2066 transaction.add(self.indexfile, curr * len(entry))
2068 transaction.add(self.indexfile, curr * len(entry))
2067 if data[0]:
2069 if data[0]:
2068 dfh.write(data[0])
2070 dfh.write(data[0])
2069 dfh.write(data[1])
2071 dfh.write(data[1])
2070 ifh.write(entry)
2072 ifh.write(entry)
2071 else:
2073 else:
2072 offset += curr * self._io.size
2074 offset += curr * self._io.size
2073 transaction.add(self.indexfile, offset, curr)
2075 transaction.add(self.indexfile, offset, curr)
2074 ifh.write(entry)
2076 ifh.write(entry)
2075 ifh.write(data[0])
2077 ifh.write(data[0])
2076 ifh.write(data[1])
2078 ifh.write(data[1])
2077 self._enforceinlinesize(transaction, ifh)
2079 self._enforceinlinesize(transaction, ifh)
2078
2080
2079 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2081 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2080 """
2082 """
2081 add a delta group
2083 add a delta group
2082
2084
2083 given a set of deltas, add them to the revision log. the
2085 given a set of deltas, add them to the revision log. the
2084 first delta is against its parent, which should be in our
2086 first delta is against its parent, which should be in our
2085 log, the rest are against the previous delta.
2087 log, the rest are against the previous delta.
2086
2088
2087 If ``addrevisioncb`` is defined, it will be called with arguments of
2089 If ``addrevisioncb`` is defined, it will be called with arguments of
2088 this revlog and the node that was added.
2090 this revlog and the node that was added.
2089 """
2091 """
2090
2092
2091 if self._writinghandles:
2093 if self._writinghandles:
2092 raise error.ProgrammingError('cannot nest addgroup() calls')
2094 raise error.ProgrammingError('cannot nest addgroup() calls')
2093
2095
2094 nodes = []
2096 nodes = []
2095
2097
2096 r = len(self)
2098 r = len(self)
2097 end = 0
2099 end = 0
2098 if r:
2100 if r:
2099 end = self.end(r - 1)
2101 end = self.end(r - 1)
2100 ifh = self._indexfp("a+")
2102 ifh = self._indexfp("a+")
2101 isize = r * self._io.size
2103 isize = r * self._io.size
2102 if self._inline:
2104 if self._inline:
2103 transaction.add(self.indexfile, end + isize, r)
2105 transaction.add(self.indexfile, end + isize, r)
2104 dfh = None
2106 dfh = None
2105 else:
2107 else:
2106 transaction.add(self.indexfile, isize, r)
2108 transaction.add(self.indexfile, isize, r)
2107 transaction.add(self.datafile, end)
2109 transaction.add(self.datafile, end)
2108 dfh = self._datafp("a+")
2110 dfh = self._datafp("a+")
2109 def flush():
2111 def flush():
2110 if dfh:
2112 if dfh:
2111 dfh.flush()
2113 dfh.flush()
2112 ifh.flush()
2114 ifh.flush()
2113
2115
2114 self._writinghandles = (ifh, dfh)
2116 self._writinghandles = (ifh, dfh)
2115
2117
2116 try:
2118 try:
2117 deltacomputer = deltautil.deltacomputer(self)
2119 deltacomputer = deltautil.deltacomputer(self)
2118 # loop through our set of deltas
2120 # loop through our set of deltas
2119 for data in deltas:
2121 for data in deltas:
2120 node, p1, p2, linknode, deltabase, delta, flags = data
2122 node, p1, p2, linknode, deltabase, delta, flags = data
2121 link = linkmapper(linknode)
2123 link = linkmapper(linknode)
2122 flags = flags or REVIDX_DEFAULT_FLAGS
2124 flags = flags or REVIDX_DEFAULT_FLAGS
2123
2125
2124 nodes.append(node)
2126 nodes.append(node)
2125
2127
2126 if node in self.nodemap:
2128 if node in self.nodemap:
2127 self._nodeduplicatecallback(transaction, node)
2129 self._nodeduplicatecallback(transaction, node)
2128 # this can happen if two branches make the same change
2130 # this can happen if two branches make the same change
2129 continue
2131 continue
2130
2132
2131 for p in (p1, p2):
2133 for p in (p1, p2):
2132 if p not in self.nodemap:
2134 if p not in self.nodemap:
2133 raise error.LookupError(p, self.indexfile,
2135 raise error.LookupError(p, self.indexfile,
2134 _('unknown parent'))
2136 _('unknown parent'))
2135
2137
2136 if deltabase not in self.nodemap:
2138 if deltabase not in self.nodemap:
2137 raise error.LookupError(deltabase, self.indexfile,
2139 raise error.LookupError(deltabase, self.indexfile,
2138 _('unknown delta base'))
2140 _('unknown delta base'))
2139
2141
2140 baserev = self.rev(deltabase)
2142 baserev = self.rev(deltabase)
2141
2143
2142 if baserev != nullrev and self.iscensored(baserev):
2144 if baserev != nullrev and self.iscensored(baserev):
2143 # if base is censored, delta must be full replacement in a
2145 # if base is censored, delta must be full replacement in a
2144 # single patch operation
2146 # single patch operation
2145 hlen = struct.calcsize(">lll")
2147 hlen = struct.calcsize(">lll")
2146 oldlen = self.rawsize(baserev)
2148 oldlen = self.rawsize(baserev)
2147 newlen = len(delta) - hlen
2149 newlen = len(delta) - hlen
2148 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2150 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2149 raise error.CensoredBaseError(self.indexfile,
2151 raise error.CensoredBaseError(self.indexfile,
2150 self.node(baserev))
2152 self.node(baserev))
2151
2153
2152 if not flags and self._peek_iscensored(baserev, delta, flush):
2154 if not flags and self._peek_iscensored(baserev, delta, flush):
2153 flags |= REVIDX_ISCENSORED
2155 flags |= REVIDX_ISCENSORED
2154
2156
2155 # We assume consumers of addrevisioncb will want to retrieve
2157 # We assume consumers of addrevisioncb will want to retrieve
2156 # the added revision, which will require a call to
2158 # the added revision, which will require a call to
2157 # revision(). revision() will fast path if there is a cache
2159 # revision(). revision() will fast path if there is a cache
2158 # hit. So, we tell _addrevision() to always cache in this case.
2160 # hit. So, we tell _addrevision() to always cache in this case.
2159 # We're only using addgroup() in the context of changegroup
2161 # We're only using addgroup() in the context of changegroup
2160 # generation so the revision data can always be handled as raw
2162 # generation so the revision data can always be handled as raw
2161 # by the flagprocessor.
2163 # by the flagprocessor.
2162 self._addrevision(node, None, transaction, link,
2164 self._addrevision(node, None, transaction, link,
2163 p1, p2, flags, (baserev, delta),
2165 p1, p2, flags, (baserev, delta),
2164 ifh, dfh,
2166 ifh, dfh,
2165 alwayscache=bool(addrevisioncb),
2167 alwayscache=bool(addrevisioncb),
2166 deltacomputer=deltacomputer)
2168 deltacomputer=deltacomputer)
2167
2169
2168 if addrevisioncb:
2170 if addrevisioncb:
2169 addrevisioncb(self, node)
2171 addrevisioncb(self, node)
2170
2172
2171 if not dfh and not self._inline:
2173 if not dfh and not self._inline:
2172 # addrevision switched from inline to conventional
2174 # addrevision switched from inline to conventional
2173 # reopen the index
2175 # reopen the index
2174 ifh.close()
2176 ifh.close()
2175 dfh = self._datafp("a+")
2177 dfh = self._datafp("a+")
2176 ifh = self._indexfp("a+")
2178 ifh = self._indexfp("a+")
2177 self._writinghandles = (ifh, dfh)
2179 self._writinghandles = (ifh, dfh)
2178 finally:
2180 finally:
2179 self._writinghandles = None
2181 self._writinghandles = None
2180
2182
2181 if dfh:
2183 if dfh:
2182 dfh.close()
2184 dfh.close()
2183 ifh.close()
2185 ifh.close()
2184
2186
2185 return nodes
2187 return nodes
2186
2188
2187 def iscensored(self, rev):
2189 def iscensored(self, rev):
2188 """Check if a file revision is censored."""
2190 """Check if a file revision is censored."""
2189 if not self._censorable:
2191 if not self._censorable:
2190 return False
2192 return False
2191
2193
2192 return self.flags(rev) & REVIDX_ISCENSORED
2194 return self.flags(rev) & REVIDX_ISCENSORED
2193
2195
2194 def _peek_iscensored(self, baserev, delta, flush):
2196 def _peek_iscensored(self, baserev, delta, flush):
2195 """Quickly check if a delta produces a censored revision."""
2197 """Quickly check if a delta produces a censored revision."""
2196 if not self._censorable:
2198 if not self._censorable:
2197 return False
2199 return False
2198
2200
2199 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2201 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2200
2202
2201 def getstrippoint(self, minlink):
2203 def getstrippoint(self, minlink):
2202 """find the minimum rev that must be stripped to strip the linkrev
2204 """find the minimum rev that must be stripped to strip the linkrev
2203
2205
2204 Returns a tuple containing the minimum rev and a set of all revs that
2206 Returns a tuple containing the minimum rev and a set of all revs that
2205 have linkrevs that will be broken by this strip.
2207 have linkrevs that will be broken by this strip.
2206 """
2208 """
2207 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2209 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2208 self.headrevs(),
2210 self.headrevs(),
2209 self.linkrev, self.parentrevs)
2211 self.linkrev, self.parentrevs)
2210
2212
2211 def strip(self, minlink, transaction):
2213 def strip(self, minlink, transaction):
2212 """truncate the revlog on the first revision with a linkrev >= minlink
2214 """truncate the revlog on the first revision with a linkrev >= minlink
2213
2215
2214 This function is called when we're stripping revision minlink and
2216 This function is called when we're stripping revision minlink and
2215 its descendants from the repository.
2217 its descendants from the repository.
2216
2218
2217 We have to remove all revisions with linkrev >= minlink, because
2219 We have to remove all revisions with linkrev >= minlink, because
2218 the equivalent changelog revisions will be renumbered after the
2220 the equivalent changelog revisions will be renumbered after the
2219 strip.
2221 strip.
2220
2222
2221 So we truncate the revlog on the first of these revisions, and
2223 So we truncate the revlog on the first of these revisions, and
2222 trust that the caller has saved the revisions that shouldn't be
2224 trust that the caller has saved the revisions that shouldn't be
2223 removed and that it'll re-add them after this truncation.
2225 removed and that it'll re-add them after this truncation.
2224 """
2226 """
2225 if len(self) == 0:
2227 if len(self) == 0:
2226 return
2228 return
2227
2229
2228 rev, _ = self.getstrippoint(minlink)
2230 rev, _ = self.getstrippoint(minlink)
2229 if rev == len(self):
2231 if rev == len(self):
2230 return
2232 return
2231
2233
2232 # first truncate the files on disk
2234 # first truncate the files on disk
2233 end = self.start(rev)
2235 end = self.start(rev)
2234 if not self._inline:
2236 if not self._inline:
2235 transaction.add(self.datafile, end)
2237 transaction.add(self.datafile, end)
2236 end = rev * self._io.size
2238 end = rev * self._io.size
2237 else:
2239 else:
2238 end += rev * self._io.size
2240 end += rev * self._io.size
2239
2241
2240 transaction.add(self.indexfile, end)
2242 transaction.add(self.indexfile, end)
2241
2243
2242 # then reset internal state in memory to forget those revisions
2244 # then reset internal state in memory to forget those revisions
2243 self._revisioncache = None
2245 self._revisioncache = None
2244 self._chaininfocache = {}
2246 self._chaininfocache = {}
2245 self._chunkclear()
2247 self._chunkclear()
2246 for x in pycompat.xrange(rev, len(self)):
2248 for x in pycompat.xrange(rev, len(self)):
2247 del self.nodemap[self.node(x)]
2249 del self.nodemap[self.node(x)]
2248
2250
2249 del self.index[rev:-1]
2251 del self.index[rev:-1]
2250 self._nodepos = None
2252 self._nodepos = None
2251
2253
2252 def checksize(self):
2254 def checksize(self):
2253 expected = 0
2255 expected = 0
2254 if len(self):
2256 if len(self):
2255 expected = max(0, self.end(len(self) - 1))
2257 expected = max(0, self.end(len(self) - 1))
2256
2258
2257 try:
2259 try:
2258 with self._datafp() as f:
2260 with self._datafp() as f:
2259 f.seek(0, 2)
2261 f.seek(0, 2)
2260 actual = f.tell()
2262 actual = f.tell()
2261 dd = actual - expected
2263 dd = actual - expected
2262 except IOError as inst:
2264 except IOError as inst:
2263 if inst.errno != errno.ENOENT:
2265 if inst.errno != errno.ENOENT:
2264 raise
2266 raise
2265 dd = 0
2267 dd = 0
2266
2268
2267 try:
2269 try:
2268 f = self.opener(self.indexfile)
2270 f = self.opener(self.indexfile)
2269 f.seek(0, 2)
2271 f.seek(0, 2)
2270 actual = f.tell()
2272 actual = f.tell()
2271 f.close()
2273 f.close()
2272 s = self._io.size
2274 s = self._io.size
2273 i = max(0, actual // s)
2275 i = max(0, actual // s)
2274 di = actual - (i * s)
2276 di = actual - (i * s)
2275 if self._inline:
2277 if self._inline:
2276 databytes = 0
2278 databytes = 0
2277 for r in self:
2279 for r in self:
2278 databytes += max(0, self.length(r))
2280 databytes += max(0, self.length(r))
2279 dd = 0
2281 dd = 0
2280 di = actual - len(self) * s - databytes
2282 di = actual - len(self) * s - databytes
2281 except IOError as inst:
2283 except IOError as inst:
2282 if inst.errno != errno.ENOENT:
2284 if inst.errno != errno.ENOENT:
2283 raise
2285 raise
2284 di = 0
2286 di = 0
2285
2287
2286 return (dd, di)
2288 return (dd, di)
2287
2289
2288 def files(self):
2290 def files(self):
2289 res = [self.indexfile]
2291 res = [self.indexfile]
2290 if not self._inline:
2292 if not self._inline:
2291 res.append(self.datafile)
2293 res.append(self.datafile)
2292 return res
2294 return res
2293
2295
2294 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2296 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2295 assumehaveparentrevisions=False,
2297 assumehaveparentrevisions=False,
2296 deltamode=repository.CG_DELTAMODE_STD):
2298 deltamode=repository.CG_DELTAMODE_STD):
2297 if nodesorder not in ('nodes', 'storage', 'linear', None):
2299 if nodesorder not in ('nodes', 'storage', 'linear', None):
2298 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2300 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2299 nodesorder)
2301 nodesorder)
2300
2302
2301 if nodesorder is None and not self._generaldelta:
2303 if nodesorder is None and not self._generaldelta:
2302 nodesorder = 'storage'
2304 nodesorder = 'storage'
2303
2305
2304 if (not self._storedeltachains and
2306 if (not self._storedeltachains and
2305 deltamode != repository.CG_DELTAMODE_PREV):
2307 deltamode != repository.CG_DELTAMODE_PREV):
2306 deltamode = repository.CG_DELTAMODE_FULL
2308 deltamode = repository.CG_DELTAMODE_FULL
2307
2309
2308 return storageutil.emitrevisions(
2310 return storageutil.emitrevisions(
2309 self, nodes, nodesorder, revlogrevisiondelta,
2311 self, nodes, nodesorder, revlogrevisiondelta,
2310 deltaparentfn=self.deltaparent,
2312 deltaparentfn=self.deltaparent,
2311 candeltafn=self.candelta,
2313 candeltafn=self.candelta,
2312 rawsizefn=self.rawsize,
2314 rawsizefn=self.rawsize,
2313 revdifffn=self.revdiff,
2315 revdifffn=self.revdiff,
2314 flagsfn=self.flags,
2316 flagsfn=self.flags,
2315 deltamode=deltamode,
2317 deltamode=deltamode,
2316 revisiondata=revisiondata,
2318 revisiondata=revisiondata,
2317 assumehaveparentrevisions=assumehaveparentrevisions)
2319 assumehaveparentrevisions=assumehaveparentrevisions)
2318
2320
2319 DELTAREUSEALWAYS = 'always'
2321 DELTAREUSEALWAYS = 'always'
2320 DELTAREUSESAMEREVS = 'samerevs'
2322 DELTAREUSESAMEREVS = 'samerevs'
2321 DELTAREUSENEVER = 'never'
2323 DELTAREUSENEVER = 'never'
2322
2324
2323 DELTAREUSEFULLADD = 'fulladd'
2325 DELTAREUSEFULLADD = 'fulladd'
2324
2326
2325 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2327 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2326
2328
2327 def clone(self, tr, destrevlog, addrevisioncb=None,
2329 def clone(self, tr, destrevlog, addrevisioncb=None,
2328 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2330 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2329 """Copy this revlog to another, possibly with format changes.
2331 """Copy this revlog to another, possibly with format changes.
2330
2332
2331 The destination revlog will contain the same revisions and nodes.
2333 The destination revlog will contain the same revisions and nodes.
2332 However, it may not be bit-for-bit identical due to e.g. delta encoding
2334 However, it may not be bit-for-bit identical due to e.g. delta encoding
2333 differences.
2335 differences.
2334
2336
2335 The ``deltareuse`` argument control how deltas from the existing revlog
2337 The ``deltareuse`` argument control how deltas from the existing revlog
2336 are preserved in the destination revlog. The argument can have the
2338 are preserved in the destination revlog. The argument can have the
2337 following values:
2339 following values:
2338
2340
2339 DELTAREUSEALWAYS
2341 DELTAREUSEALWAYS
2340 Deltas will always be reused (if possible), even if the destination
2342 Deltas will always be reused (if possible), even if the destination
2341 revlog would not select the same revisions for the delta. This is the
2343 revlog would not select the same revisions for the delta. This is the
2342 fastest mode of operation.
2344 fastest mode of operation.
2343 DELTAREUSESAMEREVS
2345 DELTAREUSESAMEREVS
2344 Deltas will be reused if the destination revlog would pick the same
2346 Deltas will be reused if the destination revlog would pick the same
2345 revisions for the delta. This mode strikes a balance between speed
2347 revisions for the delta. This mode strikes a balance between speed
2346 and optimization.
2348 and optimization.
2347 DELTAREUSENEVER
2349 DELTAREUSENEVER
2348 Deltas will never be reused. This is the slowest mode of execution.
2350 Deltas will never be reused. This is the slowest mode of execution.
2349 This mode can be used to recompute deltas (e.g. if the diff/delta
2351 This mode can be used to recompute deltas (e.g. if the diff/delta
2350 algorithm changes).
2352 algorithm changes).
2351
2353
2352 Delta computation can be slow, so the choice of delta reuse policy can
2354 Delta computation can be slow, so the choice of delta reuse policy can
2353 significantly affect run time.
2355 significantly affect run time.
2354
2356
2355 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2357 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2356 two extremes. Deltas will be reused if they are appropriate. But if the
2358 two extremes. Deltas will be reused if they are appropriate. But if the
2357 delta could choose a better revision, it will do so. This means if you
2359 delta could choose a better revision, it will do so. This means if you
2358 are converting a non-generaldelta revlog to a generaldelta revlog,
2360 are converting a non-generaldelta revlog to a generaldelta revlog,
2359 deltas will be recomputed if the delta's parent isn't a parent of the
2361 deltas will be recomputed if the delta's parent isn't a parent of the
2360 revision.
2362 revision.
2361
2363
2362 In addition to the delta policy, the ``forcedeltabothparents``
2364 In addition to the delta policy, the ``forcedeltabothparents``
2363 argument controls whether to force compute deltas against both parents
2365 argument controls whether to force compute deltas against both parents
2364 for merges. By default, the current default is used.
2366 for merges. By default, the current default is used.
2365 """
2367 """
2366 if deltareuse not in self.DELTAREUSEALL:
2368 if deltareuse not in self.DELTAREUSEALL:
2367 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2369 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2368
2370
2369 if len(destrevlog):
2371 if len(destrevlog):
2370 raise ValueError(_('destination revlog is not empty'))
2372 raise ValueError(_('destination revlog is not empty'))
2371
2373
2372 if getattr(self, 'filteredrevs', None):
2374 if getattr(self, 'filteredrevs', None):
2373 raise ValueError(_('source revlog has filtered revisions'))
2375 raise ValueError(_('source revlog has filtered revisions'))
2374 if getattr(destrevlog, 'filteredrevs', None):
2376 if getattr(destrevlog, 'filteredrevs', None):
2375 raise ValueError(_('destination revlog has filtered revisions'))
2377 raise ValueError(_('destination revlog has filtered revisions'))
2376
2378
2377 # lazydeltabase controls whether to reuse a cached delta, if possible.
2379 # lazydeltabase controls whether to reuse a cached delta, if possible.
2378 oldlazydeltabase = destrevlog._lazydeltabase
2380 oldlazydeltabase = destrevlog._lazydeltabase
2379 oldamd = destrevlog._deltabothparents
2381 oldamd = destrevlog._deltabothparents
2380
2382
2381 try:
2383 try:
2382 if deltareuse == self.DELTAREUSEALWAYS:
2384 if deltareuse == self.DELTAREUSEALWAYS:
2383 destrevlog._lazydeltabase = True
2385 destrevlog._lazydeltabase = True
2384 elif deltareuse == self.DELTAREUSESAMEREVS:
2386 elif deltareuse == self.DELTAREUSESAMEREVS:
2385 destrevlog._lazydeltabase = False
2387 destrevlog._lazydeltabase = False
2386
2388
2387 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2389 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2388
2390
2389 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2391 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2390 self.DELTAREUSESAMEREVS)
2392 self.DELTAREUSESAMEREVS)
2391
2393
2392 deltacomputer = deltautil.deltacomputer(destrevlog)
2394 deltacomputer = deltautil.deltacomputer(destrevlog)
2393 index = self.index
2395 index = self.index
2394 for rev in self:
2396 for rev in self:
2395 entry = index[rev]
2397 entry = index[rev]
2396
2398
2397 # Some classes override linkrev to take filtered revs into
2399 # Some classes override linkrev to take filtered revs into
2398 # account. Use raw entry from index.
2400 # account. Use raw entry from index.
2399 flags = entry[0] & 0xffff
2401 flags = entry[0] & 0xffff
2400 linkrev = entry[4]
2402 linkrev = entry[4]
2401 p1 = index[entry[5]][7]
2403 p1 = index[entry[5]][7]
2402 p2 = index[entry[6]][7]
2404 p2 = index[entry[6]][7]
2403 node = entry[7]
2405 node = entry[7]
2404
2406
2405 # (Possibly) reuse the delta from the revlog if allowed and
2407 # (Possibly) reuse the delta from the revlog if allowed and
2406 # the revlog chunk is a delta.
2408 # the revlog chunk is a delta.
2407 cachedelta = None
2409 cachedelta = None
2408 rawtext = None
2410 rawtext = None
2409 if populatecachedelta:
2411 if populatecachedelta:
2410 dp = self.deltaparent(rev)
2412 dp = self.deltaparent(rev)
2411 if dp != nullrev:
2413 if dp != nullrev:
2412 cachedelta = (dp, bytes(self._chunk(rev)))
2414 cachedelta = (dp, bytes(self._chunk(rev)))
2413
2415
2414 if not cachedelta:
2416 if not cachedelta:
2415 rawtext = self.revision(rev, raw=True)
2417 rawtext = self.revision(rev, raw=True)
2416
2418
2417
2419
2418 if deltareuse == self.DELTAREUSEFULLADD:
2420 if deltareuse == self.DELTAREUSEFULLADD:
2419 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2421 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2420 cachedelta=cachedelta,
2422 cachedelta=cachedelta,
2421 node=node, flags=flags,
2423 node=node, flags=flags,
2422 deltacomputer=deltacomputer)
2424 deltacomputer=deltacomputer)
2423 else:
2425 else:
2424 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2426 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2425 checkambig=False)
2427 checkambig=False)
2426 dfh = None
2428 dfh = None
2427 if not destrevlog._inline:
2429 if not destrevlog._inline:
2428 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2430 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2429 try:
2431 try:
2430 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2432 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2431 p2, flags, cachedelta, ifh, dfh,
2433 p2, flags, cachedelta, ifh, dfh,
2432 deltacomputer=deltacomputer)
2434 deltacomputer=deltacomputer)
2433 finally:
2435 finally:
2434 if dfh:
2436 if dfh:
2435 dfh.close()
2437 dfh.close()
2436 ifh.close()
2438 ifh.close()
2437
2439
2438 if addrevisioncb:
2440 if addrevisioncb:
2439 addrevisioncb(self, rev, node)
2441 addrevisioncb(self, rev, node)
2440 finally:
2442 finally:
2441 destrevlog._lazydeltabase = oldlazydeltabase
2443 destrevlog._lazydeltabase = oldlazydeltabase
2442 destrevlog._deltabothparents = oldamd
2444 destrevlog._deltabothparents = oldamd
2443
2445
2444 def censorrevision(self, tr, censornode, tombstone=b''):
2446 def censorrevision(self, tr, censornode, tombstone=b''):
2445 if (self.version & 0xFFFF) == REVLOGV0:
2447 if (self.version & 0xFFFF) == REVLOGV0:
2446 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2448 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2447 self.version)
2449 self.version)
2448
2450
2449 censorrev = self.rev(censornode)
2451 censorrev = self.rev(censornode)
2450 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2452 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2451
2453
2452 if len(tombstone) > self.rawsize(censorrev):
2454 if len(tombstone) > self.rawsize(censorrev):
2453 raise error.Abort(_('censor tombstone must be no longer than '
2455 raise error.Abort(_('censor tombstone must be no longer than '
2454 'censored data'))
2456 'censored data'))
2455
2457
2456 # Rewriting the revlog in place is hard. Our strategy for censoring is
2458 # Rewriting the revlog in place is hard. Our strategy for censoring is
2457 # to create a new revlog, copy all revisions to it, then replace the
2459 # to create a new revlog, copy all revisions to it, then replace the
2458 # revlogs on transaction close.
2460 # revlogs on transaction close.
2459
2461
2460 newindexfile = self.indexfile + b'.tmpcensored'
2462 newindexfile = self.indexfile + b'.tmpcensored'
2461 newdatafile = self.datafile + b'.tmpcensored'
2463 newdatafile = self.datafile + b'.tmpcensored'
2462
2464
2463 # This is a bit dangerous. We could easily have a mismatch of state.
2465 # This is a bit dangerous. We could easily have a mismatch of state.
2464 newrl = revlog(self.opener, newindexfile, newdatafile,
2466 newrl = revlog(self.opener, newindexfile, newdatafile,
2465 censorable=True)
2467 censorable=True)
2466 newrl.version = self.version
2468 newrl.version = self.version
2467 newrl._generaldelta = self._generaldelta
2469 newrl._generaldelta = self._generaldelta
2468 newrl._io = self._io
2470 newrl._io = self._io
2469
2471
2470 for rev in self.revs():
2472 for rev in self.revs():
2471 node = self.node(rev)
2473 node = self.node(rev)
2472 p1, p2 = self.parents(node)
2474 p1, p2 = self.parents(node)
2473
2475
2474 if rev == censorrev:
2476 if rev == censorrev:
2475 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2477 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2476 p1, p2, censornode, REVIDX_ISCENSORED)
2478 p1, p2, censornode, REVIDX_ISCENSORED)
2477
2479
2478 if newrl.deltaparent(rev) != nullrev:
2480 if newrl.deltaparent(rev) != nullrev:
2479 raise error.Abort(_('censored revision stored as delta; '
2481 raise error.Abort(_('censored revision stored as delta; '
2480 'cannot censor'),
2482 'cannot censor'),
2481 hint=_('censoring of revlogs is not '
2483 hint=_('censoring of revlogs is not '
2482 'fully implemented; please report '
2484 'fully implemented; please report '
2483 'this bug'))
2485 'this bug'))
2484 continue
2486 continue
2485
2487
2486 if self.iscensored(rev):
2488 if self.iscensored(rev):
2487 if self.deltaparent(rev) != nullrev:
2489 if self.deltaparent(rev) != nullrev:
2488 raise error.Abort(_('cannot censor due to censored '
2490 raise error.Abort(_('cannot censor due to censored '
2489 'revision having delta stored'))
2491 'revision having delta stored'))
2490 rawtext = self._chunk(rev)
2492 rawtext = self._chunk(rev)
2491 else:
2493 else:
2492 rawtext = self.revision(rev, raw=True)
2494 rawtext = self.revision(rev, raw=True)
2493
2495
2494 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2496 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2495 self.flags(rev))
2497 self.flags(rev))
2496
2498
2497 tr.addbackup(self.indexfile, location='store')
2499 tr.addbackup(self.indexfile, location='store')
2498 if not self._inline:
2500 if not self._inline:
2499 tr.addbackup(self.datafile, location='store')
2501 tr.addbackup(self.datafile, location='store')
2500
2502
2501 self.opener.rename(newrl.indexfile, self.indexfile)
2503 self.opener.rename(newrl.indexfile, self.indexfile)
2502 if not self._inline:
2504 if not self._inline:
2503 self.opener.rename(newrl.datafile, self.datafile)
2505 self.opener.rename(newrl.datafile, self.datafile)
2504
2506
2505 self.clearcaches()
2507 self.clearcaches()
2506 self._loadindex()
2508 self._loadindex()
2507
2509
2508 def verifyintegrity(self, state):
2510 def verifyintegrity(self, state):
2509 """Verifies the integrity of the revlog.
2511 """Verifies the integrity of the revlog.
2510
2512
2511 Yields ``revlogproblem`` instances describing problems that are
2513 Yields ``revlogproblem`` instances describing problems that are
2512 found.
2514 found.
2513 """
2515 """
2514 dd, di = self.checksize()
2516 dd, di = self.checksize()
2515 if dd:
2517 if dd:
2516 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2518 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2517 if di:
2519 if di:
2518 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2520 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2519
2521
2520 version = self.version & 0xFFFF
2522 version = self.version & 0xFFFF
2521
2523
2522 # The verifier tells us what version revlog we should be.
2524 # The verifier tells us what version revlog we should be.
2523 if version != state['expectedversion']:
2525 if version != state['expectedversion']:
2524 yield revlogproblem(
2526 yield revlogproblem(
2525 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2527 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2526 (self.indexfile, version, state['expectedversion']))
2528 (self.indexfile, version, state['expectedversion']))
2527
2529
2528 state['skipread'] = set()
2530 state['skipread'] = set()
2529
2531
2530 for rev in self:
2532 for rev in self:
2531 node = self.node(rev)
2533 node = self.node(rev)
2532
2534
2533 # Verify contents. 4 cases to care about:
2535 # Verify contents. 4 cases to care about:
2534 #
2536 #
2535 # common: the most common case
2537 # common: the most common case
2536 # rename: with a rename
2538 # rename: with a rename
2537 # meta: file content starts with b'\1\n', the metadata
2539 # meta: file content starts with b'\1\n', the metadata
2538 # header defined in filelog.py, but without a rename
2540 # header defined in filelog.py, but without a rename
2539 # ext: content stored externally
2541 # ext: content stored externally
2540 #
2542 #
2541 # More formally, their differences are shown below:
2543 # More formally, their differences are shown below:
2542 #
2544 #
2543 # | common | rename | meta | ext
2545 # | common | rename | meta | ext
2544 # -------------------------------------------------------
2546 # -------------------------------------------------------
2545 # flags() | 0 | 0 | 0 | not 0
2547 # flags() | 0 | 0 | 0 | not 0
2546 # renamed() | False | True | False | ?
2548 # renamed() | False | True | False | ?
2547 # rawtext[0:2]=='\1\n'| False | True | True | ?
2549 # rawtext[0:2]=='\1\n'| False | True | True | ?
2548 #
2550 #
2549 # "rawtext" means the raw text stored in revlog data, which
2551 # "rawtext" means the raw text stored in revlog data, which
2550 # could be retrieved by "revision(rev, raw=True)". "text"
2552 # could be retrieved by "revision(rev, raw=True)". "text"
2551 # mentioned below is "revision(rev, raw=False)".
2553 # mentioned below is "revision(rev, raw=False)".
2552 #
2554 #
2553 # There are 3 different lengths stored physically:
2555 # There are 3 different lengths stored physically:
2554 # 1. L1: rawsize, stored in revlog index
2556 # 1. L1: rawsize, stored in revlog index
2555 # 2. L2: len(rawtext), stored in revlog data
2557 # 2. L2: len(rawtext), stored in revlog data
2556 # 3. L3: len(text), stored in revlog data if flags==0, or
2558 # 3. L3: len(text), stored in revlog data if flags==0, or
2557 # possibly somewhere else if flags!=0
2559 # possibly somewhere else if flags!=0
2558 #
2560 #
2559 # L1 should be equal to L2. L3 could be different from them.
2561 # L1 should be equal to L2. L3 could be different from them.
2560 # "text" may or may not affect commit hash depending on flag
2562 # "text" may or may not affect commit hash depending on flag
2561 # processors (see revlog.addflagprocessor).
2563 # processors (see revlog.addflagprocessor).
2562 #
2564 #
2563 # | common | rename | meta | ext
2565 # | common | rename | meta | ext
2564 # -------------------------------------------------
2566 # -------------------------------------------------
2565 # rawsize() | L1 | L1 | L1 | L1
2567 # rawsize() | L1 | L1 | L1 | L1
2566 # size() | L1 | L2-LM | L1(*) | L1 (?)
2568 # size() | L1 | L2-LM | L1(*) | L1 (?)
2567 # len(rawtext) | L2 | L2 | L2 | L2
2569 # len(rawtext) | L2 | L2 | L2 | L2
2568 # len(text) | L2 | L2 | L2 | L3
2570 # len(text) | L2 | L2 | L2 | L3
2569 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2571 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2570 #
2572 #
2571 # LM: length of metadata, depending on rawtext
2573 # LM: length of metadata, depending on rawtext
2572 # (*): not ideal, see comment in filelog.size
2574 # (*): not ideal, see comment in filelog.size
2573 # (?): could be "- len(meta)" if the resolved content has
2575 # (?): could be "- len(meta)" if the resolved content has
2574 # rename metadata
2576 # rename metadata
2575 #
2577 #
2576 # Checks needed to be done:
2578 # Checks needed to be done:
2577 # 1. length check: L1 == L2, in all cases.
2579 # 1. length check: L1 == L2, in all cases.
2578 # 2. hash check: depending on flag processor, we may need to
2580 # 2. hash check: depending on flag processor, we may need to
2579 # use either "text" (external), or "rawtext" (in revlog).
2581 # use either "text" (external), or "rawtext" (in revlog).
2580
2582
2581 try:
2583 try:
2582 skipflags = state.get('skipflags', 0)
2584 skipflags = state.get('skipflags', 0)
2583 if skipflags:
2585 if skipflags:
2584 skipflags &= self.flags(rev)
2586 skipflags &= self.flags(rev)
2585
2587
2586 if skipflags:
2588 if skipflags:
2587 state['skipread'].add(node)
2589 state['skipread'].add(node)
2588 else:
2590 else:
2589 # Side-effect: read content and verify hash.
2591 # Side-effect: read content and verify hash.
2590 self.revision(node)
2592 self.revision(node)
2591
2593
2592 l1 = self.rawsize(rev)
2594 l1 = self.rawsize(rev)
2593 l2 = len(self.revision(node, raw=True))
2595 l2 = len(self.revision(node, raw=True))
2594
2596
2595 if l1 != l2:
2597 if l1 != l2:
2596 yield revlogproblem(
2598 yield revlogproblem(
2597 error=_('unpacked size is %d, %d expected') % (l2, l1),
2599 error=_('unpacked size is %d, %d expected') % (l2, l1),
2598 node=node)
2600 node=node)
2599
2601
2600 except error.CensoredNodeError:
2602 except error.CensoredNodeError:
2601 if state['erroroncensored']:
2603 if state['erroroncensored']:
2602 yield revlogproblem(error=_('censored file data'),
2604 yield revlogproblem(error=_('censored file data'),
2603 node=node)
2605 node=node)
2604 state['skipread'].add(node)
2606 state['skipread'].add(node)
2605 except Exception as e:
2607 except Exception as e:
2606 yield revlogproblem(
2608 yield revlogproblem(
2607 error=_('unpacking %s: %s') % (short(node),
2609 error=_('unpacking %s: %s') % (short(node),
2608 stringutil.forcebytestr(e)),
2610 stringutil.forcebytestr(e)),
2609 node=node)
2611 node=node)
2610 state['skipread'].add(node)
2612 state['skipread'].add(node)
2611
2613
2612 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2614 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2613 revisionscount=False, trackedsize=False,
2615 revisionscount=False, trackedsize=False,
2614 storedsize=False):
2616 storedsize=False):
2615 d = {}
2617 d = {}
2616
2618
2617 if exclusivefiles:
2619 if exclusivefiles:
2618 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2620 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2619 if not self._inline:
2621 if not self._inline:
2620 d['exclusivefiles'].append((self.opener, self.datafile))
2622 d['exclusivefiles'].append((self.opener, self.datafile))
2621
2623
2622 if sharedfiles:
2624 if sharedfiles:
2623 d['sharedfiles'] = []
2625 d['sharedfiles'] = []
2624
2626
2625 if revisionscount:
2627 if revisionscount:
2626 d['revisionscount'] = len(self)
2628 d['revisionscount'] = len(self)
2627
2629
2628 if trackedsize:
2630 if trackedsize:
2629 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2631 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2630
2632
2631 if storedsize:
2633 if storedsize:
2632 d['storedsize'] = sum(self.opener.stat(path).st_size
2634 d['storedsize'] = sum(self.opener.stat(path).st_size
2633 for path in self.files())
2635 for path in self.files())
2634
2636
2635 return d
2637 return d
General Comments 0
You need to be logged in to leave comments. Login now