##// END OF EJS Templates
revlog: document that mmap resources are released implicitly by GC...
Yuya Nishihara -
r41322:00b314c4 default
parent child Browse files
Show More
@@ -1,2639 +1,2641 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
375 self._withsparseread = False
375 self._withsparseread = False
376 self._sparserevlog = False
376 self._sparserevlog = False
377 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
378 self._srmingapsize = 262144
378 self._srmingapsize = 262144
379
379
380 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
381 # custom flags.
381 # custom flags.
382 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
383
383
384 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
385 self._writinghandles = None
385 self._writinghandles = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 def _loadindex(self):
389 def _loadindex(self):
390 mmapindexthreshold = None
390 mmapindexthreshold = None
391 opts = getattr(self.opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
392
392
393 if 'revlogv2' in opts:
393 if 'revlogv2' in opts:
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 elif 'revlogv1' in opts:
395 elif 'revlogv1' in opts:
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 if 'generaldelta' in opts:
397 if 'generaldelta' in opts:
398 newversionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 else:
399 else:
400 newversionflags = REVLOG_DEFAULT_VERSION
400 newversionflags = REVLOG_DEFAULT_VERSION
401
401
402 if 'chunkcachesize' in opts:
402 if 'chunkcachesize' in opts:
403 self._chunkcachesize = opts['chunkcachesize']
403 self._chunkcachesize = opts['chunkcachesize']
404 if 'maxchainlen' in opts:
404 if 'maxchainlen' in opts:
405 self._maxchainlen = opts['maxchainlen']
405 self._maxchainlen = opts['maxchainlen']
406 if 'deltabothparents' in opts:
406 if 'deltabothparents' in opts:
407 self._deltabothparents = opts['deltabothparents']
407 self._deltabothparents = opts['deltabothparents']
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 if 'compengine' in opts:
409 if 'compengine' in opts:
410 self._compengine = opts['compengine']
410 self._compengine = opts['compengine']
411 if 'maxdeltachainspan' in opts:
411 if 'maxdeltachainspan' in opts:
412 self._maxdeltachainspan = opts['maxdeltachainspan']
412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 mmapindexthreshold = opts['mmapindexthreshold']
414 mmapindexthreshold = opts['mmapindexthreshold']
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
417 # sparse-revlog forces sparse-read
417 # sparse-revlog forces sparse-read
418 self._withsparseread = self._sparserevlog or withsparseread
418 self._withsparseread = self._sparserevlog or withsparseread
419 if 'sparse-read-density-threshold' in opts:
419 if 'sparse-read-density-threshold' in opts:
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 if 'sparse-read-min-gap-size' in opts:
421 if 'sparse-read-min-gap-size' in opts:
422 self._srmingapsize = opts['sparse-read-min-gap-size']
422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 if opts.get('enableellipsis'):
423 if opts.get('enableellipsis'):
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425
425
426 # revlog v0 doesn't have flag processors
426 # revlog v0 doesn't have flag processors
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 _insertflagprocessor(flag, processor, self._flagprocessors)
428 _insertflagprocessor(flag, processor, self._flagprocessors)
429
429
430 if self._chunkcachesize <= 0:
430 if self._chunkcachesize <= 0:
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 'greater than 0') % self._chunkcachesize)
432 'greater than 0') % self._chunkcachesize)
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 'power of 2') % self._chunkcachesize)
435 'power of 2') % self._chunkcachesize)
436
436
437 indexdata = ''
437 indexdata = ''
438 self._initempty = True
438 self._initempty = True
439 try:
439 try:
440 with self._indexfp() as f:
440 with self._indexfp() as f:
441 if (mmapindexthreshold is not None and
441 if (mmapindexthreshold is not None and
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 # TODO: should .close() to release resources without
444 # relying on Python GC
443 indexdata = util.buffer(util.mmapread(f))
445 indexdata = util.buffer(util.mmapread(f))
444 else:
446 else:
445 indexdata = f.read()
447 indexdata = f.read()
446 if len(indexdata) > 0:
448 if len(indexdata) > 0:
447 versionflags = versionformat_unpack(indexdata[:4])[0]
449 versionflags = versionformat_unpack(indexdata[:4])[0]
448 self._initempty = False
450 self._initempty = False
449 else:
451 else:
450 versionflags = newversionflags
452 versionflags = newversionflags
451 except IOError as inst:
453 except IOError as inst:
452 if inst.errno != errno.ENOENT:
454 if inst.errno != errno.ENOENT:
453 raise
455 raise
454
456
455 versionflags = newversionflags
457 versionflags = newversionflags
456
458
457 self.version = versionflags
459 self.version = versionflags
458
460
459 flags = versionflags & ~0xFFFF
461 flags = versionflags & ~0xFFFF
460 fmt = versionflags & 0xFFFF
462 fmt = versionflags & 0xFFFF
461
463
462 if fmt == REVLOGV0:
464 if fmt == REVLOGV0:
463 if flags:
465 if flags:
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 'revlog %s') %
467 'revlog %s') %
466 (flags >> 16, fmt, self.indexfile))
468 (flags >> 16, fmt, self.indexfile))
467
469
468 self._inline = False
470 self._inline = False
469 self._generaldelta = False
471 self._generaldelta = False
470
472
471 elif fmt == REVLOGV1:
473 elif fmt == REVLOGV1:
472 if flags & ~REVLOGV1_FLAGS:
474 if flags & ~REVLOGV1_FLAGS:
473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
475 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
474 'revlog %s') %
476 'revlog %s') %
475 (flags >> 16, fmt, self.indexfile))
477 (flags >> 16, fmt, self.indexfile))
476
478
477 self._inline = versionflags & FLAG_INLINE_DATA
479 self._inline = versionflags & FLAG_INLINE_DATA
478 self._generaldelta = versionflags & FLAG_GENERALDELTA
480 self._generaldelta = versionflags & FLAG_GENERALDELTA
479
481
480 elif fmt == REVLOGV2:
482 elif fmt == REVLOGV2:
481 if flags & ~REVLOGV2_FLAGS:
483 if flags & ~REVLOGV2_FLAGS:
482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
484 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 'revlog %s') %
485 'revlog %s') %
484 (flags >> 16, fmt, self.indexfile))
486 (flags >> 16, fmt, self.indexfile))
485
487
486 self._inline = versionflags & FLAG_INLINE_DATA
488 self._inline = versionflags & FLAG_INLINE_DATA
487 # generaldelta implied by version 2 revlogs.
489 # generaldelta implied by version 2 revlogs.
488 self._generaldelta = True
490 self._generaldelta = True
489
491
490 else:
492 else:
491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
493 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
492 (fmt, self.indexfile))
494 (fmt, self.indexfile))
493
495
494 self._storedeltachains = True
496 self._storedeltachains = True
495
497
496 self._io = revlogio()
498 self._io = revlogio()
497 if self.version == REVLOGV0:
499 if self.version == REVLOGV0:
498 self._io = revlogoldio()
500 self._io = revlogoldio()
499 try:
501 try:
500 d = self._io.parseindex(indexdata, self._inline)
502 d = self._io.parseindex(indexdata, self._inline)
501 except (ValueError, IndexError):
503 except (ValueError, IndexError):
502 raise error.RevlogError(_("index %s is corrupted") %
504 raise error.RevlogError(_("index %s is corrupted") %
503 self.indexfile)
505 self.indexfile)
504 self.index, nodemap, self._chunkcache = d
506 self.index, nodemap, self._chunkcache = d
505 if nodemap is not None:
507 if nodemap is not None:
506 self.nodemap = self._nodecache = nodemap
508 self.nodemap = self._nodecache = nodemap
507 if not self._chunkcache:
509 if not self._chunkcache:
508 self._chunkclear()
510 self._chunkclear()
509 # revnum -> (chain-length, sum-delta-length)
511 # revnum -> (chain-length, sum-delta-length)
510 self._chaininfocache = {}
512 self._chaininfocache = {}
511 # revlog header -> revlog compressor
513 # revlog header -> revlog compressor
512 self._decompressors = {}
514 self._decompressors = {}
513
515
514 @util.propertycache
516 @util.propertycache
515 def _compressor(self):
517 def _compressor(self):
516 return util.compengines[self._compengine].revlogcompressor()
518 return util.compengines[self._compengine].revlogcompressor()
517
519
518 def _indexfp(self, mode='r'):
520 def _indexfp(self, mode='r'):
519 """file object for the revlog's index file"""
521 """file object for the revlog's index file"""
520 args = {r'mode': mode}
522 args = {r'mode': mode}
521 if mode != 'r':
523 if mode != 'r':
522 args[r'checkambig'] = self._checkambig
524 args[r'checkambig'] = self._checkambig
523 if mode == 'w':
525 if mode == 'w':
524 args[r'atomictemp'] = True
526 args[r'atomictemp'] = True
525 return self.opener(self.indexfile, **args)
527 return self.opener(self.indexfile, **args)
526
528
527 def _datafp(self, mode='r'):
529 def _datafp(self, mode='r'):
528 """file object for the revlog's data file"""
530 """file object for the revlog's data file"""
529 return self.opener(self.datafile, mode=mode)
531 return self.opener(self.datafile, mode=mode)
530
532
531 @contextlib.contextmanager
533 @contextlib.contextmanager
532 def _datareadfp(self, existingfp=None):
534 def _datareadfp(self, existingfp=None):
533 """file object suitable to read data"""
535 """file object suitable to read data"""
534 # Use explicit file handle, if given.
536 # Use explicit file handle, if given.
535 if existingfp is not None:
537 if existingfp is not None:
536 yield existingfp
538 yield existingfp
537
539
538 # Use a file handle being actively used for writes, if available.
540 # Use a file handle being actively used for writes, if available.
539 # There is some danger to doing this because reads will seek the
541 # There is some danger to doing this because reads will seek the
540 # file. However, _writeentry() performs a SEEK_END before all writes,
542 # file. However, _writeentry() performs a SEEK_END before all writes,
541 # so we should be safe.
543 # so we should be safe.
542 elif self._writinghandles:
544 elif self._writinghandles:
543 if self._inline:
545 if self._inline:
544 yield self._writinghandles[0]
546 yield self._writinghandles[0]
545 else:
547 else:
546 yield self._writinghandles[1]
548 yield self._writinghandles[1]
547
549
548 # Otherwise open a new file handle.
550 # Otherwise open a new file handle.
549 else:
551 else:
550 if self._inline:
552 if self._inline:
551 func = self._indexfp
553 func = self._indexfp
552 else:
554 else:
553 func = self._datafp
555 func = self._datafp
554 with func() as fp:
556 with func() as fp:
555 yield fp
557 yield fp
556
558
557 def tip(self):
559 def tip(self):
558 return self.node(len(self.index) - 1)
560 return self.node(len(self.index) - 1)
559 def __contains__(self, rev):
561 def __contains__(self, rev):
560 return 0 <= rev < len(self)
562 return 0 <= rev < len(self)
561 def __len__(self):
563 def __len__(self):
562 return len(self.index)
564 return len(self.index)
563 def __iter__(self):
565 def __iter__(self):
564 return iter(pycompat.xrange(len(self)))
566 return iter(pycompat.xrange(len(self)))
565 def revs(self, start=0, stop=None):
567 def revs(self, start=0, stop=None):
566 """iterate over all rev in this revlog (from start to stop)"""
568 """iterate over all rev in this revlog (from start to stop)"""
567 return storageutil.iterrevs(len(self), start=start, stop=stop)
569 return storageutil.iterrevs(len(self), start=start, stop=stop)
568
570
569 @util.propertycache
571 @util.propertycache
570 def nodemap(self):
572 def nodemap(self):
571 if self.index:
573 if self.index:
572 # populate mapping down to the initial node
574 # populate mapping down to the initial node
573 node0 = self.index[0][7] # get around changelog filtering
575 node0 = self.index[0][7] # get around changelog filtering
574 self.rev(node0)
576 self.rev(node0)
575 return self._nodecache
577 return self._nodecache
576
578
577 def hasnode(self, node):
579 def hasnode(self, node):
578 try:
580 try:
579 self.rev(node)
581 self.rev(node)
580 return True
582 return True
581 except KeyError:
583 except KeyError:
582 return False
584 return False
583
585
584 def candelta(self, baserev, rev):
586 def candelta(self, baserev, rev):
585 """whether two revisions (baserev, rev) can be delta-ed or not"""
587 """whether two revisions (baserev, rev) can be delta-ed or not"""
586 # Disable delta if either rev requires a content-changing flag
588 # Disable delta if either rev requires a content-changing flag
587 # processor (ex. LFS). This is because such flag processor can alter
589 # processor (ex. LFS). This is because such flag processor can alter
588 # the rawtext content that the delta will be based on, and two clients
590 # the rawtext content that the delta will be based on, and two clients
589 # could have a same revlog node with different flags (i.e. different
591 # could have a same revlog node with different flags (i.e. different
590 # rawtext contents) and the delta could be incompatible.
592 # rawtext contents) and the delta could be incompatible.
591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
593 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
594 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
593 return False
595 return False
594 return True
596 return True
595
597
596 def clearcaches(self):
598 def clearcaches(self):
597 self._revisioncache = None
599 self._revisioncache = None
598 self._chainbasecache.clear()
600 self._chainbasecache.clear()
599 self._chunkcache = (0, '')
601 self._chunkcache = (0, '')
600 self._pcache = {}
602 self._pcache = {}
601
603
602 try:
604 try:
603 self._nodecache.clearcaches()
605 self._nodecache.clearcaches()
604 except AttributeError:
606 except AttributeError:
605 self._nodecache = {nullid: nullrev}
607 self._nodecache = {nullid: nullrev}
606 self._nodepos = None
608 self._nodepos = None
607
609
608 def rev(self, node):
610 def rev(self, node):
609 try:
611 try:
610 return self._nodecache[node]
612 return self._nodecache[node]
611 except TypeError:
613 except TypeError:
612 raise
614 raise
613 except error.RevlogError:
615 except error.RevlogError:
614 # parsers.c radix tree lookup failed
616 # parsers.c radix tree lookup failed
615 if node == wdirid or node in wdirfilenodeids:
617 if node == wdirid or node in wdirfilenodeids:
616 raise error.WdirUnsupported
618 raise error.WdirUnsupported
617 raise error.LookupError(node, self.indexfile, _('no node'))
619 raise error.LookupError(node, self.indexfile, _('no node'))
618 except KeyError:
620 except KeyError:
619 # pure python cache lookup failed
621 # pure python cache lookup failed
620 n = self._nodecache
622 n = self._nodecache
621 i = self.index
623 i = self.index
622 p = self._nodepos
624 p = self._nodepos
623 if p is None:
625 if p is None:
624 p = len(i) - 1
626 p = len(i) - 1
625 else:
627 else:
626 assert p < len(i)
628 assert p < len(i)
627 for r in pycompat.xrange(p, -1, -1):
629 for r in pycompat.xrange(p, -1, -1):
628 v = i[r][7]
630 v = i[r][7]
629 n[v] = r
631 n[v] = r
630 if v == node:
632 if v == node:
631 self._nodepos = r - 1
633 self._nodepos = r - 1
632 return r
634 return r
633 if node == wdirid or node in wdirfilenodeids:
635 if node == wdirid or node in wdirfilenodeids:
634 raise error.WdirUnsupported
636 raise error.WdirUnsupported
635 raise error.LookupError(node, self.indexfile, _('no node'))
637 raise error.LookupError(node, self.indexfile, _('no node'))
636
638
637 # Accessors for index entries.
639 # Accessors for index entries.
638
640
639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
641 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
640 # are flags.
642 # are flags.
641 def start(self, rev):
643 def start(self, rev):
642 return int(self.index[rev][0] >> 16)
644 return int(self.index[rev][0] >> 16)
643
645
644 def flags(self, rev):
646 def flags(self, rev):
645 return self.index[rev][0] & 0xFFFF
647 return self.index[rev][0] & 0xFFFF
646
648
647 def length(self, rev):
649 def length(self, rev):
648 return self.index[rev][1]
650 return self.index[rev][1]
649
651
650 def rawsize(self, rev):
652 def rawsize(self, rev):
651 """return the length of the uncompressed text for a given revision"""
653 """return the length of the uncompressed text for a given revision"""
652 l = self.index[rev][2]
654 l = self.index[rev][2]
653 if l >= 0:
655 if l >= 0:
654 return l
656 return l
655
657
656 t = self.revision(rev, raw=True)
658 t = self.revision(rev, raw=True)
657 return len(t)
659 return len(t)
658
660
659 def size(self, rev):
661 def size(self, rev):
660 """length of non-raw text (processed by a "read" flag processor)"""
662 """length of non-raw text (processed by a "read" flag processor)"""
661 # fast path: if no "read" flag processor could change the content,
663 # fast path: if no "read" flag processor could change the content,
662 # size is rawsize. note: ELLIPSIS is known to not change the content.
664 # size is rawsize. note: ELLIPSIS is known to not change the content.
663 flags = self.flags(rev)
665 flags = self.flags(rev)
664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
666 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
665 return self.rawsize(rev)
667 return self.rawsize(rev)
666
668
667 return len(self.revision(rev, raw=False))
669 return len(self.revision(rev, raw=False))
668
670
669 def chainbase(self, rev):
671 def chainbase(self, rev):
670 base = self._chainbasecache.get(rev)
672 base = self._chainbasecache.get(rev)
671 if base is not None:
673 if base is not None:
672 return base
674 return base
673
675
674 index = self.index
676 index = self.index
675 iterrev = rev
677 iterrev = rev
676 base = index[iterrev][3]
678 base = index[iterrev][3]
677 while base != iterrev:
679 while base != iterrev:
678 iterrev = base
680 iterrev = base
679 base = index[iterrev][3]
681 base = index[iterrev][3]
680
682
681 self._chainbasecache[rev] = base
683 self._chainbasecache[rev] = base
682 return base
684 return base
683
685
684 def linkrev(self, rev):
686 def linkrev(self, rev):
685 return self.index[rev][4]
687 return self.index[rev][4]
686
688
687 def parentrevs(self, rev):
689 def parentrevs(self, rev):
688 try:
690 try:
689 entry = self.index[rev]
691 entry = self.index[rev]
690 except IndexError:
692 except IndexError:
691 if rev == wdirrev:
693 if rev == wdirrev:
692 raise error.WdirUnsupported
694 raise error.WdirUnsupported
693 raise
695 raise
694
696
695 return entry[5], entry[6]
697 return entry[5], entry[6]
696
698
697 # fast parentrevs(rev) where rev isn't filtered
699 # fast parentrevs(rev) where rev isn't filtered
698 _uncheckedparentrevs = parentrevs
700 _uncheckedparentrevs = parentrevs
699
701
700 def node(self, rev):
702 def node(self, rev):
701 try:
703 try:
702 return self.index[rev][7]
704 return self.index[rev][7]
703 except IndexError:
705 except IndexError:
704 if rev == wdirrev:
706 if rev == wdirrev:
705 raise error.WdirUnsupported
707 raise error.WdirUnsupported
706 raise
708 raise
707
709
708 # Derived from index values.
710 # Derived from index values.
709
711
710 def end(self, rev):
712 def end(self, rev):
711 return self.start(rev) + self.length(rev)
713 return self.start(rev) + self.length(rev)
712
714
713 def parents(self, node):
715 def parents(self, node):
714 i = self.index
716 i = self.index
715 d = i[self.rev(node)]
717 d = i[self.rev(node)]
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
718 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
717
719
718 def chainlen(self, rev):
720 def chainlen(self, rev):
719 return self._chaininfo(rev)[0]
721 return self._chaininfo(rev)[0]
720
722
721 def _chaininfo(self, rev):
723 def _chaininfo(self, rev):
722 chaininfocache = self._chaininfocache
724 chaininfocache = self._chaininfocache
723 if rev in chaininfocache:
725 if rev in chaininfocache:
724 return chaininfocache[rev]
726 return chaininfocache[rev]
725 index = self.index
727 index = self.index
726 generaldelta = self._generaldelta
728 generaldelta = self._generaldelta
727 iterrev = rev
729 iterrev = rev
728 e = index[iterrev]
730 e = index[iterrev]
729 clen = 0
731 clen = 0
730 compresseddeltalen = 0
732 compresseddeltalen = 0
731 while iterrev != e[3]:
733 while iterrev != e[3]:
732 clen += 1
734 clen += 1
733 compresseddeltalen += e[1]
735 compresseddeltalen += e[1]
734 if generaldelta:
736 if generaldelta:
735 iterrev = e[3]
737 iterrev = e[3]
736 else:
738 else:
737 iterrev -= 1
739 iterrev -= 1
738 if iterrev in chaininfocache:
740 if iterrev in chaininfocache:
739 t = chaininfocache[iterrev]
741 t = chaininfocache[iterrev]
740 clen += t[0]
742 clen += t[0]
741 compresseddeltalen += t[1]
743 compresseddeltalen += t[1]
742 break
744 break
743 e = index[iterrev]
745 e = index[iterrev]
744 else:
746 else:
745 # Add text length of base since decompressing that also takes
747 # Add text length of base since decompressing that also takes
746 # work. For cache hits the length is already included.
748 # work. For cache hits the length is already included.
747 compresseddeltalen += e[1]
749 compresseddeltalen += e[1]
748 r = (clen, compresseddeltalen)
750 r = (clen, compresseddeltalen)
749 chaininfocache[rev] = r
751 chaininfocache[rev] = r
750 return r
752 return r
751
753
752 def _deltachain(self, rev, stoprev=None):
754 def _deltachain(self, rev, stoprev=None):
753 """Obtain the delta chain for a revision.
755 """Obtain the delta chain for a revision.
754
756
755 ``stoprev`` specifies a revision to stop at. If not specified, we
757 ``stoprev`` specifies a revision to stop at. If not specified, we
756 stop at the base of the chain.
758 stop at the base of the chain.
757
759
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
760 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
759 revs in ascending order and ``stopped`` is a bool indicating whether
761 revs in ascending order and ``stopped`` is a bool indicating whether
760 ``stoprev`` was hit.
762 ``stoprev`` was hit.
761 """
763 """
762 # Try C implementation.
764 # Try C implementation.
763 try:
765 try:
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
766 return self.index.deltachain(rev, stoprev, self._generaldelta)
765 except AttributeError:
767 except AttributeError:
766 pass
768 pass
767
769
768 chain = []
770 chain = []
769
771
770 # Alias to prevent attribute lookup in tight loop.
772 # Alias to prevent attribute lookup in tight loop.
771 index = self.index
773 index = self.index
772 generaldelta = self._generaldelta
774 generaldelta = self._generaldelta
773
775
774 iterrev = rev
776 iterrev = rev
775 e = index[iterrev]
777 e = index[iterrev]
776 while iterrev != e[3] and iterrev != stoprev:
778 while iterrev != e[3] and iterrev != stoprev:
777 chain.append(iterrev)
779 chain.append(iterrev)
778 if generaldelta:
780 if generaldelta:
779 iterrev = e[3]
781 iterrev = e[3]
780 else:
782 else:
781 iterrev -= 1
783 iterrev -= 1
782 e = index[iterrev]
784 e = index[iterrev]
783
785
784 if iterrev == stoprev:
786 if iterrev == stoprev:
785 stopped = True
787 stopped = True
786 else:
788 else:
787 chain.append(iterrev)
789 chain.append(iterrev)
788 stopped = False
790 stopped = False
789
791
790 chain.reverse()
792 chain.reverse()
791 return chain, stopped
793 return chain, stopped
792
794
793 def ancestors(self, revs, stoprev=0, inclusive=False):
795 def ancestors(self, revs, stoprev=0, inclusive=False):
794 """Generate the ancestors of 'revs' in reverse revision order.
796 """Generate the ancestors of 'revs' in reverse revision order.
795 Does not generate revs lower than stoprev.
797 Does not generate revs lower than stoprev.
796
798
797 See the documentation for ancestor.lazyancestors for more details."""
799 See the documentation for ancestor.lazyancestors for more details."""
798
800
799 # first, make sure start revisions aren't filtered
801 # first, make sure start revisions aren't filtered
800 revs = list(revs)
802 revs = list(revs)
801 checkrev = self.node
803 checkrev = self.node
802 for r in revs:
804 for r in revs:
803 checkrev(r)
805 checkrev(r)
804 # and we're sure ancestors aren't filtered as well
806 # and we're sure ancestors aren't filtered as well
805
807
806 if rustext is not None:
808 if rustext is not None:
807 lazyancestors = rustext.ancestor.LazyAncestors
809 lazyancestors = rustext.ancestor.LazyAncestors
808 arg = self.index
810 arg = self.index
809 elif util.safehasattr(parsers, 'rustlazyancestors'):
811 elif util.safehasattr(parsers, 'rustlazyancestors'):
810 lazyancestors = ancestor.rustlazyancestors
812 lazyancestors = ancestor.rustlazyancestors
811 arg = self.index
813 arg = self.index
812 else:
814 else:
813 lazyancestors = ancestor.lazyancestors
815 lazyancestors = ancestor.lazyancestors
814 arg = self._uncheckedparentrevs
816 arg = self._uncheckedparentrevs
815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
817 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
816
818
817 def descendants(self, revs):
819 def descendants(self, revs):
818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
820 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
819
821
820 def findcommonmissing(self, common=None, heads=None):
822 def findcommonmissing(self, common=None, heads=None):
821 """Return a tuple of the ancestors of common and the ancestors of heads
823 """Return a tuple of the ancestors of common and the ancestors of heads
822 that are not ancestors of common. In revset terminology, we return the
824 that are not ancestors of common. In revset terminology, we return the
823 tuple:
825 tuple:
824
826
825 ::common, (::heads) - (::common)
827 ::common, (::heads) - (::common)
826
828
827 The list is sorted by revision number, meaning it is
829 The list is sorted by revision number, meaning it is
828 topologically sorted.
830 topologically sorted.
829
831
830 'heads' and 'common' are both lists of node IDs. If heads is
832 'heads' and 'common' are both lists of node IDs. If heads is
831 not supplied, uses all of the revlog's heads. If common is not
833 not supplied, uses all of the revlog's heads. If common is not
832 supplied, uses nullid."""
834 supplied, uses nullid."""
833 if common is None:
835 if common is None:
834 common = [nullid]
836 common = [nullid]
835 if heads is None:
837 if heads is None:
836 heads = self.heads()
838 heads = self.heads()
837
839
838 common = [self.rev(n) for n in common]
840 common = [self.rev(n) for n in common]
839 heads = [self.rev(n) for n in heads]
841 heads = [self.rev(n) for n in heads]
840
842
841 # we want the ancestors, but inclusive
843 # we want the ancestors, but inclusive
842 class lazyset(object):
844 class lazyset(object):
843 def __init__(self, lazyvalues):
845 def __init__(self, lazyvalues):
844 self.addedvalues = set()
846 self.addedvalues = set()
845 self.lazyvalues = lazyvalues
847 self.lazyvalues = lazyvalues
846
848
847 def __contains__(self, value):
849 def __contains__(self, value):
848 return value in self.addedvalues or value in self.lazyvalues
850 return value in self.addedvalues or value in self.lazyvalues
849
851
850 def __iter__(self):
852 def __iter__(self):
851 added = self.addedvalues
853 added = self.addedvalues
852 for r in added:
854 for r in added:
853 yield r
855 yield r
854 for r in self.lazyvalues:
856 for r in self.lazyvalues:
855 if not r in added:
857 if not r in added:
856 yield r
858 yield r
857
859
858 def add(self, value):
860 def add(self, value):
859 self.addedvalues.add(value)
861 self.addedvalues.add(value)
860
862
861 def update(self, values):
863 def update(self, values):
862 self.addedvalues.update(values)
864 self.addedvalues.update(values)
863
865
864 has = lazyset(self.ancestors(common))
866 has = lazyset(self.ancestors(common))
865 has.add(nullrev)
867 has.add(nullrev)
866 has.update(common)
868 has.update(common)
867
869
868 # take all ancestors from heads that aren't in has
870 # take all ancestors from heads that aren't in has
869 missing = set()
871 missing = set()
870 visit = collections.deque(r for r in heads if r not in has)
872 visit = collections.deque(r for r in heads if r not in has)
871 while visit:
873 while visit:
872 r = visit.popleft()
874 r = visit.popleft()
873 if r in missing:
875 if r in missing:
874 continue
876 continue
875 else:
877 else:
876 missing.add(r)
878 missing.add(r)
877 for p in self.parentrevs(r):
879 for p in self.parentrevs(r):
878 if p not in has:
880 if p not in has:
879 visit.append(p)
881 visit.append(p)
880 missing = list(missing)
882 missing = list(missing)
881 missing.sort()
883 missing.sort()
882 return has, [self.node(miss) for miss in missing]
884 return has, [self.node(miss) for miss in missing]
883
885
884 def incrementalmissingrevs(self, common=None):
886 def incrementalmissingrevs(self, common=None):
885 """Return an object that can be used to incrementally compute the
887 """Return an object that can be used to incrementally compute the
886 revision numbers of the ancestors of arbitrary sets that are not
888 revision numbers of the ancestors of arbitrary sets that are not
887 ancestors of common. This is an ancestor.incrementalmissingancestors
889 ancestors of common. This is an ancestor.incrementalmissingancestors
888 object.
890 object.
889
891
890 'common' is a list of revision numbers. If common is not supplied, uses
892 'common' is a list of revision numbers. If common is not supplied, uses
891 nullrev.
893 nullrev.
892 """
894 """
893 if common is None:
895 if common is None:
894 common = [nullrev]
896 common = [nullrev]
895
897
896 if rustext is not None:
898 if rustext is not None:
897 return rustext.ancestor.MissingAncestors(self.index, common)
899 return rustext.ancestor.MissingAncestors(self.index, common)
898 return ancestor.incrementalmissingancestors(self.parentrevs, common)
900 return ancestor.incrementalmissingancestors(self.parentrevs, common)
899
901
900 def findmissingrevs(self, common=None, heads=None):
902 def findmissingrevs(self, common=None, heads=None):
901 """Return the revision numbers of the ancestors of heads that
903 """Return the revision numbers of the ancestors of heads that
902 are not ancestors of common.
904 are not ancestors of common.
903
905
904 More specifically, return a list of revision numbers corresponding to
906 More specifically, return a list of revision numbers corresponding to
905 nodes N such that every N satisfies the following constraints:
907 nodes N such that every N satisfies the following constraints:
906
908
907 1. N is an ancestor of some node in 'heads'
909 1. N is an ancestor of some node in 'heads'
908 2. N is not an ancestor of any node in 'common'
910 2. N is not an ancestor of any node in 'common'
909
911
910 The list is sorted by revision number, meaning it is
912 The list is sorted by revision number, meaning it is
911 topologically sorted.
913 topologically sorted.
912
914
913 'heads' and 'common' are both lists of revision numbers. If heads is
915 'heads' and 'common' are both lists of revision numbers. If heads is
914 not supplied, uses all of the revlog's heads. If common is not
916 not supplied, uses all of the revlog's heads. If common is not
915 supplied, uses nullid."""
917 supplied, uses nullid."""
916 if common is None:
918 if common is None:
917 common = [nullrev]
919 common = [nullrev]
918 if heads is None:
920 if heads is None:
919 heads = self.headrevs()
921 heads = self.headrevs()
920
922
921 inc = self.incrementalmissingrevs(common=common)
923 inc = self.incrementalmissingrevs(common=common)
922 return inc.missingancestors(heads)
924 return inc.missingancestors(heads)
923
925
924 def findmissing(self, common=None, heads=None):
926 def findmissing(self, common=None, heads=None):
925 """Return the ancestors of heads that are not ancestors of common.
927 """Return the ancestors of heads that are not ancestors of common.
926
928
927 More specifically, return a list of nodes N such that every N
929 More specifically, return a list of nodes N such that every N
928 satisfies the following constraints:
930 satisfies the following constraints:
929
931
930 1. N is an ancestor of some node in 'heads'
932 1. N is an ancestor of some node in 'heads'
931 2. N is not an ancestor of any node in 'common'
933 2. N is not an ancestor of any node in 'common'
932
934
933 The list is sorted by revision number, meaning it is
935 The list is sorted by revision number, meaning it is
934 topologically sorted.
936 topologically sorted.
935
937
936 'heads' and 'common' are both lists of node IDs. If heads is
938 'heads' and 'common' are both lists of node IDs. If heads is
937 not supplied, uses all of the revlog's heads. If common is not
939 not supplied, uses all of the revlog's heads. If common is not
938 supplied, uses nullid."""
940 supplied, uses nullid."""
939 if common is None:
941 if common is None:
940 common = [nullid]
942 common = [nullid]
941 if heads is None:
943 if heads is None:
942 heads = self.heads()
944 heads = self.heads()
943
945
944 common = [self.rev(n) for n in common]
946 common = [self.rev(n) for n in common]
945 heads = [self.rev(n) for n in heads]
947 heads = [self.rev(n) for n in heads]
946
948
947 inc = self.incrementalmissingrevs(common=common)
949 inc = self.incrementalmissingrevs(common=common)
948 return [self.node(r) for r in inc.missingancestors(heads)]
950 return [self.node(r) for r in inc.missingancestors(heads)]
949
951
950 def nodesbetween(self, roots=None, heads=None):
952 def nodesbetween(self, roots=None, heads=None):
951 """Return a topological path from 'roots' to 'heads'.
953 """Return a topological path from 'roots' to 'heads'.
952
954
953 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
955 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
954 topologically sorted list of all nodes N that satisfy both of
956 topologically sorted list of all nodes N that satisfy both of
955 these constraints:
957 these constraints:
956
958
957 1. N is a descendant of some node in 'roots'
959 1. N is a descendant of some node in 'roots'
958 2. N is an ancestor of some node in 'heads'
960 2. N is an ancestor of some node in 'heads'
959
961
960 Every node is considered to be both a descendant and an ancestor
962 Every node is considered to be both a descendant and an ancestor
961 of itself, so every reachable node in 'roots' and 'heads' will be
963 of itself, so every reachable node in 'roots' and 'heads' will be
962 included in 'nodes'.
964 included in 'nodes'.
963
965
964 'outroots' is the list of reachable nodes in 'roots', i.e., the
966 'outroots' is the list of reachable nodes in 'roots', i.e., the
965 subset of 'roots' that is returned in 'nodes'. Likewise,
967 subset of 'roots' that is returned in 'nodes'. Likewise,
966 'outheads' is the subset of 'heads' that is also in 'nodes'.
968 'outheads' is the subset of 'heads' that is also in 'nodes'.
967
969
968 'roots' and 'heads' are both lists of node IDs. If 'roots' is
970 'roots' and 'heads' are both lists of node IDs. If 'roots' is
969 unspecified, uses nullid as the only root. If 'heads' is
971 unspecified, uses nullid as the only root. If 'heads' is
970 unspecified, uses list of all of the revlog's heads."""
972 unspecified, uses list of all of the revlog's heads."""
971 nonodes = ([], [], [])
973 nonodes = ([], [], [])
972 if roots is not None:
974 if roots is not None:
973 roots = list(roots)
975 roots = list(roots)
974 if not roots:
976 if not roots:
975 return nonodes
977 return nonodes
976 lowestrev = min([self.rev(n) for n in roots])
978 lowestrev = min([self.rev(n) for n in roots])
977 else:
979 else:
978 roots = [nullid] # Everybody's a descendant of nullid
980 roots = [nullid] # Everybody's a descendant of nullid
979 lowestrev = nullrev
981 lowestrev = nullrev
980 if (lowestrev == nullrev) and (heads is None):
982 if (lowestrev == nullrev) and (heads is None):
981 # We want _all_ the nodes!
983 # We want _all_ the nodes!
982 return ([self.node(r) for r in self], [nullid], list(self.heads()))
984 return ([self.node(r) for r in self], [nullid], list(self.heads()))
983 if heads is None:
985 if heads is None:
984 # All nodes are ancestors, so the latest ancestor is the last
986 # All nodes are ancestors, so the latest ancestor is the last
985 # node.
987 # node.
986 highestrev = len(self) - 1
988 highestrev = len(self) - 1
987 # Set ancestors to None to signal that every node is an ancestor.
989 # Set ancestors to None to signal that every node is an ancestor.
988 ancestors = None
990 ancestors = None
989 # Set heads to an empty dictionary for later discovery of heads
991 # Set heads to an empty dictionary for later discovery of heads
990 heads = {}
992 heads = {}
991 else:
993 else:
992 heads = list(heads)
994 heads = list(heads)
993 if not heads:
995 if not heads:
994 return nonodes
996 return nonodes
995 ancestors = set()
997 ancestors = set()
996 # Turn heads into a dictionary so we can remove 'fake' heads.
998 # Turn heads into a dictionary so we can remove 'fake' heads.
997 # Also, later we will be using it to filter out the heads we can't
999 # Also, later we will be using it to filter out the heads we can't
998 # find from roots.
1000 # find from roots.
999 heads = dict.fromkeys(heads, False)
1001 heads = dict.fromkeys(heads, False)
1000 # Start at the top and keep marking parents until we're done.
1002 # Start at the top and keep marking parents until we're done.
1001 nodestotag = set(heads)
1003 nodestotag = set(heads)
1002 # Remember where the top was so we can use it as a limit later.
1004 # Remember where the top was so we can use it as a limit later.
1003 highestrev = max([self.rev(n) for n in nodestotag])
1005 highestrev = max([self.rev(n) for n in nodestotag])
1004 while nodestotag:
1006 while nodestotag:
1005 # grab a node to tag
1007 # grab a node to tag
1006 n = nodestotag.pop()
1008 n = nodestotag.pop()
1007 # Never tag nullid
1009 # Never tag nullid
1008 if n == nullid:
1010 if n == nullid:
1009 continue
1011 continue
1010 # A node's revision number represents its place in a
1012 # A node's revision number represents its place in a
1011 # topologically sorted list of nodes.
1013 # topologically sorted list of nodes.
1012 r = self.rev(n)
1014 r = self.rev(n)
1013 if r >= lowestrev:
1015 if r >= lowestrev:
1014 if n not in ancestors:
1016 if n not in ancestors:
1015 # If we are possibly a descendant of one of the roots
1017 # If we are possibly a descendant of one of the roots
1016 # and we haven't already been marked as an ancestor
1018 # and we haven't already been marked as an ancestor
1017 ancestors.add(n) # Mark as ancestor
1019 ancestors.add(n) # Mark as ancestor
1018 # Add non-nullid parents to list of nodes to tag.
1020 # Add non-nullid parents to list of nodes to tag.
1019 nodestotag.update([p for p in self.parents(n) if
1021 nodestotag.update([p for p in self.parents(n) if
1020 p != nullid])
1022 p != nullid])
1021 elif n in heads: # We've seen it before, is it a fake head?
1023 elif n in heads: # We've seen it before, is it a fake head?
1022 # So it is, real heads should not be the ancestors of
1024 # So it is, real heads should not be the ancestors of
1023 # any other heads.
1025 # any other heads.
1024 heads.pop(n)
1026 heads.pop(n)
1025 if not ancestors:
1027 if not ancestors:
1026 return nonodes
1028 return nonodes
1027 # Now that we have our set of ancestors, we want to remove any
1029 # Now that we have our set of ancestors, we want to remove any
1028 # roots that are not ancestors.
1030 # roots that are not ancestors.
1029
1031
1030 # If one of the roots was nullid, everything is included anyway.
1032 # If one of the roots was nullid, everything is included anyway.
1031 if lowestrev > nullrev:
1033 if lowestrev > nullrev:
1032 # But, since we weren't, let's recompute the lowest rev to not
1034 # But, since we weren't, let's recompute the lowest rev to not
1033 # include roots that aren't ancestors.
1035 # include roots that aren't ancestors.
1034
1036
1035 # Filter out roots that aren't ancestors of heads
1037 # Filter out roots that aren't ancestors of heads
1036 roots = [root for root in roots if root in ancestors]
1038 roots = [root for root in roots if root in ancestors]
1037 # Recompute the lowest revision
1039 # Recompute the lowest revision
1038 if roots:
1040 if roots:
1039 lowestrev = min([self.rev(root) for root in roots])
1041 lowestrev = min([self.rev(root) for root in roots])
1040 else:
1042 else:
1041 # No more roots? Return empty list
1043 # No more roots? Return empty list
1042 return nonodes
1044 return nonodes
1043 else:
1045 else:
1044 # We are descending from nullid, and don't need to care about
1046 # We are descending from nullid, and don't need to care about
1045 # any other roots.
1047 # any other roots.
1046 lowestrev = nullrev
1048 lowestrev = nullrev
1047 roots = [nullid]
1049 roots = [nullid]
1048 # Transform our roots list into a set.
1050 # Transform our roots list into a set.
1049 descendants = set(roots)
1051 descendants = set(roots)
1050 # Also, keep the original roots so we can filter out roots that aren't
1052 # Also, keep the original roots so we can filter out roots that aren't
1051 # 'real' roots (i.e. are descended from other roots).
1053 # 'real' roots (i.e. are descended from other roots).
1052 roots = descendants.copy()
1054 roots = descendants.copy()
1053 # Our topologically sorted list of output nodes.
1055 # Our topologically sorted list of output nodes.
1054 orderedout = []
1056 orderedout = []
1055 # Don't start at nullid since we don't want nullid in our output list,
1057 # Don't start at nullid since we don't want nullid in our output list,
1056 # and if nullid shows up in descendants, empty parents will look like
1058 # and if nullid shows up in descendants, empty parents will look like
1057 # they're descendants.
1059 # they're descendants.
1058 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1060 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1059 n = self.node(r)
1061 n = self.node(r)
1060 isdescendant = False
1062 isdescendant = False
1061 if lowestrev == nullrev: # Everybody is a descendant of nullid
1063 if lowestrev == nullrev: # Everybody is a descendant of nullid
1062 isdescendant = True
1064 isdescendant = True
1063 elif n in descendants:
1065 elif n in descendants:
1064 # n is already a descendant
1066 # n is already a descendant
1065 isdescendant = True
1067 isdescendant = True
1066 # This check only needs to be done here because all the roots
1068 # This check only needs to be done here because all the roots
1067 # will start being marked is descendants before the loop.
1069 # will start being marked is descendants before the loop.
1068 if n in roots:
1070 if n in roots:
1069 # If n was a root, check if it's a 'real' root.
1071 # If n was a root, check if it's a 'real' root.
1070 p = tuple(self.parents(n))
1072 p = tuple(self.parents(n))
1071 # If any of its parents are descendants, it's not a root.
1073 # If any of its parents are descendants, it's not a root.
1072 if (p[0] in descendants) or (p[1] in descendants):
1074 if (p[0] in descendants) or (p[1] in descendants):
1073 roots.remove(n)
1075 roots.remove(n)
1074 else:
1076 else:
1075 p = tuple(self.parents(n))
1077 p = tuple(self.parents(n))
1076 # A node is a descendant if either of its parents are
1078 # A node is a descendant if either of its parents are
1077 # descendants. (We seeded the dependents list with the roots
1079 # descendants. (We seeded the dependents list with the roots
1078 # up there, remember?)
1080 # up there, remember?)
1079 if (p[0] in descendants) or (p[1] in descendants):
1081 if (p[0] in descendants) or (p[1] in descendants):
1080 descendants.add(n)
1082 descendants.add(n)
1081 isdescendant = True
1083 isdescendant = True
1082 if isdescendant and ((ancestors is None) or (n in ancestors)):
1084 if isdescendant and ((ancestors is None) or (n in ancestors)):
1083 # Only include nodes that are both descendants and ancestors.
1085 # Only include nodes that are both descendants and ancestors.
1084 orderedout.append(n)
1086 orderedout.append(n)
1085 if (ancestors is not None) and (n in heads):
1087 if (ancestors is not None) and (n in heads):
1086 # We're trying to figure out which heads are reachable
1088 # We're trying to figure out which heads are reachable
1087 # from roots.
1089 # from roots.
1088 # Mark this head as having been reached
1090 # Mark this head as having been reached
1089 heads[n] = True
1091 heads[n] = True
1090 elif ancestors is None:
1092 elif ancestors is None:
1091 # Otherwise, we're trying to discover the heads.
1093 # Otherwise, we're trying to discover the heads.
1092 # Assume this is a head because if it isn't, the next step
1094 # Assume this is a head because if it isn't, the next step
1093 # will eventually remove it.
1095 # will eventually remove it.
1094 heads[n] = True
1096 heads[n] = True
1095 # But, obviously its parents aren't.
1097 # But, obviously its parents aren't.
1096 for p in self.parents(n):
1098 for p in self.parents(n):
1097 heads.pop(p, None)
1099 heads.pop(p, None)
1098 heads = [head for head, flag in heads.iteritems() if flag]
1100 heads = [head for head, flag in heads.iteritems() if flag]
1099 roots = list(roots)
1101 roots = list(roots)
1100 assert orderedout
1102 assert orderedout
1101 assert roots
1103 assert roots
1102 assert heads
1104 assert heads
1103 return (orderedout, roots, heads)
1105 return (orderedout, roots, heads)
1104
1106
1105 def headrevs(self, revs=None):
1107 def headrevs(self, revs=None):
1106 if revs is None:
1108 if revs is None:
1107 try:
1109 try:
1108 return self.index.headrevs()
1110 return self.index.headrevs()
1109 except AttributeError:
1111 except AttributeError:
1110 return self._headrevs()
1112 return self._headrevs()
1111 return dagop.headrevs(revs, self.parentrevs)
1113 return dagop.headrevs(revs, self.parentrevs)
1112
1114
1113 def computephases(self, roots):
1115 def computephases(self, roots):
1114 return self.index.computephasesmapsets(roots)
1116 return self.index.computephasesmapsets(roots)
1115
1117
1116 def _headrevs(self):
1118 def _headrevs(self):
1117 count = len(self)
1119 count = len(self)
1118 if not count:
1120 if not count:
1119 return [nullrev]
1121 return [nullrev]
1120 # we won't iter over filtered rev so nobody is a head at start
1122 # we won't iter over filtered rev so nobody is a head at start
1121 ishead = [0] * (count + 1)
1123 ishead = [0] * (count + 1)
1122 index = self.index
1124 index = self.index
1123 for r in self:
1125 for r in self:
1124 ishead[r] = 1 # I may be an head
1126 ishead[r] = 1 # I may be an head
1125 e = index[r]
1127 e = index[r]
1126 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1128 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1127 return [r for r, val in enumerate(ishead) if val]
1129 return [r for r, val in enumerate(ishead) if val]
1128
1130
1129 def heads(self, start=None, stop=None):
1131 def heads(self, start=None, stop=None):
1130 """return the list of all nodes that have no children
1132 """return the list of all nodes that have no children
1131
1133
1132 if start is specified, only heads that are descendants of
1134 if start is specified, only heads that are descendants of
1133 start will be returned
1135 start will be returned
1134 if stop is specified, it will consider all the revs from stop
1136 if stop is specified, it will consider all the revs from stop
1135 as if they had no children
1137 as if they had no children
1136 """
1138 """
1137 if start is None and stop is None:
1139 if start is None and stop is None:
1138 if not len(self):
1140 if not len(self):
1139 return [nullid]
1141 return [nullid]
1140 return [self.node(r) for r in self.headrevs()]
1142 return [self.node(r) for r in self.headrevs()]
1141
1143
1142 if start is None:
1144 if start is None:
1143 start = nullrev
1145 start = nullrev
1144 else:
1146 else:
1145 start = self.rev(start)
1147 start = self.rev(start)
1146
1148
1147 stoprevs = set(self.rev(n) for n in stop or [])
1149 stoprevs = set(self.rev(n) for n in stop or [])
1148
1150
1149 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1151 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1150 stoprevs=stoprevs)
1152 stoprevs=stoprevs)
1151
1153
1152 return [self.node(rev) for rev in revs]
1154 return [self.node(rev) for rev in revs]
1153
1155
1154 def children(self, node):
1156 def children(self, node):
1155 """find the children of a given node"""
1157 """find the children of a given node"""
1156 c = []
1158 c = []
1157 p = self.rev(node)
1159 p = self.rev(node)
1158 for r in self.revs(start=p + 1):
1160 for r in self.revs(start=p + 1):
1159 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1161 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1160 if prevs:
1162 if prevs:
1161 for pr in prevs:
1163 for pr in prevs:
1162 if pr == p:
1164 if pr == p:
1163 c.append(self.node(r))
1165 c.append(self.node(r))
1164 elif p == nullrev:
1166 elif p == nullrev:
1165 c.append(self.node(r))
1167 c.append(self.node(r))
1166 return c
1168 return c
1167
1169
1168 def commonancestorsheads(self, a, b):
1170 def commonancestorsheads(self, a, b):
1169 """calculate all the heads of the common ancestors of nodes a and b"""
1171 """calculate all the heads of the common ancestors of nodes a and b"""
1170 a, b = self.rev(a), self.rev(b)
1172 a, b = self.rev(a), self.rev(b)
1171 ancs = self._commonancestorsheads(a, b)
1173 ancs = self._commonancestorsheads(a, b)
1172 return pycompat.maplist(self.node, ancs)
1174 return pycompat.maplist(self.node, ancs)
1173
1175
1174 def _commonancestorsheads(self, *revs):
1176 def _commonancestorsheads(self, *revs):
1175 """calculate all the heads of the common ancestors of revs"""
1177 """calculate all the heads of the common ancestors of revs"""
1176 try:
1178 try:
1177 ancs = self.index.commonancestorsheads(*revs)
1179 ancs = self.index.commonancestorsheads(*revs)
1178 except (AttributeError, OverflowError): # C implementation failed
1180 except (AttributeError, OverflowError): # C implementation failed
1179 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1181 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1180 return ancs
1182 return ancs
1181
1183
1182 def isancestor(self, a, b):
1184 def isancestor(self, a, b):
1183 """return True if node a is an ancestor of node b
1185 """return True if node a is an ancestor of node b
1184
1186
1185 A revision is considered an ancestor of itself."""
1187 A revision is considered an ancestor of itself."""
1186 a, b = self.rev(a), self.rev(b)
1188 a, b = self.rev(a), self.rev(b)
1187 return self.isancestorrev(a, b)
1189 return self.isancestorrev(a, b)
1188
1190
1189 def isancestorrev(self, a, b):
1191 def isancestorrev(self, a, b):
1190 """return True if revision a is an ancestor of revision b
1192 """return True if revision a is an ancestor of revision b
1191
1193
1192 A revision is considered an ancestor of itself.
1194 A revision is considered an ancestor of itself.
1193
1195
1194 The implementation of this is trivial but the use of
1196 The implementation of this is trivial but the use of
1195 commonancestorsheads is not."""
1197 commonancestorsheads is not."""
1196 if a == nullrev:
1198 if a == nullrev:
1197 return True
1199 return True
1198 elif a == b:
1200 elif a == b:
1199 return True
1201 return True
1200 elif a > b:
1202 elif a > b:
1201 return False
1203 return False
1202 return a in self._commonancestorsheads(a, b)
1204 return a in self._commonancestorsheads(a, b)
1203
1205
1204 def ancestor(self, a, b):
1206 def ancestor(self, a, b):
1205 """calculate the "best" common ancestor of nodes a and b"""
1207 """calculate the "best" common ancestor of nodes a and b"""
1206
1208
1207 a, b = self.rev(a), self.rev(b)
1209 a, b = self.rev(a), self.rev(b)
1208 try:
1210 try:
1209 ancs = self.index.ancestors(a, b)
1211 ancs = self.index.ancestors(a, b)
1210 except (AttributeError, OverflowError):
1212 except (AttributeError, OverflowError):
1211 ancs = ancestor.ancestors(self.parentrevs, a, b)
1213 ancs = ancestor.ancestors(self.parentrevs, a, b)
1212 if ancs:
1214 if ancs:
1213 # choose a consistent winner when there's a tie
1215 # choose a consistent winner when there's a tie
1214 return min(map(self.node, ancs))
1216 return min(map(self.node, ancs))
1215 return nullid
1217 return nullid
1216
1218
1217 def _match(self, id):
1219 def _match(self, id):
1218 if isinstance(id, int):
1220 if isinstance(id, int):
1219 # rev
1221 # rev
1220 return self.node(id)
1222 return self.node(id)
1221 if len(id) == 20:
1223 if len(id) == 20:
1222 # possibly a binary node
1224 # possibly a binary node
1223 # odds of a binary node being all hex in ASCII are 1 in 10**25
1225 # odds of a binary node being all hex in ASCII are 1 in 10**25
1224 try:
1226 try:
1225 node = id
1227 node = id
1226 self.rev(node) # quick search the index
1228 self.rev(node) # quick search the index
1227 return node
1229 return node
1228 except error.LookupError:
1230 except error.LookupError:
1229 pass # may be partial hex id
1231 pass # may be partial hex id
1230 try:
1232 try:
1231 # str(rev)
1233 # str(rev)
1232 rev = int(id)
1234 rev = int(id)
1233 if "%d" % rev != id:
1235 if "%d" % rev != id:
1234 raise ValueError
1236 raise ValueError
1235 if rev < 0:
1237 if rev < 0:
1236 rev = len(self) + rev
1238 rev = len(self) + rev
1237 if rev < 0 or rev >= len(self):
1239 if rev < 0 or rev >= len(self):
1238 raise ValueError
1240 raise ValueError
1239 return self.node(rev)
1241 return self.node(rev)
1240 except (ValueError, OverflowError):
1242 except (ValueError, OverflowError):
1241 pass
1243 pass
1242 if len(id) == 40:
1244 if len(id) == 40:
1243 try:
1245 try:
1244 # a full hex nodeid?
1246 # a full hex nodeid?
1245 node = bin(id)
1247 node = bin(id)
1246 self.rev(node)
1248 self.rev(node)
1247 return node
1249 return node
1248 except (TypeError, error.LookupError):
1250 except (TypeError, error.LookupError):
1249 pass
1251 pass
1250
1252
1251 def _partialmatch(self, id):
1253 def _partialmatch(self, id):
1252 # we don't care wdirfilenodeids as they should be always full hash
1254 # we don't care wdirfilenodeids as they should be always full hash
1253 maybewdir = wdirhex.startswith(id)
1255 maybewdir = wdirhex.startswith(id)
1254 try:
1256 try:
1255 partial = self.index.partialmatch(id)
1257 partial = self.index.partialmatch(id)
1256 if partial and self.hasnode(partial):
1258 if partial and self.hasnode(partial):
1257 if maybewdir:
1259 if maybewdir:
1258 # single 'ff...' match in radix tree, ambiguous with wdir
1260 # single 'ff...' match in radix tree, ambiguous with wdir
1259 raise error.RevlogError
1261 raise error.RevlogError
1260 return partial
1262 return partial
1261 if maybewdir:
1263 if maybewdir:
1262 # no 'ff...' match in radix tree, wdir identified
1264 # no 'ff...' match in radix tree, wdir identified
1263 raise error.WdirUnsupported
1265 raise error.WdirUnsupported
1264 return None
1266 return None
1265 except error.RevlogError:
1267 except error.RevlogError:
1266 # parsers.c radix tree lookup gave multiple matches
1268 # parsers.c radix tree lookup gave multiple matches
1267 # fast path: for unfiltered changelog, radix tree is accurate
1269 # fast path: for unfiltered changelog, radix tree is accurate
1268 if not getattr(self, 'filteredrevs', None):
1270 if not getattr(self, 'filteredrevs', None):
1269 raise error.AmbiguousPrefixLookupError(
1271 raise error.AmbiguousPrefixLookupError(
1270 id, self.indexfile, _('ambiguous identifier'))
1272 id, self.indexfile, _('ambiguous identifier'))
1271 # fall through to slow path that filters hidden revisions
1273 # fall through to slow path that filters hidden revisions
1272 except (AttributeError, ValueError):
1274 except (AttributeError, ValueError):
1273 # we are pure python, or key was too short to search radix tree
1275 # we are pure python, or key was too short to search radix tree
1274 pass
1276 pass
1275
1277
1276 if id in self._pcache:
1278 if id in self._pcache:
1277 return self._pcache[id]
1279 return self._pcache[id]
1278
1280
1279 if len(id) <= 40:
1281 if len(id) <= 40:
1280 try:
1282 try:
1281 # hex(node)[:...]
1283 # hex(node)[:...]
1282 l = len(id) // 2 # grab an even number of digits
1284 l = len(id) // 2 # grab an even number of digits
1283 prefix = bin(id[:l * 2])
1285 prefix = bin(id[:l * 2])
1284 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1286 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1285 nl = [n for n in nl if hex(n).startswith(id) and
1287 nl = [n for n in nl if hex(n).startswith(id) and
1286 self.hasnode(n)]
1288 self.hasnode(n)]
1287 if nullhex.startswith(id):
1289 if nullhex.startswith(id):
1288 nl.append(nullid)
1290 nl.append(nullid)
1289 if len(nl) > 0:
1291 if len(nl) > 0:
1290 if len(nl) == 1 and not maybewdir:
1292 if len(nl) == 1 and not maybewdir:
1291 self._pcache[id] = nl[0]
1293 self._pcache[id] = nl[0]
1292 return nl[0]
1294 return nl[0]
1293 raise error.AmbiguousPrefixLookupError(
1295 raise error.AmbiguousPrefixLookupError(
1294 id, self.indexfile, _('ambiguous identifier'))
1296 id, self.indexfile, _('ambiguous identifier'))
1295 if maybewdir:
1297 if maybewdir:
1296 raise error.WdirUnsupported
1298 raise error.WdirUnsupported
1297 return None
1299 return None
1298 except TypeError:
1300 except TypeError:
1299 pass
1301 pass
1300
1302
1301 def lookup(self, id):
1303 def lookup(self, id):
1302 """locate a node based on:
1304 """locate a node based on:
1303 - revision number or str(revision number)
1305 - revision number or str(revision number)
1304 - nodeid or subset of hex nodeid
1306 - nodeid or subset of hex nodeid
1305 """
1307 """
1306 n = self._match(id)
1308 n = self._match(id)
1307 if n is not None:
1309 if n is not None:
1308 return n
1310 return n
1309 n = self._partialmatch(id)
1311 n = self._partialmatch(id)
1310 if n:
1312 if n:
1311 return n
1313 return n
1312
1314
1313 raise error.LookupError(id, self.indexfile, _('no match found'))
1315 raise error.LookupError(id, self.indexfile, _('no match found'))
1314
1316
1315 def shortest(self, node, minlength=1):
1317 def shortest(self, node, minlength=1):
1316 """Find the shortest unambiguous prefix that matches node."""
1318 """Find the shortest unambiguous prefix that matches node."""
1317 def isvalid(prefix):
1319 def isvalid(prefix):
1318 try:
1320 try:
1319 node = self._partialmatch(prefix)
1321 node = self._partialmatch(prefix)
1320 except error.AmbiguousPrefixLookupError:
1322 except error.AmbiguousPrefixLookupError:
1321 return False
1323 return False
1322 except error.WdirUnsupported:
1324 except error.WdirUnsupported:
1323 # single 'ff...' match
1325 # single 'ff...' match
1324 return True
1326 return True
1325 if node is None:
1327 if node is None:
1326 raise error.LookupError(node, self.indexfile, _('no node'))
1328 raise error.LookupError(node, self.indexfile, _('no node'))
1327 return True
1329 return True
1328
1330
1329 def maybewdir(prefix):
1331 def maybewdir(prefix):
1330 return all(c == 'f' for c in prefix)
1332 return all(c == 'f' for c in prefix)
1331
1333
1332 hexnode = hex(node)
1334 hexnode = hex(node)
1333
1335
1334 def disambiguate(hexnode, minlength):
1336 def disambiguate(hexnode, minlength):
1335 """Disambiguate against wdirid."""
1337 """Disambiguate against wdirid."""
1336 for length in range(minlength, 41):
1338 for length in range(minlength, 41):
1337 prefix = hexnode[:length]
1339 prefix = hexnode[:length]
1338 if not maybewdir(prefix):
1340 if not maybewdir(prefix):
1339 return prefix
1341 return prefix
1340
1342
1341 if not getattr(self, 'filteredrevs', None):
1343 if not getattr(self, 'filteredrevs', None):
1342 try:
1344 try:
1343 length = max(self.index.shortest(node), minlength)
1345 length = max(self.index.shortest(node), minlength)
1344 return disambiguate(hexnode, length)
1346 return disambiguate(hexnode, length)
1345 except error.RevlogError:
1347 except error.RevlogError:
1346 if node != wdirid:
1348 if node != wdirid:
1347 raise error.LookupError(node, self.indexfile, _('no node'))
1349 raise error.LookupError(node, self.indexfile, _('no node'))
1348 except AttributeError:
1350 except AttributeError:
1349 # Fall through to pure code
1351 # Fall through to pure code
1350 pass
1352 pass
1351
1353
1352 if node == wdirid:
1354 if node == wdirid:
1353 for length in range(minlength, 41):
1355 for length in range(minlength, 41):
1354 prefix = hexnode[:length]
1356 prefix = hexnode[:length]
1355 if isvalid(prefix):
1357 if isvalid(prefix):
1356 return prefix
1358 return prefix
1357
1359
1358 for length in range(minlength, 41):
1360 for length in range(minlength, 41):
1359 prefix = hexnode[:length]
1361 prefix = hexnode[:length]
1360 if isvalid(prefix):
1362 if isvalid(prefix):
1361 return disambiguate(hexnode, length)
1363 return disambiguate(hexnode, length)
1362
1364
1363 def cmp(self, node, text):
1365 def cmp(self, node, text):
1364 """compare text with a given file revision
1366 """compare text with a given file revision
1365
1367
1366 returns True if text is different than what is stored.
1368 returns True if text is different than what is stored.
1367 """
1369 """
1368 p1, p2 = self.parents(node)
1370 p1, p2 = self.parents(node)
1369 return storageutil.hashrevisionsha1(text, p1, p2) != node
1371 return storageutil.hashrevisionsha1(text, p1, p2) != node
1370
1372
1371 def _cachesegment(self, offset, data):
1373 def _cachesegment(self, offset, data):
1372 """Add a segment to the revlog cache.
1374 """Add a segment to the revlog cache.
1373
1375
1374 Accepts an absolute offset and the data that is at that location.
1376 Accepts an absolute offset and the data that is at that location.
1375 """
1377 """
1376 o, d = self._chunkcache
1378 o, d = self._chunkcache
1377 # try to add to existing cache
1379 # try to add to existing cache
1378 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1380 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1379 self._chunkcache = o, d + data
1381 self._chunkcache = o, d + data
1380 else:
1382 else:
1381 self._chunkcache = offset, data
1383 self._chunkcache = offset, data
1382
1384
1383 def _readsegment(self, offset, length, df=None):
1385 def _readsegment(self, offset, length, df=None):
1384 """Load a segment of raw data from the revlog.
1386 """Load a segment of raw data from the revlog.
1385
1387
1386 Accepts an absolute offset, length to read, and an optional existing
1388 Accepts an absolute offset, length to read, and an optional existing
1387 file handle to read from.
1389 file handle to read from.
1388
1390
1389 If an existing file handle is passed, it will be seeked and the
1391 If an existing file handle is passed, it will be seeked and the
1390 original seek position will NOT be restored.
1392 original seek position will NOT be restored.
1391
1393
1392 Returns a str or buffer of raw byte data.
1394 Returns a str or buffer of raw byte data.
1393
1395
1394 Raises if the requested number of bytes could not be read.
1396 Raises if the requested number of bytes could not be read.
1395 """
1397 """
1396 # Cache data both forward and backward around the requested
1398 # Cache data both forward and backward around the requested
1397 # data, in a fixed size window. This helps speed up operations
1399 # data, in a fixed size window. This helps speed up operations
1398 # involving reading the revlog backwards.
1400 # involving reading the revlog backwards.
1399 cachesize = self._chunkcachesize
1401 cachesize = self._chunkcachesize
1400 realoffset = offset & ~(cachesize - 1)
1402 realoffset = offset & ~(cachesize - 1)
1401 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1403 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1402 - realoffset)
1404 - realoffset)
1403 with self._datareadfp(df) as df:
1405 with self._datareadfp(df) as df:
1404 df.seek(realoffset)
1406 df.seek(realoffset)
1405 d = df.read(reallength)
1407 d = df.read(reallength)
1406
1408
1407 self._cachesegment(realoffset, d)
1409 self._cachesegment(realoffset, d)
1408 if offset != realoffset or reallength != length:
1410 if offset != realoffset or reallength != length:
1409 startoffset = offset - realoffset
1411 startoffset = offset - realoffset
1410 if len(d) - startoffset < length:
1412 if len(d) - startoffset < length:
1411 raise error.RevlogError(
1413 raise error.RevlogError(
1412 _('partial read of revlog %s; expected %d bytes from '
1414 _('partial read of revlog %s; expected %d bytes from '
1413 'offset %d, got %d') %
1415 'offset %d, got %d') %
1414 (self.indexfile if self._inline else self.datafile,
1416 (self.indexfile if self._inline else self.datafile,
1415 length, realoffset, len(d) - startoffset))
1417 length, realoffset, len(d) - startoffset))
1416
1418
1417 return util.buffer(d, startoffset, length)
1419 return util.buffer(d, startoffset, length)
1418
1420
1419 if len(d) < length:
1421 if len(d) < length:
1420 raise error.RevlogError(
1422 raise error.RevlogError(
1421 _('partial read of revlog %s; expected %d bytes from offset '
1423 _('partial read of revlog %s; expected %d bytes from offset '
1422 '%d, got %d') %
1424 '%d, got %d') %
1423 (self.indexfile if self._inline else self.datafile,
1425 (self.indexfile if self._inline else self.datafile,
1424 length, offset, len(d)))
1426 length, offset, len(d)))
1425
1427
1426 return d
1428 return d
1427
1429
1428 def _getsegment(self, offset, length, df=None):
1430 def _getsegment(self, offset, length, df=None):
1429 """Obtain a segment of raw data from the revlog.
1431 """Obtain a segment of raw data from the revlog.
1430
1432
1431 Accepts an absolute offset, length of bytes to obtain, and an
1433 Accepts an absolute offset, length of bytes to obtain, and an
1432 optional file handle to the already-opened revlog. If the file
1434 optional file handle to the already-opened revlog. If the file
1433 handle is used, it's original seek position will not be preserved.
1435 handle is used, it's original seek position will not be preserved.
1434
1436
1435 Requests for data may be returned from a cache.
1437 Requests for data may be returned from a cache.
1436
1438
1437 Returns a str or a buffer instance of raw byte data.
1439 Returns a str or a buffer instance of raw byte data.
1438 """
1440 """
1439 o, d = self._chunkcache
1441 o, d = self._chunkcache
1440 l = len(d)
1442 l = len(d)
1441
1443
1442 # is it in the cache?
1444 # is it in the cache?
1443 cachestart = offset - o
1445 cachestart = offset - o
1444 cacheend = cachestart + length
1446 cacheend = cachestart + length
1445 if cachestart >= 0 and cacheend <= l:
1447 if cachestart >= 0 and cacheend <= l:
1446 if cachestart == 0 and cacheend == l:
1448 if cachestart == 0 and cacheend == l:
1447 return d # avoid a copy
1449 return d # avoid a copy
1448 return util.buffer(d, cachestart, cacheend - cachestart)
1450 return util.buffer(d, cachestart, cacheend - cachestart)
1449
1451
1450 return self._readsegment(offset, length, df=df)
1452 return self._readsegment(offset, length, df=df)
1451
1453
1452 def _getsegmentforrevs(self, startrev, endrev, df=None):
1454 def _getsegmentforrevs(self, startrev, endrev, df=None):
1453 """Obtain a segment of raw data corresponding to a range of revisions.
1455 """Obtain a segment of raw data corresponding to a range of revisions.
1454
1456
1455 Accepts the start and end revisions and an optional already-open
1457 Accepts the start and end revisions and an optional already-open
1456 file handle to be used for reading. If the file handle is read, its
1458 file handle to be used for reading. If the file handle is read, its
1457 seek position will not be preserved.
1459 seek position will not be preserved.
1458
1460
1459 Requests for data may be satisfied by a cache.
1461 Requests for data may be satisfied by a cache.
1460
1462
1461 Returns a 2-tuple of (offset, data) for the requested range of
1463 Returns a 2-tuple of (offset, data) for the requested range of
1462 revisions. Offset is the integer offset from the beginning of the
1464 revisions. Offset is the integer offset from the beginning of the
1463 revlog and data is a str or buffer of the raw byte data.
1465 revlog and data is a str or buffer of the raw byte data.
1464
1466
1465 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1467 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1466 to determine where each revision's data begins and ends.
1468 to determine where each revision's data begins and ends.
1467 """
1469 """
1468 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1470 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1469 # (functions are expensive).
1471 # (functions are expensive).
1470 index = self.index
1472 index = self.index
1471 istart = index[startrev]
1473 istart = index[startrev]
1472 start = int(istart[0] >> 16)
1474 start = int(istart[0] >> 16)
1473 if startrev == endrev:
1475 if startrev == endrev:
1474 end = start + istart[1]
1476 end = start + istart[1]
1475 else:
1477 else:
1476 iend = index[endrev]
1478 iend = index[endrev]
1477 end = int(iend[0] >> 16) + iend[1]
1479 end = int(iend[0] >> 16) + iend[1]
1478
1480
1479 if self._inline:
1481 if self._inline:
1480 start += (startrev + 1) * self._io.size
1482 start += (startrev + 1) * self._io.size
1481 end += (endrev + 1) * self._io.size
1483 end += (endrev + 1) * self._io.size
1482 length = end - start
1484 length = end - start
1483
1485
1484 return start, self._getsegment(start, length, df=df)
1486 return start, self._getsegment(start, length, df=df)
1485
1487
1486 def _chunk(self, rev, df=None):
1488 def _chunk(self, rev, df=None):
1487 """Obtain a single decompressed chunk for a revision.
1489 """Obtain a single decompressed chunk for a revision.
1488
1490
1489 Accepts an integer revision and an optional already-open file handle
1491 Accepts an integer revision and an optional already-open file handle
1490 to be used for reading. If used, the seek position of the file will not
1492 to be used for reading. If used, the seek position of the file will not
1491 be preserved.
1493 be preserved.
1492
1494
1493 Returns a str holding uncompressed data for the requested revision.
1495 Returns a str holding uncompressed data for the requested revision.
1494 """
1496 """
1495 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1497 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1496
1498
1497 def _chunks(self, revs, df=None, targetsize=None):
1499 def _chunks(self, revs, df=None, targetsize=None):
1498 """Obtain decompressed chunks for the specified revisions.
1500 """Obtain decompressed chunks for the specified revisions.
1499
1501
1500 Accepts an iterable of numeric revisions that are assumed to be in
1502 Accepts an iterable of numeric revisions that are assumed to be in
1501 ascending order. Also accepts an optional already-open file handle
1503 ascending order. Also accepts an optional already-open file handle
1502 to be used for reading. If used, the seek position of the file will
1504 to be used for reading. If used, the seek position of the file will
1503 not be preserved.
1505 not be preserved.
1504
1506
1505 This function is similar to calling ``self._chunk()`` multiple times,
1507 This function is similar to calling ``self._chunk()`` multiple times,
1506 but is faster.
1508 but is faster.
1507
1509
1508 Returns a list with decompressed data for each requested revision.
1510 Returns a list with decompressed data for each requested revision.
1509 """
1511 """
1510 if not revs:
1512 if not revs:
1511 return []
1513 return []
1512 start = self.start
1514 start = self.start
1513 length = self.length
1515 length = self.length
1514 inline = self._inline
1516 inline = self._inline
1515 iosize = self._io.size
1517 iosize = self._io.size
1516 buffer = util.buffer
1518 buffer = util.buffer
1517
1519
1518 l = []
1520 l = []
1519 ladd = l.append
1521 ladd = l.append
1520
1522
1521 if not self._withsparseread:
1523 if not self._withsparseread:
1522 slicedchunks = (revs,)
1524 slicedchunks = (revs,)
1523 else:
1525 else:
1524 slicedchunks = deltautil.slicechunk(self, revs,
1526 slicedchunks = deltautil.slicechunk(self, revs,
1525 targetsize=targetsize)
1527 targetsize=targetsize)
1526
1528
1527 for revschunk in slicedchunks:
1529 for revschunk in slicedchunks:
1528 firstrev = revschunk[0]
1530 firstrev = revschunk[0]
1529 # Skip trailing revisions with empty diff
1531 # Skip trailing revisions with empty diff
1530 for lastrev in revschunk[::-1]:
1532 for lastrev in revschunk[::-1]:
1531 if length(lastrev) != 0:
1533 if length(lastrev) != 0:
1532 break
1534 break
1533
1535
1534 try:
1536 try:
1535 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1537 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1536 except OverflowError:
1538 except OverflowError:
1537 # issue4215 - we can't cache a run of chunks greater than
1539 # issue4215 - we can't cache a run of chunks greater than
1538 # 2G on Windows
1540 # 2G on Windows
1539 return [self._chunk(rev, df=df) for rev in revschunk]
1541 return [self._chunk(rev, df=df) for rev in revschunk]
1540
1542
1541 decomp = self.decompress
1543 decomp = self.decompress
1542 for rev in revschunk:
1544 for rev in revschunk:
1543 chunkstart = start(rev)
1545 chunkstart = start(rev)
1544 if inline:
1546 if inline:
1545 chunkstart += (rev + 1) * iosize
1547 chunkstart += (rev + 1) * iosize
1546 chunklength = length(rev)
1548 chunklength = length(rev)
1547 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1549 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1548
1550
1549 return l
1551 return l
1550
1552
1551 def _chunkclear(self):
1553 def _chunkclear(self):
1552 """Clear the raw chunk cache."""
1554 """Clear the raw chunk cache."""
1553 self._chunkcache = (0, '')
1555 self._chunkcache = (0, '')
1554
1556
1555 def deltaparent(self, rev):
1557 def deltaparent(self, rev):
1556 """return deltaparent of the given revision"""
1558 """return deltaparent of the given revision"""
1557 base = self.index[rev][3]
1559 base = self.index[rev][3]
1558 if base == rev:
1560 if base == rev:
1559 return nullrev
1561 return nullrev
1560 elif self._generaldelta:
1562 elif self._generaldelta:
1561 return base
1563 return base
1562 else:
1564 else:
1563 return rev - 1
1565 return rev - 1
1564
1566
1565 def issnapshot(self, rev):
1567 def issnapshot(self, rev):
1566 """tells whether rev is a snapshot
1568 """tells whether rev is a snapshot
1567 """
1569 """
1568 if not self._sparserevlog:
1570 if not self._sparserevlog:
1569 return self.deltaparent(rev) == nullrev
1571 return self.deltaparent(rev) == nullrev
1570 elif util.safehasattr(self.index, 'issnapshot'):
1572 elif util.safehasattr(self.index, 'issnapshot'):
1571 # directly assign the method to cache the testing and access
1573 # directly assign the method to cache the testing and access
1572 self.issnapshot = self.index.issnapshot
1574 self.issnapshot = self.index.issnapshot
1573 return self.issnapshot(rev)
1575 return self.issnapshot(rev)
1574 if rev == nullrev:
1576 if rev == nullrev:
1575 return True
1577 return True
1576 entry = self.index[rev]
1578 entry = self.index[rev]
1577 base = entry[3]
1579 base = entry[3]
1578 if base == rev:
1580 if base == rev:
1579 return True
1581 return True
1580 if base == nullrev:
1582 if base == nullrev:
1581 return True
1583 return True
1582 p1 = entry[5]
1584 p1 = entry[5]
1583 p2 = entry[6]
1585 p2 = entry[6]
1584 if base == p1 or base == p2:
1586 if base == p1 or base == p2:
1585 return False
1587 return False
1586 return self.issnapshot(base)
1588 return self.issnapshot(base)
1587
1589
1588 def snapshotdepth(self, rev):
1590 def snapshotdepth(self, rev):
1589 """number of snapshot in the chain before this one"""
1591 """number of snapshot in the chain before this one"""
1590 if not self.issnapshot(rev):
1592 if not self.issnapshot(rev):
1591 raise error.ProgrammingError('revision %d not a snapshot')
1593 raise error.ProgrammingError('revision %d not a snapshot')
1592 return len(self._deltachain(rev)[0]) - 1
1594 return len(self._deltachain(rev)[0]) - 1
1593
1595
1594 def revdiff(self, rev1, rev2):
1596 def revdiff(self, rev1, rev2):
1595 """return or calculate a delta between two revisions
1597 """return or calculate a delta between two revisions
1596
1598
1597 The delta calculated is in binary form and is intended to be written to
1599 The delta calculated is in binary form and is intended to be written to
1598 revlog data directly. So this function needs raw revision data.
1600 revlog data directly. So this function needs raw revision data.
1599 """
1601 """
1600 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1602 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1601 return bytes(self._chunk(rev2))
1603 return bytes(self._chunk(rev2))
1602
1604
1603 return mdiff.textdiff(self.revision(rev1, raw=True),
1605 return mdiff.textdiff(self.revision(rev1, raw=True),
1604 self.revision(rev2, raw=True))
1606 self.revision(rev2, raw=True))
1605
1607
1606 def revision(self, nodeorrev, _df=None, raw=False):
1608 def revision(self, nodeorrev, _df=None, raw=False):
1607 """return an uncompressed revision of a given node or revision
1609 """return an uncompressed revision of a given node or revision
1608 number.
1610 number.
1609
1611
1610 _df - an existing file handle to read from. (internal-only)
1612 _df - an existing file handle to read from. (internal-only)
1611 raw - an optional argument specifying if the revision data is to be
1613 raw - an optional argument specifying if the revision data is to be
1612 treated as raw data when applying flag transforms. 'raw' should be set
1614 treated as raw data when applying flag transforms. 'raw' should be set
1613 to True when generating changegroups or in debug commands.
1615 to True when generating changegroups or in debug commands.
1614 """
1616 """
1615 if isinstance(nodeorrev, int):
1617 if isinstance(nodeorrev, int):
1616 rev = nodeorrev
1618 rev = nodeorrev
1617 node = self.node(rev)
1619 node = self.node(rev)
1618 else:
1620 else:
1619 node = nodeorrev
1621 node = nodeorrev
1620 rev = None
1622 rev = None
1621
1623
1622 cachedrev = None
1624 cachedrev = None
1623 flags = None
1625 flags = None
1624 rawtext = None
1626 rawtext = None
1625 if node == nullid:
1627 if node == nullid:
1626 return ""
1628 return ""
1627 if self._revisioncache:
1629 if self._revisioncache:
1628 if self._revisioncache[0] == node:
1630 if self._revisioncache[0] == node:
1629 # _cache only stores rawtext
1631 # _cache only stores rawtext
1630 if raw:
1632 if raw:
1631 return self._revisioncache[2]
1633 return self._revisioncache[2]
1632 # duplicated, but good for perf
1634 # duplicated, but good for perf
1633 if rev is None:
1635 if rev is None:
1634 rev = self.rev(node)
1636 rev = self.rev(node)
1635 if flags is None:
1637 if flags is None:
1636 flags = self.flags(rev)
1638 flags = self.flags(rev)
1637 # no extra flags set, no flag processor runs, text = rawtext
1639 # no extra flags set, no flag processor runs, text = rawtext
1638 if flags == REVIDX_DEFAULT_FLAGS:
1640 if flags == REVIDX_DEFAULT_FLAGS:
1639 return self._revisioncache[2]
1641 return self._revisioncache[2]
1640 # rawtext is reusable. need to run flag processor
1642 # rawtext is reusable. need to run flag processor
1641 rawtext = self._revisioncache[2]
1643 rawtext = self._revisioncache[2]
1642
1644
1643 cachedrev = self._revisioncache[1]
1645 cachedrev = self._revisioncache[1]
1644
1646
1645 # look up what we need to read
1647 # look up what we need to read
1646 if rawtext is None:
1648 if rawtext is None:
1647 if rev is None:
1649 if rev is None:
1648 rev = self.rev(node)
1650 rev = self.rev(node)
1649
1651
1650 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1652 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1651 if stopped:
1653 if stopped:
1652 rawtext = self._revisioncache[2]
1654 rawtext = self._revisioncache[2]
1653
1655
1654 # drop cache to save memory
1656 # drop cache to save memory
1655 self._revisioncache = None
1657 self._revisioncache = None
1656
1658
1657 targetsize = None
1659 targetsize = None
1658 rawsize = self.index[rev][2]
1660 rawsize = self.index[rev][2]
1659 if 0 <= rawsize:
1661 if 0 <= rawsize:
1660 targetsize = 4 * rawsize
1662 targetsize = 4 * rawsize
1661
1663
1662 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1664 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1663 if rawtext is None:
1665 if rawtext is None:
1664 rawtext = bytes(bins[0])
1666 rawtext = bytes(bins[0])
1665 bins = bins[1:]
1667 bins = bins[1:]
1666
1668
1667 rawtext = mdiff.patches(rawtext, bins)
1669 rawtext = mdiff.patches(rawtext, bins)
1668 self._revisioncache = (node, rev, rawtext)
1670 self._revisioncache = (node, rev, rawtext)
1669
1671
1670 if flags is None:
1672 if flags is None:
1671 if rev is None:
1673 if rev is None:
1672 rev = self.rev(node)
1674 rev = self.rev(node)
1673 flags = self.flags(rev)
1675 flags = self.flags(rev)
1674
1676
1675 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1677 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1676 if validatehash:
1678 if validatehash:
1677 self.checkhash(text, node, rev=rev)
1679 self.checkhash(text, node, rev=rev)
1678
1680
1679 return text
1681 return text
1680
1682
1681 def hash(self, text, p1, p2):
1683 def hash(self, text, p1, p2):
1682 """Compute a node hash.
1684 """Compute a node hash.
1683
1685
1684 Available as a function so that subclasses can replace the hash
1686 Available as a function so that subclasses can replace the hash
1685 as needed.
1687 as needed.
1686 """
1688 """
1687 return storageutil.hashrevisionsha1(text, p1, p2)
1689 return storageutil.hashrevisionsha1(text, p1, p2)
1688
1690
1689 def _processflags(self, text, flags, operation, raw=False):
1691 def _processflags(self, text, flags, operation, raw=False):
1690 """Inspect revision data flags and applies transforms defined by
1692 """Inspect revision data flags and applies transforms defined by
1691 registered flag processors.
1693 registered flag processors.
1692
1694
1693 ``text`` - the revision data to process
1695 ``text`` - the revision data to process
1694 ``flags`` - the revision flags
1696 ``flags`` - the revision flags
1695 ``operation`` - the operation being performed (read or write)
1697 ``operation`` - the operation being performed (read or write)
1696 ``raw`` - an optional argument describing if the raw transform should be
1698 ``raw`` - an optional argument describing if the raw transform should be
1697 applied.
1699 applied.
1698
1700
1699 This method processes the flags in the order (or reverse order if
1701 This method processes the flags in the order (or reverse order if
1700 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1702 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1701 flag processors registered for present flags. The order of flags defined
1703 flag processors registered for present flags. The order of flags defined
1702 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1704 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1703
1705
1704 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1706 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1705 processed text and ``validatehash`` is a bool indicating whether the
1707 processed text and ``validatehash`` is a bool indicating whether the
1706 returned text should be checked for hash integrity.
1708 returned text should be checked for hash integrity.
1707
1709
1708 Note: If the ``raw`` argument is set, it has precedence over the
1710 Note: If the ``raw`` argument is set, it has precedence over the
1709 operation and will only update the value of ``validatehash``.
1711 operation and will only update the value of ``validatehash``.
1710 """
1712 """
1711 # fast path: no flag processors will run
1713 # fast path: no flag processors will run
1712 if flags == 0:
1714 if flags == 0:
1713 return text, True
1715 return text, True
1714 if not operation in ('read', 'write'):
1716 if not operation in ('read', 'write'):
1715 raise error.ProgrammingError(_("invalid '%s' operation") %
1717 raise error.ProgrammingError(_("invalid '%s' operation") %
1716 operation)
1718 operation)
1717 # Check all flags are known.
1719 # Check all flags are known.
1718 if flags & ~REVIDX_KNOWN_FLAGS:
1720 if flags & ~REVIDX_KNOWN_FLAGS:
1719 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1721 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1720 (flags & ~REVIDX_KNOWN_FLAGS))
1722 (flags & ~REVIDX_KNOWN_FLAGS))
1721 validatehash = True
1723 validatehash = True
1722 # Depending on the operation (read or write), the order might be
1724 # Depending on the operation (read or write), the order might be
1723 # reversed due to non-commutative transforms.
1725 # reversed due to non-commutative transforms.
1724 orderedflags = REVIDX_FLAGS_ORDER
1726 orderedflags = REVIDX_FLAGS_ORDER
1725 if operation == 'write':
1727 if operation == 'write':
1726 orderedflags = reversed(orderedflags)
1728 orderedflags = reversed(orderedflags)
1727
1729
1728 for flag in orderedflags:
1730 for flag in orderedflags:
1729 # If a flagprocessor has been registered for a known flag, apply the
1731 # If a flagprocessor has been registered for a known flag, apply the
1730 # related operation transform and update result tuple.
1732 # related operation transform and update result tuple.
1731 if flag & flags:
1733 if flag & flags:
1732 vhash = True
1734 vhash = True
1733
1735
1734 if flag not in self._flagprocessors:
1736 if flag not in self._flagprocessors:
1735 message = _("missing processor for flag '%#x'") % (flag)
1737 message = _("missing processor for flag '%#x'") % (flag)
1736 raise error.RevlogError(message)
1738 raise error.RevlogError(message)
1737
1739
1738 processor = self._flagprocessors[flag]
1740 processor = self._flagprocessors[flag]
1739 if processor is not None:
1741 if processor is not None:
1740 readtransform, writetransform, rawtransform = processor
1742 readtransform, writetransform, rawtransform = processor
1741
1743
1742 if raw:
1744 if raw:
1743 vhash = rawtransform(self, text)
1745 vhash = rawtransform(self, text)
1744 elif operation == 'read':
1746 elif operation == 'read':
1745 text, vhash = readtransform(self, text)
1747 text, vhash = readtransform(self, text)
1746 else: # write operation
1748 else: # write operation
1747 text, vhash = writetransform(self, text)
1749 text, vhash = writetransform(self, text)
1748 validatehash = validatehash and vhash
1750 validatehash = validatehash and vhash
1749
1751
1750 return text, validatehash
1752 return text, validatehash
1751
1753
1752 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1754 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1753 """Check node hash integrity.
1755 """Check node hash integrity.
1754
1756
1755 Available as a function so that subclasses can extend hash mismatch
1757 Available as a function so that subclasses can extend hash mismatch
1756 behaviors as needed.
1758 behaviors as needed.
1757 """
1759 """
1758 try:
1760 try:
1759 if p1 is None and p2 is None:
1761 if p1 is None and p2 is None:
1760 p1, p2 = self.parents(node)
1762 p1, p2 = self.parents(node)
1761 if node != self.hash(text, p1, p2):
1763 if node != self.hash(text, p1, p2):
1762 # Clear the revision cache on hash failure. The revision cache
1764 # Clear the revision cache on hash failure. The revision cache
1763 # only stores the raw revision and clearing the cache does have
1765 # only stores the raw revision and clearing the cache does have
1764 # the side-effect that we won't have a cache hit when the raw
1766 # the side-effect that we won't have a cache hit when the raw
1765 # revision data is accessed. But this case should be rare and
1767 # revision data is accessed. But this case should be rare and
1766 # it is extra work to teach the cache about the hash
1768 # it is extra work to teach the cache about the hash
1767 # verification state.
1769 # verification state.
1768 if self._revisioncache and self._revisioncache[0] == node:
1770 if self._revisioncache and self._revisioncache[0] == node:
1769 self._revisioncache = None
1771 self._revisioncache = None
1770
1772
1771 revornode = rev
1773 revornode = rev
1772 if revornode is None:
1774 if revornode is None:
1773 revornode = templatefilters.short(hex(node))
1775 revornode = templatefilters.short(hex(node))
1774 raise error.RevlogError(_("integrity check failed on %s:%s")
1776 raise error.RevlogError(_("integrity check failed on %s:%s")
1775 % (self.indexfile, pycompat.bytestr(revornode)))
1777 % (self.indexfile, pycompat.bytestr(revornode)))
1776 except error.RevlogError:
1778 except error.RevlogError:
1777 if self._censorable and storageutil.iscensoredtext(text):
1779 if self._censorable and storageutil.iscensoredtext(text):
1778 raise error.CensoredNodeError(self.indexfile, node, text)
1780 raise error.CensoredNodeError(self.indexfile, node, text)
1779 raise
1781 raise
1780
1782
1781 def _enforceinlinesize(self, tr, fp=None):
1783 def _enforceinlinesize(self, tr, fp=None):
1782 """Check if the revlog is too big for inline and convert if so.
1784 """Check if the revlog is too big for inline and convert if so.
1783
1785
1784 This should be called after revisions are added to the revlog. If the
1786 This should be called after revisions are added to the revlog. If the
1785 revlog has grown too large to be an inline revlog, it will convert it
1787 revlog has grown too large to be an inline revlog, it will convert it
1786 to use multiple index and data files.
1788 to use multiple index and data files.
1787 """
1789 """
1788 tiprev = len(self) - 1
1790 tiprev = len(self) - 1
1789 if (not self._inline or
1791 if (not self._inline or
1790 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1792 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1791 return
1793 return
1792
1794
1793 trinfo = tr.find(self.indexfile)
1795 trinfo = tr.find(self.indexfile)
1794 if trinfo is None:
1796 if trinfo is None:
1795 raise error.RevlogError(_("%s not found in the transaction")
1797 raise error.RevlogError(_("%s not found in the transaction")
1796 % self.indexfile)
1798 % self.indexfile)
1797
1799
1798 trindex = trinfo[2]
1800 trindex = trinfo[2]
1799 if trindex is not None:
1801 if trindex is not None:
1800 dataoff = self.start(trindex)
1802 dataoff = self.start(trindex)
1801 else:
1803 else:
1802 # revlog was stripped at start of transaction, use all leftover data
1804 # revlog was stripped at start of transaction, use all leftover data
1803 trindex = len(self) - 1
1805 trindex = len(self) - 1
1804 dataoff = self.end(tiprev)
1806 dataoff = self.end(tiprev)
1805
1807
1806 tr.add(self.datafile, dataoff)
1808 tr.add(self.datafile, dataoff)
1807
1809
1808 if fp:
1810 if fp:
1809 fp.flush()
1811 fp.flush()
1810 fp.close()
1812 fp.close()
1811 # We can't use the cached file handle after close(). So prevent
1813 # We can't use the cached file handle after close(). So prevent
1812 # its usage.
1814 # its usage.
1813 self._writinghandles = None
1815 self._writinghandles = None
1814
1816
1815 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1817 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1816 for r in self:
1818 for r in self:
1817 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1819 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1818
1820
1819 with self._indexfp('w') as fp:
1821 with self._indexfp('w') as fp:
1820 self.version &= ~FLAG_INLINE_DATA
1822 self.version &= ~FLAG_INLINE_DATA
1821 self._inline = False
1823 self._inline = False
1822 io = self._io
1824 io = self._io
1823 for i in self:
1825 for i in self:
1824 e = io.packentry(self.index[i], self.node, self.version, i)
1826 e = io.packentry(self.index[i], self.node, self.version, i)
1825 fp.write(e)
1827 fp.write(e)
1826
1828
1827 # the temp file replace the real index when we exit the context
1829 # the temp file replace the real index when we exit the context
1828 # manager
1830 # manager
1829
1831
1830 tr.replace(self.indexfile, trindex * self._io.size)
1832 tr.replace(self.indexfile, trindex * self._io.size)
1831 self._chunkclear()
1833 self._chunkclear()
1832
1834
1833 def _nodeduplicatecallback(self, transaction, node):
1835 def _nodeduplicatecallback(self, transaction, node):
1834 """called when trying to add a node already stored.
1836 """called when trying to add a node already stored.
1835 """
1837 """
1836
1838
1837 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1839 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1838 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1840 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1839 """add a revision to the log
1841 """add a revision to the log
1840
1842
1841 text - the revision data to add
1843 text - the revision data to add
1842 transaction - the transaction object used for rollback
1844 transaction - the transaction object used for rollback
1843 link - the linkrev data to add
1845 link - the linkrev data to add
1844 p1, p2 - the parent nodeids of the revision
1846 p1, p2 - the parent nodeids of the revision
1845 cachedelta - an optional precomputed delta
1847 cachedelta - an optional precomputed delta
1846 node - nodeid of revision; typically node is not specified, and it is
1848 node - nodeid of revision; typically node is not specified, and it is
1847 computed by default as hash(text, p1, p2), however subclasses might
1849 computed by default as hash(text, p1, p2), however subclasses might
1848 use different hashing method (and override checkhash() in such case)
1850 use different hashing method (and override checkhash() in such case)
1849 flags - the known flags to set on the revision
1851 flags - the known flags to set on the revision
1850 deltacomputer - an optional deltacomputer instance shared between
1852 deltacomputer - an optional deltacomputer instance shared between
1851 multiple calls
1853 multiple calls
1852 """
1854 """
1853 if link == nullrev:
1855 if link == nullrev:
1854 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1856 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1855 % self.indexfile)
1857 % self.indexfile)
1856
1858
1857 if flags:
1859 if flags:
1858 node = node or self.hash(text, p1, p2)
1860 node = node or self.hash(text, p1, p2)
1859
1861
1860 rawtext, validatehash = self._processflags(text, flags, 'write')
1862 rawtext, validatehash = self._processflags(text, flags, 'write')
1861
1863
1862 # If the flag processor modifies the revision data, ignore any provided
1864 # If the flag processor modifies the revision data, ignore any provided
1863 # cachedelta.
1865 # cachedelta.
1864 if rawtext != text:
1866 if rawtext != text:
1865 cachedelta = None
1867 cachedelta = None
1866
1868
1867 if len(rawtext) > _maxentrysize:
1869 if len(rawtext) > _maxentrysize:
1868 raise error.RevlogError(
1870 raise error.RevlogError(
1869 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1871 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1870 % (self.indexfile, len(rawtext)))
1872 % (self.indexfile, len(rawtext)))
1871
1873
1872 node = node or self.hash(rawtext, p1, p2)
1874 node = node or self.hash(rawtext, p1, p2)
1873 if node in self.nodemap:
1875 if node in self.nodemap:
1874 return node
1876 return node
1875
1877
1876 if validatehash:
1878 if validatehash:
1877 self.checkhash(rawtext, node, p1=p1, p2=p2)
1879 self.checkhash(rawtext, node, p1=p1, p2=p2)
1878
1880
1879 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1881 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1880 flags, cachedelta=cachedelta,
1882 flags, cachedelta=cachedelta,
1881 deltacomputer=deltacomputer)
1883 deltacomputer=deltacomputer)
1882
1884
1883 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1885 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1884 cachedelta=None, deltacomputer=None):
1886 cachedelta=None, deltacomputer=None):
1885 """add a raw revision with known flags, node and parents
1887 """add a raw revision with known flags, node and parents
1886 useful when reusing a revision not stored in this revlog (ex: received
1888 useful when reusing a revision not stored in this revlog (ex: received
1887 over wire, or read from an external bundle).
1889 over wire, or read from an external bundle).
1888 """
1890 """
1889 dfh = None
1891 dfh = None
1890 if not self._inline:
1892 if not self._inline:
1891 dfh = self._datafp("a+")
1893 dfh = self._datafp("a+")
1892 ifh = self._indexfp("a+")
1894 ifh = self._indexfp("a+")
1893 try:
1895 try:
1894 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1896 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1895 flags, cachedelta, ifh, dfh,
1897 flags, cachedelta, ifh, dfh,
1896 deltacomputer=deltacomputer)
1898 deltacomputer=deltacomputer)
1897 finally:
1899 finally:
1898 if dfh:
1900 if dfh:
1899 dfh.close()
1901 dfh.close()
1900 ifh.close()
1902 ifh.close()
1901
1903
1902 def compress(self, data):
1904 def compress(self, data):
1903 """Generate a possibly-compressed representation of data."""
1905 """Generate a possibly-compressed representation of data."""
1904 if not data:
1906 if not data:
1905 return '', data
1907 return '', data
1906
1908
1907 compressed = self._compressor.compress(data)
1909 compressed = self._compressor.compress(data)
1908
1910
1909 if compressed:
1911 if compressed:
1910 # The revlog compressor added the header in the returned data.
1912 # The revlog compressor added the header in the returned data.
1911 return '', compressed
1913 return '', compressed
1912
1914
1913 if data[0:1] == '\0':
1915 if data[0:1] == '\0':
1914 return '', data
1916 return '', data
1915 return 'u', data
1917 return 'u', data
1916
1918
1917 def decompress(self, data):
1919 def decompress(self, data):
1918 """Decompress a revlog chunk.
1920 """Decompress a revlog chunk.
1919
1921
1920 The chunk is expected to begin with a header identifying the
1922 The chunk is expected to begin with a header identifying the
1921 format type so it can be routed to an appropriate decompressor.
1923 format type so it can be routed to an appropriate decompressor.
1922 """
1924 """
1923 if not data:
1925 if not data:
1924 return data
1926 return data
1925
1927
1926 # Revlogs are read much more frequently than they are written and many
1928 # Revlogs are read much more frequently than they are written and many
1927 # chunks only take microseconds to decompress, so performance is
1929 # chunks only take microseconds to decompress, so performance is
1928 # important here.
1930 # important here.
1929 #
1931 #
1930 # We can make a few assumptions about revlogs:
1932 # We can make a few assumptions about revlogs:
1931 #
1933 #
1932 # 1) the majority of chunks will be compressed (as opposed to inline
1934 # 1) the majority of chunks will be compressed (as opposed to inline
1933 # raw data).
1935 # raw data).
1934 # 2) decompressing *any* data will likely by at least 10x slower than
1936 # 2) decompressing *any* data will likely by at least 10x slower than
1935 # returning raw inline data.
1937 # returning raw inline data.
1936 # 3) we want to prioritize common and officially supported compression
1938 # 3) we want to prioritize common and officially supported compression
1937 # engines
1939 # engines
1938 #
1940 #
1939 # It follows that we want to optimize for "decompress compressed data
1941 # It follows that we want to optimize for "decompress compressed data
1940 # when encoded with common and officially supported compression engines"
1942 # when encoded with common and officially supported compression engines"
1941 # case over "raw data" and "data encoded by less common or non-official
1943 # case over "raw data" and "data encoded by less common or non-official
1942 # compression engines." That is why we have the inline lookup first
1944 # compression engines." That is why we have the inline lookup first
1943 # followed by the compengines lookup.
1945 # followed by the compengines lookup.
1944 #
1946 #
1945 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1947 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1946 # compressed chunks. And this matters for changelog and manifest reads.
1948 # compressed chunks. And this matters for changelog and manifest reads.
1947 t = data[0:1]
1949 t = data[0:1]
1948
1950
1949 if t == 'x':
1951 if t == 'x':
1950 try:
1952 try:
1951 return _zlibdecompress(data)
1953 return _zlibdecompress(data)
1952 except zlib.error as e:
1954 except zlib.error as e:
1953 raise error.RevlogError(_('revlog decompress error: %s') %
1955 raise error.RevlogError(_('revlog decompress error: %s') %
1954 stringutil.forcebytestr(e))
1956 stringutil.forcebytestr(e))
1955 # '\0' is more common than 'u' so it goes first.
1957 # '\0' is more common than 'u' so it goes first.
1956 elif t == '\0':
1958 elif t == '\0':
1957 return data
1959 return data
1958 elif t == 'u':
1960 elif t == 'u':
1959 return util.buffer(data, 1)
1961 return util.buffer(data, 1)
1960
1962
1961 try:
1963 try:
1962 compressor = self._decompressors[t]
1964 compressor = self._decompressors[t]
1963 except KeyError:
1965 except KeyError:
1964 try:
1966 try:
1965 engine = util.compengines.forrevlogheader(t)
1967 engine = util.compengines.forrevlogheader(t)
1966 compressor = engine.revlogcompressor()
1968 compressor = engine.revlogcompressor()
1967 self._decompressors[t] = compressor
1969 self._decompressors[t] = compressor
1968 except KeyError:
1970 except KeyError:
1969 raise error.RevlogError(_('unknown compression type %r') % t)
1971 raise error.RevlogError(_('unknown compression type %r') % t)
1970
1972
1971 return compressor.decompress(data)
1973 return compressor.decompress(data)
1972
1974
1973 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1975 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1974 cachedelta, ifh, dfh, alwayscache=False,
1976 cachedelta, ifh, dfh, alwayscache=False,
1975 deltacomputer=None):
1977 deltacomputer=None):
1976 """internal function to add revisions to the log
1978 """internal function to add revisions to the log
1977
1979
1978 see addrevision for argument descriptions.
1980 see addrevision for argument descriptions.
1979
1981
1980 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1982 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1981
1983
1982 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1984 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1983 be used.
1985 be used.
1984
1986
1985 invariants:
1987 invariants:
1986 - rawtext is optional (can be None); if not set, cachedelta must be set.
1988 - rawtext is optional (can be None); if not set, cachedelta must be set.
1987 if both are set, they must correspond to each other.
1989 if both are set, they must correspond to each other.
1988 """
1990 """
1989 if node == nullid:
1991 if node == nullid:
1990 raise error.RevlogError(_("%s: attempt to add null revision") %
1992 raise error.RevlogError(_("%s: attempt to add null revision") %
1991 self.indexfile)
1993 self.indexfile)
1992 if node == wdirid or node in wdirfilenodeids:
1994 if node == wdirid or node in wdirfilenodeids:
1993 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1995 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1994 self.indexfile)
1996 self.indexfile)
1995
1997
1996 if self._inline:
1998 if self._inline:
1997 fh = ifh
1999 fh = ifh
1998 else:
2000 else:
1999 fh = dfh
2001 fh = dfh
2000
2002
2001 btext = [rawtext]
2003 btext = [rawtext]
2002
2004
2003 curr = len(self)
2005 curr = len(self)
2004 prev = curr - 1
2006 prev = curr - 1
2005 offset = self.end(prev)
2007 offset = self.end(prev)
2006 p1r, p2r = self.rev(p1), self.rev(p2)
2008 p1r, p2r = self.rev(p1), self.rev(p2)
2007
2009
2008 # full versions are inserted when the needed deltas
2010 # full versions are inserted when the needed deltas
2009 # become comparable to the uncompressed text
2011 # become comparable to the uncompressed text
2010 if rawtext is None:
2012 if rawtext is None:
2011 # need rawtext size, before changed by flag processors, which is
2013 # need rawtext size, before changed by flag processors, which is
2012 # the non-raw size. use revlog explicitly to avoid filelog's extra
2014 # the non-raw size. use revlog explicitly to avoid filelog's extra
2013 # logic that might remove metadata size.
2015 # logic that might remove metadata size.
2014 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2016 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2015 cachedelta[1])
2017 cachedelta[1])
2016 else:
2018 else:
2017 textlen = len(rawtext)
2019 textlen = len(rawtext)
2018
2020
2019 if deltacomputer is None:
2021 if deltacomputer is None:
2020 deltacomputer = deltautil.deltacomputer(self)
2022 deltacomputer = deltautil.deltacomputer(self)
2021
2023
2022 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2024 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2023
2025
2024 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2026 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2025
2027
2026 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2028 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2027 deltainfo.base, link, p1r, p2r, node)
2029 deltainfo.base, link, p1r, p2r, node)
2028 self.index.append(e)
2030 self.index.append(e)
2029 self.nodemap[node] = curr
2031 self.nodemap[node] = curr
2030
2032
2031 # Reset the pure node cache start lookup offset to account for new
2033 # Reset the pure node cache start lookup offset to account for new
2032 # revision.
2034 # revision.
2033 if self._nodepos is not None:
2035 if self._nodepos is not None:
2034 self._nodepos = curr
2036 self._nodepos = curr
2035
2037
2036 entry = self._io.packentry(e, self.node, self.version, curr)
2038 entry = self._io.packentry(e, self.node, self.version, curr)
2037 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2039 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2038 link, offset)
2040 link, offset)
2039
2041
2040 rawtext = btext[0]
2042 rawtext = btext[0]
2041
2043
2042 if alwayscache and rawtext is None:
2044 if alwayscache and rawtext is None:
2043 rawtext = deltacomputer.buildtext(revinfo, fh)
2045 rawtext = deltacomputer.buildtext(revinfo, fh)
2044
2046
2045 if type(rawtext) == bytes: # only accept immutable objects
2047 if type(rawtext) == bytes: # only accept immutable objects
2046 self._revisioncache = (node, curr, rawtext)
2048 self._revisioncache = (node, curr, rawtext)
2047 self._chainbasecache[curr] = deltainfo.chainbase
2049 self._chainbasecache[curr] = deltainfo.chainbase
2048 return node
2050 return node
2049
2051
2050 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2052 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2051 # Files opened in a+ mode have inconsistent behavior on various
2053 # Files opened in a+ mode have inconsistent behavior on various
2052 # platforms. Windows requires that a file positioning call be made
2054 # platforms. Windows requires that a file positioning call be made
2053 # when the file handle transitions between reads and writes. See
2055 # when the file handle transitions between reads and writes. See
2054 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2056 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2055 # platforms, Python or the platform itself can be buggy. Some versions
2057 # platforms, Python or the platform itself can be buggy. Some versions
2056 # of Solaris have been observed to not append at the end of the file
2058 # of Solaris have been observed to not append at the end of the file
2057 # if the file was seeked to before the end. See issue4943 for more.
2059 # if the file was seeked to before the end. See issue4943 for more.
2058 #
2060 #
2059 # We work around this issue by inserting a seek() before writing.
2061 # We work around this issue by inserting a seek() before writing.
2060 # Note: This is likely not necessary on Python 3. However, because
2062 # Note: This is likely not necessary on Python 3. However, because
2061 # the file handle is reused for reads and may be seeked there, we need
2063 # the file handle is reused for reads and may be seeked there, we need
2062 # to be careful before changing this.
2064 # to be careful before changing this.
2063 ifh.seek(0, os.SEEK_END)
2065 ifh.seek(0, os.SEEK_END)
2064 if dfh:
2066 if dfh:
2065 dfh.seek(0, os.SEEK_END)
2067 dfh.seek(0, os.SEEK_END)
2066
2068
2067 curr = len(self) - 1
2069 curr = len(self) - 1
2068 if not self._inline:
2070 if not self._inline:
2069 transaction.add(self.datafile, offset)
2071 transaction.add(self.datafile, offset)
2070 transaction.add(self.indexfile, curr * len(entry))
2072 transaction.add(self.indexfile, curr * len(entry))
2071 if data[0]:
2073 if data[0]:
2072 dfh.write(data[0])
2074 dfh.write(data[0])
2073 dfh.write(data[1])
2075 dfh.write(data[1])
2074 ifh.write(entry)
2076 ifh.write(entry)
2075 else:
2077 else:
2076 offset += curr * self._io.size
2078 offset += curr * self._io.size
2077 transaction.add(self.indexfile, offset, curr)
2079 transaction.add(self.indexfile, offset, curr)
2078 ifh.write(entry)
2080 ifh.write(entry)
2079 ifh.write(data[0])
2081 ifh.write(data[0])
2080 ifh.write(data[1])
2082 ifh.write(data[1])
2081 self._enforceinlinesize(transaction, ifh)
2083 self._enforceinlinesize(transaction, ifh)
2082
2084
2083 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2085 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2084 """
2086 """
2085 add a delta group
2087 add a delta group
2086
2088
2087 given a set of deltas, add them to the revision log. the
2089 given a set of deltas, add them to the revision log. the
2088 first delta is against its parent, which should be in our
2090 first delta is against its parent, which should be in our
2089 log, the rest are against the previous delta.
2091 log, the rest are against the previous delta.
2090
2092
2091 If ``addrevisioncb`` is defined, it will be called with arguments of
2093 If ``addrevisioncb`` is defined, it will be called with arguments of
2092 this revlog and the node that was added.
2094 this revlog and the node that was added.
2093 """
2095 """
2094
2096
2095 if self._writinghandles:
2097 if self._writinghandles:
2096 raise error.ProgrammingError('cannot nest addgroup() calls')
2098 raise error.ProgrammingError('cannot nest addgroup() calls')
2097
2099
2098 nodes = []
2100 nodes = []
2099
2101
2100 r = len(self)
2102 r = len(self)
2101 end = 0
2103 end = 0
2102 if r:
2104 if r:
2103 end = self.end(r - 1)
2105 end = self.end(r - 1)
2104 ifh = self._indexfp("a+")
2106 ifh = self._indexfp("a+")
2105 isize = r * self._io.size
2107 isize = r * self._io.size
2106 if self._inline:
2108 if self._inline:
2107 transaction.add(self.indexfile, end + isize, r)
2109 transaction.add(self.indexfile, end + isize, r)
2108 dfh = None
2110 dfh = None
2109 else:
2111 else:
2110 transaction.add(self.indexfile, isize, r)
2112 transaction.add(self.indexfile, isize, r)
2111 transaction.add(self.datafile, end)
2113 transaction.add(self.datafile, end)
2112 dfh = self._datafp("a+")
2114 dfh = self._datafp("a+")
2113 def flush():
2115 def flush():
2114 if dfh:
2116 if dfh:
2115 dfh.flush()
2117 dfh.flush()
2116 ifh.flush()
2118 ifh.flush()
2117
2119
2118 self._writinghandles = (ifh, dfh)
2120 self._writinghandles = (ifh, dfh)
2119
2121
2120 try:
2122 try:
2121 deltacomputer = deltautil.deltacomputer(self)
2123 deltacomputer = deltautil.deltacomputer(self)
2122 # loop through our set of deltas
2124 # loop through our set of deltas
2123 for data in deltas:
2125 for data in deltas:
2124 node, p1, p2, linknode, deltabase, delta, flags = data
2126 node, p1, p2, linknode, deltabase, delta, flags = data
2125 link = linkmapper(linknode)
2127 link = linkmapper(linknode)
2126 flags = flags or REVIDX_DEFAULT_FLAGS
2128 flags = flags or REVIDX_DEFAULT_FLAGS
2127
2129
2128 nodes.append(node)
2130 nodes.append(node)
2129
2131
2130 if node in self.nodemap:
2132 if node in self.nodemap:
2131 self._nodeduplicatecallback(transaction, node)
2133 self._nodeduplicatecallback(transaction, node)
2132 # this can happen if two branches make the same change
2134 # this can happen if two branches make the same change
2133 continue
2135 continue
2134
2136
2135 for p in (p1, p2):
2137 for p in (p1, p2):
2136 if p not in self.nodemap:
2138 if p not in self.nodemap:
2137 raise error.LookupError(p, self.indexfile,
2139 raise error.LookupError(p, self.indexfile,
2138 _('unknown parent'))
2140 _('unknown parent'))
2139
2141
2140 if deltabase not in self.nodemap:
2142 if deltabase not in self.nodemap:
2141 raise error.LookupError(deltabase, self.indexfile,
2143 raise error.LookupError(deltabase, self.indexfile,
2142 _('unknown delta base'))
2144 _('unknown delta base'))
2143
2145
2144 baserev = self.rev(deltabase)
2146 baserev = self.rev(deltabase)
2145
2147
2146 if baserev != nullrev and self.iscensored(baserev):
2148 if baserev != nullrev and self.iscensored(baserev):
2147 # if base is censored, delta must be full replacement in a
2149 # if base is censored, delta must be full replacement in a
2148 # single patch operation
2150 # single patch operation
2149 hlen = struct.calcsize(">lll")
2151 hlen = struct.calcsize(">lll")
2150 oldlen = self.rawsize(baserev)
2152 oldlen = self.rawsize(baserev)
2151 newlen = len(delta) - hlen
2153 newlen = len(delta) - hlen
2152 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2154 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2153 raise error.CensoredBaseError(self.indexfile,
2155 raise error.CensoredBaseError(self.indexfile,
2154 self.node(baserev))
2156 self.node(baserev))
2155
2157
2156 if not flags and self._peek_iscensored(baserev, delta, flush):
2158 if not flags and self._peek_iscensored(baserev, delta, flush):
2157 flags |= REVIDX_ISCENSORED
2159 flags |= REVIDX_ISCENSORED
2158
2160
2159 # We assume consumers of addrevisioncb will want to retrieve
2161 # We assume consumers of addrevisioncb will want to retrieve
2160 # the added revision, which will require a call to
2162 # the added revision, which will require a call to
2161 # revision(). revision() will fast path if there is a cache
2163 # revision(). revision() will fast path if there is a cache
2162 # hit. So, we tell _addrevision() to always cache in this case.
2164 # hit. So, we tell _addrevision() to always cache in this case.
2163 # We're only using addgroup() in the context of changegroup
2165 # We're only using addgroup() in the context of changegroup
2164 # generation so the revision data can always be handled as raw
2166 # generation so the revision data can always be handled as raw
2165 # by the flagprocessor.
2167 # by the flagprocessor.
2166 self._addrevision(node, None, transaction, link,
2168 self._addrevision(node, None, transaction, link,
2167 p1, p2, flags, (baserev, delta),
2169 p1, p2, flags, (baserev, delta),
2168 ifh, dfh,
2170 ifh, dfh,
2169 alwayscache=bool(addrevisioncb),
2171 alwayscache=bool(addrevisioncb),
2170 deltacomputer=deltacomputer)
2172 deltacomputer=deltacomputer)
2171
2173
2172 if addrevisioncb:
2174 if addrevisioncb:
2173 addrevisioncb(self, node)
2175 addrevisioncb(self, node)
2174
2176
2175 if not dfh and not self._inline:
2177 if not dfh and not self._inline:
2176 # addrevision switched from inline to conventional
2178 # addrevision switched from inline to conventional
2177 # reopen the index
2179 # reopen the index
2178 ifh.close()
2180 ifh.close()
2179 dfh = self._datafp("a+")
2181 dfh = self._datafp("a+")
2180 ifh = self._indexfp("a+")
2182 ifh = self._indexfp("a+")
2181 self._writinghandles = (ifh, dfh)
2183 self._writinghandles = (ifh, dfh)
2182 finally:
2184 finally:
2183 self._writinghandles = None
2185 self._writinghandles = None
2184
2186
2185 if dfh:
2187 if dfh:
2186 dfh.close()
2188 dfh.close()
2187 ifh.close()
2189 ifh.close()
2188
2190
2189 return nodes
2191 return nodes
2190
2192
2191 def iscensored(self, rev):
2193 def iscensored(self, rev):
2192 """Check if a file revision is censored."""
2194 """Check if a file revision is censored."""
2193 if not self._censorable:
2195 if not self._censorable:
2194 return False
2196 return False
2195
2197
2196 return self.flags(rev) & REVIDX_ISCENSORED
2198 return self.flags(rev) & REVIDX_ISCENSORED
2197
2199
2198 def _peek_iscensored(self, baserev, delta, flush):
2200 def _peek_iscensored(self, baserev, delta, flush):
2199 """Quickly check if a delta produces a censored revision."""
2201 """Quickly check if a delta produces a censored revision."""
2200 if not self._censorable:
2202 if not self._censorable:
2201 return False
2203 return False
2202
2204
2203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2205 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2204
2206
2205 def getstrippoint(self, minlink):
2207 def getstrippoint(self, minlink):
2206 """find the minimum rev that must be stripped to strip the linkrev
2208 """find the minimum rev that must be stripped to strip the linkrev
2207
2209
2208 Returns a tuple containing the minimum rev and a set of all revs that
2210 Returns a tuple containing the minimum rev and a set of all revs that
2209 have linkrevs that will be broken by this strip.
2211 have linkrevs that will be broken by this strip.
2210 """
2212 """
2211 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2213 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2212 self.headrevs(),
2214 self.headrevs(),
2213 self.linkrev, self.parentrevs)
2215 self.linkrev, self.parentrevs)
2214
2216
2215 def strip(self, minlink, transaction):
2217 def strip(self, minlink, transaction):
2216 """truncate the revlog on the first revision with a linkrev >= minlink
2218 """truncate the revlog on the first revision with a linkrev >= minlink
2217
2219
2218 This function is called when we're stripping revision minlink and
2220 This function is called when we're stripping revision minlink and
2219 its descendants from the repository.
2221 its descendants from the repository.
2220
2222
2221 We have to remove all revisions with linkrev >= minlink, because
2223 We have to remove all revisions with linkrev >= minlink, because
2222 the equivalent changelog revisions will be renumbered after the
2224 the equivalent changelog revisions will be renumbered after the
2223 strip.
2225 strip.
2224
2226
2225 So we truncate the revlog on the first of these revisions, and
2227 So we truncate the revlog on the first of these revisions, and
2226 trust that the caller has saved the revisions that shouldn't be
2228 trust that the caller has saved the revisions that shouldn't be
2227 removed and that it'll re-add them after this truncation.
2229 removed and that it'll re-add them after this truncation.
2228 """
2230 """
2229 if len(self) == 0:
2231 if len(self) == 0:
2230 return
2232 return
2231
2233
2232 rev, _ = self.getstrippoint(minlink)
2234 rev, _ = self.getstrippoint(minlink)
2233 if rev == len(self):
2235 if rev == len(self):
2234 return
2236 return
2235
2237
2236 # first truncate the files on disk
2238 # first truncate the files on disk
2237 end = self.start(rev)
2239 end = self.start(rev)
2238 if not self._inline:
2240 if not self._inline:
2239 transaction.add(self.datafile, end)
2241 transaction.add(self.datafile, end)
2240 end = rev * self._io.size
2242 end = rev * self._io.size
2241 else:
2243 else:
2242 end += rev * self._io.size
2244 end += rev * self._io.size
2243
2245
2244 transaction.add(self.indexfile, end)
2246 transaction.add(self.indexfile, end)
2245
2247
2246 # then reset internal state in memory to forget those revisions
2248 # then reset internal state in memory to forget those revisions
2247 self._revisioncache = None
2249 self._revisioncache = None
2248 self._chaininfocache = {}
2250 self._chaininfocache = {}
2249 self._chunkclear()
2251 self._chunkclear()
2250 for x in pycompat.xrange(rev, len(self)):
2252 for x in pycompat.xrange(rev, len(self)):
2251 del self.nodemap[self.node(x)]
2253 del self.nodemap[self.node(x)]
2252
2254
2253 del self.index[rev:-1]
2255 del self.index[rev:-1]
2254 self._nodepos = None
2256 self._nodepos = None
2255
2257
2256 def checksize(self):
2258 def checksize(self):
2257 expected = 0
2259 expected = 0
2258 if len(self):
2260 if len(self):
2259 expected = max(0, self.end(len(self) - 1))
2261 expected = max(0, self.end(len(self) - 1))
2260
2262
2261 try:
2263 try:
2262 with self._datafp() as f:
2264 with self._datafp() as f:
2263 f.seek(0, 2)
2265 f.seek(0, 2)
2264 actual = f.tell()
2266 actual = f.tell()
2265 dd = actual - expected
2267 dd = actual - expected
2266 except IOError as inst:
2268 except IOError as inst:
2267 if inst.errno != errno.ENOENT:
2269 if inst.errno != errno.ENOENT:
2268 raise
2270 raise
2269 dd = 0
2271 dd = 0
2270
2272
2271 try:
2273 try:
2272 f = self.opener(self.indexfile)
2274 f = self.opener(self.indexfile)
2273 f.seek(0, 2)
2275 f.seek(0, 2)
2274 actual = f.tell()
2276 actual = f.tell()
2275 f.close()
2277 f.close()
2276 s = self._io.size
2278 s = self._io.size
2277 i = max(0, actual // s)
2279 i = max(0, actual // s)
2278 di = actual - (i * s)
2280 di = actual - (i * s)
2279 if self._inline:
2281 if self._inline:
2280 databytes = 0
2282 databytes = 0
2281 for r in self:
2283 for r in self:
2282 databytes += max(0, self.length(r))
2284 databytes += max(0, self.length(r))
2283 dd = 0
2285 dd = 0
2284 di = actual - len(self) * s - databytes
2286 di = actual - len(self) * s - databytes
2285 except IOError as inst:
2287 except IOError as inst:
2286 if inst.errno != errno.ENOENT:
2288 if inst.errno != errno.ENOENT:
2287 raise
2289 raise
2288 di = 0
2290 di = 0
2289
2291
2290 return (dd, di)
2292 return (dd, di)
2291
2293
2292 def files(self):
2294 def files(self):
2293 res = [self.indexfile]
2295 res = [self.indexfile]
2294 if not self._inline:
2296 if not self._inline:
2295 res.append(self.datafile)
2297 res.append(self.datafile)
2296 return res
2298 return res
2297
2299
2298 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2300 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2299 assumehaveparentrevisions=False,
2301 assumehaveparentrevisions=False,
2300 deltamode=repository.CG_DELTAMODE_STD):
2302 deltamode=repository.CG_DELTAMODE_STD):
2301 if nodesorder not in ('nodes', 'storage', 'linear', None):
2303 if nodesorder not in ('nodes', 'storage', 'linear', None):
2302 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2304 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2303 nodesorder)
2305 nodesorder)
2304
2306
2305 if nodesorder is None and not self._generaldelta:
2307 if nodesorder is None and not self._generaldelta:
2306 nodesorder = 'storage'
2308 nodesorder = 'storage'
2307
2309
2308 if (not self._storedeltachains and
2310 if (not self._storedeltachains and
2309 deltamode != repository.CG_DELTAMODE_PREV):
2311 deltamode != repository.CG_DELTAMODE_PREV):
2310 deltamode = repository.CG_DELTAMODE_FULL
2312 deltamode = repository.CG_DELTAMODE_FULL
2311
2313
2312 return storageutil.emitrevisions(
2314 return storageutil.emitrevisions(
2313 self, nodes, nodesorder, revlogrevisiondelta,
2315 self, nodes, nodesorder, revlogrevisiondelta,
2314 deltaparentfn=self.deltaparent,
2316 deltaparentfn=self.deltaparent,
2315 candeltafn=self.candelta,
2317 candeltafn=self.candelta,
2316 rawsizefn=self.rawsize,
2318 rawsizefn=self.rawsize,
2317 revdifffn=self.revdiff,
2319 revdifffn=self.revdiff,
2318 flagsfn=self.flags,
2320 flagsfn=self.flags,
2319 deltamode=deltamode,
2321 deltamode=deltamode,
2320 revisiondata=revisiondata,
2322 revisiondata=revisiondata,
2321 assumehaveparentrevisions=assumehaveparentrevisions)
2323 assumehaveparentrevisions=assumehaveparentrevisions)
2322
2324
2323 DELTAREUSEALWAYS = 'always'
2325 DELTAREUSEALWAYS = 'always'
2324 DELTAREUSESAMEREVS = 'samerevs'
2326 DELTAREUSESAMEREVS = 'samerevs'
2325 DELTAREUSENEVER = 'never'
2327 DELTAREUSENEVER = 'never'
2326
2328
2327 DELTAREUSEFULLADD = 'fulladd'
2329 DELTAREUSEFULLADD = 'fulladd'
2328
2330
2329 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2331 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2330
2332
2331 def clone(self, tr, destrevlog, addrevisioncb=None,
2333 def clone(self, tr, destrevlog, addrevisioncb=None,
2332 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2334 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2333 """Copy this revlog to another, possibly with format changes.
2335 """Copy this revlog to another, possibly with format changes.
2334
2336
2335 The destination revlog will contain the same revisions and nodes.
2337 The destination revlog will contain the same revisions and nodes.
2336 However, it may not be bit-for-bit identical due to e.g. delta encoding
2338 However, it may not be bit-for-bit identical due to e.g. delta encoding
2337 differences.
2339 differences.
2338
2340
2339 The ``deltareuse`` argument control how deltas from the existing revlog
2341 The ``deltareuse`` argument control how deltas from the existing revlog
2340 are preserved in the destination revlog. The argument can have the
2342 are preserved in the destination revlog. The argument can have the
2341 following values:
2343 following values:
2342
2344
2343 DELTAREUSEALWAYS
2345 DELTAREUSEALWAYS
2344 Deltas will always be reused (if possible), even if the destination
2346 Deltas will always be reused (if possible), even if the destination
2345 revlog would not select the same revisions for the delta. This is the
2347 revlog would not select the same revisions for the delta. This is the
2346 fastest mode of operation.
2348 fastest mode of operation.
2347 DELTAREUSESAMEREVS
2349 DELTAREUSESAMEREVS
2348 Deltas will be reused if the destination revlog would pick the same
2350 Deltas will be reused if the destination revlog would pick the same
2349 revisions for the delta. This mode strikes a balance between speed
2351 revisions for the delta. This mode strikes a balance between speed
2350 and optimization.
2352 and optimization.
2351 DELTAREUSENEVER
2353 DELTAREUSENEVER
2352 Deltas will never be reused. This is the slowest mode of execution.
2354 Deltas will never be reused. This is the slowest mode of execution.
2353 This mode can be used to recompute deltas (e.g. if the diff/delta
2355 This mode can be used to recompute deltas (e.g. if the diff/delta
2354 algorithm changes).
2356 algorithm changes).
2355
2357
2356 Delta computation can be slow, so the choice of delta reuse policy can
2358 Delta computation can be slow, so the choice of delta reuse policy can
2357 significantly affect run time.
2359 significantly affect run time.
2358
2360
2359 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2361 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2360 two extremes. Deltas will be reused if they are appropriate. But if the
2362 two extremes. Deltas will be reused if they are appropriate. But if the
2361 delta could choose a better revision, it will do so. This means if you
2363 delta could choose a better revision, it will do so. This means if you
2362 are converting a non-generaldelta revlog to a generaldelta revlog,
2364 are converting a non-generaldelta revlog to a generaldelta revlog,
2363 deltas will be recomputed if the delta's parent isn't a parent of the
2365 deltas will be recomputed if the delta's parent isn't a parent of the
2364 revision.
2366 revision.
2365
2367
2366 In addition to the delta policy, the ``forcedeltabothparents``
2368 In addition to the delta policy, the ``forcedeltabothparents``
2367 argument controls whether to force compute deltas against both parents
2369 argument controls whether to force compute deltas against both parents
2368 for merges. By default, the current default is used.
2370 for merges. By default, the current default is used.
2369 """
2371 """
2370 if deltareuse not in self.DELTAREUSEALL:
2372 if deltareuse not in self.DELTAREUSEALL:
2371 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2373 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2372
2374
2373 if len(destrevlog):
2375 if len(destrevlog):
2374 raise ValueError(_('destination revlog is not empty'))
2376 raise ValueError(_('destination revlog is not empty'))
2375
2377
2376 if getattr(self, 'filteredrevs', None):
2378 if getattr(self, 'filteredrevs', None):
2377 raise ValueError(_('source revlog has filtered revisions'))
2379 raise ValueError(_('source revlog has filtered revisions'))
2378 if getattr(destrevlog, 'filteredrevs', None):
2380 if getattr(destrevlog, 'filteredrevs', None):
2379 raise ValueError(_('destination revlog has filtered revisions'))
2381 raise ValueError(_('destination revlog has filtered revisions'))
2380
2382
2381 # lazydeltabase controls whether to reuse a cached delta, if possible.
2383 # lazydeltabase controls whether to reuse a cached delta, if possible.
2382 oldlazydeltabase = destrevlog._lazydeltabase
2384 oldlazydeltabase = destrevlog._lazydeltabase
2383 oldamd = destrevlog._deltabothparents
2385 oldamd = destrevlog._deltabothparents
2384
2386
2385 try:
2387 try:
2386 if deltareuse == self.DELTAREUSEALWAYS:
2388 if deltareuse == self.DELTAREUSEALWAYS:
2387 destrevlog._lazydeltabase = True
2389 destrevlog._lazydeltabase = True
2388 elif deltareuse == self.DELTAREUSESAMEREVS:
2390 elif deltareuse == self.DELTAREUSESAMEREVS:
2389 destrevlog._lazydeltabase = False
2391 destrevlog._lazydeltabase = False
2390
2392
2391 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2393 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2392
2394
2393 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2395 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2394 self.DELTAREUSESAMEREVS)
2396 self.DELTAREUSESAMEREVS)
2395
2397
2396 deltacomputer = deltautil.deltacomputer(destrevlog)
2398 deltacomputer = deltautil.deltacomputer(destrevlog)
2397 index = self.index
2399 index = self.index
2398 for rev in self:
2400 for rev in self:
2399 entry = index[rev]
2401 entry = index[rev]
2400
2402
2401 # Some classes override linkrev to take filtered revs into
2403 # Some classes override linkrev to take filtered revs into
2402 # account. Use raw entry from index.
2404 # account. Use raw entry from index.
2403 flags = entry[0] & 0xffff
2405 flags = entry[0] & 0xffff
2404 linkrev = entry[4]
2406 linkrev = entry[4]
2405 p1 = index[entry[5]][7]
2407 p1 = index[entry[5]][7]
2406 p2 = index[entry[6]][7]
2408 p2 = index[entry[6]][7]
2407 node = entry[7]
2409 node = entry[7]
2408
2410
2409 # (Possibly) reuse the delta from the revlog if allowed and
2411 # (Possibly) reuse the delta from the revlog if allowed and
2410 # the revlog chunk is a delta.
2412 # the revlog chunk is a delta.
2411 cachedelta = None
2413 cachedelta = None
2412 rawtext = None
2414 rawtext = None
2413 if populatecachedelta:
2415 if populatecachedelta:
2414 dp = self.deltaparent(rev)
2416 dp = self.deltaparent(rev)
2415 if dp != nullrev:
2417 if dp != nullrev:
2416 cachedelta = (dp, bytes(self._chunk(rev)))
2418 cachedelta = (dp, bytes(self._chunk(rev)))
2417
2419
2418 if not cachedelta:
2420 if not cachedelta:
2419 rawtext = self.revision(rev, raw=True)
2421 rawtext = self.revision(rev, raw=True)
2420
2422
2421
2423
2422 if deltareuse == self.DELTAREUSEFULLADD:
2424 if deltareuse == self.DELTAREUSEFULLADD:
2423 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2425 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2424 cachedelta=cachedelta,
2426 cachedelta=cachedelta,
2425 node=node, flags=flags,
2427 node=node, flags=flags,
2426 deltacomputer=deltacomputer)
2428 deltacomputer=deltacomputer)
2427 else:
2429 else:
2428 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2430 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2429 checkambig=False)
2431 checkambig=False)
2430 dfh = None
2432 dfh = None
2431 if not destrevlog._inline:
2433 if not destrevlog._inline:
2432 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2434 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2433 try:
2435 try:
2434 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2436 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2435 p2, flags, cachedelta, ifh, dfh,
2437 p2, flags, cachedelta, ifh, dfh,
2436 deltacomputer=deltacomputer)
2438 deltacomputer=deltacomputer)
2437 finally:
2439 finally:
2438 if dfh:
2440 if dfh:
2439 dfh.close()
2441 dfh.close()
2440 ifh.close()
2442 ifh.close()
2441
2443
2442 if addrevisioncb:
2444 if addrevisioncb:
2443 addrevisioncb(self, rev, node)
2445 addrevisioncb(self, rev, node)
2444 finally:
2446 finally:
2445 destrevlog._lazydeltabase = oldlazydeltabase
2447 destrevlog._lazydeltabase = oldlazydeltabase
2446 destrevlog._deltabothparents = oldamd
2448 destrevlog._deltabothparents = oldamd
2447
2449
2448 def censorrevision(self, tr, censornode, tombstone=b''):
2450 def censorrevision(self, tr, censornode, tombstone=b''):
2449 if (self.version & 0xFFFF) == REVLOGV0:
2451 if (self.version & 0xFFFF) == REVLOGV0:
2450 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2452 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2451 self.version)
2453 self.version)
2452
2454
2453 censorrev = self.rev(censornode)
2455 censorrev = self.rev(censornode)
2454 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2456 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2455
2457
2456 if len(tombstone) > self.rawsize(censorrev):
2458 if len(tombstone) > self.rawsize(censorrev):
2457 raise error.Abort(_('censor tombstone must be no longer than '
2459 raise error.Abort(_('censor tombstone must be no longer than '
2458 'censored data'))
2460 'censored data'))
2459
2461
2460 # Rewriting the revlog in place is hard. Our strategy for censoring is
2462 # Rewriting the revlog in place is hard. Our strategy for censoring is
2461 # to create a new revlog, copy all revisions to it, then replace the
2463 # to create a new revlog, copy all revisions to it, then replace the
2462 # revlogs on transaction close.
2464 # revlogs on transaction close.
2463
2465
2464 newindexfile = self.indexfile + b'.tmpcensored'
2466 newindexfile = self.indexfile + b'.tmpcensored'
2465 newdatafile = self.datafile + b'.tmpcensored'
2467 newdatafile = self.datafile + b'.tmpcensored'
2466
2468
2467 # This is a bit dangerous. We could easily have a mismatch of state.
2469 # This is a bit dangerous. We could easily have a mismatch of state.
2468 newrl = revlog(self.opener, newindexfile, newdatafile,
2470 newrl = revlog(self.opener, newindexfile, newdatafile,
2469 censorable=True)
2471 censorable=True)
2470 newrl.version = self.version
2472 newrl.version = self.version
2471 newrl._generaldelta = self._generaldelta
2473 newrl._generaldelta = self._generaldelta
2472 newrl._io = self._io
2474 newrl._io = self._io
2473
2475
2474 for rev in self.revs():
2476 for rev in self.revs():
2475 node = self.node(rev)
2477 node = self.node(rev)
2476 p1, p2 = self.parents(node)
2478 p1, p2 = self.parents(node)
2477
2479
2478 if rev == censorrev:
2480 if rev == censorrev:
2479 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2481 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2480 p1, p2, censornode, REVIDX_ISCENSORED)
2482 p1, p2, censornode, REVIDX_ISCENSORED)
2481
2483
2482 if newrl.deltaparent(rev) != nullrev:
2484 if newrl.deltaparent(rev) != nullrev:
2483 raise error.Abort(_('censored revision stored as delta; '
2485 raise error.Abort(_('censored revision stored as delta; '
2484 'cannot censor'),
2486 'cannot censor'),
2485 hint=_('censoring of revlogs is not '
2487 hint=_('censoring of revlogs is not '
2486 'fully implemented; please report '
2488 'fully implemented; please report '
2487 'this bug'))
2489 'this bug'))
2488 continue
2490 continue
2489
2491
2490 if self.iscensored(rev):
2492 if self.iscensored(rev):
2491 if self.deltaparent(rev) != nullrev:
2493 if self.deltaparent(rev) != nullrev:
2492 raise error.Abort(_('cannot censor due to censored '
2494 raise error.Abort(_('cannot censor due to censored '
2493 'revision having delta stored'))
2495 'revision having delta stored'))
2494 rawtext = self._chunk(rev)
2496 rawtext = self._chunk(rev)
2495 else:
2497 else:
2496 rawtext = self.revision(rev, raw=True)
2498 rawtext = self.revision(rev, raw=True)
2497
2499
2498 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2500 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2499 self.flags(rev))
2501 self.flags(rev))
2500
2502
2501 tr.addbackup(self.indexfile, location='store')
2503 tr.addbackup(self.indexfile, location='store')
2502 if not self._inline:
2504 if not self._inline:
2503 tr.addbackup(self.datafile, location='store')
2505 tr.addbackup(self.datafile, location='store')
2504
2506
2505 self.opener.rename(newrl.indexfile, self.indexfile)
2507 self.opener.rename(newrl.indexfile, self.indexfile)
2506 if not self._inline:
2508 if not self._inline:
2507 self.opener.rename(newrl.datafile, self.datafile)
2509 self.opener.rename(newrl.datafile, self.datafile)
2508
2510
2509 self.clearcaches()
2511 self.clearcaches()
2510 self._loadindex()
2512 self._loadindex()
2511
2513
2512 def verifyintegrity(self, state):
2514 def verifyintegrity(self, state):
2513 """Verifies the integrity of the revlog.
2515 """Verifies the integrity of the revlog.
2514
2516
2515 Yields ``revlogproblem`` instances describing problems that are
2517 Yields ``revlogproblem`` instances describing problems that are
2516 found.
2518 found.
2517 """
2519 """
2518 dd, di = self.checksize()
2520 dd, di = self.checksize()
2519 if dd:
2521 if dd:
2520 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2522 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2521 if di:
2523 if di:
2522 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2524 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2523
2525
2524 version = self.version & 0xFFFF
2526 version = self.version & 0xFFFF
2525
2527
2526 # The verifier tells us what version revlog we should be.
2528 # The verifier tells us what version revlog we should be.
2527 if version != state['expectedversion']:
2529 if version != state['expectedversion']:
2528 yield revlogproblem(
2530 yield revlogproblem(
2529 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2531 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2530 (self.indexfile, version, state['expectedversion']))
2532 (self.indexfile, version, state['expectedversion']))
2531
2533
2532 state['skipread'] = set()
2534 state['skipread'] = set()
2533
2535
2534 for rev in self:
2536 for rev in self:
2535 node = self.node(rev)
2537 node = self.node(rev)
2536
2538
2537 # Verify contents. 4 cases to care about:
2539 # Verify contents. 4 cases to care about:
2538 #
2540 #
2539 # common: the most common case
2541 # common: the most common case
2540 # rename: with a rename
2542 # rename: with a rename
2541 # meta: file content starts with b'\1\n', the metadata
2543 # meta: file content starts with b'\1\n', the metadata
2542 # header defined in filelog.py, but without a rename
2544 # header defined in filelog.py, but without a rename
2543 # ext: content stored externally
2545 # ext: content stored externally
2544 #
2546 #
2545 # More formally, their differences are shown below:
2547 # More formally, their differences are shown below:
2546 #
2548 #
2547 # | common | rename | meta | ext
2549 # | common | rename | meta | ext
2548 # -------------------------------------------------------
2550 # -------------------------------------------------------
2549 # flags() | 0 | 0 | 0 | not 0
2551 # flags() | 0 | 0 | 0 | not 0
2550 # renamed() | False | True | False | ?
2552 # renamed() | False | True | False | ?
2551 # rawtext[0:2]=='\1\n'| False | True | True | ?
2553 # rawtext[0:2]=='\1\n'| False | True | True | ?
2552 #
2554 #
2553 # "rawtext" means the raw text stored in revlog data, which
2555 # "rawtext" means the raw text stored in revlog data, which
2554 # could be retrieved by "revision(rev, raw=True)". "text"
2556 # could be retrieved by "revision(rev, raw=True)". "text"
2555 # mentioned below is "revision(rev, raw=False)".
2557 # mentioned below is "revision(rev, raw=False)".
2556 #
2558 #
2557 # There are 3 different lengths stored physically:
2559 # There are 3 different lengths stored physically:
2558 # 1. L1: rawsize, stored in revlog index
2560 # 1. L1: rawsize, stored in revlog index
2559 # 2. L2: len(rawtext), stored in revlog data
2561 # 2. L2: len(rawtext), stored in revlog data
2560 # 3. L3: len(text), stored in revlog data if flags==0, or
2562 # 3. L3: len(text), stored in revlog data if flags==0, or
2561 # possibly somewhere else if flags!=0
2563 # possibly somewhere else if flags!=0
2562 #
2564 #
2563 # L1 should be equal to L2. L3 could be different from them.
2565 # L1 should be equal to L2. L3 could be different from them.
2564 # "text" may or may not affect commit hash depending on flag
2566 # "text" may or may not affect commit hash depending on flag
2565 # processors (see revlog.addflagprocessor).
2567 # processors (see revlog.addflagprocessor).
2566 #
2568 #
2567 # | common | rename | meta | ext
2569 # | common | rename | meta | ext
2568 # -------------------------------------------------
2570 # -------------------------------------------------
2569 # rawsize() | L1 | L1 | L1 | L1
2571 # rawsize() | L1 | L1 | L1 | L1
2570 # size() | L1 | L2-LM | L1(*) | L1 (?)
2572 # size() | L1 | L2-LM | L1(*) | L1 (?)
2571 # len(rawtext) | L2 | L2 | L2 | L2
2573 # len(rawtext) | L2 | L2 | L2 | L2
2572 # len(text) | L2 | L2 | L2 | L3
2574 # len(text) | L2 | L2 | L2 | L3
2573 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2575 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2574 #
2576 #
2575 # LM: length of metadata, depending on rawtext
2577 # LM: length of metadata, depending on rawtext
2576 # (*): not ideal, see comment in filelog.size
2578 # (*): not ideal, see comment in filelog.size
2577 # (?): could be "- len(meta)" if the resolved content has
2579 # (?): could be "- len(meta)" if the resolved content has
2578 # rename metadata
2580 # rename metadata
2579 #
2581 #
2580 # Checks needed to be done:
2582 # Checks needed to be done:
2581 # 1. length check: L1 == L2, in all cases.
2583 # 1. length check: L1 == L2, in all cases.
2582 # 2. hash check: depending on flag processor, we may need to
2584 # 2. hash check: depending on flag processor, we may need to
2583 # use either "text" (external), or "rawtext" (in revlog).
2585 # use either "text" (external), or "rawtext" (in revlog).
2584
2586
2585 try:
2587 try:
2586 skipflags = state.get('skipflags', 0)
2588 skipflags = state.get('skipflags', 0)
2587 if skipflags:
2589 if skipflags:
2588 skipflags &= self.flags(rev)
2590 skipflags &= self.flags(rev)
2589
2591
2590 if skipflags:
2592 if skipflags:
2591 state['skipread'].add(node)
2593 state['skipread'].add(node)
2592 else:
2594 else:
2593 # Side-effect: read content and verify hash.
2595 # Side-effect: read content and verify hash.
2594 self.revision(node)
2596 self.revision(node)
2595
2597
2596 l1 = self.rawsize(rev)
2598 l1 = self.rawsize(rev)
2597 l2 = len(self.revision(node, raw=True))
2599 l2 = len(self.revision(node, raw=True))
2598
2600
2599 if l1 != l2:
2601 if l1 != l2:
2600 yield revlogproblem(
2602 yield revlogproblem(
2601 error=_('unpacked size is %d, %d expected') % (l2, l1),
2603 error=_('unpacked size is %d, %d expected') % (l2, l1),
2602 node=node)
2604 node=node)
2603
2605
2604 except error.CensoredNodeError:
2606 except error.CensoredNodeError:
2605 if state['erroroncensored']:
2607 if state['erroroncensored']:
2606 yield revlogproblem(error=_('censored file data'),
2608 yield revlogproblem(error=_('censored file data'),
2607 node=node)
2609 node=node)
2608 state['skipread'].add(node)
2610 state['skipread'].add(node)
2609 except Exception as e:
2611 except Exception as e:
2610 yield revlogproblem(
2612 yield revlogproblem(
2611 error=_('unpacking %s: %s') % (short(node),
2613 error=_('unpacking %s: %s') % (short(node),
2612 stringutil.forcebytestr(e)),
2614 stringutil.forcebytestr(e)),
2613 node=node)
2615 node=node)
2614 state['skipread'].add(node)
2616 state['skipread'].add(node)
2615
2617
2616 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2618 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2617 revisionscount=False, trackedsize=False,
2619 revisionscount=False, trackedsize=False,
2618 storedsize=False):
2620 storedsize=False):
2619 d = {}
2621 d = {}
2620
2622
2621 if exclusivefiles:
2623 if exclusivefiles:
2622 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2624 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2623 if not self._inline:
2625 if not self._inline:
2624 d['exclusivefiles'].append((self.opener, self.datafile))
2626 d['exclusivefiles'].append((self.opener, self.datafile))
2625
2627
2626 if sharedfiles:
2628 if sharedfiles:
2627 d['sharedfiles'] = []
2629 d['sharedfiles'] = []
2628
2630
2629 if revisionscount:
2631 if revisionscount:
2630 d['revisionscount'] = len(self)
2632 d['revisionscount'] = len(self)
2631
2633
2632 if trackedsize:
2634 if trackedsize:
2633 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2635 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2634
2636
2635 if storedsize:
2637 if storedsize:
2636 d['storedsize'] = sum(self.opener.stat(path).st_size
2638 d['storedsize'] = sum(self.opener.stat(path).st_size
2637 for path in self.files())
2639 for path in self.files())
2638
2640
2639 return d
2641 return d
General Comments 0
You need to be logged in to leave comments. Login now