##// END OF EJS Templates
revlog: store mmaplargeindex as an instance attribute...
Gregory Szorc -
r41239:d0de4fdd default
parent child Browse files
Show More
@@ -1,2630 +1,2631 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._censorable = censorable
356 self._censorable = censorable
356 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None
358 self._revisioncache = None
358 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
362 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
363 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
364 self._maxchainlen = None
365 self._maxchainlen = None
365 self._deltabothparents = True
366 self._deltabothparents = True
366 self.index = []
367 self.index = []
367 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
368 self._pcache = {}
369 self._pcache = {}
369 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
370 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
371 self._nodepos = None
372 self._nodepos = None
372 self._compengine = 'zlib'
373 self._compengine = 'zlib'
373 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
374 self._withsparseread = False
375 self._withsparseread = False
375 self._sparserevlog = False
376 self._sparserevlog = False
376 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
377 self._srmingapsize = 262144
378 self._srmingapsize = 262144
378
379
379 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
380 # custom flags.
381 # custom flags.
381 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
382
383
383 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
384 self._writinghandles = None
385 self._writinghandles = None
385
386
386 mmapindexthreshold = None
387 mmapindexthreshold = None
387 opts = getattr(opener, 'options', {}) or {}
388 opts = getattr(opener, 'options', {}) or {}
388
389
389 if 'revlogv2' in opts:
390 if 'revlogv2' in opts:
390 versionflags = REVLOGV2 | FLAG_INLINE_DATA
391 versionflags = REVLOGV2 | FLAG_INLINE_DATA
391 elif 'revlogv1' in opts:
392 elif 'revlogv1' in opts:
392 versionflags = REVLOGV1 | FLAG_INLINE_DATA
393 versionflags = REVLOGV1 | FLAG_INLINE_DATA
393 if 'generaldelta' in opts:
394 if 'generaldelta' in opts:
394 versionflags |= FLAG_GENERALDELTA
395 versionflags |= FLAG_GENERALDELTA
395 else:
396 else:
396 versionflags = REVLOG_DEFAULT_VERSION
397 versionflags = REVLOG_DEFAULT_VERSION
397
398
398 if 'chunkcachesize' in opts:
399 if 'chunkcachesize' in opts:
399 self._chunkcachesize = opts['chunkcachesize']
400 self._chunkcachesize = opts['chunkcachesize']
400 if 'maxchainlen' in opts:
401 if 'maxchainlen' in opts:
401 self._maxchainlen = opts['maxchainlen']
402 self._maxchainlen = opts['maxchainlen']
402 if 'deltabothparents' in opts:
403 if 'deltabothparents' in opts:
403 self._deltabothparents = opts['deltabothparents']
404 self._deltabothparents = opts['deltabothparents']
404 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
405 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
405 if 'compengine' in opts:
406 if 'compengine' in opts:
406 self._compengine = opts['compengine']
407 self._compengine = opts['compengine']
407 if 'maxdeltachainspan' in opts:
408 if 'maxdeltachainspan' in opts:
408 self._maxdeltachainspan = opts['maxdeltachainspan']
409 self._maxdeltachainspan = opts['maxdeltachainspan']
409 if mmaplargeindex and 'mmapindexthreshold' in opts:
410 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
410 mmapindexthreshold = opts['mmapindexthreshold']
411 mmapindexthreshold = opts['mmapindexthreshold']
411 self._sparserevlog = bool(opts.get('sparse-revlog', False))
412 self._sparserevlog = bool(opts.get('sparse-revlog', False))
412 withsparseread = bool(opts.get('with-sparse-read', False))
413 withsparseread = bool(opts.get('with-sparse-read', False))
413 # sparse-revlog forces sparse-read
414 # sparse-revlog forces sparse-read
414 self._withsparseread = self._sparserevlog or withsparseread
415 self._withsparseread = self._sparserevlog or withsparseread
415 if 'sparse-read-density-threshold' in opts:
416 if 'sparse-read-density-threshold' in opts:
416 self._srdensitythreshold = opts['sparse-read-density-threshold']
417 self._srdensitythreshold = opts['sparse-read-density-threshold']
417 if 'sparse-read-min-gap-size' in opts:
418 if 'sparse-read-min-gap-size' in opts:
418 self._srmingapsize = opts['sparse-read-min-gap-size']
419 self._srmingapsize = opts['sparse-read-min-gap-size']
419 if opts.get('enableellipsis'):
420 if opts.get('enableellipsis'):
420 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
421 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
421
422
422 # revlog v0 doesn't have flag processors
423 # revlog v0 doesn't have flag processors
423 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
424 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
424 _insertflagprocessor(flag, processor, self._flagprocessors)
425 _insertflagprocessor(flag, processor, self._flagprocessors)
425
426
426 if self._chunkcachesize <= 0:
427 if self._chunkcachesize <= 0:
427 raise error.RevlogError(_('revlog chunk cache size %r is not '
428 raise error.RevlogError(_('revlog chunk cache size %r is not '
428 'greater than 0') % self._chunkcachesize)
429 'greater than 0') % self._chunkcachesize)
429 elif self._chunkcachesize & (self._chunkcachesize - 1):
430 elif self._chunkcachesize & (self._chunkcachesize - 1):
430 raise error.RevlogError(_('revlog chunk cache size %r is not a '
431 raise error.RevlogError(_('revlog chunk cache size %r is not a '
431 'power of 2') % self._chunkcachesize)
432 'power of 2') % self._chunkcachesize)
432
433
433 self._loadindex(versionflags, mmapindexthreshold)
434 self._loadindex(versionflags, mmapindexthreshold)
434
435
435 def _loadindex(self, versionflags, mmapindexthreshold):
436 def _loadindex(self, versionflags, mmapindexthreshold):
436 indexdata = ''
437 indexdata = ''
437 self._initempty = True
438 self._initempty = True
438 try:
439 try:
439 with self._indexfp() as f:
440 with self._indexfp() as f:
440 if (mmapindexthreshold is not None and
441 if (mmapindexthreshold is not None and
441 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 indexdata = util.buffer(util.mmapread(f))
443 indexdata = util.buffer(util.mmapread(f))
443 else:
444 else:
444 indexdata = f.read()
445 indexdata = f.read()
445 if len(indexdata) > 0:
446 if len(indexdata) > 0:
446 versionflags = versionformat_unpack(indexdata[:4])[0]
447 versionflags = versionformat_unpack(indexdata[:4])[0]
447 self._initempty = False
448 self._initempty = False
448 except IOError as inst:
449 except IOError as inst:
449 if inst.errno != errno.ENOENT:
450 if inst.errno != errno.ENOENT:
450 raise
451 raise
451
452
452 self.version = versionflags
453 self.version = versionflags
453
454
454 flags = versionflags & ~0xFFFF
455 flags = versionflags & ~0xFFFF
455 fmt = versionflags & 0xFFFF
456 fmt = versionflags & 0xFFFF
456
457
457 if fmt == REVLOGV0:
458 if fmt == REVLOGV0:
458 if flags:
459 if flags:
459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 'revlog %s') %
461 'revlog %s') %
461 (flags >> 16, fmt, self.indexfile))
462 (flags >> 16, fmt, self.indexfile))
462
463
463 self._inline = False
464 self._inline = False
464 self._generaldelta = False
465 self._generaldelta = False
465
466
466 elif fmt == REVLOGV1:
467 elif fmt == REVLOGV1:
467 if flags & ~REVLOGV1_FLAGS:
468 if flags & ~REVLOGV1_FLAGS:
468 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
469 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
469 'revlog %s') %
470 'revlog %s') %
470 (flags >> 16, fmt, self.indexfile))
471 (flags >> 16, fmt, self.indexfile))
471
472
472 self._inline = versionflags & FLAG_INLINE_DATA
473 self._inline = versionflags & FLAG_INLINE_DATA
473 self._generaldelta = versionflags & FLAG_GENERALDELTA
474 self._generaldelta = versionflags & FLAG_GENERALDELTA
474
475
475 elif fmt == REVLOGV2:
476 elif fmt == REVLOGV2:
476 if flags & ~REVLOGV2_FLAGS:
477 if flags & ~REVLOGV2_FLAGS:
477 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
478 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
478 'revlog %s') %
479 'revlog %s') %
479 (flags >> 16, fmt, self.indexfile))
480 (flags >> 16, fmt, self.indexfile))
480
481
481 self._inline = versionflags & FLAG_INLINE_DATA
482 self._inline = versionflags & FLAG_INLINE_DATA
482 # generaldelta implied by version 2 revlogs.
483 # generaldelta implied by version 2 revlogs.
483 self._generaldelta = True
484 self._generaldelta = True
484
485
485 else:
486 else:
486 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
487 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
487 (fmt, self.indexfile))
488 (fmt, self.indexfile))
488
489
489 self._storedeltachains = True
490 self._storedeltachains = True
490
491
491 self._io = revlogio()
492 self._io = revlogio()
492 if self.version == REVLOGV0:
493 if self.version == REVLOGV0:
493 self._io = revlogoldio()
494 self._io = revlogoldio()
494 try:
495 try:
495 d = self._io.parseindex(indexdata, self._inline)
496 d = self._io.parseindex(indexdata, self._inline)
496 except (ValueError, IndexError):
497 except (ValueError, IndexError):
497 raise error.RevlogError(_("index %s is corrupted") %
498 raise error.RevlogError(_("index %s is corrupted") %
498 self.indexfile)
499 self.indexfile)
499 self.index, nodemap, self._chunkcache = d
500 self.index, nodemap, self._chunkcache = d
500 if nodemap is not None:
501 if nodemap is not None:
501 self.nodemap = self._nodecache = nodemap
502 self.nodemap = self._nodecache = nodemap
502 if not self._chunkcache:
503 if not self._chunkcache:
503 self._chunkclear()
504 self._chunkclear()
504 # revnum -> (chain-length, sum-delta-length)
505 # revnum -> (chain-length, sum-delta-length)
505 self._chaininfocache = {}
506 self._chaininfocache = {}
506 # revlog header -> revlog compressor
507 # revlog header -> revlog compressor
507 self._decompressors = {}
508 self._decompressors = {}
508
509
509 @util.propertycache
510 @util.propertycache
510 def _compressor(self):
511 def _compressor(self):
511 return util.compengines[self._compengine].revlogcompressor()
512 return util.compengines[self._compengine].revlogcompressor()
512
513
513 def _indexfp(self, mode='r'):
514 def _indexfp(self, mode='r'):
514 """file object for the revlog's index file"""
515 """file object for the revlog's index file"""
515 args = {r'mode': mode}
516 args = {r'mode': mode}
516 if mode != 'r':
517 if mode != 'r':
517 args[r'checkambig'] = self._checkambig
518 args[r'checkambig'] = self._checkambig
518 if mode == 'w':
519 if mode == 'w':
519 args[r'atomictemp'] = True
520 args[r'atomictemp'] = True
520 return self.opener(self.indexfile, **args)
521 return self.opener(self.indexfile, **args)
521
522
522 def _datafp(self, mode='r'):
523 def _datafp(self, mode='r'):
523 """file object for the revlog's data file"""
524 """file object for the revlog's data file"""
524 return self.opener(self.datafile, mode=mode)
525 return self.opener(self.datafile, mode=mode)
525
526
526 @contextlib.contextmanager
527 @contextlib.contextmanager
527 def _datareadfp(self, existingfp=None):
528 def _datareadfp(self, existingfp=None):
528 """file object suitable to read data"""
529 """file object suitable to read data"""
529 # Use explicit file handle, if given.
530 # Use explicit file handle, if given.
530 if existingfp is not None:
531 if existingfp is not None:
531 yield existingfp
532 yield existingfp
532
533
533 # Use a file handle being actively used for writes, if available.
534 # Use a file handle being actively used for writes, if available.
534 # There is some danger to doing this because reads will seek the
535 # There is some danger to doing this because reads will seek the
535 # file. However, _writeentry() performs a SEEK_END before all writes,
536 # file. However, _writeentry() performs a SEEK_END before all writes,
536 # so we should be safe.
537 # so we should be safe.
537 elif self._writinghandles:
538 elif self._writinghandles:
538 if self._inline:
539 if self._inline:
539 yield self._writinghandles[0]
540 yield self._writinghandles[0]
540 else:
541 else:
541 yield self._writinghandles[1]
542 yield self._writinghandles[1]
542
543
543 # Otherwise open a new file handle.
544 # Otherwise open a new file handle.
544 else:
545 else:
545 if self._inline:
546 if self._inline:
546 func = self._indexfp
547 func = self._indexfp
547 else:
548 else:
548 func = self._datafp
549 func = self._datafp
549 with func() as fp:
550 with func() as fp:
550 yield fp
551 yield fp
551
552
552 def tip(self):
553 def tip(self):
553 return self.node(len(self.index) - 1)
554 return self.node(len(self.index) - 1)
554 def __contains__(self, rev):
555 def __contains__(self, rev):
555 return 0 <= rev < len(self)
556 return 0 <= rev < len(self)
556 def __len__(self):
557 def __len__(self):
557 return len(self.index)
558 return len(self.index)
558 def __iter__(self):
559 def __iter__(self):
559 return iter(pycompat.xrange(len(self)))
560 return iter(pycompat.xrange(len(self)))
560 def revs(self, start=0, stop=None):
561 def revs(self, start=0, stop=None):
561 """iterate over all rev in this revlog (from start to stop)"""
562 """iterate over all rev in this revlog (from start to stop)"""
562 return storageutil.iterrevs(len(self), start=start, stop=stop)
563 return storageutil.iterrevs(len(self), start=start, stop=stop)
563
564
564 @util.propertycache
565 @util.propertycache
565 def nodemap(self):
566 def nodemap(self):
566 if self.index:
567 if self.index:
567 # populate mapping down to the initial node
568 # populate mapping down to the initial node
568 node0 = self.index[0][7] # get around changelog filtering
569 node0 = self.index[0][7] # get around changelog filtering
569 self.rev(node0)
570 self.rev(node0)
570 return self._nodecache
571 return self._nodecache
571
572
572 def hasnode(self, node):
573 def hasnode(self, node):
573 try:
574 try:
574 self.rev(node)
575 self.rev(node)
575 return True
576 return True
576 except KeyError:
577 except KeyError:
577 return False
578 return False
578
579
579 def candelta(self, baserev, rev):
580 def candelta(self, baserev, rev):
580 """whether two revisions (baserev, rev) can be delta-ed or not"""
581 """whether two revisions (baserev, rev) can be delta-ed or not"""
581 # Disable delta if either rev requires a content-changing flag
582 # Disable delta if either rev requires a content-changing flag
582 # processor (ex. LFS). This is because such flag processor can alter
583 # processor (ex. LFS). This is because such flag processor can alter
583 # the rawtext content that the delta will be based on, and two clients
584 # the rawtext content that the delta will be based on, and two clients
584 # could have a same revlog node with different flags (i.e. different
585 # could have a same revlog node with different flags (i.e. different
585 # rawtext contents) and the delta could be incompatible.
586 # rawtext contents) and the delta could be incompatible.
586 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
587 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
587 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
588 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
588 return False
589 return False
589 return True
590 return True
590
591
591 def clearcaches(self):
592 def clearcaches(self):
592 self._revisioncache = None
593 self._revisioncache = None
593 self._chainbasecache.clear()
594 self._chainbasecache.clear()
594 self._chunkcache = (0, '')
595 self._chunkcache = (0, '')
595 self._pcache = {}
596 self._pcache = {}
596
597
597 try:
598 try:
598 self._nodecache.clearcaches()
599 self._nodecache.clearcaches()
599 except AttributeError:
600 except AttributeError:
600 self._nodecache = {nullid: nullrev}
601 self._nodecache = {nullid: nullrev}
601 self._nodepos = None
602 self._nodepos = None
602
603
603 def rev(self, node):
604 def rev(self, node):
604 try:
605 try:
605 return self._nodecache[node]
606 return self._nodecache[node]
606 except TypeError:
607 except TypeError:
607 raise
608 raise
608 except error.RevlogError:
609 except error.RevlogError:
609 # parsers.c radix tree lookup failed
610 # parsers.c radix tree lookup failed
610 if node == wdirid or node in wdirfilenodeids:
611 if node == wdirid or node in wdirfilenodeids:
611 raise error.WdirUnsupported
612 raise error.WdirUnsupported
612 raise error.LookupError(node, self.indexfile, _('no node'))
613 raise error.LookupError(node, self.indexfile, _('no node'))
613 except KeyError:
614 except KeyError:
614 # pure python cache lookup failed
615 # pure python cache lookup failed
615 n = self._nodecache
616 n = self._nodecache
616 i = self.index
617 i = self.index
617 p = self._nodepos
618 p = self._nodepos
618 if p is None:
619 if p is None:
619 p = len(i) - 1
620 p = len(i) - 1
620 else:
621 else:
621 assert p < len(i)
622 assert p < len(i)
622 for r in pycompat.xrange(p, -1, -1):
623 for r in pycompat.xrange(p, -1, -1):
623 v = i[r][7]
624 v = i[r][7]
624 n[v] = r
625 n[v] = r
625 if v == node:
626 if v == node:
626 self._nodepos = r - 1
627 self._nodepos = r - 1
627 return r
628 return r
628 if node == wdirid or node in wdirfilenodeids:
629 if node == wdirid or node in wdirfilenodeids:
629 raise error.WdirUnsupported
630 raise error.WdirUnsupported
630 raise error.LookupError(node, self.indexfile, _('no node'))
631 raise error.LookupError(node, self.indexfile, _('no node'))
631
632
632 # Accessors for index entries.
633 # Accessors for index entries.
633
634
634 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
635 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
635 # are flags.
636 # are flags.
636 def start(self, rev):
637 def start(self, rev):
637 return int(self.index[rev][0] >> 16)
638 return int(self.index[rev][0] >> 16)
638
639
639 def flags(self, rev):
640 def flags(self, rev):
640 return self.index[rev][0] & 0xFFFF
641 return self.index[rev][0] & 0xFFFF
641
642
642 def length(self, rev):
643 def length(self, rev):
643 return self.index[rev][1]
644 return self.index[rev][1]
644
645
645 def rawsize(self, rev):
646 def rawsize(self, rev):
646 """return the length of the uncompressed text for a given revision"""
647 """return the length of the uncompressed text for a given revision"""
647 l = self.index[rev][2]
648 l = self.index[rev][2]
648 if l >= 0:
649 if l >= 0:
649 return l
650 return l
650
651
651 t = self.revision(rev, raw=True)
652 t = self.revision(rev, raw=True)
652 return len(t)
653 return len(t)
653
654
654 def size(self, rev):
655 def size(self, rev):
655 """length of non-raw text (processed by a "read" flag processor)"""
656 """length of non-raw text (processed by a "read" flag processor)"""
656 # fast path: if no "read" flag processor could change the content,
657 # fast path: if no "read" flag processor could change the content,
657 # size is rawsize. note: ELLIPSIS is known to not change the content.
658 # size is rawsize. note: ELLIPSIS is known to not change the content.
658 flags = self.flags(rev)
659 flags = self.flags(rev)
659 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
660 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
660 return self.rawsize(rev)
661 return self.rawsize(rev)
661
662
662 return len(self.revision(rev, raw=False))
663 return len(self.revision(rev, raw=False))
663
664
664 def chainbase(self, rev):
665 def chainbase(self, rev):
665 base = self._chainbasecache.get(rev)
666 base = self._chainbasecache.get(rev)
666 if base is not None:
667 if base is not None:
667 return base
668 return base
668
669
669 index = self.index
670 index = self.index
670 iterrev = rev
671 iterrev = rev
671 base = index[iterrev][3]
672 base = index[iterrev][3]
672 while base != iterrev:
673 while base != iterrev:
673 iterrev = base
674 iterrev = base
674 base = index[iterrev][3]
675 base = index[iterrev][3]
675
676
676 self._chainbasecache[rev] = base
677 self._chainbasecache[rev] = base
677 return base
678 return base
678
679
679 def linkrev(self, rev):
680 def linkrev(self, rev):
680 return self.index[rev][4]
681 return self.index[rev][4]
681
682
682 def parentrevs(self, rev):
683 def parentrevs(self, rev):
683 try:
684 try:
684 entry = self.index[rev]
685 entry = self.index[rev]
685 except IndexError:
686 except IndexError:
686 if rev == wdirrev:
687 if rev == wdirrev:
687 raise error.WdirUnsupported
688 raise error.WdirUnsupported
688 raise
689 raise
689
690
690 return entry[5], entry[6]
691 return entry[5], entry[6]
691
692
692 # fast parentrevs(rev) where rev isn't filtered
693 # fast parentrevs(rev) where rev isn't filtered
693 _uncheckedparentrevs = parentrevs
694 _uncheckedparentrevs = parentrevs
694
695
695 def node(self, rev):
696 def node(self, rev):
696 try:
697 try:
697 return self.index[rev][7]
698 return self.index[rev][7]
698 except IndexError:
699 except IndexError:
699 if rev == wdirrev:
700 if rev == wdirrev:
700 raise error.WdirUnsupported
701 raise error.WdirUnsupported
701 raise
702 raise
702
703
703 # Derived from index values.
704 # Derived from index values.
704
705
705 def end(self, rev):
706 def end(self, rev):
706 return self.start(rev) + self.length(rev)
707 return self.start(rev) + self.length(rev)
707
708
708 def parents(self, node):
709 def parents(self, node):
709 i = self.index
710 i = self.index
710 d = i[self.rev(node)]
711 d = i[self.rev(node)]
711 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
712 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
712
713
713 def chainlen(self, rev):
714 def chainlen(self, rev):
714 return self._chaininfo(rev)[0]
715 return self._chaininfo(rev)[0]
715
716
716 def _chaininfo(self, rev):
717 def _chaininfo(self, rev):
717 chaininfocache = self._chaininfocache
718 chaininfocache = self._chaininfocache
718 if rev in chaininfocache:
719 if rev in chaininfocache:
719 return chaininfocache[rev]
720 return chaininfocache[rev]
720 index = self.index
721 index = self.index
721 generaldelta = self._generaldelta
722 generaldelta = self._generaldelta
722 iterrev = rev
723 iterrev = rev
723 e = index[iterrev]
724 e = index[iterrev]
724 clen = 0
725 clen = 0
725 compresseddeltalen = 0
726 compresseddeltalen = 0
726 while iterrev != e[3]:
727 while iterrev != e[3]:
727 clen += 1
728 clen += 1
728 compresseddeltalen += e[1]
729 compresseddeltalen += e[1]
729 if generaldelta:
730 if generaldelta:
730 iterrev = e[3]
731 iterrev = e[3]
731 else:
732 else:
732 iterrev -= 1
733 iterrev -= 1
733 if iterrev in chaininfocache:
734 if iterrev in chaininfocache:
734 t = chaininfocache[iterrev]
735 t = chaininfocache[iterrev]
735 clen += t[0]
736 clen += t[0]
736 compresseddeltalen += t[1]
737 compresseddeltalen += t[1]
737 break
738 break
738 e = index[iterrev]
739 e = index[iterrev]
739 else:
740 else:
740 # Add text length of base since decompressing that also takes
741 # Add text length of base since decompressing that also takes
741 # work. For cache hits the length is already included.
742 # work. For cache hits the length is already included.
742 compresseddeltalen += e[1]
743 compresseddeltalen += e[1]
743 r = (clen, compresseddeltalen)
744 r = (clen, compresseddeltalen)
744 chaininfocache[rev] = r
745 chaininfocache[rev] = r
745 return r
746 return r
746
747
747 def _deltachain(self, rev, stoprev=None):
748 def _deltachain(self, rev, stoprev=None):
748 """Obtain the delta chain for a revision.
749 """Obtain the delta chain for a revision.
749
750
750 ``stoprev`` specifies a revision to stop at. If not specified, we
751 ``stoprev`` specifies a revision to stop at. If not specified, we
751 stop at the base of the chain.
752 stop at the base of the chain.
752
753
753 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
754 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
754 revs in ascending order and ``stopped`` is a bool indicating whether
755 revs in ascending order and ``stopped`` is a bool indicating whether
755 ``stoprev`` was hit.
756 ``stoprev`` was hit.
756 """
757 """
757 # Try C implementation.
758 # Try C implementation.
758 try:
759 try:
759 return self.index.deltachain(rev, stoprev, self._generaldelta)
760 return self.index.deltachain(rev, stoprev, self._generaldelta)
760 except AttributeError:
761 except AttributeError:
761 pass
762 pass
762
763
763 chain = []
764 chain = []
764
765
765 # Alias to prevent attribute lookup in tight loop.
766 # Alias to prevent attribute lookup in tight loop.
766 index = self.index
767 index = self.index
767 generaldelta = self._generaldelta
768 generaldelta = self._generaldelta
768
769
769 iterrev = rev
770 iterrev = rev
770 e = index[iterrev]
771 e = index[iterrev]
771 while iterrev != e[3] and iterrev != stoprev:
772 while iterrev != e[3] and iterrev != stoprev:
772 chain.append(iterrev)
773 chain.append(iterrev)
773 if generaldelta:
774 if generaldelta:
774 iterrev = e[3]
775 iterrev = e[3]
775 else:
776 else:
776 iterrev -= 1
777 iterrev -= 1
777 e = index[iterrev]
778 e = index[iterrev]
778
779
779 if iterrev == stoprev:
780 if iterrev == stoprev:
780 stopped = True
781 stopped = True
781 else:
782 else:
782 chain.append(iterrev)
783 chain.append(iterrev)
783 stopped = False
784 stopped = False
784
785
785 chain.reverse()
786 chain.reverse()
786 return chain, stopped
787 return chain, stopped
787
788
788 def ancestors(self, revs, stoprev=0, inclusive=False):
789 def ancestors(self, revs, stoprev=0, inclusive=False):
789 """Generate the ancestors of 'revs' in reverse revision order.
790 """Generate the ancestors of 'revs' in reverse revision order.
790 Does not generate revs lower than stoprev.
791 Does not generate revs lower than stoprev.
791
792
792 See the documentation for ancestor.lazyancestors for more details."""
793 See the documentation for ancestor.lazyancestors for more details."""
793
794
794 # first, make sure start revisions aren't filtered
795 # first, make sure start revisions aren't filtered
795 revs = list(revs)
796 revs = list(revs)
796 checkrev = self.node
797 checkrev = self.node
797 for r in revs:
798 for r in revs:
798 checkrev(r)
799 checkrev(r)
799 # and we're sure ancestors aren't filtered as well
800 # and we're sure ancestors aren't filtered as well
800
801
801 if rustext is not None:
802 if rustext is not None:
802 lazyancestors = rustext.ancestor.LazyAncestors
803 lazyancestors = rustext.ancestor.LazyAncestors
803 arg = self.index
804 arg = self.index
804 elif util.safehasattr(parsers, 'rustlazyancestors'):
805 elif util.safehasattr(parsers, 'rustlazyancestors'):
805 lazyancestors = ancestor.rustlazyancestors
806 lazyancestors = ancestor.rustlazyancestors
806 arg = self.index
807 arg = self.index
807 else:
808 else:
808 lazyancestors = ancestor.lazyancestors
809 lazyancestors = ancestor.lazyancestors
809 arg = self._uncheckedparentrevs
810 arg = self._uncheckedparentrevs
810 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
811 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
811
812
812 def descendants(self, revs):
813 def descendants(self, revs):
813 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
814 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
814
815
815 def findcommonmissing(self, common=None, heads=None):
816 def findcommonmissing(self, common=None, heads=None):
816 """Return a tuple of the ancestors of common and the ancestors of heads
817 """Return a tuple of the ancestors of common and the ancestors of heads
817 that are not ancestors of common. In revset terminology, we return the
818 that are not ancestors of common. In revset terminology, we return the
818 tuple:
819 tuple:
819
820
820 ::common, (::heads) - (::common)
821 ::common, (::heads) - (::common)
821
822
822 The list is sorted by revision number, meaning it is
823 The list is sorted by revision number, meaning it is
823 topologically sorted.
824 topologically sorted.
824
825
825 'heads' and 'common' are both lists of node IDs. If heads is
826 'heads' and 'common' are both lists of node IDs. If heads is
826 not supplied, uses all of the revlog's heads. If common is not
827 not supplied, uses all of the revlog's heads. If common is not
827 supplied, uses nullid."""
828 supplied, uses nullid."""
828 if common is None:
829 if common is None:
829 common = [nullid]
830 common = [nullid]
830 if heads is None:
831 if heads is None:
831 heads = self.heads()
832 heads = self.heads()
832
833
833 common = [self.rev(n) for n in common]
834 common = [self.rev(n) for n in common]
834 heads = [self.rev(n) for n in heads]
835 heads = [self.rev(n) for n in heads]
835
836
836 # we want the ancestors, but inclusive
837 # we want the ancestors, but inclusive
837 class lazyset(object):
838 class lazyset(object):
838 def __init__(self, lazyvalues):
839 def __init__(self, lazyvalues):
839 self.addedvalues = set()
840 self.addedvalues = set()
840 self.lazyvalues = lazyvalues
841 self.lazyvalues = lazyvalues
841
842
842 def __contains__(self, value):
843 def __contains__(self, value):
843 return value in self.addedvalues or value in self.lazyvalues
844 return value in self.addedvalues or value in self.lazyvalues
844
845
845 def __iter__(self):
846 def __iter__(self):
846 added = self.addedvalues
847 added = self.addedvalues
847 for r in added:
848 for r in added:
848 yield r
849 yield r
849 for r in self.lazyvalues:
850 for r in self.lazyvalues:
850 if not r in added:
851 if not r in added:
851 yield r
852 yield r
852
853
853 def add(self, value):
854 def add(self, value):
854 self.addedvalues.add(value)
855 self.addedvalues.add(value)
855
856
856 def update(self, values):
857 def update(self, values):
857 self.addedvalues.update(values)
858 self.addedvalues.update(values)
858
859
859 has = lazyset(self.ancestors(common))
860 has = lazyset(self.ancestors(common))
860 has.add(nullrev)
861 has.add(nullrev)
861 has.update(common)
862 has.update(common)
862
863
863 # take all ancestors from heads that aren't in has
864 # take all ancestors from heads that aren't in has
864 missing = set()
865 missing = set()
865 visit = collections.deque(r for r in heads if r not in has)
866 visit = collections.deque(r for r in heads if r not in has)
866 while visit:
867 while visit:
867 r = visit.popleft()
868 r = visit.popleft()
868 if r in missing:
869 if r in missing:
869 continue
870 continue
870 else:
871 else:
871 missing.add(r)
872 missing.add(r)
872 for p in self.parentrevs(r):
873 for p in self.parentrevs(r):
873 if p not in has:
874 if p not in has:
874 visit.append(p)
875 visit.append(p)
875 missing = list(missing)
876 missing = list(missing)
876 missing.sort()
877 missing.sort()
877 return has, [self.node(miss) for miss in missing]
878 return has, [self.node(miss) for miss in missing]
878
879
879 def incrementalmissingrevs(self, common=None):
880 def incrementalmissingrevs(self, common=None):
880 """Return an object that can be used to incrementally compute the
881 """Return an object that can be used to incrementally compute the
881 revision numbers of the ancestors of arbitrary sets that are not
882 revision numbers of the ancestors of arbitrary sets that are not
882 ancestors of common. This is an ancestor.incrementalmissingancestors
883 ancestors of common. This is an ancestor.incrementalmissingancestors
883 object.
884 object.
884
885
885 'common' is a list of revision numbers. If common is not supplied, uses
886 'common' is a list of revision numbers. If common is not supplied, uses
886 nullrev.
887 nullrev.
887 """
888 """
888 if common is None:
889 if common is None:
889 common = [nullrev]
890 common = [nullrev]
890
891
891 return ancestor.incrementalmissingancestors(self.parentrevs, common)
892 return ancestor.incrementalmissingancestors(self.parentrevs, common)
892
893
893 def findmissingrevs(self, common=None, heads=None):
894 def findmissingrevs(self, common=None, heads=None):
894 """Return the revision numbers of the ancestors of heads that
895 """Return the revision numbers of the ancestors of heads that
895 are not ancestors of common.
896 are not ancestors of common.
896
897
897 More specifically, return a list of revision numbers corresponding to
898 More specifically, return a list of revision numbers corresponding to
898 nodes N such that every N satisfies the following constraints:
899 nodes N such that every N satisfies the following constraints:
899
900
900 1. N is an ancestor of some node in 'heads'
901 1. N is an ancestor of some node in 'heads'
901 2. N is not an ancestor of any node in 'common'
902 2. N is not an ancestor of any node in 'common'
902
903
903 The list is sorted by revision number, meaning it is
904 The list is sorted by revision number, meaning it is
904 topologically sorted.
905 topologically sorted.
905
906
906 'heads' and 'common' are both lists of revision numbers. If heads is
907 'heads' and 'common' are both lists of revision numbers. If heads is
907 not supplied, uses all of the revlog's heads. If common is not
908 not supplied, uses all of the revlog's heads. If common is not
908 supplied, uses nullid."""
909 supplied, uses nullid."""
909 if common is None:
910 if common is None:
910 common = [nullrev]
911 common = [nullrev]
911 if heads is None:
912 if heads is None:
912 heads = self.headrevs()
913 heads = self.headrevs()
913
914
914 inc = self.incrementalmissingrevs(common=common)
915 inc = self.incrementalmissingrevs(common=common)
915 return inc.missingancestors(heads)
916 return inc.missingancestors(heads)
916
917
917 def findmissing(self, common=None, heads=None):
918 def findmissing(self, common=None, heads=None):
918 """Return the ancestors of heads that are not ancestors of common.
919 """Return the ancestors of heads that are not ancestors of common.
919
920
920 More specifically, return a list of nodes N such that every N
921 More specifically, return a list of nodes N such that every N
921 satisfies the following constraints:
922 satisfies the following constraints:
922
923
923 1. N is an ancestor of some node in 'heads'
924 1. N is an ancestor of some node in 'heads'
924 2. N is not an ancestor of any node in 'common'
925 2. N is not an ancestor of any node in 'common'
925
926
926 The list is sorted by revision number, meaning it is
927 The list is sorted by revision number, meaning it is
927 topologically sorted.
928 topologically sorted.
928
929
929 'heads' and 'common' are both lists of node IDs. If heads is
930 'heads' and 'common' are both lists of node IDs. If heads is
930 not supplied, uses all of the revlog's heads. If common is not
931 not supplied, uses all of the revlog's heads. If common is not
931 supplied, uses nullid."""
932 supplied, uses nullid."""
932 if common is None:
933 if common is None:
933 common = [nullid]
934 common = [nullid]
934 if heads is None:
935 if heads is None:
935 heads = self.heads()
936 heads = self.heads()
936
937
937 common = [self.rev(n) for n in common]
938 common = [self.rev(n) for n in common]
938 heads = [self.rev(n) for n in heads]
939 heads = [self.rev(n) for n in heads]
939
940
940 inc = self.incrementalmissingrevs(common=common)
941 inc = self.incrementalmissingrevs(common=common)
941 return [self.node(r) for r in inc.missingancestors(heads)]
942 return [self.node(r) for r in inc.missingancestors(heads)]
942
943
943 def nodesbetween(self, roots=None, heads=None):
944 def nodesbetween(self, roots=None, heads=None):
944 """Return a topological path from 'roots' to 'heads'.
945 """Return a topological path from 'roots' to 'heads'.
945
946
946 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
947 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
947 topologically sorted list of all nodes N that satisfy both of
948 topologically sorted list of all nodes N that satisfy both of
948 these constraints:
949 these constraints:
949
950
950 1. N is a descendant of some node in 'roots'
951 1. N is a descendant of some node in 'roots'
951 2. N is an ancestor of some node in 'heads'
952 2. N is an ancestor of some node in 'heads'
952
953
953 Every node is considered to be both a descendant and an ancestor
954 Every node is considered to be both a descendant and an ancestor
954 of itself, so every reachable node in 'roots' and 'heads' will be
955 of itself, so every reachable node in 'roots' and 'heads' will be
955 included in 'nodes'.
956 included in 'nodes'.
956
957
957 'outroots' is the list of reachable nodes in 'roots', i.e., the
958 'outroots' is the list of reachable nodes in 'roots', i.e., the
958 subset of 'roots' that is returned in 'nodes'. Likewise,
959 subset of 'roots' that is returned in 'nodes'. Likewise,
959 'outheads' is the subset of 'heads' that is also in 'nodes'.
960 'outheads' is the subset of 'heads' that is also in 'nodes'.
960
961
961 'roots' and 'heads' are both lists of node IDs. If 'roots' is
962 'roots' and 'heads' are both lists of node IDs. If 'roots' is
962 unspecified, uses nullid as the only root. If 'heads' is
963 unspecified, uses nullid as the only root. If 'heads' is
963 unspecified, uses list of all of the revlog's heads."""
964 unspecified, uses list of all of the revlog's heads."""
964 nonodes = ([], [], [])
965 nonodes = ([], [], [])
965 if roots is not None:
966 if roots is not None:
966 roots = list(roots)
967 roots = list(roots)
967 if not roots:
968 if not roots:
968 return nonodes
969 return nonodes
969 lowestrev = min([self.rev(n) for n in roots])
970 lowestrev = min([self.rev(n) for n in roots])
970 else:
971 else:
971 roots = [nullid] # Everybody's a descendant of nullid
972 roots = [nullid] # Everybody's a descendant of nullid
972 lowestrev = nullrev
973 lowestrev = nullrev
973 if (lowestrev == nullrev) and (heads is None):
974 if (lowestrev == nullrev) and (heads is None):
974 # We want _all_ the nodes!
975 # We want _all_ the nodes!
975 return ([self.node(r) for r in self], [nullid], list(self.heads()))
976 return ([self.node(r) for r in self], [nullid], list(self.heads()))
976 if heads is None:
977 if heads is None:
977 # All nodes are ancestors, so the latest ancestor is the last
978 # All nodes are ancestors, so the latest ancestor is the last
978 # node.
979 # node.
979 highestrev = len(self) - 1
980 highestrev = len(self) - 1
980 # Set ancestors to None to signal that every node is an ancestor.
981 # Set ancestors to None to signal that every node is an ancestor.
981 ancestors = None
982 ancestors = None
982 # Set heads to an empty dictionary for later discovery of heads
983 # Set heads to an empty dictionary for later discovery of heads
983 heads = {}
984 heads = {}
984 else:
985 else:
985 heads = list(heads)
986 heads = list(heads)
986 if not heads:
987 if not heads:
987 return nonodes
988 return nonodes
988 ancestors = set()
989 ancestors = set()
989 # Turn heads into a dictionary so we can remove 'fake' heads.
990 # Turn heads into a dictionary so we can remove 'fake' heads.
990 # Also, later we will be using it to filter out the heads we can't
991 # Also, later we will be using it to filter out the heads we can't
991 # find from roots.
992 # find from roots.
992 heads = dict.fromkeys(heads, False)
993 heads = dict.fromkeys(heads, False)
993 # Start at the top and keep marking parents until we're done.
994 # Start at the top and keep marking parents until we're done.
994 nodestotag = set(heads)
995 nodestotag = set(heads)
995 # Remember where the top was so we can use it as a limit later.
996 # Remember where the top was so we can use it as a limit later.
996 highestrev = max([self.rev(n) for n in nodestotag])
997 highestrev = max([self.rev(n) for n in nodestotag])
997 while nodestotag:
998 while nodestotag:
998 # grab a node to tag
999 # grab a node to tag
999 n = nodestotag.pop()
1000 n = nodestotag.pop()
1000 # Never tag nullid
1001 # Never tag nullid
1001 if n == nullid:
1002 if n == nullid:
1002 continue
1003 continue
1003 # A node's revision number represents its place in a
1004 # A node's revision number represents its place in a
1004 # topologically sorted list of nodes.
1005 # topologically sorted list of nodes.
1005 r = self.rev(n)
1006 r = self.rev(n)
1006 if r >= lowestrev:
1007 if r >= lowestrev:
1007 if n not in ancestors:
1008 if n not in ancestors:
1008 # If we are possibly a descendant of one of the roots
1009 # If we are possibly a descendant of one of the roots
1009 # and we haven't already been marked as an ancestor
1010 # and we haven't already been marked as an ancestor
1010 ancestors.add(n) # Mark as ancestor
1011 ancestors.add(n) # Mark as ancestor
1011 # Add non-nullid parents to list of nodes to tag.
1012 # Add non-nullid parents to list of nodes to tag.
1012 nodestotag.update([p for p in self.parents(n) if
1013 nodestotag.update([p for p in self.parents(n) if
1013 p != nullid])
1014 p != nullid])
1014 elif n in heads: # We've seen it before, is it a fake head?
1015 elif n in heads: # We've seen it before, is it a fake head?
1015 # So it is, real heads should not be the ancestors of
1016 # So it is, real heads should not be the ancestors of
1016 # any other heads.
1017 # any other heads.
1017 heads.pop(n)
1018 heads.pop(n)
1018 if not ancestors:
1019 if not ancestors:
1019 return nonodes
1020 return nonodes
1020 # Now that we have our set of ancestors, we want to remove any
1021 # Now that we have our set of ancestors, we want to remove any
1021 # roots that are not ancestors.
1022 # roots that are not ancestors.
1022
1023
1023 # If one of the roots was nullid, everything is included anyway.
1024 # If one of the roots was nullid, everything is included anyway.
1024 if lowestrev > nullrev:
1025 if lowestrev > nullrev:
1025 # But, since we weren't, let's recompute the lowest rev to not
1026 # But, since we weren't, let's recompute the lowest rev to not
1026 # include roots that aren't ancestors.
1027 # include roots that aren't ancestors.
1027
1028
1028 # Filter out roots that aren't ancestors of heads
1029 # Filter out roots that aren't ancestors of heads
1029 roots = [root for root in roots if root in ancestors]
1030 roots = [root for root in roots if root in ancestors]
1030 # Recompute the lowest revision
1031 # Recompute the lowest revision
1031 if roots:
1032 if roots:
1032 lowestrev = min([self.rev(root) for root in roots])
1033 lowestrev = min([self.rev(root) for root in roots])
1033 else:
1034 else:
1034 # No more roots? Return empty list
1035 # No more roots? Return empty list
1035 return nonodes
1036 return nonodes
1036 else:
1037 else:
1037 # We are descending from nullid, and don't need to care about
1038 # We are descending from nullid, and don't need to care about
1038 # any other roots.
1039 # any other roots.
1039 lowestrev = nullrev
1040 lowestrev = nullrev
1040 roots = [nullid]
1041 roots = [nullid]
1041 # Transform our roots list into a set.
1042 # Transform our roots list into a set.
1042 descendants = set(roots)
1043 descendants = set(roots)
1043 # Also, keep the original roots so we can filter out roots that aren't
1044 # Also, keep the original roots so we can filter out roots that aren't
1044 # 'real' roots (i.e. are descended from other roots).
1045 # 'real' roots (i.e. are descended from other roots).
1045 roots = descendants.copy()
1046 roots = descendants.copy()
1046 # Our topologically sorted list of output nodes.
1047 # Our topologically sorted list of output nodes.
1047 orderedout = []
1048 orderedout = []
1048 # Don't start at nullid since we don't want nullid in our output list,
1049 # Don't start at nullid since we don't want nullid in our output list,
1049 # and if nullid shows up in descendants, empty parents will look like
1050 # and if nullid shows up in descendants, empty parents will look like
1050 # they're descendants.
1051 # they're descendants.
1051 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1052 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1052 n = self.node(r)
1053 n = self.node(r)
1053 isdescendant = False
1054 isdescendant = False
1054 if lowestrev == nullrev: # Everybody is a descendant of nullid
1055 if lowestrev == nullrev: # Everybody is a descendant of nullid
1055 isdescendant = True
1056 isdescendant = True
1056 elif n in descendants:
1057 elif n in descendants:
1057 # n is already a descendant
1058 # n is already a descendant
1058 isdescendant = True
1059 isdescendant = True
1059 # This check only needs to be done here because all the roots
1060 # This check only needs to be done here because all the roots
1060 # will start being marked is descendants before the loop.
1061 # will start being marked is descendants before the loop.
1061 if n in roots:
1062 if n in roots:
1062 # If n was a root, check if it's a 'real' root.
1063 # If n was a root, check if it's a 'real' root.
1063 p = tuple(self.parents(n))
1064 p = tuple(self.parents(n))
1064 # If any of its parents are descendants, it's not a root.
1065 # If any of its parents are descendants, it's not a root.
1065 if (p[0] in descendants) or (p[1] in descendants):
1066 if (p[0] in descendants) or (p[1] in descendants):
1066 roots.remove(n)
1067 roots.remove(n)
1067 else:
1068 else:
1068 p = tuple(self.parents(n))
1069 p = tuple(self.parents(n))
1069 # A node is a descendant if either of its parents are
1070 # A node is a descendant if either of its parents are
1070 # descendants. (We seeded the dependents list with the roots
1071 # descendants. (We seeded the dependents list with the roots
1071 # up there, remember?)
1072 # up there, remember?)
1072 if (p[0] in descendants) or (p[1] in descendants):
1073 if (p[0] in descendants) or (p[1] in descendants):
1073 descendants.add(n)
1074 descendants.add(n)
1074 isdescendant = True
1075 isdescendant = True
1075 if isdescendant and ((ancestors is None) or (n in ancestors)):
1076 if isdescendant and ((ancestors is None) or (n in ancestors)):
1076 # Only include nodes that are both descendants and ancestors.
1077 # Only include nodes that are both descendants and ancestors.
1077 orderedout.append(n)
1078 orderedout.append(n)
1078 if (ancestors is not None) and (n in heads):
1079 if (ancestors is not None) and (n in heads):
1079 # We're trying to figure out which heads are reachable
1080 # We're trying to figure out which heads are reachable
1080 # from roots.
1081 # from roots.
1081 # Mark this head as having been reached
1082 # Mark this head as having been reached
1082 heads[n] = True
1083 heads[n] = True
1083 elif ancestors is None:
1084 elif ancestors is None:
1084 # Otherwise, we're trying to discover the heads.
1085 # Otherwise, we're trying to discover the heads.
1085 # Assume this is a head because if it isn't, the next step
1086 # Assume this is a head because if it isn't, the next step
1086 # will eventually remove it.
1087 # will eventually remove it.
1087 heads[n] = True
1088 heads[n] = True
1088 # But, obviously its parents aren't.
1089 # But, obviously its parents aren't.
1089 for p in self.parents(n):
1090 for p in self.parents(n):
1090 heads.pop(p, None)
1091 heads.pop(p, None)
1091 heads = [head for head, flag in heads.iteritems() if flag]
1092 heads = [head for head, flag in heads.iteritems() if flag]
1092 roots = list(roots)
1093 roots = list(roots)
1093 assert orderedout
1094 assert orderedout
1094 assert roots
1095 assert roots
1095 assert heads
1096 assert heads
1096 return (orderedout, roots, heads)
1097 return (orderedout, roots, heads)
1097
1098
1098 def headrevs(self):
1099 def headrevs(self):
1099 try:
1100 try:
1100 return self.index.headrevs()
1101 return self.index.headrevs()
1101 except AttributeError:
1102 except AttributeError:
1102 return self._headrevs()
1103 return self._headrevs()
1103
1104
1104 def computephases(self, roots):
1105 def computephases(self, roots):
1105 return self.index.computephasesmapsets(roots)
1106 return self.index.computephasesmapsets(roots)
1106
1107
1107 def _headrevs(self):
1108 def _headrevs(self):
1108 count = len(self)
1109 count = len(self)
1109 if not count:
1110 if not count:
1110 return [nullrev]
1111 return [nullrev]
1111 # we won't iter over filtered rev so nobody is a head at start
1112 # we won't iter over filtered rev so nobody is a head at start
1112 ishead = [0] * (count + 1)
1113 ishead = [0] * (count + 1)
1113 index = self.index
1114 index = self.index
1114 for r in self:
1115 for r in self:
1115 ishead[r] = 1 # I may be an head
1116 ishead[r] = 1 # I may be an head
1116 e = index[r]
1117 e = index[r]
1117 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1118 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1118 return [r for r, val in enumerate(ishead) if val]
1119 return [r for r, val in enumerate(ishead) if val]
1119
1120
1120 def heads(self, start=None, stop=None):
1121 def heads(self, start=None, stop=None):
1121 """return the list of all nodes that have no children
1122 """return the list of all nodes that have no children
1122
1123
1123 if start is specified, only heads that are descendants of
1124 if start is specified, only heads that are descendants of
1124 start will be returned
1125 start will be returned
1125 if stop is specified, it will consider all the revs from stop
1126 if stop is specified, it will consider all the revs from stop
1126 as if they had no children
1127 as if they had no children
1127 """
1128 """
1128 if start is None and stop is None:
1129 if start is None and stop is None:
1129 if not len(self):
1130 if not len(self):
1130 return [nullid]
1131 return [nullid]
1131 return [self.node(r) for r in self.headrevs()]
1132 return [self.node(r) for r in self.headrevs()]
1132
1133
1133 if start is None:
1134 if start is None:
1134 start = nullrev
1135 start = nullrev
1135 else:
1136 else:
1136 start = self.rev(start)
1137 start = self.rev(start)
1137
1138
1138 stoprevs = set(self.rev(n) for n in stop or [])
1139 stoprevs = set(self.rev(n) for n in stop or [])
1139
1140
1140 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1141 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1141 stoprevs=stoprevs)
1142 stoprevs=stoprevs)
1142
1143
1143 return [self.node(rev) for rev in revs]
1144 return [self.node(rev) for rev in revs]
1144
1145
1145 def children(self, node):
1146 def children(self, node):
1146 """find the children of a given node"""
1147 """find the children of a given node"""
1147 c = []
1148 c = []
1148 p = self.rev(node)
1149 p = self.rev(node)
1149 for r in self.revs(start=p + 1):
1150 for r in self.revs(start=p + 1):
1150 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1151 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1151 if prevs:
1152 if prevs:
1152 for pr in prevs:
1153 for pr in prevs:
1153 if pr == p:
1154 if pr == p:
1154 c.append(self.node(r))
1155 c.append(self.node(r))
1155 elif p == nullrev:
1156 elif p == nullrev:
1156 c.append(self.node(r))
1157 c.append(self.node(r))
1157 return c
1158 return c
1158
1159
1159 def commonancestorsheads(self, a, b):
1160 def commonancestorsheads(self, a, b):
1160 """calculate all the heads of the common ancestors of nodes a and b"""
1161 """calculate all the heads of the common ancestors of nodes a and b"""
1161 a, b = self.rev(a), self.rev(b)
1162 a, b = self.rev(a), self.rev(b)
1162 ancs = self._commonancestorsheads(a, b)
1163 ancs = self._commonancestorsheads(a, b)
1163 return pycompat.maplist(self.node, ancs)
1164 return pycompat.maplist(self.node, ancs)
1164
1165
1165 def _commonancestorsheads(self, *revs):
1166 def _commonancestorsheads(self, *revs):
1166 """calculate all the heads of the common ancestors of revs"""
1167 """calculate all the heads of the common ancestors of revs"""
1167 try:
1168 try:
1168 ancs = self.index.commonancestorsheads(*revs)
1169 ancs = self.index.commonancestorsheads(*revs)
1169 except (AttributeError, OverflowError): # C implementation failed
1170 except (AttributeError, OverflowError): # C implementation failed
1170 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1171 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1171 return ancs
1172 return ancs
1172
1173
1173 def isancestor(self, a, b):
1174 def isancestor(self, a, b):
1174 """return True if node a is an ancestor of node b
1175 """return True if node a is an ancestor of node b
1175
1176
1176 A revision is considered an ancestor of itself."""
1177 A revision is considered an ancestor of itself."""
1177 a, b = self.rev(a), self.rev(b)
1178 a, b = self.rev(a), self.rev(b)
1178 return self.isancestorrev(a, b)
1179 return self.isancestorrev(a, b)
1179
1180
1180 def isancestorrev(self, a, b):
1181 def isancestorrev(self, a, b):
1181 """return True if revision a is an ancestor of revision b
1182 """return True if revision a is an ancestor of revision b
1182
1183
1183 A revision is considered an ancestor of itself.
1184 A revision is considered an ancestor of itself.
1184
1185
1185 The implementation of this is trivial but the use of
1186 The implementation of this is trivial but the use of
1186 commonancestorsheads is not."""
1187 commonancestorsheads is not."""
1187 if a == nullrev:
1188 if a == nullrev:
1188 return True
1189 return True
1189 elif a == b:
1190 elif a == b:
1190 return True
1191 return True
1191 elif a > b:
1192 elif a > b:
1192 return False
1193 return False
1193 return a in self._commonancestorsheads(a, b)
1194 return a in self._commonancestorsheads(a, b)
1194
1195
1195 def ancestor(self, a, b):
1196 def ancestor(self, a, b):
1196 """calculate the "best" common ancestor of nodes a and b"""
1197 """calculate the "best" common ancestor of nodes a and b"""
1197
1198
1198 a, b = self.rev(a), self.rev(b)
1199 a, b = self.rev(a), self.rev(b)
1199 try:
1200 try:
1200 ancs = self.index.ancestors(a, b)
1201 ancs = self.index.ancestors(a, b)
1201 except (AttributeError, OverflowError):
1202 except (AttributeError, OverflowError):
1202 ancs = ancestor.ancestors(self.parentrevs, a, b)
1203 ancs = ancestor.ancestors(self.parentrevs, a, b)
1203 if ancs:
1204 if ancs:
1204 # choose a consistent winner when there's a tie
1205 # choose a consistent winner when there's a tie
1205 return min(map(self.node, ancs))
1206 return min(map(self.node, ancs))
1206 return nullid
1207 return nullid
1207
1208
1208 def _match(self, id):
1209 def _match(self, id):
1209 if isinstance(id, int):
1210 if isinstance(id, int):
1210 # rev
1211 # rev
1211 return self.node(id)
1212 return self.node(id)
1212 if len(id) == 20:
1213 if len(id) == 20:
1213 # possibly a binary node
1214 # possibly a binary node
1214 # odds of a binary node being all hex in ASCII are 1 in 10**25
1215 # odds of a binary node being all hex in ASCII are 1 in 10**25
1215 try:
1216 try:
1216 node = id
1217 node = id
1217 self.rev(node) # quick search the index
1218 self.rev(node) # quick search the index
1218 return node
1219 return node
1219 except error.LookupError:
1220 except error.LookupError:
1220 pass # may be partial hex id
1221 pass # may be partial hex id
1221 try:
1222 try:
1222 # str(rev)
1223 # str(rev)
1223 rev = int(id)
1224 rev = int(id)
1224 if "%d" % rev != id:
1225 if "%d" % rev != id:
1225 raise ValueError
1226 raise ValueError
1226 if rev < 0:
1227 if rev < 0:
1227 rev = len(self) + rev
1228 rev = len(self) + rev
1228 if rev < 0 or rev >= len(self):
1229 if rev < 0 or rev >= len(self):
1229 raise ValueError
1230 raise ValueError
1230 return self.node(rev)
1231 return self.node(rev)
1231 except (ValueError, OverflowError):
1232 except (ValueError, OverflowError):
1232 pass
1233 pass
1233 if len(id) == 40:
1234 if len(id) == 40:
1234 try:
1235 try:
1235 # a full hex nodeid?
1236 # a full hex nodeid?
1236 node = bin(id)
1237 node = bin(id)
1237 self.rev(node)
1238 self.rev(node)
1238 return node
1239 return node
1239 except (TypeError, error.LookupError):
1240 except (TypeError, error.LookupError):
1240 pass
1241 pass
1241
1242
1242 def _partialmatch(self, id):
1243 def _partialmatch(self, id):
1243 # we don't care wdirfilenodeids as they should be always full hash
1244 # we don't care wdirfilenodeids as they should be always full hash
1244 maybewdir = wdirhex.startswith(id)
1245 maybewdir = wdirhex.startswith(id)
1245 try:
1246 try:
1246 partial = self.index.partialmatch(id)
1247 partial = self.index.partialmatch(id)
1247 if partial and self.hasnode(partial):
1248 if partial and self.hasnode(partial):
1248 if maybewdir:
1249 if maybewdir:
1249 # single 'ff...' match in radix tree, ambiguous with wdir
1250 # single 'ff...' match in radix tree, ambiguous with wdir
1250 raise error.RevlogError
1251 raise error.RevlogError
1251 return partial
1252 return partial
1252 if maybewdir:
1253 if maybewdir:
1253 # no 'ff...' match in radix tree, wdir identified
1254 # no 'ff...' match in radix tree, wdir identified
1254 raise error.WdirUnsupported
1255 raise error.WdirUnsupported
1255 return None
1256 return None
1256 except error.RevlogError:
1257 except error.RevlogError:
1257 # parsers.c radix tree lookup gave multiple matches
1258 # parsers.c radix tree lookup gave multiple matches
1258 # fast path: for unfiltered changelog, radix tree is accurate
1259 # fast path: for unfiltered changelog, radix tree is accurate
1259 if not getattr(self, 'filteredrevs', None):
1260 if not getattr(self, 'filteredrevs', None):
1260 raise error.AmbiguousPrefixLookupError(
1261 raise error.AmbiguousPrefixLookupError(
1261 id, self.indexfile, _('ambiguous identifier'))
1262 id, self.indexfile, _('ambiguous identifier'))
1262 # fall through to slow path that filters hidden revisions
1263 # fall through to slow path that filters hidden revisions
1263 except (AttributeError, ValueError):
1264 except (AttributeError, ValueError):
1264 # we are pure python, or key was too short to search radix tree
1265 # we are pure python, or key was too short to search radix tree
1265 pass
1266 pass
1266
1267
1267 if id in self._pcache:
1268 if id in self._pcache:
1268 return self._pcache[id]
1269 return self._pcache[id]
1269
1270
1270 if len(id) <= 40:
1271 if len(id) <= 40:
1271 try:
1272 try:
1272 # hex(node)[:...]
1273 # hex(node)[:...]
1273 l = len(id) // 2 # grab an even number of digits
1274 l = len(id) // 2 # grab an even number of digits
1274 prefix = bin(id[:l * 2])
1275 prefix = bin(id[:l * 2])
1275 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1276 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1276 nl = [n for n in nl if hex(n).startswith(id) and
1277 nl = [n for n in nl if hex(n).startswith(id) and
1277 self.hasnode(n)]
1278 self.hasnode(n)]
1278 if nullhex.startswith(id):
1279 if nullhex.startswith(id):
1279 nl.append(nullid)
1280 nl.append(nullid)
1280 if len(nl) > 0:
1281 if len(nl) > 0:
1281 if len(nl) == 1 and not maybewdir:
1282 if len(nl) == 1 and not maybewdir:
1282 self._pcache[id] = nl[0]
1283 self._pcache[id] = nl[0]
1283 return nl[0]
1284 return nl[0]
1284 raise error.AmbiguousPrefixLookupError(
1285 raise error.AmbiguousPrefixLookupError(
1285 id, self.indexfile, _('ambiguous identifier'))
1286 id, self.indexfile, _('ambiguous identifier'))
1286 if maybewdir:
1287 if maybewdir:
1287 raise error.WdirUnsupported
1288 raise error.WdirUnsupported
1288 return None
1289 return None
1289 except TypeError:
1290 except TypeError:
1290 pass
1291 pass
1291
1292
1292 def lookup(self, id):
1293 def lookup(self, id):
1293 """locate a node based on:
1294 """locate a node based on:
1294 - revision number or str(revision number)
1295 - revision number or str(revision number)
1295 - nodeid or subset of hex nodeid
1296 - nodeid or subset of hex nodeid
1296 """
1297 """
1297 n = self._match(id)
1298 n = self._match(id)
1298 if n is not None:
1299 if n is not None:
1299 return n
1300 return n
1300 n = self._partialmatch(id)
1301 n = self._partialmatch(id)
1301 if n:
1302 if n:
1302 return n
1303 return n
1303
1304
1304 raise error.LookupError(id, self.indexfile, _('no match found'))
1305 raise error.LookupError(id, self.indexfile, _('no match found'))
1305
1306
1306 def shortest(self, node, minlength=1):
1307 def shortest(self, node, minlength=1):
1307 """Find the shortest unambiguous prefix that matches node."""
1308 """Find the shortest unambiguous prefix that matches node."""
1308 def isvalid(prefix):
1309 def isvalid(prefix):
1309 try:
1310 try:
1310 node = self._partialmatch(prefix)
1311 node = self._partialmatch(prefix)
1311 except error.AmbiguousPrefixLookupError:
1312 except error.AmbiguousPrefixLookupError:
1312 return False
1313 return False
1313 except error.WdirUnsupported:
1314 except error.WdirUnsupported:
1314 # single 'ff...' match
1315 # single 'ff...' match
1315 return True
1316 return True
1316 if node is None:
1317 if node is None:
1317 raise error.LookupError(node, self.indexfile, _('no node'))
1318 raise error.LookupError(node, self.indexfile, _('no node'))
1318 return True
1319 return True
1319
1320
1320 def maybewdir(prefix):
1321 def maybewdir(prefix):
1321 return all(c == 'f' for c in prefix)
1322 return all(c == 'f' for c in prefix)
1322
1323
1323 hexnode = hex(node)
1324 hexnode = hex(node)
1324
1325
1325 def disambiguate(hexnode, minlength):
1326 def disambiguate(hexnode, minlength):
1326 """Disambiguate against wdirid."""
1327 """Disambiguate against wdirid."""
1327 for length in range(minlength, 41):
1328 for length in range(minlength, 41):
1328 prefix = hexnode[:length]
1329 prefix = hexnode[:length]
1329 if not maybewdir(prefix):
1330 if not maybewdir(prefix):
1330 return prefix
1331 return prefix
1331
1332
1332 if not getattr(self, 'filteredrevs', None):
1333 if not getattr(self, 'filteredrevs', None):
1333 try:
1334 try:
1334 length = max(self.index.shortest(node), minlength)
1335 length = max(self.index.shortest(node), minlength)
1335 return disambiguate(hexnode, length)
1336 return disambiguate(hexnode, length)
1336 except error.RevlogError:
1337 except error.RevlogError:
1337 if node != wdirid:
1338 if node != wdirid:
1338 raise error.LookupError(node, self.indexfile, _('no node'))
1339 raise error.LookupError(node, self.indexfile, _('no node'))
1339 except AttributeError:
1340 except AttributeError:
1340 # Fall through to pure code
1341 # Fall through to pure code
1341 pass
1342 pass
1342
1343
1343 if node == wdirid:
1344 if node == wdirid:
1344 for length in range(minlength, 41):
1345 for length in range(minlength, 41):
1345 prefix = hexnode[:length]
1346 prefix = hexnode[:length]
1346 if isvalid(prefix):
1347 if isvalid(prefix):
1347 return prefix
1348 return prefix
1348
1349
1349 for length in range(minlength, 41):
1350 for length in range(minlength, 41):
1350 prefix = hexnode[:length]
1351 prefix = hexnode[:length]
1351 if isvalid(prefix):
1352 if isvalid(prefix):
1352 return disambiguate(hexnode, length)
1353 return disambiguate(hexnode, length)
1353
1354
1354 def cmp(self, node, text):
1355 def cmp(self, node, text):
1355 """compare text with a given file revision
1356 """compare text with a given file revision
1356
1357
1357 returns True if text is different than what is stored.
1358 returns True if text is different than what is stored.
1358 """
1359 """
1359 p1, p2 = self.parents(node)
1360 p1, p2 = self.parents(node)
1360 return storageutil.hashrevisionsha1(text, p1, p2) != node
1361 return storageutil.hashrevisionsha1(text, p1, p2) != node
1361
1362
1362 def _cachesegment(self, offset, data):
1363 def _cachesegment(self, offset, data):
1363 """Add a segment to the revlog cache.
1364 """Add a segment to the revlog cache.
1364
1365
1365 Accepts an absolute offset and the data that is at that location.
1366 Accepts an absolute offset and the data that is at that location.
1366 """
1367 """
1367 o, d = self._chunkcache
1368 o, d = self._chunkcache
1368 # try to add to existing cache
1369 # try to add to existing cache
1369 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1370 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1370 self._chunkcache = o, d + data
1371 self._chunkcache = o, d + data
1371 else:
1372 else:
1372 self._chunkcache = offset, data
1373 self._chunkcache = offset, data
1373
1374
1374 def _readsegment(self, offset, length, df=None):
1375 def _readsegment(self, offset, length, df=None):
1375 """Load a segment of raw data from the revlog.
1376 """Load a segment of raw data from the revlog.
1376
1377
1377 Accepts an absolute offset, length to read, and an optional existing
1378 Accepts an absolute offset, length to read, and an optional existing
1378 file handle to read from.
1379 file handle to read from.
1379
1380
1380 If an existing file handle is passed, it will be seeked and the
1381 If an existing file handle is passed, it will be seeked and the
1381 original seek position will NOT be restored.
1382 original seek position will NOT be restored.
1382
1383
1383 Returns a str or buffer of raw byte data.
1384 Returns a str or buffer of raw byte data.
1384
1385
1385 Raises if the requested number of bytes could not be read.
1386 Raises if the requested number of bytes could not be read.
1386 """
1387 """
1387 # Cache data both forward and backward around the requested
1388 # Cache data both forward and backward around the requested
1388 # data, in a fixed size window. This helps speed up operations
1389 # data, in a fixed size window. This helps speed up operations
1389 # involving reading the revlog backwards.
1390 # involving reading the revlog backwards.
1390 cachesize = self._chunkcachesize
1391 cachesize = self._chunkcachesize
1391 realoffset = offset & ~(cachesize - 1)
1392 realoffset = offset & ~(cachesize - 1)
1392 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1393 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1393 - realoffset)
1394 - realoffset)
1394 with self._datareadfp(df) as df:
1395 with self._datareadfp(df) as df:
1395 df.seek(realoffset)
1396 df.seek(realoffset)
1396 d = df.read(reallength)
1397 d = df.read(reallength)
1397
1398
1398 self._cachesegment(realoffset, d)
1399 self._cachesegment(realoffset, d)
1399 if offset != realoffset or reallength != length:
1400 if offset != realoffset or reallength != length:
1400 startoffset = offset - realoffset
1401 startoffset = offset - realoffset
1401 if len(d) - startoffset < length:
1402 if len(d) - startoffset < length:
1402 raise error.RevlogError(
1403 raise error.RevlogError(
1403 _('partial read of revlog %s; expected %d bytes from '
1404 _('partial read of revlog %s; expected %d bytes from '
1404 'offset %d, got %d') %
1405 'offset %d, got %d') %
1405 (self.indexfile if self._inline else self.datafile,
1406 (self.indexfile if self._inline else self.datafile,
1406 length, realoffset, len(d) - startoffset))
1407 length, realoffset, len(d) - startoffset))
1407
1408
1408 return util.buffer(d, startoffset, length)
1409 return util.buffer(d, startoffset, length)
1409
1410
1410 if len(d) < length:
1411 if len(d) < length:
1411 raise error.RevlogError(
1412 raise error.RevlogError(
1412 _('partial read of revlog %s; expected %d bytes from offset '
1413 _('partial read of revlog %s; expected %d bytes from offset '
1413 '%d, got %d') %
1414 '%d, got %d') %
1414 (self.indexfile if self._inline else self.datafile,
1415 (self.indexfile if self._inline else self.datafile,
1415 length, offset, len(d)))
1416 length, offset, len(d)))
1416
1417
1417 return d
1418 return d
1418
1419
1419 def _getsegment(self, offset, length, df=None):
1420 def _getsegment(self, offset, length, df=None):
1420 """Obtain a segment of raw data from the revlog.
1421 """Obtain a segment of raw data from the revlog.
1421
1422
1422 Accepts an absolute offset, length of bytes to obtain, and an
1423 Accepts an absolute offset, length of bytes to obtain, and an
1423 optional file handle to the already-opened revlog. If the file
1424 optional file handle to the already-opened revlog. If the file
1424 handle is used, it's original seek position will not be preserved.
1425 handle is used, it's original seek position will not be preserved.
1425
1426
1426 Requests for data may be returned from a cache.
1427 Requests for data may be returned from a cache.
1427
1428
1428 Returns a str or a buffer instance of raw byte data.
1429 Returns a str or a buffer instance of raw byte data.
1429 """
1430 """
1430 o, d = self._chunkcache
1431 o, d = self._chunkcache
1431 l = len(d)
1432 l = len(d)
1432
1433
1433 # is it in the cache?
1434 # is it in the cache?
1434 cachestart = offset - o
1435 cachestart = offset - o
1435 cacheend = cachestart + length
1436 cacheend = cachestart + length
1436 if cachestart >= 0 and cacheend <= l:
1437 if cachestart >= 0 and cacheend <= l:
1437 if cachestart == 0 and cacheend == l:
1438 if cachestart == 0 and cacheend == l:
1438 return d # avoid a copy
1439 return d # avoid a copy
1439 return util.buffer(d, cachestart, cacheend - cachestart)
1440 return util.buffer(d, cachestart, cacheend - cachestart)
1440
1441
1441 return self._readsegment(offset, length, df=df)
1442 return self._readsegment(offset, length, df=df)
1442
1443
1443 def _getsegmentforrevs(self, startrev, endrev, df=None):
1444 def _getsegmentforrevs(self, startrev, endrev, df=None):
1444 """Obtain a segment of raw data corresponding to a range of revisions.
1445 """Obtain a segment of raw data corresponding to a range of revisions.
1445
1446
1446 Accepts the start and end revisions and an optional already-open
1447 Accepts the start and end revisions and an optional already-open
1447 file handle to be used for reading. If the file handle is read, its
1448 file handle to be used for reading. If the file handle is read, its
1448 seek position will not be preserved.
1449 seek position will not be preserved.
1449
1450
1450 Requests for data may be satisfied by a cache.
1451 Requests for data may be satisfied by a cache.
1451
1452
1452 Returns a 2-tuple of (offset, data) for the requested range of
1453 Returns a 2-tuple of (offset, data) for the requested range of
1453 revisions. Offset is the integer offset from the beginning of the
1454 revisions. Offset is the integer offset from the beginning of the
1454 revlog and data is a str or buffer of the raw byte data.
1455 revlog and data is a str or buffer of the raw byte data.
1455
1456
1456 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1457 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1457 to determine where each revision's data begins and ends.
1458 to determine where each revision's data begins and ends.
1458 """
1459 """
1459 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1460 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1460 # (functions are expensive).
1461 # (functions are expensive).
1461 index = self.index
1462 index = self.index
1462 istart = index[startrev]
1463 istart = index[startrev]
1463 start = int(istart[0] >> 16)
1464 start = int(istart[0] >> 16)
1464 if startrev == endrev:
1465 if startrev == endrev:
1465 end = start + istart[1]
1466 end = start + istart[1]
1466 else:
1467 else:
1467 iend = index[endrev]
1468 iend = index[endrev]
1468 end = int(iend[0] >> 16) + iend[1]
1469 end = int(iend[0] >> 16) + iend[1]
1469
1470
1470 if self._inline:
1471 if self._inline:
1471 start += (startrev + 1) * self._io.size
1472 start += (startrev + 1) * self._io.size
1472 end += (endrev + 1) * self._io.size
1473 end += (endrev + 1) * self._io.size
1473 length = end - start
1474 length = end - start
1474
1475
1475 return start, self._getsegment(start, length, df=df)
1476 return start, self._getsegment(start, length, df=df)
1476
1477
1477 def _chunk(self, rev, df=None):
1478 def _chunk(self, rev, df=None):
1478 """Obtain a single decompressed chunk for a revision.
1479 """Obtain a single decompressed chunk for a revision.
1479
1480
1480 Accepts an integer revision and an optional already-open file handle
1481 Accepts an integer revision and an optional already-open file handle
1481 to be used for reading. If used, the seek position of the file will not
1482 to be used for reading. If used, the seek position of the file will not
1482 be preserved.
1483 be preserved.
1483
1484
1484 Returns a str holding uncompressed data for the requested revision.
1485 Returns a str holding uncompressed data for the requested revision.
1485 """
1486 """
1486 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1487 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1487
1488
1488 def _chunks(self, revs, df=None, targetsize=None):
1489 def _chunks(self, revs, df=None, targetsize=None):
1489 """Obtain decompressed chunks for the specified revisions.
1490 """Obtain decompressed chunks for the specified revisions.
1490
1491
1491 Accepts an iterable of numeric revisions that are assumed to be in
1492 Accepts an iterable of numeric revisions that are assumed to be in
1492 ascending order. Also accepts an optional already-open file handle
1493 ascending order. Also accepts an optional already-open file handle
1493 to be used for reading. If used, the seek position of the file will
1494 to be used for reading. If used, the seek position of the file will
1494 not be preserved.
1495 not be preserved.
1495
1496
1496 This function is similar to calling ``self._chunk()`` multiple times,
1497 This function is similar to calling ``self._chunk()`` multiple times,
1497 but is faster.
1498 but is faster.
1498
1499
1499 Returns a list with decompressed data for each requested revision.
1500 Returns a list with decompressed data for each requested revision.
1500 """
1501 """
1501 if not revs:
1502 if not revs:
1502 return []
1503 return []
1503 start = self.start
1504 start = self.start
1504 length = self.length
1505 length = self.length
1505 inline = self._inline
1506 inline = self._inline
1506 iosize = self._io.size
1507 iosize = self._io.size
1507 buffer = util.buffer
1508 buffer = util.buffer
1508
1509
1509 l = []
1510 l = []
1510 ladd = l.append
1511 ladd = l.append
1511
1512
1512 if not self._withsparseread:
1513 if not self._withsparseread:
1513 slicedchunks = (revs,)
1514 slicedchunks = (revs,)
1514 else:
1515 else:
1515 slicedchunks = deltautil.slicechunk(self, revs,
1516 slicedchunks = deltautil.slicechunk(self, revs,
1516 targetsize=targetsize)
1517 targetsize=targetsize)
1517
1518
1518 for revschunk in slicedchunks:
1519 for revschunk in slicedchunks:
1519 firstrev = revschunk[0]
1520 firstrev = revschunk[0]
1520 # Skip trailing revisions with empty diff
1521 # Skip trailing revisions with empty diff
1521 for lastrev in revschunk[::-1]:
1522 for lastrev in revschunk[::-1]:
1522 if length(lastrev) != 0:
1523 if length(lastrev) != 0:
1523 break
1524 break
1524
1525
1525 try:
1526 try:
1526 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1527 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1527 except OverflowError:
1528 except OverflowError:
1528 # issue4215 - we can't cache a run of chunks greater than
1529 # issue4215 - we can't cache a run of chunks greater than
1529 # 2G on Windows
1530 # 2G on Windows
1530 return [self._chunk(rev, df=df) for rev in revschunk]
1531 return [self._chunk(rev, df=df) for rev in revschunk]
1531
1532
1532 decomp = self.decompress
1533 decomp = self.decompress
1533 for rev in revschunk:
1534 for rev in revschunk:
1534 chunkstart = start(rev)
1535 chunkstart = start(rev)
1535 if inline:
1536 if inline:
1536 chunkstart += (rev + 1) * iosize
1537 chunkstart += (rev + 1) * iosize
1537 chunklength = length(rev)
1538 chunklength = length(rev)
1538 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1539 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1539
1540
1540 return l
1541 return l
1541
1542
1542 def _chunkclear(self):
1543 def _chunkclear(self):
1543 """Clear the raw chunk cache."""
1544 """Clear the raw chunk cache."""
1544 self._chunkcache = (0, '')
1545 self._chunkcache = (0, '')
1545
1546
1546 def deltaparent(self, rev):
1547 def deltaparent(self, rev):
1547 """return deltaparent of the given revision"""
1548 """return deltaparent of the given revision"""
1548 base = self.index[rev][3]
1549 base = self.index[rev][3]
1549 if base == rev:
1550 if base == rev:
1550 return nullrev
1551 return nullrev
1551 elif self._generaldelta:
1552 elif self._generaldelta:
1552 return base
1553 return base
1553 else:
1554 else:
1554 return rev - 1
1555 return rev - 1
1555
1556
1556 def issnapshot(self, rev):
1557 def issnapshot(self, rev):
1557 """tells whether rev is a snapshot
1558 """tells whether rev is a snapshot
1558 """
1559 """
1559 if not self._sparserevlog:
1560 if not self._sparserevlog:
1560 return self.deltaparent(rev) == nullrev
1561 return self.deltaparent(rev) == nullrev
1561 elif util.safehasattr(self.index, 'issnapshot'):
1562 elif util.safehasattr(self.index, 'issnapshot'):
1562 # directly assign the method to cache the testing and access
1563 # directly assign the method to cache the testing and access
1563 self.issnapshot = self.index.issnapshot
1564 self.issnapshot = self.index.issnapshot
1564 return self.issnapshot(rev)
1565 return self.issnapshot(rev)
1565 if rev == nullrev:
1566 if rev == nullrev:
1566 return True
1567 return True
1567 entry = self.index[rev]
1568 entry = self.index[rev]
1568 base = entry[3]
1569 base = entry[3]
1569 if base == rev:
1570 if base == rev:
1570 return True
1571 return True
1571 if base == nullrev:
1572 if base == nullrev:
1572 return True
1573 return True
1573 p1 = entry[5]
1574 p1 = entry[5]
1574 p2 = entry[6]
1575 p2 = entry[6]
1575 if base == p1 or base == p2:
1576 if base == p1 or base == p2:
1576 return False
1577 return False
1577 return self.issnapshot(base)
1578 return self.issnapshot(base)
1578
1579
1579 def snapshotdepth(self, rev):
1580 def snapshotdepth(self, rev):
1580 """number of snapshot in the chain before this one"""
1581 """number of snapshot in the chain before this one"""
1581 if not self.issnapshot(rev):
1582 if not self.issnapshot(rev):
1582 raise error.ProgrammingError('revision %d not a snapshot')
1583 raise error.ProgrammingError('revision %d not a snapshot')
1583 return len(self._deltachain(rev)[0]) - 1
1584 return len(self._deltachain(rev)[0]) - 1
1584
1585
1585 def revdiff(self, rev1, rev2):
1586 def revdiff(self, rev1, rev2):
1586 """return or calculate a delta between two revisions
1587 """return or calculate a delta between two revisions
1587
1588
1588 The delta calculated is in binary form and is intended to be written to
1589 The delta calculated is in binary form and is intended to be written to
1589 revlog data directly. So this function needs raw revision data.
1590 revlog data directly. So this function needs raw revision data.
1590 """
1591 """
1591 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1592 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1592 return bytes(self._chunk(rev2))
1593 return bytes(self._chunk(rev2))
1593
1594
1594 return mdiff.textdiff(self.revision(rev1, raw=True),
1595 return mdiff.textdiff(self.revision(rev1, raw=True),
1595 self.revision(rev2, raw=True))
1596 self.revision(rev2, raw=True))
1596
1597
1597 def revision(self, nodeorrev, _df=None, raw=False):
1598 def revision(self, nodeorrev, _df=None, raw=False):
1598 """return an uncompressed revision of a given node or revision
1599 """return an uncompressed revision of a given node or revision
1599 number.
1600 number.
1600
1601
1601 _df - an existing file handle to read from. (internal-only)
1602 _df - an existing file handle to read from. (internal-only)
1602 raw - an optional argument specifying if the revision data is to be
1603 raw - an optional argument specifying if the revision data is to be
1603 treated as raw data when applying flag transforms. 'raw' should be set
1604 treated as raw data when applying flag transforms. 'raw' should be set
1604 to True when generating changegroups or in debug commands.
1605 to True when generating changegroups or in debug commands.
1605 """
1606 """
1606 if isinstance(nodeorrev, int):
1607 if isinstance(nodeorrev, int):
1607 rev = nodeorrev
1608 rev = nodeorrev
1608 node = self.node(rev)
1609 node = self.node(rev)
1609 else:
1610 else:
1610 node = nodeorrev
1611 node = nodeorrev
1611 rev = None
1612 rev = None
1612
1613
1613 cachedrev = None
1614 cachedrev = None
1614 flags = None
1615 flags = None
1615 rawtext = None
1616 rawtext = None
1616 if node == nullid:
1617 if node == nullid:
1617 return ""
1618 return ""
1618 if self._revisioncache:
1619 if self._revisioncache:
1619 if self._revisioncache[0] == node:
1620 if self._revisioncache[0] == node:
1620 # _cache only stores rawtext
1621 # _cache only stores rawtext
1621 if raw:
1622 if raw:
1622 return self._revisioncache[2]
1623 return self._revisioncache[2]
1623 # duplicated, but good for perf
1624 # duplicated, but good for perf
1624 if rev is None:
1625 if rev is None:
1625 rev = self.rev(node)
1626 rev = self.rev(node)
1626 if flags is None:
1627 if flags is None:
1627 flags = self.flags(rev)
1628 flags = self.flags(rev)
1628 # no extra flags set, no flag processor runs, text = rawtext
1629 # no extra flags set, no flag processor runs, text = rawtext
1629 if flags == REVIDX_DEFAULT_FLAGS:
1630 if flags == REVIDX_DEFAULT_FLAGS:
1630 return self._revisioncache[2]
1631 return self._revisioncache[2]
1631 # rawtext is reusable. need to run flag processor
1632 # rawtext is reusable. need to run flag processor
1632 rawtext = self._revisioncache[2]
1633 rawtext = self._revisioncache[2]
1633
1634
1634 cachedrev = self._revisioncache[1]
1635 cachedrev = self._revisioncache[1]
1635
1636
1636 # look up what we need to read
1637 # look up what we need to read
1637 if rawtext is None:
1638 if rawtext is None:
1638 if rev is None:
1639 if rev is None:
1639 rev = self.rev(node)
1640 rev = self.rev(node)
1640
1641
1641 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1642 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1642 if stopped:
1643 if stopped:
1643 rawtext = self._revisioncache[2]
1644 rawtext = self._revisioncache[2]
1644
1645
1645 # drop cache to save memory
1646 # drop cache to save memory
1646 self._revisioncache = None
1647 self._revisioncache = None
1647
1648
1648 targetsize = None
1649 targetsize = None
1649 rawsize = self.index[rev][2]
1650 rawsize = self.index[rev][2]
1650 if 0 <= rawsize:
1651 if 0 <= rawsize:
1651 targetsize = 4 * rawsize
1652 targetsize = 4 * rawsize
1652
1653
1653 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1654 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1654 if rawtext is None:
1655 if rawtext is None:
1655 rawtext = bytes(bins[0])
1656 rawtext = bytes(bins[0])
1656 bins = bins[1:]
1657 bins = bins[1:]
1657
1658
1658 rawtext = mdiff.patches(rawtext, bins)
1659 rawtext = mdiff.patches(rawtext, bins)
1659 self._revisioncache = (node, rev, rawtext)
1660 self._revisioncache = (node, rev, rawtext)
1660
1661
1661 if flags is None:
1662 if flags is None:
1662 if rev is None:
1663 if rev is None:
1663 rev = self.rev(node)
1664 rev = self.rev(node)
1664 flags = self.flags(rev)
1665 flags = self.flags(rev)
1665
1666
1666 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1667 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1667 if validatehash:
1668 if validatehash:
1668 self.checkhash(text, node, rev=rev)
1669 self.checkhash(text, node, rev=rev)
1669
1670
1670 return text
1671 return text
1671
1672
1672 def hash(self, text, p1, p2):
1673 def hash(self, text, p1, p2):
1673 """Compute a node hash.
1674 """Compute a node hash.
1674
1675
1675 Available as a function so that subclasses can replace the hash
1676 Available as a function so that subclasses can replace the hash
1676 as needed.
1677 as needed.
1677 """
1678 """
1678 return storageutil.hashrevisionsha1(text, p1, p2)
1679 return storageutil.hashrevisionsha1(text, p1, p2)
1679
1680
1680 def _processflags(self, text, flags, operation, raw=False):
1681 def _processflags(self, text, flags, operation, raw=False):
1681 """Inspect revision data flags and applies transforms defined by
1682 """Inspect revision data flags and applies transforms defined by
1682 registered flag processors.
1683 registered flag processors.
1683
1684
1684 ``text`` - the revision data to process
1685 ``text`` - the revision data to process
1685 ``flags`` - the revision flags
1686 ``flags`` - the revision flags
1686 ``operation`` - the operation being performed (read or write)
1687 ``operation`` - the operation being performed (read or write)
1687 ``raw`` - an optional argument describing if the raw transform should be
1688 ``raw`` - an optional argument describing if the raw transform should be
1688 applied.
1689 applied.
1689
1690
1690 This method processes the flags in the order (or reverse order if
1691 This method processes the flags in the order (or reverse order if
1691 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1692 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1692 flag processors registered for present flags. The order of flags defined
1693 flag processors registered for present flags. The order of flags defined
1693 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1694 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1694
1695
1695 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1696 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1696 processed text and ``validatehash`` is a bool indicating whether the
1697 processed text and ``validatehash`` is a bool indicating whether the
1697 returned text should be checked for hash integrity.
1698 returned text should be checked for hash integrity.
1698
1699
1699 Note: If the ``raw`` argument is set, it has precedence over the
1700 Note: If the ``raw`` argument is set, it has precedence over the
1700 operation and will only update the value of ``validatehash``.
1701 operation and will only update the value of ``validatehash``.
1701 """
1702 """
1702 # fast path: no flag processors will run
1703 # fast path: no flag processors will run
1703 if flags == 0:
1704 if flags == 0:
1704 return text, True
1705 return text, True
1705 if not operation in ('read', 'write'):
1706 if not operation in ('read', 'write'):
1706 raise error.ProgrammingError(_("invalid '%s' operation") %
1707 raise error.ProgrammingError(_("invalid '%s' operation") %
1707 operation)
1708 operation)
1708 # Check all flags are known.
1709 # Check all flags are known.
1709 if flags & ~REVIDX_KNOWN_FLAGS:
1710 if flags & ~REVIDX_KNOWN_FLAGS:
1710 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1711 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1711 (flags & ~REVIDX_KNOWN_FLAGS))
1712 (flags & ~REVIDX_KNOWN_FLAGS))
1712 validatehash = True
1713 validatehash = True
1713 # Depending on the operation (read or write), the order might be
1714 # Depending on the operation (read or write), the order might be
1714 # reversed due to non-commutative transforms.
1715 # reversed due to non-commutative transforms.
1715 orderedflags = REVIDX_FLAGS_ORDER
1716 orderedflags = REVIDX_FLAGS_ORDER
1716 if operation == 'write':
1717 if operation == 'write':
1717 orderedflags = reversed(orderedflags)
1718 orderedflags = reversed(orderedflags)
1718
1719
1719 for flag in orderedflags:
1720 for flag in orderedflags:
1720 # If a flagprocessor has been registered for a known flag, apply the
1721 # If a flagprocessor has been registered for a known flag, apply the
1721 # related operation transform and update result tuple.
1722 # related operation transform and update result tuple.
1722 if flag & flags:
1723 if flag & flags:
1723 vhash = True
1724 vhash = True
1724
1725
1725 if flag not in self._flagprocessors:
1726 if flag not in self._flagprocessors:
1726 message = _("missing processor for flag '%#x'") % (flag)
1727 message = _("missing processor for flag '%#x'") % (flag)
1727 raise error.RevlogError(message)
1728 raise error.RevlogError(message)
1728
1729
1729 processor = self._flagprocessors[flag]
1730 processor = self._flagprocessors[flag]
1730 if processor is not None:
1731 if processor is not None:
1731 readtransform, writetransform, rawtransform = processor
1732 readtransform, writetransform, rawtransform = processor
1732
1733
1733 if raw:
1734 if raw:
1734 vhash = rawtransform(self, text)
1735 vhash = rawtransform(self, text)
1735 elif operation == 'read':
1736 elif operation == 'read':
1736 text, vhash = readtransform(self, text)
1737 text, vhash = readtransform(self, text)
1737 else: # write operation
1738 else: # write operation
1738 text, vhash = writetransform(self, text)
1739 text, vhash = writetransform(self, text)
1739 validatehash = validatehash and vhash
1740 validatehash = validatehash and vhash
1740
1741
1741 return text, validatehash
1742 return text, validatehash
1742
1743
1743 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1744 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1744 """Check node hash integrity.
1745 """Check node hash integrity.
1745
1746
1746 Available as a function so that subclasses can extend hash mismatch
1747 Available as a function so that subclasses can extend hash mismatch
1747 behaviors as needed.
1748 behaviors as needed.
1748 """
1749 """
1749 try:
1750 try:
1750 if p1 is None and p2 is None:
1751 if p1 is None and p2 is None:
1751 p1, p2 = self.parents(node)
1752 p1, p2 = self.parents(node)
1752 if node != self.hash(text, p1, p2):
1753 if node != self.hash(text, p1, p2):
1753 # Clear the revision cache on hash failure. The revision cache
1754 # Clear the revision cache on hash failure. The revision cache
1754 # only stores the raw revision and clearing the cache does have
1755 # only stores the raw revision and clearing the cache does have
1755 # the side-effect that we won't have a cache hit when the raw
1756 # the side-effect that we won't have a cache hit when the raw
1756 # revision data is accessed. But this case should be rare and
1757 # revision data is accessed. But this case should be rare and
1757 # it is extra work to teach the cache about the hash
1758 # it is extra work to teach the cache about the hash
1758 # verification state.
1759 # verification state.
1759 if self._revisioncache and self._revisioncache[0] == node:
1760 if self._revisioncache and self._revisioncache[0] == node:
1760 self._revisioncache = None
1761 self._revisioncache = None
1761
1762
1762 revornode = rev
1763 revornode = rev
1763 if revornode is None:
1764 if revornode is None:
1764 revornode = templatefilters.short(hex(node))
1765 revornode = templatefilters.short(hex(node))
1765 raise error.RevlogError(_("integrity check failed on %s:%s")
1766 raise error.RevlogError(_("integrity check failed on %s:%s")
1766 % (self.indexfile, pycompat.bytestr(revornode)))
1767 % (self.indexfile, pycompat.bytestr(revornode)))
1767 except error.RevlogError:
1768 except error.RevlogError:
1768 if self._censorable and storageutil.iscensoredtext(text):
1769 if self._censorable and storageutil.iscensoredtext(text):
1769 raise error.CensoredNodeError(self.indexfile, node, text)
1770 raise error.CensoredNodeError(self.indexfile, node, text)
1770 raise
1771 raise
1771
1772
1772 def _enforceinlinesize(self, tr, fp=None):
1773 def _enforceinlinesize(self, tr, fp=None):
1773 """Check if the revlog is too big for inline and convert if so.
1774 """Check if the revlog is too big for inline and convert if so.
1774
1775
1775 This should be called after revisions are added to the revlog. If the
1776 This should be called after revisions are added to the revlog. If the
1776 revlog has grown too large to be an inline revlog, it will convert it
1777 revlog has grown too large to be an inline revlog, it will convert it
1777 to use multiple index and data files.
1778 to use multiple index and data files.
1778 """
1779 """
1779 tiprev = len(self) - 1
1780 tiprev = len(self) - 1
1780 if (not self._inline or
1781 if (not self._inline or
1781 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1782 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1782 return
1783 return
1783
1784
1784 trinfo = tr.find(self.indexfile)
1785 trinfo = tr.find(self.indexfile)
1785 if trinfo is None:
1786 if trinfo is None:
1786 raise error.RevlogError(_("%s not found in the transaction")
1787 raise error.RevlogError(_("%s not found in the transaction")
1787 % self.indexfile)
1788 % self.indexfile)
1788
1789
1789 trindex = trinfo[2]
1790 trindex = trinfo[2]
1790 if trindex is not None:
1791 if trindex is not None:
1791 dataoff = self.start(trindex)
1792 dataoff = self.start(trindex)
1792 else:
1793 else:
1793 # revlog was stripped at start of transaction, use all leftover data
1794 # revlog was stripped at start of transaction, use all leftover data
1794 trindex = len(self) - 1
1795 trindex = len(self) - 1
1795 dataoff = self.end(tiprev)
1796 dataoff = self.end(tiprev)
1796
1797
1797 tr.add(self.datafile, dataoff)
1798 tr.add(self.datafile, dataoff)
1798
1799
1799 if fp:
1800 if fp:
1800 fp.flush()
1801 fp.flush()
1801 fp.close()
1802 fp.close()
1802 # We can't use the cached file handle after close(). So prevent
1803 # We can't use the cached file handle after close(). So prevent
1803 # its usage.
1804 # its usage.
1804 self._writinghandles = None
1805 self._writinghandles = None
1805
1806
1806 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1807 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1807 for r in self:
1808 for r in self:
1808 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1809 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1809
1810
1810 with self._indexfp('w') as fp:
1811 with self._indexfp('w') as fp:
1811 self.version &= ~FLAG_INLINE_DATA
1812 self.version &= ~FLAG_INLINE_DATA
1812 self._inline = False
1813 self._inline = False
1813 io = self._io
1814 io = self._io
1814 for i in self:
1815 for i in self:
1815 e = io.packentry(self.index[i], self.node, self.version, i)
1816 e = io.packentry(self.index[i], self.node, self.version, i)
1816 fp.write(e)
1817 fp.write(e)
1817
1818
1818 # the temp file replace the real index when we exit the context
1819 # the temp file replace the real index when we exit the context
1819 # manager
1820 # manager
1820
1821
1821 tr.replace(self.indexfile, trindex * self._io.size)
1822 tr.replace(self.indexfile, trindex * self._io.size)
1822 self._chunkclear()
1823 self._chunkclear()
1823
1824
1824 def _nodeduplicatecallback(self, transaction, node):
1825 def _nodeduplicatecallback(self, transaction, node):
1825 """called when trying to add a node already stored.
1826 """called when trying to add a node already stored.
1826 """
1827 """
1827
1828
1828 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1829 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1829 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1830 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1830 """add a revision to the log
1831 """add a revision to the log
1831
1832
1832 text - the revision data to add
1833 text - the revision data to add
1833 transaction - the transaction object used for rollback
1834 transaction - the transaction object used for rollback
1834 link - the linkrev data to add
1835 link - the linkrev data to add
1835 p1, p2 - the parent nodeids of the revision
1836 p1, p2 - the parent nodeids of the revision
1836 cachedelta - an optional precomputed delta
1837 cachedelta - an optional precomputed delta
1837 node - nodeid of revision; typically node is not specified, and it is
1838 node - nodeid of revision; typically node is not specified, and it is
1838 computed by default as hash(text, p1, p2), however subclasses might
1839 computed by default as hash(text, p1, p2), however subclasses might
1839 use different hashing method (and override checkhash() in such case)
1840 use different hashing method (and override checkhash() in such case)
1840 flags - the known flags to set on the revision
1841 flags - the known flags to set on the revision
1841 deltacomputer - an optional deltacomputer instance shared between
1842 deltacomputer - an optional deltacomputer instance shared between
1842 multiple calls
1843 multiple calls
1843 """
1844 """
1844 if link == nullrev:
1845 if link == nullrev:
1845 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1846 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1846 % self.indexfile)
1847 % self.indexfile)
1847
1848
1848 if flags:
1849 if flags:
1849 node = node or self.hash(text, p1, p2)
1850 node = node or self.hash(text, p1, p2)
1850
1851
1851 rawtext, validatehash = self._processflags(text, flags, 'write')
1852 rawtext, validatehash = self._processflags(text, flags, 'write')
1852
1853
1853 # If the flag processor modifies the revision data, ignore any provided
1854 # If the flag processor modifies the revision data, ignore any provided
1854 # cachedelta.
1855 # cachedelta.
1855 if rawtext != text:
1856 if rawtext != text:
1856 cachedelta = None
1857 cachedelta = None
1857
1858
1858 if len(rawtext) > _maxentrysize:
1859 if len(rawtext) > _maxentrysize:
1859 raise error.RevlogError(
1860 raise error.RevlogError(
1860 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1861 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1861 % (self.indexfile, len(rawtext)))
1862 % (self.indexfile, len(rawtext)))
1862
1863
1863 node = node or self.hash(rawtext, p1, p2)
1864 node = node or self.hash(rawtext, p1, p2)
1864 if node in self.nodemap:
1865 if node in self.nodemap:
1865 return node
1866 return node
1866
1867
1867 if validatehash:
1868 if validatehash:
1868 self.checkhash(rawtext, node, p1=p1, p2=p2)
1869 self.checkhash(rawtext, node, p1=p1, p2=p2)
1869
1870
1870 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1871 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1871 flags, cachedelta=cachedelta,
1872 flags, cachedelta=cachedelta,
1872 deltacomputer=deltacomputer)
1873 deltacomputer=deltacomputer)
1873
1874
1874 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1875 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1875 cachedelta=None, deltacomputer=None):
1876 cachedelta=None, deltacomputer=None):
1876 """add a raw revision with known flags, node and parents
1877 """add a raw revision with known flags, node and parents
1877 useful when reusing a revision not stored in this revlog (ex: received
1878 useful when reusing a revision not stored in this revlog (ex: received
1878 over wire, or read from an external bundle).
1879 over wire, or read from an external bundle).
1879 """
1880 """
1880 dfh = None
1881 dfh = None
1881 if not self._inline:
1882 if not self._inline:
1882 dfh = self._datafp("a+")
1883 dfh = self._datafp("a+")
1883 ifh = self._indexfp("a+")
1884 ifh = self._indexfp("a+")
1884 try:
1885 try:
1885 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1886 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1886 flags, cachedelta, ifh, dfh,
1887 flags, cachedelta, ifh, dfh,
1887 deltacomputer=deltacomputer)
1888 deltacomputer=deltacomputer)
1888 finally:
1889 finally:
1889 if dfh:
1890 if dfh:
1890 dfh.close()
1891 dfh.close()
1891 ifh.close()
1892 ifh.close()
1892
1893
1893 def compress(self, data):
1894 def compress(self, data):
1894 """Generate a possibly-compressed representation of data."""
1895 """Generate a possibly-compressed representation of data."""
1895 if not data:
1896 if not data:
1896 return '', data
1897 return '', data
1897
1898
1898 compressed = self._compressor.compress(data)
1899 compressed = self._compressor.compress(data)
1899
1900
1900 if compressed:
1901 if compressed:
1901 # The revlog compressor added the header in the returned data.
1902 # The revlog compressor added the header in the returned data.
1902 return '', compressed
1903 return '', compressed
1903
1904
1904 if data[0:1] == '\0':
1905 if data[0:1] == '\0':
1905 return '', data
1906 return '', data
1906 return 'u', data
1907 return 'u', data
1907
1908
1908 def decompress(self, data):
1909 def decompress(self, data):
1909 """Decompress a revlog chunk.
1910 """Decompress a revlog chunk.
1910
1911
1911 The chunk is expected to begin with a header identifying the
1912 The chunk is expected to begin with a header identifying the
1912 format type so it can be routed to an appropriate decompressor.
1913 format type so it can be routed to an appropriate decompressor.
1913 """
1914 """
1914 if not data:
1915 if not data:
1915 return data
1916 return data
1916
1917
1917 # Revlogs are read much more frequently than they are written and many
1918 # Revlogs are read much more frequently than they are written and many
1918 # chunks only take microseconds to decompress, so performance is
1919 # chunks only take microseconds to decompress, so performance is
1919 # important here.
1920 # important here.
1920 #
1921 #
1921 # We can make a few assumptions about revlogs:
1922 # We can make a few assumptions about revlogs:
1922 #
1923 #
1923 # 1) the majority of chunks will be compressed (as opposed to inline
1924 # 1) the majority of chunks will be compressed (as opposed to inline
1924 # raw data).
1925 # raw data).
1925 # 2) decompressing *any* data will likely by at least 10x slower than
1926 # 2) decompressing *any* data will likely by at least 10x slower than
1926 # returning raw inline data.
1927 # returning raw inline data.
1927 # 3) we want to prioritize common and officially supported compression
1928 # 3) we want to prioritize common and officially supported compression
1928 # engines
1929 # engines
1929 #
1930 #
1930 # It follows that we want to optimize for "decompress compressed data
1931 # It follows that we want to optimize for "decompress compressed data
1931 # when encoded with common and officially supported compression engines"
1932 # when encoded with common and officially supported compression engines"
1932 # case over "raw data" and "data encoded by less common or non-official
1933 # case over "raw data" and "data encoded by less common or non-official
1933 # compression engines." That is why we have the inline lookup first
1934 # compression engines." That is why we have the inline lookup first
1934 # followed by the compengines lookup.
1935 # followed by the compengines lookup.
1935 #
1936 #
1936 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1937 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1937 # compressed chunks. And this matters for changelog and manifest reads.
1938 # compressed chunks. And this matters for changelog and manifest reads.
1938 t = data[0:1]
1939 t = data[0:1]
1939
1940
1940 if t == 'x':
1941 if t == 'x':
1941 try:
1942 try:
1942 return _zlibdecompress(data)
1943 return _zlibdecompress(data)
1943 except zlib.error as e:
1944 except zlib.error as e:
1944 raise error.RevlogError(_('revlog decompress error: %s') %
1945 raise error.RevlogError(_('revlog decompress error: %s') %
1945 stringutil.forcebytestr(e))
1946 stringutil.forcebytestr(e))
1946 # '\0' is more common than 'u' so it goes first.
1947 # '\0' is more common than 'u' so it goes first.
1947 elif t == '\0':
1948 elif t == '\0':
1948 return data
1949 return data
1949 elif t == 'u':
1950 elif t == 'u':
1950 return util.buffer(data, 1)
1951 return util.buffer(data, 1)
1951
1952
1952 try:
1953 try:
1953 compressor = self._decompressors[t]
1954 compressor = self._decompressors[t]
1954 except KeyError:
1955 except KeyError:
1955 try:
1956 try:
1956 engine = util.compengines.forrevlogheader(t)
1957 engine = util.compengines.forrevlogheader(t)
1957 compressor = engine.revlogcompressor()
1958 compressor = engine.revlogcompressor()
1958 self._decompressors[t] = compressor
1959 self._decompressors[t] = compressor
1959 except KeyError:
1960 except KeyError:
1960 raise error.RevlogError(_('unknown compression type %r') % t)
1961 raise error.RevlogError(_('unknown compression type %r') % t)
1961
1962
1962 return compressor.decompress(data)
1963 return compressor.decompress(data)
1963
1964
1964 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1965 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1965 cachedelta, ifh, dfh, alwayscache=False,
1966 cachedelta, ifh, dfh, alwayscache=False,
1966 deltacomputer=None):
1967 deltacomputer=None):
1967 """internal function to add revisions to the log
1968 """internal function to add revisions to the log
1968
1969
1969 see addrevision for argument descriptions.
1970 see addrevision for argument descriptions.
1970
1971
1971 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1972 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1972
1973
1973 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1974 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1974 be used.
1975 be used.
1975
1976
1976 invariants:
1977 invariants:
1977 - rawtext is optional (can be None); if not set, cachedelta must be set.
1978 - rawtext is optional (can be None); if not set, cachedelta must be set.
1978 if both are set, they must correspond to each other.
1979 if both are set, they must correspond to each other.
1979 """
1980 """
1980 if node == nullid:
1981 if node == nullid:
1981 raise error.RevlogError(_("%s: attempt to add null revision") %
1982 raise error.RevlogError(_("%s: attempt to add null revision") %
1982 self.indexfile)
1983 self.indexfile)
1983 if node == wdirid or node in wdirfilenodeids:
1984 if node == wdirid or node in wdirfilenodeids:
1984 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1985 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1985 self.indexfile)
1986 self.indexfile)
1986
1987
1987 if self._inline:
1988 if self._inline:
1988 fh = ifh
1989 fh = ifh
1989 else:
1990 else:
1990 fh = dfh
1991 fh = dfh
1991
1992
1992 btext = [rawtext]
1993 btext = [rawtext]
1993
1994
1994 curr = len(self)
1995 curr = len(self)
1995 prev = curr - 1
1996 prev = curr - 1
1996 offset = self.end(prev)
1997 offset = self.end(prev)
1997 p1r, p2r = self.rev(p1), self.rev(p2)
1998 p1r, p2r = self.rev(p1), self.rev(p2)
1998
1999
1999 # full versions are inserted when the needed deltas
2000 # full versions are inserted when the needed deltas
2000 # become comparable to the uncompressed text
2001 # become comparable to the uncompressed text
2001 if rawtext is None:
2002 if rawtext is None:
2002 # need rawtext size, before changed by flag processors, which is
2003 # need rawtext size, before changed by flag processors, which is
2003 # the non-raw size. use revlog explicitly to avoid filelog's extra
2004 # the non-raw size. use revlog explicitly to avoid filelog's extra
2004 # logic that might remove metadata size.
2005 # logic that might remove metadata size.
2005 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2006 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2006 cachedelta[1])
2007 cachedelta[1])
2007 else:
2008 else:
2008 textlen = len(rawtext)
2009 textlen = len(rawtext)
2009
2010
2010 if deltacomputer is None:
2011 if deltacomputer is None:
2011 deltacomputer = deltautil.deltacomputer(self)
2012 deltacomputer = deltautil.deltacomputer(self)
2012
2013
2013 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2014 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2014
2015
2015 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2016 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2016
2017
2017 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2018 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2018 deltainfo.base, link, p1r, p2r, node)
2019 deltainfo.base, link, p1r, p2r, node)
2019 self.index.append(e)
2020 self.index.append(e)
2020 self.nodemap[node] = curr
2021 self.nodemap[node] = curr
2021
2022
2022 # Reset the pure node cache start lookup offset to account for new
2023 # Reset the pure node cache start lookup offset to account for new
2023 # revision.
2024 # revision.
2024 if self._nodepos is not None:
2025 if self._nodepos is not None:
2025 self._nodepos = curr
2026 self._nodepos = curr
2026
2027
2027 entry = self._io.packentry(e, self.node, self.version, curr)
2028 entry = self._io.packentry(e, self.node, self.version, curr)
2028 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2029 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2029 link, offset)
2030 link, offset)
2030
2031
2031 rawtext = btext[0]
2032 rawtext = btext[0]
2032
2033
2033 if alwayscache and rawtext is None:
2034 if alwayscache and rawtext is None:
2034 rawtext = deltacomputer.buildtext(revinfo, fh)
2035 rawtext = deltacomputer.buildtext(revinfo, fh)
2035
2036
2036 if type(rawtext) == bytes: # only accept immutable objects
2037 if type(rawtext) == bytes: # only accept immutable objects
2037 self._revisioncache = (node, curr, rawtext)
2038 self._revisioncache = (node, curr, rawtext)
2038 self._chainbasecache[curr] = deltainfo.chainbase
2039 self._chainbasecache[curr] = deltainfo.chainbase
2039 return node
2040 return node
2040
2041
2041 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2042 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2042 # Files opened in a+ mode have inconsistent behavior on various
2043 # Files opened in a+ mode have inconsistent behavior on various
2043 # platforms. Windows requires that a file positioning call be made
2044 # platforms. Windows requires that a file positioning call be made
2044 # when the file handle transitions between reads and writes. See
2045 # when the file handle transitions between reads and writes. See
2045 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2046 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2046 # platforms, Python or the platform itself can be buggy. Some versions
2047 # platforms, Python or the platform itself can be buggy. Some versions
2047 # of Solaris have been observed to not append at the end of the file
2048 # of Solaris have been observed to not append at the end of the file
2048 # if the file was seeked to before the end. See issue4943 for more.
2049 # if the file was seeked to before the end. See issue4943 for more.
2049 #
2050 #
2050 # We work around this issue by inserting a seek() before writing.
2051 # We work around this issue by inserting a seek() before writing.
2051 # Note: This is likely not necessary on Python 3. However, because
2052 # Note: This is likely not necessary on Python 3. However, because
2052 # the file handle is reused for reads and may be seeked there, we need
2053 # the file handle is reused for reads and may be seeked there, we need
2053 # to be careful before changing this.
2054 # to be careful before changing this.
2054 ifh.seek(0, os.SEEK_END)
2055 ifh.seek(0, os.SEEK_END)
2055 if dfh:
2056 if dfh:
2056 dfh.seek(0, os.SEEK_END)
2057 dfh.seek(0, os.SEEK_END)
2057
2058
2058 curr = len(self) - 1
2059 curr = len(self) - 1
2059 if not self._inline:
2060 if not self._inline:
2060 transaction.add(self.datafile, offset)
2061 transaction.add(self.datafile, offset)
2061 transaction.add(self.indexfile, curr * len(entry))
2062 transaction.add(self.indexfile, curr * len(entry))
2062 if data[0]:
2063 if data[0]:
2063 dfh.write(data[0])
2064 dfh.write(data[0])
2064 dfh.write(data[1])
2065 dfh.write(data[1])
2065 ifh.write(entry)
2066 ifh.write(entry)
2066 else:
2067 else:
2067 offset += curr * self._io.size
2068 offset += curr * self._io.size
2068 transaction.add(self.indexfile, offset, curr)
2069 transaction.add(self.indexfile, offset, curr)
2069 ifh.write(entry)
2070 ifh.write(entry)
2070 ifh.write(data[0])
2071 ifh.write(data[0])
2071 ifh.write(data[1])
2072 ifh.write(data[1])
2072 self._enforceinlinesize(transaction, ifh)
2073 self._enforceinlinesize(transaction, ifh)
2073
2074
2074 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2075 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2075 """
2076 """
2076 add a delta group
2077 add a delta group
2077
2078
2078 given a set of deltas, add them to the revision log. the
2079 given a set of deltas, add them to the revision log. the
2079 first delta is against its parent, which should be in our
2080 first delta is against its parent, which should be in our
2080 log, the rest are against the previous delta.
2081 log, the rest are against the previous delta.
2081
2082
2082 If ``addrevisioncb`` is defined, it will be called with arguments of
2083 If ``addrevisioncb`` is defined, it will be called with arguments of
2083 this revlog and the node that was added.
2084 this revlog and the node that was added.
2084 """
2085 """
2085
2086
2086 if self._writinghandles:
2087 if self._writinghandles:
2087 raise error.ProgrammingError('cannot nest addgroup() calls')
2088 raise error.ProgrammingError('cannot nest addgroup() calls')
2088
2089
2089 nodes = []
2090 nodes = []
2090
2091
2091 r = len(self)
2092 r = len(self)
2092 end = 0
2093 end = 0
2093 if r:
2094 if r:
2094 end = self.end(r - 1)
2095 end = self.end(r - 1)
2095 ifh = self._indexfp("a+")
2096 ifh = self._indexfp("a+")
2096 isize = r * self._io.size
2097 isize = r * self._io.size
2097 if self._inline:
2098 if self._inline:
2098 transaction.add(self.indexfile, end + isize, r)
2099 transaction.add(self.indexfile, end + isize, r)
2099 dfh = None
2100 dfh = None
2100 else:
2101 else:
2101 transaction.add(self.indexfile, isize, r)
2102 transaction.add(self.indexfile, isize, r)
2102 transaction.add(self.datafile, end)
2103 transaction.add(self.datafile, end)
2103 dfh = self._datafp("a+")
2104 dfh = self._datafp("a+")
2104 def flush():
2105 def flush():
2105 if dfh:
2106 if dfh:
2106 dfh.flush()
2107 dfh.flush()
2107 ifh.flush()
2108 ifh.flush()
2108
2109
2109 self._writinghandles = (ifh, dfh)
2110 self._writinghandles = (ifh, dfh)
2110
2111
2111 try:
2112 try:
2112 deltacomputer = deltautil.deltacomputer(self)
2113 deltacomputer = deltautil.deltacomputer(self)
2113 # loop through our set of deltas
2114 # loop through our set of deltas
2114 for data in deltas:
2115 for data in deltas:
2115 node, p1, p2, linknode, deltabase, delta, flags = data
2116 node, p1, p2, linknode, deltabase, delta, flags = data
2116 link = linkmapper(linknode)
2117 link = linkmapper(linknode)
2117 flags = flags or REVIDX_DEFAULT_FLAGS
2118 flags = flags or REVIDX_DEFAULT_FLAGS
2118
2119
2119 nodes.append(node)
2120 nodes.append(node)
2120
2121
2121 if node in self.nodemap:
2122 if node in self.nodemap:
2122 self._nodeduplicatecallback(transaction, node)
2123 self._nodeduplicatecallback(transaction, node)
2123 # this can happen if two branches make the same change
2124 # this can happen if two branches make the same change
2124 continue
2125 continue
2125
2126
2126 for p in (p1, p2):
2127 for p in (p1, p2):
2127 if p not in self.nodemap:
2128 if p not in self.nodemap:
2128 raise error.LookupError(p, self.indexfile,
2129 raise error.LookupError(p, self.indexfile,
2129 _('unknown parent'))
2130 _('unknown parent'))
2130
2131
2131 if deltabase not in self.nodemap:
2132 if deltabase not in self.nodemap:
2132 raise error.LookupError(deltabase, self.indexfile,
2133 raise error.LookupError(deltabase, self.indexfile,
2133 _('unknown delta base'))
2134 _('unknown delta base'))
2134
2135
2135 baserev = self.rev(deltabase)
2136 baserev = self.rev(deltabase)
2136
2137
2137 if baserev != nullrev and self.iscensored(baserev):
2138 if baserev != nullrev and self.iscensored(baserev):
2138 # if base is censored, delta must be full replacement in a
2139 # if base is censored, delta must be full replacement in a
2139 # single patch operation
2140 # single patch operation
2140 hlen = struct.calcsize(">lll")
2141 hlen = struct.calcsize(">lll")
2141 oldlen = self.rawsize(baserev)
2142 oldlen = self.rawsize(baserev)
2142 newlen = len(delta) - hlen
2143 newlen = len(delta) - hlen
2143 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2144 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2144 raise error.CensoredBaseError(self.indexfile,
2145 raise error.CensoredBaseError(self.indexfile,
2145 self.node(baserev))
2146 self.node(baserev))
2146
2147
2147 if not flags and self._peek_iscensored(baserev, delta, flush):
2148 if not flags and self._peek_iscensored(baserev, delta, flush):
2148 flags |= REVIDX_ISCENSORED
2149 flags |= REVIDX_ISCENSORED
2149
2150
2150 # We assume consumers of addrevisioncb will want to retrieve
2151 # We assume consumers of addrevisioncb will want to retrieve
2151 # the added revision, which will require a call to
2152 # the added revision, which will require a call to
2152 # revision(). revision() will fast path if there is a cache
2153 # revision(). revision() will fast path if there is a cache
2153 # hit. So, we tell _addrevision() to always cache in this case.
2154 # hit. So, we tell _addrevision() to always cache in this case.
2154 # We're only using addgroup() in the context of changegroup
2155 # We're only using addgroup() in the context of changegroup
2155 # generation so the revision data can always be handled as raw
2156 # generation so the revision data can always be handled as raw
2156 # by the flagprocessor.
2157 # by the flagprocessor.
2157 self._addrevision(node, None, transaction, link,
2158 self._addrevision(node, None, transaction, link,
2158 p1, p2, flags, (baserev, delta),
2159 p1, p2, flags, (baserev, delta),
2159 ifh, dfh,
2160 ifh, dfh,
2160 alwayscache=bool(addrevisioncb),
2161 alwayscache=bool(addrevisioncb),
2161 deltacomputer=deltacomputer)
2162 deltacomputer=deltacomputer)
2162
2163
2163 if addrevisioncb:
2164 if addrevisioncb:
2164 addrevisioncb(self, node)
2165 addrevisioncb(self, node)
2165
2166
2166 if not dfh and not self._inline:
2167 if not dfh and not self._inline:
2167 # addrevision switched from inline to conventional
2168 # addrevision switched from inline to conventional
2168 # reopen the index
2169 # reopen the index
2169 ifh.close()
2170 ifh.close()
2170 dfh = self._datafp("a+")
2171 dfh = self._datafp("a+")
2171 ifh = self._indexfp("a+")
2172 ifh = self._indexfp("a+")
2172 self._writinghandles = (ifh, dfh)
2173 self._writinghandles = (ifh, dfh)
2173 finally:
2174 finally:
2174 self._writinghandles = None
2175 self._writinghandles = None
2175
2176
2176 if dfh:
2177 if dfh:
2177 dfh.close()
2178 dfh.close()
2178 ifh.close()
2179 ifh.close()
2179
2180
2180 return nodes
2181 return nodes
2181
2182
2182 def iscensored(self, rev):
2183 def iscensored(self, rev):
2183 """Check if a file revision is censored."""
2184 """Check if a file revision is censored."""
2184 if not self._censorable:
2185 if not self._censorable:
2185 return False
2186 return False
2186
2187
2187 return self.flags(rev) & REVIDX_ISCENSORED
2188 return self.flags(rev) & REVIDX_ISCENSORED
2188
2189
2189 def _peek_iscensored(self, baserev, delta, flush):
2190 def _peek_iscensored(self, baserev, delta, flush):
2190 """Quickly check if a delta produces a censored revision."""
2191 """Quickly check if a delta produces a censored revision."""
2191 if not self._censorable:
2192 if not self._censorable:
2192 return False
2193 return False
2193
2194
2194 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2195 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2195
2196
2196 def getstrippoint(self, minlink):
2197 def getstrippoint(self, minlink):
2197 """find the minimum rev that must be stripped to strip the linkrev
2198 """find the minimum rev that must be stripped to strip the linkrev
2198
2199
2199 Returns a tuple containing the minimum rev and a set of all revs that
2200 Returns a tuple containing the minimum rev and a set of all revs that
2200 have linkrevs that will be broken by this strip.
2201 have linkrevs that will be broken by this strip.
2201 """
2202 """
2202 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2203 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2203 self.headrevs(),
2204 self.headrevs(),
2204 self.linkrev, self.parentrevs)
2205 self.linkrev, self.parentrevs)
2205
2206
2206 def strip(self, minlink, transaction):
2207 def strip(self, minlink, transaction):
2207 """truncate the revlog on the first revision with a linkrev >= minlink
2208 """truncate the revlog on the first revision with a linkrev >= minlink
2208
2209
2209 This function is called when we're stripping revision minlink and
2210 This function is called when we're stripping revision minlink and
2210 its descendants from the repository.
2211 its descendants from the repository.
2211
2212
2212 We have to remove all revisions with linkrev >= minlink, because
2213 We have to remove all revisions with linkrev >= minlink, because
2213 the equivalent changelog revisions will be renumbered after the
2214 the equivalent changelog revisions will be renumbered after the
2214 strip.
2215 strip.
2215
2216
2216 So we truncate the revlog on the first of these revisions, and
2217 So we truncate the revlog on the first of these revisions, and
2217 trust that the caller has saved the revisions that shouldn't be
2218 trust that the caller has saved the revisions that shouldn't be
2218 removed and that it'll re-add them after this truncation.
2219 removed and that it'll re-add them after this truncation.
2219 """
2220 """
2220 if len(self) == 0:
2221 if len(self) == 0:
2221 return
2222 return
2222
2223
2223 rev, _ = self.getstrippoint(minlink)
2224 rev, _ = self.getstrippoint(minlink)
2224 if rev == len(self):
2225 if rev == len(self):
2225 return
2226 return
2226
2227
2227 # first truncate the files on disk
2228 # first truncate the files on disk
2228 end = self.start(rev)
2229 end = self.start(rev)
2229 if not self._inline:
2230 if not self._inline:
2230 transaction.add(self.datafile, end)
2231 transaction.add(self.datafile, end)
2231 end = rev * self._io.size
2232 end = rev * self._io.size
2232 else:
2233 else:
2233 end += rev * self._io.size
2234 end += rev * self._io.size
2234
2235
2235 transaction.add(self.indexfile, end)
2236 transaction.add(self.indexfile, end)
2236
2237
2237 # then reset internal state in memory to forget those revisions
2238 # then reset internal state in memory to forget those revisions
2238 self._revisioncache = None
2239 self._revisioncache = None
2239 self._chaininfocache = {}
2240 self._chaininfocache = {}
2240 self._chunkclear()
2241 self._chunkclear()
2241 for x in pycompat.xrange(rev, len(self)):
2242 for x in pycompat.xrange(rev, len(self)):
2242 del self.nodemap[self.node(x)]
2243 del self.nodemap[self.node(x)]
2243
2244
2244 del self.index[rev:-1]
2245 del self.index[rev:-1]
2245 self._nodepos = None
2246 self._nodepos = None
2246
2247
2247 def checksize(self):
2248 def checksize(self):
2248 expected = 0
2249 expected = 0
2249 if len(self):
2250 if len(self):
2250 expected = max(0, self.end(len(self) - 1))
2251 expected = max(0, self.end(len(self) - 1))
2251
2252
2252 try:
2253 try:
2253 with self._datafp() as f:
2254 with self._datafp() as f:
2254 f.seek(0, 2)
2255 f.seek(0, 2)
2255 actual = f.tell()
2256 actual = f.tell()
2256 dd = actual - expected
2257 dd = actual - expected
2257 except IOError as inst:
2258 except IOError as inst:
2258 if inst.errno != errno.ENOENT:
2259 if inst.errno != errno.ENOENT:
2259 raise
2260 raise
2260 dd = 0
2261 dd = 0
2261
2262
2262 try:
2263 try:
2263 f = self.opener(self.indexfile)
2264 f = self.opener(self.indexfile)
2264 f.seek(0, 2)
2265 f.seek(0, 2)
2265 actual = f.tell()
2266 actual = f.tell()
2266 f.close()
2267 f.close()
2267 s = self._io.size
2268 s = self._io.size
2268 i = max(0, actual // s)
2269 i = max(0, actual // s)
2269 di = actual - (i * s)
2270 di = actual - (i * s)
2270 if self._inline:
2271 if self._inline:
2271 databytes = 0
2272 databytes = 0
2272 for r in self:
2273 for r in self:
2273 databytes += max(0, self.length(r))
2274 databytes += max(0, self.length(r))
2274 dd = 0
2275 dd = 0
2275 di = actual - len(self) * s - databytes
2276 di = actual - len(self) * s - databytes
2276 except IOError as inst:
2277 except IOError as inst:
2277 if inst.errno != errno.ENOENT:
2278 if inst.errno != errno.ENOENT:
2278 raise
2279 raise
2279 di = 0
2280 di = 0
2280
2281
2281 return (dd, di)
2282 return (dd, di)
2282
2283
2283 def files(self):
2284 def files(self):
2284 res = [self.indexfile]
2285 res = [self.indexfile]
2285 if not self._inline:
2286 if not self._inline:
2286 res.append(self.datafile)
2287 res.append(self.datafile)
2287 return res
2288 return res
2288
2289
2289 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2290 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2290 assumehaveparentrevisions=False,
2291 assumehaveparentrevisions=False,
2291 deltamode=repository.CG_DELTAMODE_STD):
2292 deltamode=repository.CG_DELTAMODE_STD):
2292 if nodesorder not in ('nodes', 'storage', 'linear', None):
2293 if nodesorder not in ('nodes', 'storage', 'linear', None):
2293 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2294 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2294 nodesorder)
2295 nodesorder)
2295
2296
2296 if nodesorder is None and not self._generaldelta:
2297 if nodesorder is None and not self._generaldelta:
2297 nodesorder = 'storage'
2298 nodesorder = 'storage'
2298
2299
2299 if (not self._storedeltachains and
2300 if (not self._storedeltachains and
2300 deltamode != repository.CG_DELTAMODE_PREV):
2301 deltamode != repository.CG_DELTAMODE_PREV):
2301 deltamode = repository.CG_DELTAMODE_FULL
2302 deltamode = repository.CG_DELTAMODE_FULL
2302
2303
2303 return storageutil.emitrevisions(
2304 return storageutil.emitrevisions(
2304 self, nodes, nodesorder, revlogrevisiondelta,
2305 self, nodes, nodesorder, revlogrevisiondelta,
2305 deltaparentfn=self.deltaparent,
2306 deltaparentfn=self.deltaparent,
2306 candeltafn=self.candelta,
2307 candeltafn=self.candelta,
2307 rawsizefn=self.rawsize,
2308 rawsizefn=self.rawsize,
2308 revdifffn=self.revdiff,
2309 revdifffn=self.revdiff,
2309 flagsfn=self.flags,
2310 flagsfn=self.flags,
2310 deltamode=deltamode,
2311 deltamode=deltamode,
2311 revisiondata=revisiondata,
2312 revisiondata=revisiondata,
2312 assumehaveparentrevisions=assumehaveparentrevisions)
2313 assumehaveparentrevisions=assumehaveparentrevisions)
2313
2314
2314 DELTAREUSEALWAYS = 'always'
2315 DELTAREUSEALWAYS = 'always'
2315 DELTAREUSESAMEREVS = 'samerevs'
2316 DELTAREUSESAMEREVS = 'samerevs'
2316 DELTAREUSENEVER = 'never'
2317 DELTAREUSENEVER = 'never'
2317
2318
2318 DELTAREUSEFULLADD = 'fulladd'
2319 DELTAREUSEFULLADD = 'fulladd'
2319
2320
2320 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2321 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2321
2322
2322 def clone(self, tr, destrevlog, addrevisioncb=None,
2323 def clone(self, tr, destrevlog, addrevisioncb=None,
2323 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2324 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2324 """Copy this revlog to another, possibly with format changes.
2325 """Copy this revlog to another, possibly with format changes.
2325
2326
2326 The destination revlog will contain the same revisions and nodes.
2327 The destination revlog will contain the same revisions and nodes.
2327 However, it may not be bit-for-bit identical due to e.g. delta encoding
2328 However, it may not be bit-for-bit identical due to e.g. delta encoding
2328 differences.
2329 differences.
2329
2330
2330 The ``deltareuse`` argument control how deltas from the existing revlog
2331 The ``deltareuse`` argument control how deltas from the existing revlog
2331 are preserved in the destination revlog. The argument can have the
2332 are preserved in the destination revlog. The argument can have the
2332 following values:
2333 following values:
2333
2334
2334 DELTAREUSEALWAYS
2335 DELTAREUSEALWAYS
2335 Deltas will always be reused (if possible), even if the destination
2336 Deltas will always be reused (if possible), even if the destination
2336 revlog would not select the same revisions for the delta. This is the
2337 revlog would not select the same revisions for the delta. This is the
2337 fastest mode of operation.
2338 fastest mode of operation.
2338 DELTAREUSESAMEREVS
2339 DELTAREUSESAMEREVS
2339 Deltas will be reused if the destination revlog would pick the same
2340 Deltas will be reused if the destination revlog would pick the same
2340 revisions for the delta. This mode strikes a balance between speed
2341 revisions for the delta. This mode strikes a balance between speed
2341 and optimization.
2342 and optimization.
2342 DELTAREUSENEVER
2343 DELTAREUSENEVER
2343 Deltas will never be reused. This is the slowest mode of execution.
2344 Deltas will never be reused. This is the slowest mode of execution.
2344 This mode can be used to recompute deltas (e.g. if the diff/delta
2345 This mode can be used to recompute deltas (e.g. if the diff/delta
2345 algorithm changes).
2346 algorithm changes).
2346
2347
2347 Delta computation can be slow, so the choice of delta reuse policy can
2348 Delta computation can be slow, so the choice of delta reuse policy can
2348 significantly affect run time.
2349 significantly affect run time.
2349
2350
2350 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2351 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2351 two extremes. Deltas will be reused if they are appropriate. But if the
2352 two extremes. Deltas will be reused if they are appropriate. But if the
2352 delta could choose a better revision, it will do so. This means if you
2353 delta could choose a better revision, it will do so. This means if you
2353 are converting a non-generaldelta revlog to a generaldelta revlog,
2354 are converting a non-generaldelta revlog to a generaldelta revlog,
2354 deltas will be recomputed if the delta's parent isn't a parent of the
2355 deltas will be recomputed if the delta's parent isn't a parent of the
2355 revision.
2356 revision.
2356
2357
2357 In addition to the delta policy, the ``forcedeltabothparents``
2358 In addition to the delta policy, the ``forcedeltabothparents``
2358 argument controls whether to force compute deltas against both parents
2359 argument controls whether to force compute deltas against both parents
2359 for merges. By default, the current default is used.
2360 for merges. By default, the current default is used.
2360 """
2361 """
2361 if deltareuse not in self.DELTAREUSEALL:
2362 if deltareuse not in self.DELTAREUSEALL:
2362 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2363 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2363
2364
2364 if len(destrevlog):
2365 if len(destrevlog):
2365 raise ValueError(_('destination revlog is not empty'))
2366 raise ValueError(_('destination revlog is not empty'))
2366
2367
2367 if getattr(self, 'filteredrevs', None):
2368 if getattr(self, 'filteredrevs', None):
2368 raise ValueError(_('source revlog has filtered revisions'))
2369 raise ValueError(_('source revlog has filtered revisions'))
2369 if getattr(destrevlog, 'filteredrevs', None):
2370 if getattr(destrevlog, 'filteredrevs', None):
2370 raise ValueError(_('destination revlog has filtered revisions'))
2371 raise ValueError(_('destination revlog has filtered revisions'))
2371
2372
2372 # lazydeltabase controls whether to reuse a cached delta, if possible.
2373 # lazydeltabase controls whether to reuse a cached delta, if possible.
2373 oldlazydeltabase = destrevlog._lazydeltabase
2374 oldlazydeltabase = destrevlog._lazydeltabase
2374 oldamd = destrevlog._deltabothparents
2375 oldamd = destrevlog._deltabothparents
2375
2376
2376 try:
2377 try:
2377 if deltareuse == self.DELTAREUSEALWAYS:
2378 if deltareuse == self.DELTAREUSEALWAYS:
2378 destrevlog._lazydeltabase = True
2379 destrevlog._lazydeltabase = True
2379 elif deltareuse == self.DELTAREUSESAMEREVS:
2380 elif deltareuse == self.DELTAREUSESAMEREVS:
2380 destrevlog._lazydeltabase = False
2381 destrevlog._lazydeltabase = False
2381
2382
2382 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2383 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2383
2384
2384 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2385 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2385 self.DELTAREUSESAMEREVS)
2386 self.DELTAREUSESAMEREVS)
2386
2387
2387 deltacomputer = deltautil.deltacomputer(destrevlog)
2388 deltacomputer = deltautil.deltacomputer(destrevlog)
2388 index = self.index
2389 index = self.index
2389 for rev in self:
2390 for rev in self:
2390 entry = index[rev]
2391 entry = index[rev]
2391
2392
2392 # Some classes override linkrev to take filtered revs into
2393 # Some classes override linkrev to take filtered revs into
2393 # account. Use raw entry from index.
2394 # account. Use raw entry from index.
2394 flags = entry[0] & 0xffff
2395 flags = entry[0] & 0xffff
2395 linkrev = entry[4]
2396 linkrev = entry[4]
2396 p1 = index[entry[5]][7]
2397 p1 = index[entry[5]][7]
2397 p2 = index[entry[6]][7]
2398 p2 = index[entry[6]][7]
2398 node = entry[7]
2399 node = entry[7]
2399
2400
2400 # (Possibly) reuse the delta from the revlog if allowed and
2401 # (Possibly) reuse the delta from the revlog if allowed and
2401 # the revlog chunk is a delta.
2402 # the revlog chunk is a delta.
2402 cachedelta = None
2403 cachedelta = None
2403 rawtext = None
2404 rawtext = None
2404 if populatecachedelta:
2405 if populatecachedelta:
2405 dp = self.deltaparent(rev)
2406 dp = self.deltaparent(rev)
2406 if dp != nullrev:
2407 if dp != nullrev:
2407 cachedelta = (dp, bytes(self._chunk(rev)))
2408 cachedelta = (dp, bytes(self._chunk(rev)))
2408
2409
2409 if not cachedelta:
2410 if not cachedelta:
2410 rawtext = self.revision(rev, raw=True)
2411 rawtext = self.revision(rev, raw=True)
2411
2412
2412
2413
2413 if deltareuse == self.DELTAREUSEFULLADD:
2414 if deltareuse == self.DELTAREUSEFULLADD:
2414 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2415 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2415 cachedelta=cachedelta,
2416 cachedelta=cachedelta,
2416 node=node, flags=flags,
2417 node=node, flags=flags,
2417 deltacomputer=deltacomputer)
2418 deltacomputer=deltacomputer)
2418 else:
2419 else:
2419 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2420 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2420 checkambig=False)
2421 checkambig=False)
2421 dfh = None
2422 dfh = None
2422 if not destrevlog._inline:
2423 if not destrevlog._inline:
2423 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2424 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2424 try:
2425 try:
2425 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2426 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2426 p2, flags, cachedelta, ifh, dfh,
2427 p2, flags, cachedelta, ifh, dfh,
2427 deltacomputer=deltacomputer)
2428 deltacomputer=deltacomputer)
2428 finally:
2429 finally:
2429 if dfh:
2430 if dfh:
2430 dfh.close()
2431 dfh.close()
2431 ifh.close()
2432 ifh.close()
2432
2433
2433 if addrevisioncb:
2434 if addrevisioncb:
2434 addrevisioncb(self, rev, node)
2435 addrevisioncb(self, rev, node)
2435 finally:
2436 finally:
2436 destrevlog._lazydeltabase = oldlazydeltabase
2437 destrevlog._lazydeltabase = oldlazydeltabase
2437 destrevlog._deltabothparents = oldamd
2438 destrevlog._deltabothparents = oldamd
2438
2439
2439 def censorrevision(self, tr, censornode, tombstone=b''):
2440 def censorrevision(self, tr, censornode, tombstone=b''):
2440 if (self.version & 0xFFFF) == REVLOGV0:
2441 if (self.version & 0xFFFF) == REVLOGV0:
2441 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2442 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2442 self.version)
2443 self.version)
2443
2444
2444 censorrev = self.rev(censornode)
2445 censorrev = self.rev(censornode)
2445 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2446 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2446
2447
2447 if len(tombstone) > self.rawsize(censorrev):
2448 if len(tombstone) > self.rawsize(censorrev):
2448 raise error.Abort(_('censor tombstone must be no longer than '
2449 raise error.Abort(_('censor tombstone must be no longer than '
2449 'censored data'))
2450 'censored data'))
2450
2451
2451 # Rewriting the revlog in place is hard. Our strategy for censoring is
2452 # Rewriting the revlog in place is hard. Our strategy for censoring is
2452 # to create a new revlog, copy all revisions to it, then replace the
2453 # to create a new revlog, copy all revisions to it, then replace the
2453 # revlogs on transaction close.
2454 # revlogs on transaction close.
2454
2455
2455 newindexfile = self.indexfile + b'.tmpcensored'
2456 newindexfile = self.indexfile + b'.tmpcensored'
2456 newdatafile = self.datafile + b'.tmpcensored'
2457 newdatafile = self.datafile + b'.tmpcensored'
2457
2458
2458 # This is a bit dangerous. We could easily have a mismatch of state.
2459 # This is a bit dangerous. We could easily have a mismatch of state.
2459 newrl = revlog(self.opener, newindexfile, newdatafile,
2460 newrl = revlog(self.opener, newindexfile, newdatafile,
2460 censorable=True)
2461 censorable=True)
2461 newrl.version = self.version
2462 newrl.version = self.version
2462 newrl._generaldelta = self._generaldelta
2463 newrl._generaldelta = self._generaldelta
2463 newrl._io = self._io
2464 newrl._io = self._io
2464
2465
2465 for rev in self.revs():
2466 for rev in self.revs():
2466 node = self.node(rev)
2467 node = self.node(rev)
2467 p1, p2 = self.parents(node)
2468 p1, p2 = self.parents(node)
2468
2469
2469 if rev == censorrev:
2470 if rev == censorrev:
2470 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2471 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2471 p1, p2, censornode, REVIDX_ISCENSORED)
2472 p1, p2, censornode, REVIDX_ISCENSORED)
2472
2473
2473 if newrl.deltaparent(rev) != nullrev:
2474 if newrl.deltaparent(rev) != nullrev:
2474 raise error.Abort(_('censored revision stored as delta; '
2475 raise error.Abort(_('censored revision stored as delta; '
2475 'cannot censor'),
2476 'cannot censor'),
2476 hint=_('censoring of revlogs is not '
2477 hint=_('censoring of revlogs is not '
2477 'fully implemented; please report '
2478 'fully implemented; please report '
2478 'this bug'))
2479 'this bug'))
2479 continue
2480 continue
2480
2481
2481 if self.iscensored(rev):
2482 if self.iscensored(rev):
2482 if self.deltaparent(rev) != nullrev:
2483 if self.deltaparent(rev) != nullrev:
2483 raise error.Abort(_('cannot censor due to censored '
2484 raise error.Abort(_('cannot censor due to censored '
2484 'revision having delta stored'))
2485 'revision having delta stored'))
2485 rawtext = self._chunk(rev)
2486 rawtext = self._chunk(rev)
2486 else:
2487 else:
2487 rawtext = self.revision(rev, raw=True)
2488 rawtext = self.revision(rev, raw=True)
2488
2489
2489 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2490 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2490 self.flags(rev))
2491 self.flags(rev))
2491
2492
2492 tr.addbackup(self.indexfile, location='store')
2493 tr.addbackup(self.indexfile, location='store')
2493 if not self._inline:
2494 if not self._inline:
2494 tr.addbackup(self.datafile, location='store')
2495 tr.addbackup(self.datafile, location='store')
2495
2496
2496 self.opener.rename(newrl.indexfile, self.indexfile)
2497 self.opener.rename(newrl.indexfile, self.indexfile)
2497 if not self._inline:
2498 if not self._inline:
2498 self.opener.rename(newrl.datafile, self.datafile)
2499 self.opener.rename(newrl.datafile, self.datafile)
2499
2500
2500 self.clearcaches()
2501 self.clearcaches()
2501 self._loadindex(self.version, None)
2502 self._loadindex(self.version, None)
2502
2503
2503 def verifyintegrity(self, state):
2504 def verifyintegrity(self, state):
2504 """Verifies the integrity of the revlog.
2505 """Verifies the integrity of the revlog.
2505
2506
2506 Yields ``revlogproblem`` instances describing problems that are
2507 Yields ``revlogproblem`` instances describing problems that are
2507 found.
2508 found.
2508 """
2509 """
2509 dd, di = self.checksize()
2510 dd, di = self.checksize()
2510 if dd:
2511 if dd:
2511 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2512 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2512 if di:
2513 if di:
2513 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2514 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2514
2515
2515 version = self.version & 0xFFFF
2516 version = self.version & 0xFFFF
2516
2517
2517 # The verifier tells us what version revlog we should be.
2518 # The verifier tells us what version revlog we should be.
2518 if version != state['expectedversion']:
2519 if version != state['expectedversion']:
2519 yield revlogproblem(
2520 yield revlogproblem(
2520 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2521 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2521 (self.indexfile, version, state['expectedversion']))
2522 (self.indexfile, version, state['expectedversion']))
2522
2523
2523 state['skipread'] = set()
2524 state['skipread'] = set()
2524
2525
2525 for rev in self:
2526 for rev in self:
2526 node = self.node(rev)
2527 node = self.node(rev)
2527
2528
2528 # Verify contents. 4 cases to care about:
2529 # Verify contents. 4 cases to care about:
2529 #
2530 #
2530 # common: the most common case
2531 # common: the most common case
2531 # rename: with a rename
2532 # rename: with a rename
2532 # meta: file content starts with b'\1\n', the metadata
2533 # meta: file content starts with b'\1\n', the metadata
2533 # header defined in filelog.py, but without a rename
2534 # header defined in filelog.py, but without a rename
2534 # ext: content stored externally
2535 # ext: content stored externally
2535 #
2536 #
2536 # More formally, their differences are shown below:
2537 # More formally, their differences are shown below:
2537 #
2538 #
2538 # | common | rename | meta | ext
2539 # | common | rename | meta | ext
2539 # -------------------------------------------------------
2540 # -------------------------------------------------------
2540 # flags() | 0 | 0 | 0 | not 0
2541 # flags() | 0 | 0 | 0 | not 0
2541 # renamed() | False | True | False | ?
2542 # renamed() | False | True | False | ?
2542 # rawtext[0:2]=='\1\n'| False | True | True | ?
2543 # rawtext[0:2]=='\1\n'| False | True | True | ?
2543 #
2544 #
2544 # "rawtext" means the raw text stored in revlog data, which
2545 # "rawtext" means the raw text stored in revlog data, which
2545 # could be retrieved by "revision(rev, raw=True)". "text"
2546 # could be retrieved by "revision(rev, raw=True)". "text"
2546 # mentioned below is "revision(rev, raw=False)".
2547 # mentioned below is "revision(rev, raw=False)".
2547 #
2548 #
2548 # There are 3 different lengths stored physically:
2549 # There are 3 different lengths stored physically:
2549 # 1. L1: rawsize, stored in revlog index
2550 # 1. L1: rawsize, stored in revlog index
2550 # 2. L2: len(rawtext), stored in revlog data
2551 # 2. L2: len(rawtext), stored in revlog data
2551 # 3. L3: len(text), stored in revlog data if flags==0, or
2552 # 3. L3: len(text), stored in revlog data if flags==0, or
2552 # possibly somewhere else if flags!=0
2553 # possibly somewhere else if flags!=0
2553 #
2554 #
2554 # L1 should be equal to L2. L3 could be different from them.
2555 # L1 should be equal to L2. L3 could be different from them.
2555 # "text" may or may not affect commit hash depending on flag
2556 # "text" may or may not affect commit hash depending on flag
2556 # processors (see revlog.addflagprocessor).
2557 # processors (see revlog.addflagprocessor).
2557 #
2558 #
2558 # | common | rename | meta | ext
2559 # | common | rename | meta | ext
2559 # -------------------------------------------------
2560 # -------------------------------------------------
2560 # rawsize() | L1 | L1 | L1 | L1
2561 # rawsize() | L1 | L1 | L1 | L1
2561 # size() | L1 | L2-LM | L1(*) | L1 (?)
2562 # size() | L1 | L2-LM | L1(*) | L1 (?)
2562 # len(rawtext) | L2 | L2 | L2 | L2
2563 # len(rawtext) | L2 | L2 | L2 | L2
2563 # len(text) | L2 | L2 | L2 | L3
2564 # len(text) | L2 | L2 | L2 | L3
2564 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2565 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2565 #
2566 #
2566 # LM: length of metadata, depending on rawtext
2567 # LM: length of metadata, depending on rawtext
2567 # (*): not ideal, see comment in filelog.size
2568 # (*): not ideal, see comment in filelog.size
2568 # (?): could be "- len(meta)" if the resolved content has
2569 # (?): could be "- len(meta)" if the resolved content has
2569 # rename metadata
2570 # rename metadata
2570 #
2571 #
2571 # Checks needed to be done:
2572 # Checks needed to be done:
2572 # 1. length check: L1 == L2, in all cases.
2573 # 1. length check: L1 == L2, in all cases.
2573 # 2. hash check: depending on flag processor, we may need to
2574 # 2. hash check: depending on flag processor, we may need to
2574 # use either "text" (external), or "rawtext" (in revlog).
2575 # use either "text" (external), or "rawtext" (in revlog).
2575
2576
2576 try:
2577 try:
2577 skipflags = state.get('skipflags', 0)
2578 skipflags = state.get('skipflags', 0)
2578 if skipflags:
2579 if skipflags:
2579 skipflags &= self.flags(rev)
2580 skipflags &= self.flags(rev)
2580
2581
2581 if skipflags:
2582 if skipflags:
2582 state['skipread'].add(node)
2583 state['skipread'].add(node)
2583 else:
2584 else:
2584 # Side-effect: read content and verify hash.
2585 # Side-effect: read content and verify hash.
2585 self.revision(node)
2586 self.revision(node)
2586
2587
2587 l1 = self.rawsize(rev)
2588 l1 = self.rawsize(rev)
2588 l2 = len(self.revision(node, raw=True))
2589 l2 = len(self.revision(node, raw=True))
2589
2590
2590 if l1 != l2:
2591 if l1 != l2:
2591 yield revlogproblem(
2592 yield revlogproblem(
2592 error=_('unpacked size is %d, %d expected') % (l2, l1),
2593 error=_('unpacked size is %d, %d expected') % (l2, l1),
2593 node=node)
2594 node=node)
2594
2595
2595 except error.CensoredNodeError:
2596 except error.CensoredNodeError:
2596 if state['erroroncensored']:
2597 if state['erroroncensored']:
2597 yield revlogproblem(error=_('censored file data'),
2598 yield revlogproblem(error=_('censored file data'),
2598 node=node)
2599 node=node)
2599 state['skipread'].add(node)
2600 state['skipread'].add(node)
2600 except Exception as e:
2601 except Exception as e:
2601 yield revlogproblem(
2602 yield revlogproblem(
2602 error=_('unpacking %s: %s') % (short(node),
2603 error=_('unpacking %s: %s') % (short(node),
2603 stringutil.forcebytestr(e)),
2604 stringutil.forcebytestr(e)),
2604 node=node)
2605 node=node)
2605 state['skipread'].add(node)
2606 state['skipread'].add(node)
2606
2607
2607 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2608 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2608 revisionscount=False, trackedsize=False,
2609 revisionscount=False, trackedsize=False,
2609 storedsize=False):
2610 storedsize=False):
2610 d = {}
2611 d = {}
2611
2612
2612 if exclusivefiles:
2613 if exclusivefiles:
2613 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2614 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2614 if not self._inline:
2615 if not self._inline:
2615 d['exclusivefiles'].append((self.opener, self.datafile))
2616 d['exclusivefiles'].append((self.opener, self.datafile))
2616
2617
2617 if sharedfiles:
2618 if sharedfiles:
2618 d['sharedfiles'] = []
2619 d['sharedfiles'] = []
2619
2620
2620 if revisionscount:
2621 if revisionscount:
2621 d['revisionscount'] = len(self)
2622 d['revisionscount'] = len(self)
2622
2623
2623 if trackedsize:
2624 if trackedsize:
2624 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2625 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2625
2626
2626 if storedsize:
2627 if storedsize:
2627 d['storedsize'] = sum(self.opener.stat(path).st_size
2628 d['storedsize'] = sum(self.opener.stat(path).st_size
2628 for path in self.files())
2629 for path in self.files())
2629
2630
2630 return d
2631 return d
General Comments 0
You need to be logged in to leave comments. Login now