##// END OF EJS Templates
revlog: use iterbytestr()...
Gregory Szorc -
r41838:f63ba0b9 default
parent child Browse files
Show More
@@ -1,2652 +1,2652 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
375 self._withsparseread = False
375 self._withsparseread = False
376 self._sparserevlog = False
376 self._sparserevlog = False
377 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
378 self._srmingapsize = 262144
378 self._srmingapsize = 262144
379
379
380 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
381 # custom flags.
381 # custom flags.
382 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
383
383
384 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
385 self._writinghandles = None
385 self._writinghandles = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 def _loadindex(self):
389 def _loadindex(self):
390 mmapindexthreshold = None
390 mmapindexthreshold = None
391 opts = getattr(self.opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
392
392
393 if 'revlogv2' in opts:
393 if 'revlogv2' in opts:
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 elif 'revlogv1' in opts:
395 elif 'revlogv1' in opts:
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 if 'generaldelta' in opts:
397 if 'generaldelta' in opts:
398 newversionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 elif getattr(self.opener, 'options', None) is not None:
399 elif getattr(self.opener, 'options', None) is not None:
400 # If options provided but no 'revlog*' found, the repository
400 # If options provided but no 'revlog*' found, the repository
401 # would have no 'requires' file in it, which means we have to
401 # would have no 'requires' file in it, which means we have to
402 # stick to the old format.
402 # stick to the old format.
403 newversionflags = REVLOGV0
403 newversionflags = REVLOGV0
404 else:
404 else:
405 newversionflags = REVLOG_DEFAULT_VERSION
405 newversionflags = REVLOG_DEFAULT_VERSION
406
406
407 if 'chunkcachesize' in opts:
407 if 'chunkcachesize' in opts:
408 self._chunkcachesize = opts['chunkcachesize']
408 self._chunkcachesize = opts['chunkcachesize']
409 if 'maxchainlen' in opts:
409 if 'maxchainlen' in opts:
410 self._maxchainlen = opts['maxchainlen']
410 self._maxchainlen = opts['maxchainlen']
411 if 'deltabothparents' in opts:
411 if 'deltabothparents' in opts:
412 self._deltabothparents = opts['deltabothparents']
412 self._deltabothparents = opts['deltabothparents']
413 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
413 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
414 if 'compengine' in opts:
414 if 'compengine' in opts:
415 self._compengine = opts['compengine']
415 self._compengine = opts['compengine']
416 if 'maxdeltachainspan' in opts:
416 if 'maxdeltachainspan' in opts:
417 self._maxdeltachainspan = opts['maxdeltachainspan']
417 self._maxdeltachainspan = opts['maxdeltachainspan']
418 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
418 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
419 mmapindexthreshold = opts['mmapindexthreshold']
419 mmapindexthreshold = opts['mmapindexthreshold']
420 self._sparserevlog = bool(opts.get('sparse-revlog', False))
420 self._sparserevlog = bool(opts.get('sparse-revlog', False))
421 withsparseread = bool(opts.get('with-sparse-read', False))
421 withsparseread = bool(opts.get('with-sparse-read', False))
422 # sparse-revlog forces sparse-read
422 # sparse-revlog forces sparse-read
423 self._withsparseread = self._sparserevlog or withsparseread
423 self._withsparseread = self._sparserevlog or withsparseread
424 if 'sparse-read-density-threshold' in opts:
424 if 'sparse-read-density-threshold' in opts:
425 self._srdensitythreshold = opts['sparse-read-density-threshold']
425 self._srdensitythreshold = opts['sparse-read-density-threshold']
426 if 'sparse-read-min-gap-size' in opts:
426 if 'sparse-read-min-gap-size' in opts:
427 self._srmingapsize = opts['sparse-read-min-gap-size']
427 self._srmingapsize = opts['sparse-read-min-gap-size']
428 if opts.get('enableellipsis'):
428 if opts.get('enableellipsis'):
429 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
430
430
431 # revlog v0 doesn't have flag processors
431 # revlog v0 doesn't have flag processors
432 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
432 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
433 _insertflagprocessor(flag, processor, self._flagprocessors)
433 _insertflagprocessor(flag, processor, self._flagprocessors)
434
434
435 if self._chunkcachesize <= 0:
435 if self._chunkcachesize <= 0:
436 raise error.RevlogError(_('revlog chunk cache size %r is not '
436 raise error.RevlogError(_('revlog chunk cache size %r is not '
437 'greater than 0') % self._chunkcachesize)
437 'greater than 0') % self._chunkcachesize)
438 elif self._chunkcachesize & (self._chunkcachesize - 1):
438 elif self._chunkcachesize & (self._chunkcachesize - 1):
439 raise error.RevlogError(_('revlog chunk cache size %r is not a '
439 raise error.RevlogError(_('revlog chunk cache size %r is not a '
440 'power of 2') % self._chunkcachesize)
440 'power of 2') % self._chunkcachesize)
441
441
442 indexdata = ''
442 indexdata = ''
443 self._initempty = True
443 self._initempty = True
444 try:
444 try:
445 with self._indexfp() as f:
445 with self._indexfp() as f:
446 if (mmapindexthreshold is not None and
446 if (mmapindexthreshold is not None and
447 self.opener.fstat(f).st_size >= mmapindexthreshold):
447 self.opener.fstat(f).st_size >= mmapindexthreshold):
448 # TODO: should .close() to release resources without
448 # TODO: should .close() to release resources without
449 # relying on Python GC
449 # relying on Python GC
450 indexdata = util.buffer(util.mmapread(f))
450 indexdata = util.buffer(util.mmapread(f))
451 else:
451 else:
452 indexdata = f.read()
452 indexdata = f.read()
453 if len(indexdata) > 0:
453 if len(indexdata) > 0:
454 versionflags = versionformat_unpack(indexdata[:4])[0]
454 versionflags = versionformat_unpack(indexdata[:4])[0]
455 self._initempty = False
455 self._initempty = False
456 else:
456 else:
457 versionflags = newversionflags
457 versionflags = newversionflags
458 except IOError as inst:
458 except IOError as inst:
459 if inst.errno != errno.ENOENT:
459 if inst.errno != errno.ENOENT:
460 raise
460 raise
461
461
462 versionflags = newversionflags
462 versionflags = newversionflags
463
463
464 self.version = versionflags
464 self.version = versionflags
465
465
466 flags = versionflags & ~0xFFFF
466 flags = versionflags & ~0xFFFF
467 fmt = versionflags & 0xFFFF
467 fmt = versionflags & 0xFFFF
468
468
469 if fmt == REVLOGV0:
469 if fmt == REVLOGV0:
470 if flags:
470 if flags:
471 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
471 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
472 'revlog %s') %
472 'revlog %s') %
473 (flags >> 16, fmt, self.indexfile))
473 (flags >> 16, fmt, self.indexfile))
474
474
475 self._inline = False
475 self._inline = False
476 self._generaldelta = False
476 self._generaldelta = False
477
477
478 elif fmt == REVLOGV1:
478 elif fmt == REVLOGV1:
479 if flags & ~REVLOGV1_FLAGS:
479 if flags & ~REVLOGV1_FLAGS:
480 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
480 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
481 'revlog %s') %
481 'revlog %s') %
482 (flags >> 16, fmt, self.indexfile))
482 (flags >> 16, fmt, self.indexfile))
483
483
484 self._inline = versionflags & FLAG_INLINE_DATA
484 self._inline = versionflags & FLAG_INLINE_DATA
485 self._generaldelta = versionflags & FLAG_GENERALDELTA
485 self._generaldelta = versionflags & FLAG_GENERALDELTA
486
486
487 elif fmt == REVLOGV2:
487 elif fmt == REVLOGV2:
488 if flags & ~REVLOGV2_FLAGS:
488 if flags & ~REVLOGV2_FLAGS:
489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
490 'revlog %s') %
490 'revlog %s') %
491 (flags >> 16, fmt, self.indexfile))
491 (flags >> 16, fmt, self.indexfile))
492
492
493 self._inline = versionflags & FLAG_INLINE_DATA
493 self._inline = versionflags & FLAG_INLINE_DATA
494 # generaldelta implied by version 2 revlogs.
494 # generaldelta implied by version 2 revlogs.
495 self._generaldelta = True
495 self._generaldelta = True
496
496
497 else:
497 else:
498 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
498 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
499 (fmt, self.indexfile))
499 (fmt, self.indexfile))
500 # sparse-revlog can't be on without general-delta (issue6056)
500 # sparse-revlog can't be on without general-delta (issue6056)
501 if not self._generaldelta:
501 if not self._generaldelta:
502 self._sparserevlog = False
502 self._sparserevlog = False
503
503
504 self._storedeltachains = True
504 self._storedeltachains = True
505
505
506 self._io = revlogio()
506 self._io = revlogio()
507 if self.version == REVLOGV0:
507 if self.version == REVLOGV0:
508 self._io = revlogoldio()
508 self._io = revlogoldio()
509 try:
509 try:
510 d = self._io.parseindex(indexdata, self._inline)
510 d = self._io.parseindex(indexdata, self._inline)
511 except (ValueError, IndexError):
511 except (ValueError, IndexError):
512 raise error.RevlogError(_("index %s is corrupted") %
512 raise error.RevlogError(_("index %s is corrupted") %
513 self.indexfile)
513 self.indexfile)
514 self.index, nodemap, self._chunkcache = d
514 self.index, nodemap, self._chunkcache = d
515 if nodemap is not None:
515 if nodemap is not None:
516 self.nodemap = self._nodecache = nodemap
516 self.nodemap = self._nodecache = nodemap
517 if not self._chunkcache:
517 if not self._chunkcache:
518 self._chunkclear()
518 self._chunkclear()
519 # revnum -> (chain-length, sum-delta-length)
519 # revnum -> (chain-length, sum-delta-length)
520 self._chaininfocache = {}
520 self._chaininfocache = {}
521 # revlog header -> revlog compressor
521 # revlog header -> revlog compressor
522 self._decompressors = {}
522 self._decompressors = {}
523
523
524 @util.propertycache
524 @util.propertycache
525 def _compressor(self):
525 def _compressor(self):
526 return util.compengines[self._compengine].revlogcompressor()
526 return util.compengines[self._compengine].revlogcompressor()
527
527
528 def _indexfp(self, mode='r'):
528 def _indexfp(self, mode='r'):
529 """file object for the revlog's index file"""
529 """file object for the revlog's index file"""
530 args = {r'mode': mode}
530 args = {r'mode': mode}
531 if mode != 'r':
531 if mode != 'r':
532 args[r'checkambig'] = self._checkambig
532 args[r'checkambig'] = self._checkambig
533 if mode == 'w':
533 if mode == 'w':
534 args[r'atomictemp'] = True
534 args[r'atomictemp'] = True
535 return self.opener(self.indexfile, **args)
535 return self.opener(self.indexfile, **args)
536
536
537 def _datafp(self, mode='r'):
537 def _datafp(self, mode='r'):
538 """file object for the revlog's data file"""
538 """file object for the revlog's data file"""
539 return self.opener(self.datafile, mode=mode)
539 return self.opener(self.datafile, mode=mode)
540
540
541 @contextlib.contextmanager
541 @contextlib.contextmanager
542 def _datareadfp(self, existingfp=None):
542 def _datareadfp(self, existingfp=None):
543 """file object suitable to read data"""
543 """file object suitable to read data"""
544 # Use explicit file handle, if given.
544 # Use explicit file handle, if given.
545 if existingfp is not None:
545 if existingfp is not None:
546 yield existingfp
546 yield existingfp
547
547
548 # Use a file handle being actively used for writes, if available.
548 # Use a file handle being actively used for writes, if available.
549 # There is some danger to doing this because reads will seek the
549 # There is some danger to doing this because reads will seek the
550 # file. However, _writeentry() performs a SEEK_END before all writes,
550 # file. However, _writeentry() performs a SEEK_END before all writes,
551 # so we should be safe.
551 # so we should be safe.
552 elif self._writinghandles:
552 elif self._writinghandles:
553 if self._inline:
553 if self._inline:
554 yield self._writinghandles[0]
554 yield self._writinghandles[0]
555 else:
555 else:
556 yield self._writinghandles[1]
556 yield self._writinghandles[1]
557
557
558 # Otherwise open a new file handle.
558 # Otherwise open a new file handle.
559 else:
559 else:
560 if self._inline:
560 if self._inline:
561 func = self._indexfp
561 func = self._indexfp
562 else:
562 else:
563 func = self._datafp
563 func = self._datafp
564 with func() as fp:
564 with func() as fp:
565 yield fp
565 yield fp
566
566
567 def tip(self):
567 def tip(self):
568 return self.node(len(self.index) - 1)
568 return self.node(len(self.index) - 1)
569 def __contains__(self, rev):
569 def __contains__(self, rev):
570 return 0 <= rev < len(self)
570 return 0 <= rev < len(self)
571 def __len__(self):
571 def __len__(self):
572 return len(self.index)
572 return len(self.index)
573 def __iter__(self):
573 def __iter__(self):
574 return iter(pycompat.xrange(len(self)))
574 return iter(pycompat.xrange(len(self)))
575 def revs(self, start=0, stop=None):
575 def revs(self, start=0, stop=None):
576 """iterate over all rev in this revlog (from start to stop)"""
576 """iterate over all rev in this revlog (from start to stop)"""
577 return storageutil.iterrevs(len(self), start=start, stop=stop)
577 return storageutil.iterrevs(len(self), start=start, stop=stop)
578
578
579 @util.propertycache
579 @util.propertycache
580 def nodemap(self):
580 def nodemap(self):
581 if self.index:
581 if self.index:
582 # populate mapping down to the initial node
582 # populate mapping down to the initial node
583 node0 = self.index[0][7] # get around changelog filtering
583 node0 = self.index[0][7] # get around changelog filtering
584 self.rev(node0)
584 self.rev(node0)
585 return self._nodecache
585 return self._nodecache
586
586
587 def hasnode(self, node):
587 def hasnode(self, node):
588 try:
588 try:
589 self.rev(node)
589 self.rev(node)
590 return True
590 return True
591 except KeyError:
591 except KeyError:
592 return False
592 return False
593
593
594 def candelta(self, baserev, rev):
594 def candelta(self, baserev, rev):
595 """whether two revisions (baserev, rev) can be delta-ed or not"""
595 """whether two revisions (baserev, rev) can be delta-ed or not"""
596 # Disable delta if either rev requires a content-changing flag
596 # Disable delta if either rev requires a content-changing flag
597 # processor (ex. LFS). This is because such flag processor can alter
597 # processor (ex. LFS). This is because such flag processor can alter
598 # the rawtext content that the delta will be based on, and two clients
598 # the rawtext content that the delta will be based on, and two clients
599 # could have a same revlog node with different flags (i.e. different
599 # could have a same revlog node with different flags (i.e. different
600 # rawtext contents) and the delta could be incompatible.
600 # rawtext contents) and the delta could be incompatible.
601 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
601 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
602 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
602 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
603 return False
603 return False
604 return True
604 return True
605
605
606 def clearcaches(self):
606 def clearcaches(self):
607 self._revisioncache = None
607 self._revisioncache = None
608 self._chainbasecache.clear()
608 self._chainbasecache.clear()
609 self._chunkcache = (0, '')
609 self._chunkcache = (0, '')
610 self._pcache = {}
610 self._pcache = {}
611
611
612 try:
612 try:
613 # If we are using the native C version, you are in a fun case
613 # If we are using the native C version, you are in a fun case
614 # where self.index, self.nodemap and self._nodecaches is the same
614 # where self.index, self.nodemap and self._nodecaches is the same
615 # object.
615 # object.
616 self._nodecache.clearcaches()
616 self._nodecache.clearcaches()
617 except AttributeError:
617 except AttributeError:
618 self._nodecache = {nullid: nullrev}
618 self._nodecache = {nullid: nullrev}
619 self._nodepos = None
619 self._nodepos = None
620
620
621 def rev(self, node):
621 def rev(self, node):
622 try:
622 try:
623 return self._nodecache[node]
623 return self._nodecache[node]
624 except TypeError:
624 except TypeError:
625 raise
625 raise
626 except error.RevlogError:
626 except error.RevlogError:
627 # parsers.c radix tree lookup failed
627 # parsers.c radix tree lookup failed
628 if node == wdirid or node in wdirfilenodeids:
628 if node == wdirid or node in wdirfilenodeids:
629 raise error.WdirUnsupported
629 raise error.WdirUnsupported
630 raise error.LookupError(node, self.indexfile, _('no node'))
630 raise error.LookupError(node, self.indexfile, _('no node'))
631 except KeyError:
631 except KeyError:
632 # pure python cache lookup failed
632 # pure python cache lookup failed
633 n = self._nodecache
633 n = self._nodecache
634 i = self.index
634 i = self.index
635 p = self._nodepos
635 p = self._nodepos
636 if p is None:
636 if p is None:
637 p = len(i) - 1
637 p = len(i) - 1
638 else:
638 else:
639 assert p < len(i)
639 assert p < len(i)
640 for r in pycompat.xrange(p, -1, -1):
640 for r in pycompat.xrange(p, -1, -1):
641 v = i[r][7]
641 v = i[r][7]
642 n[v] = r
642 n[v] = r
643 if v == node:
643 if v == node:
644 self._nodepos = r - 1
644 self._nodepos = r - 1
645 return r
645 return r
646 if node == wdirid or node in wdirfilenodeids:
646 if node == wdirid or node in wdirfilenodeids:
647 raise error.WdirUnsupported
647 raise error.WdirUnsupported
648 raise error.LookupError(node, self.indexfile, _('no node'))
648 raise error.LookupError(node, self.indexfile, _('no node'))
649
649
650 # Accessors for index entries.
650 # Accessors for index entries.
651
651
652 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
652 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
653 # are flags.
653 # are flags.
654 def start(self, rev):
654 def start(self, rev):
655 return int(self.index[rev][0] >> 16)
655 return int(self.index[rev][0] >> 16)
656
656
657 def flags(self, rev):
657 def flags(self, rev):
658 return self.index[rev][0] & 0xFFFF
658 return self.index[rev][0] & 0xFFFF
659
659
660 def length(self, rev):
660 def length(self, rev):
661 return self.index[rev][1]
661 return self.index[rev][1]
662
662
663 def rawsize(self, rev):
663 def rawsize(self, rev):
664 """return the length of the uncompressed text for a given revision"""
664 """return the length of the uncompressed text for a given revision"""
665 l = self.index[rev][2]
665 l = self.index[rev][2]
666 if l >= 0:
666 if l >= 0:
667 return l
667 return l
668
668
669 t = self.revision(rev, raw=True)
669 t = self.revision(rev, raw=True)
670 return len(t)
670 return len(t)
671
671
672 def size(self, rev):
672 def size(self, rev):
673 """length of non-raw text (processed by a "read" flag processor)"""
673 """length of non-raw text (processed by a "read" flag processor)"""
674 # fast path: if no "read" flag processor could change the content,
674 # fast path: if no "read" flag processor could change the content,
675 # size is rawsize. note: ELLIPSIS is known to not change the content.
675 # size is rawsize. note: ELLIPSIS is known to not change the content.
676 flags = self.flags(rev)
676 flags = self.flags(rev)
677 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
677 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
678 return self.rawsize(rev)
678 return self.rawsize(rev)
679
679
680 return len(self.revision(rev, raw=False))
680 return len(self.revision(rev, raw=False))
681
681
682 def chainbase(self, rev):
682 def chainbase(self, rev):
683 base = self._chainbasecache.get(rev)
683 base = self._chainbasecache.get(rev)
684 if base is not None:
684 if base is not None:
685 return base
685 return base
686
686
687 index = self.index
687 index = self.index
688 iterrev = rev
688 iterrev = rev
689 base = index[iterrev][3]
689 base = index[iterrev][3]
690 while base != iterrev:
690 while base != iterrev:
691 iterrev = base
691 iterrev = base
692 base = index[iterrev][3]
692 base = index[iterrev][3]
693
693
694 self._chainbasecache[rev] = base
694 self._chainbasecache[rev] = base
695 return base
695 return base
696
696
697 def linkrev(self, rev):
697 def linkrev(self, rev):
698 return self.index[rev][4]
698 return self.index[rev][4]
699
699
700 def parentrevs(self, rev):
700 def parentrevs(self, rev):
701 try:
701 try:
702 entry = self.index[rev]
702 entry = self.index[rev]
703 except IndexError:
703 except IndexError:
704 if rev == wdirrev:
704 if rev == wdirrev:
705 raise error.WdirUnsupported
705 raise error.WdirUnsupported
706 raise
706 raise
707
707
708 return entry[5], entry[6]
708 return entry[5], entry[6]
709
709
710 # fast parentrevs(rev) where rev isn't filtered
710 # fast parentrevs(rev) where rev isn't filtered
711 _uncheckedparentrevs = parentrevs
711 _uncheckedparentrevs = parentrevs
712
712
713 def node(self, rev):
713 def node(self, rev):
714 try:
714 try:
715 return self.index[rev][7]
715 return self.index[rev][7]
716 except IndexError:
716 except IndexError:
717 if rev == wdirrev:
717 if rev == wdirrev:
718 raise error.WdirUnsupported
718 raise error.WdirUnsupported
719 raise
719 raise
720
720
721 # Derived from index values.
721 # Derived from index values.
722
722
723 def end(self, rev):
723 def end(self, rev):
724 return self.start(rev) + self.length(rev)
724 return self.start(rev) + self.length(rev)
725
725
726 def parents(self, node):
726 def parents(self, node):
727 i = self.index
727 i = self.index
728 d = i[self.rev(node)]
728 d = i[self.rev(node)]
729 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
729 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
730
730
731 def chainlen(self, rev):
731 def chainlen(self, rev):
732 return self._chaininfo(rev)[0]
732 return self._chaininfo(rev)[0]
733
733
734 def _chaininfo(self, rev):
734 def _chaininfo(self, rev):
735 chaininfocache = self._chaininfocache
735 chaininfocache = self._chaininfocache
736 if rev in chaininfocache:
736 if rev in chaininfocache:
737 return chaininfocache[rev]
737 return chaininfocache[rev]
738 index = self.index
738 index = self.index
739 generaldelta = self._generaldelta
739 generaldelta = self._generaldelta
740 iterrev = rev
740 iterrev = rev
741 e = index[iterrev]
741 e = index[iterrev]
742 clen = 0
742 clen = 0
743 compresseddeltalen = 0
743 compresseddeltalen = 0
744 while iterrev != e[3]:
744 while iterrev != e[3]:
745 clen += 1
745 clen += 1
746 compresseddeltalen += e[1]
746 compresseddeltalen += e[1]
747 if generaldelta:
747 if generaldelta:
748 iterrev = e[3]
748 iterrev = e[3]
749 else:
749 else:
750 iterrev -= 1
750 iterrev -= 1
751 if iterrev in chaininfocache:
751 if iterrev in chaininfocache:
752 t = chaininfocache[iterrev]
752 t = chaininfocache[iterrev]
753 clen += t[0]
753 clen += t[0]
754 compresseddeltalen += t[1]
754 compresseddeltalen += t[1]
755 break
755 break
756 e = index[iterrev]
756 e = index[iterrev]
757 else:
757 else:
758 # Add text length of base since decompressing that also takes
758 # Add text length of base since decompressing that also takes
759 # work. For cache hits the length is already included.
759 # work. For cache hits the length is already included.
760 compresseddeltalen += e[1]
760 compresseddeltalen += e[1]
761 r = (clen, compresseddeltalen)
761 r = (clen, compresseddeltalen)
762 chaininfocache[rev] = r
762 chaininfocache[rev] = r
763 return r
763 return r
764
764
765 def _deltachain(self, rev, stoprev=None):
765 def _deltachain(self, rev, stoprev=None):
766 """Obtain the delta chain for a revision.
766 """Obtain the delta chain for a revision.
767
767
768 ``stoprev`` specifies a revision to stop at. If not specified, we
768 ``stoprev`` specifies a revision to stop at. If not specified, we
769 stop at the base of the chain.
769 stop at the base of the chain.
770
770
771 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
771 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
772 revs in ascending order and ``stopped`` is a bool indicating whether
772 revs in ascending order and ``stopped`` is a bool indicating whether
773 ``stoprev`` was hit.
773 ``stoprev`` was hit.
774 """
774 """
775 # Try C implementation.
775 # Try C implementation.
776 try:
776 try:
777 return self.index.deltachain(rev, stoprev, self._generaldelta)
777 return self.index.deltachain(rev, stoprev, self._generaldelta)
778 except AttributeError:
778 except AttributeError:
779 pass
779 pass
780
780
781 chain = []
781 chain = []
782
782
783 # Alias to prevent attribute lookup in tight loop.
783 # Alias to prevent attribute lookup in tight loop.
784 index = self.index
784 index = self.index
785 generaldelta = self._generaldelta
785 generaldelta = self._generaldelta
786
786
787 iterrev = rev
787 iterrev = rev
788 e = index[iterrev]
788 e = index[iterrev]
789 while iterrev != e[3] and iterrev != stoprev:
789 while iterrev != e[3] and iterrev != stoprev:
790 chain.append(iterrev)
790 chain.append(iterrev)
791 if generaldelta:
791 if generaldelta:
792 iterrev = e[3]
792 iterrev = e[3]
793 else:
793 else:
794 iterrev -= 1
794 iterrev -= 1
795 e = index[iterrev]
795 e = index[iterrev]
796
796
797 if iterrev == stoprev:
797 if iterrev == stoprev:
798 stopped = True
798 stopped = True
799 else:
799 else:
800 chain.append(iterrev)
800 chain.append(iterrev)
801 stopped = False
801 stopped = False
802
802
803 chain.reverse()
803 chain.reverse()
804 return chain, stopped
804 return chain, stopped
805
805
806 def ancestors(self, revs, stoprev=0, inclusive=False):
806 def ancestors(self, revs, stoprev=0, inclusive=False):
807 """Generate the ancestors of 'revs' in reverse revision order.
807 """Generate the ancestors of 'revs' in reverse revision order.
808 Does not generate revs lower than stoprev.
808 Does not generate revs lower than stoprev.
809
809
810 See the documentation for ancestor.lazyancestors for more details."""
810 See the documentation for ancestor.lazyancestors for more details."""
811
811
812 # first, make sure start revisions aren't filtered
812 # first, make sure start revisions aren't filtered
813 revs = list(revs)
813 revs = list(revs)
814 checkrev = self.node
814 checkrev = self.node
815 for r in revs:
815 for r in revs:
816 checkrev(r)
816 checkrev(r)
817 # and we're sure ancestors aren't filtered as well
817 # and we're sure ancestors aren't filtered as well
818
818
819 if rustext is not None:
819 if rustext is not None:
820 lazyancestors = rustext.ancestor.LazyAncestors
820 lazyancestors = rustext.ancestor.LazyAncestors
821 arg = self.index
821 arg = self.index
822 elif util.safehasattr(parsers, 'rustlazyancestors'):
822 elif util.safehasattr(parsers, 'rustlazyancestors'):
823 lazyancestors = ancestor.rustlazyancestors
823 lazyancestors = ancestor.rustlazyancestors
824 arg = self.index
824 arg = self.index
825 else:
825 else:
826 lazyancestors = ancestor.lazyancestors
826 lazyancestors = ancestor.lazyancestors
827 arg = self._uncheckedparentrevs
827 arg = self._uncheckedparentrevs
828 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
828 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
829
829
830 def descendants(self, revs):
830 def descendants(self, revs):
831 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
831 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
832
832
833 def findcommonmissing(self, common=None, heads=None):
833 def findcommonmissing(self, common=None, heads=None):
834 """Return a tuple of the ancestors of common and the ancestors of heads
834 """Return a tuple of the ancestors of common and the ancestors of heads
835 that are not ancestors of common. In revset terminology, we return the
835 that are not ancestors of common. In revset terminology, we return the
836 tuple:
836 tuple:
837
837
838 ::common, (::heads) - (::common)
838 ::common, (::heads) - (::common)
839
839
840 The list is sorted by revision number, meaning it is
840 The list is sorted by revision number, meaning it is
841 topologically sorted.
841 topologically sorted.
842
842
843 'heads' and 'common' are both lists of node IDs. If heads is
843 'heads' and 'common' are both lists of node IDs. If heads is
844 not supplied, uses all of the revlog's heads. If common is not
844 not supplied, uses all of the revlog's heads. If common is not
845 supplied, uses nullid."""
845 supplied, uses nullid."""
846 if common is None:
846 if common is None:
847 common = [nullid]
847 common = [nullid]
848 if heads is None:
848 if heads is None:
849 heads = self.heads()
849 heads = self.heads()
850
850
851 common = [self.rev(n) for n in common]
851 common = [self.rev(n) for n in common]
852 heads = [self.rev(n) for n in heads]
852 heads = [self.rev(n) for n in heads]
853
853
854 # we want the ancestors, but inclusive
854 # we want the ancestors, but inclusive
855 class lazyset(object):
855 class lazyset(object):
856 def __init__(self, lazyvalues):
856 def __init__(self, lazyvalues):
857 self.addedvalues = set()
857 self.addedvalues = set()
858 self.lazyvalues = lazyvalues
858 self.lazyvalues = lazyvalues
859
859
860 def __contains__(self, value):
860 def __contains__(self, value):
861 return value in self.addedvalues or value in self.lazyvalues
861 return value in self.addedvalues or value in self.lazyvalues
862
862
863 def __iter__(self):
863 def __iter__(self):
864 added = self.addedvalues
864 added = self.addedvalues
865 for r in added:
865 for r in added:
866 yield r
866 yield r
867 for r in self.lazyvalues:
867 for r in self.lazyvalues:
868 if not r in added:
868 if not r in added:
869 yield r
869 yield r
870
870
871 def add(self, value):
871 def add(self, value):
872 self.addedvalues.add(value)
872 self.addedvalues.add(value)
873
873
874 def update(self, values):
874 def update(self, values):
875 self.addedvalues.update(values)
875 self.addedvalues.update(values)
876
876
877 has = lazyset(self.ancestors(common))
877 has = lazyset(self.ancestors(common))
878 has.add(nullrev)
878 has.add(nullrev)
879 has.update(common)
879 has.update(common)
880
880
881 # take all ancestors from heads that aren't in has
881 # take all ancestors from heads that aren't in has
882 missing = set()
882 missing = set()
883 visit = collections.deque(r for r in heads if r not in has)
883 visit = collections.deque(r for r in heads if r not in has)
884 while visit:
884 while visit:
885 r = visit.popleft()
885 r = visit.popleft()
886 if r in missing:
886 if r in missing:
887 continue
887 continue
888 else:
888 else:
889 missing.add(r)
889 missing.add(r)
890 for p in self.parentrevs(r):
890 for p in self.parentrevs(r):
891 if p not in has:
891 if p not in has:
892 visit.append(p)
892 visit.append(p)
893 missing = list(missing)
893 missing = list(missing)
894 missing.sort()
894 missing.sort()
895 return has, [self.node(miss) for miss in missing]
895 return has, [self.node(miss) for miss in missing]
896
896
897 def incrementalmissingrevs(self, common=None):
897 def incrementalmissingrevs(self, common=None):
898 """Return an object that can be used to incrementally compute the
898 """Return an object that can be used to incrementally compute the
899 revision numbers of the ancestors of arbitrary sets that are not
899 revision numbers of the ancestors of arbitrary sets that are not
900 ancestors of common. This is an ancestor.incrementalmissingancestors
900 ancestors of common. This is an ancestor.incrementalmissingancestors
901 object.
901 object.
902
902
903 'common' is a list of revision numbers. If common is not supplied, uses
903 'common' is a list of revision numbers. If common is not supplied, uses
904 nullrev.
904 nullrev.
905 """
905 """
906 if common is None:
906 if common is None:
907 common = [nullrev]
907 common = [nullrev]
908
908
909 if rustext is not None:
909 if rustext is not None:
910 return rustext.ancestor.MissingAncestors(self.index, common)
910 return rustext.ancestor.MissingAncestors(self.index, common)
911 return ancestor.incrementalmissingancestors(self.parentrevs, common)
911 return ancestor.incrementalmissingancestors(self.parentrevs, common)
912
912
913 def findmissingrevs(self, common=None, heads=None):
913 def findmissingrevs(self, common=None, heads=None):
914 """Return the revision numbers of the ancestors of heads that
914 """Return the revision numbers of the ancestors of heads that
915 are not ancestors of common.
915 are not ancestors of common.
916
916
917 More specifically, return a list of revision numbers corresponding to
917 More specifically, return a list of revision numbers corresponding to
918 nodes N such that every N satisfies the following constraints:
918 nodes N such that every N satisfies the following constraints:
919
919
920 1. N is an ancestor of some node in 'heads'
920 1. N is an ancestor of some node in 'heads'
921 2. N is not an ancestor of any node in 'common'
921 2. N is not an ancestor of any node in 'common'
922
922
923 The list is sorted by revision number, meaning it is
923 The list is sorted by revision number, meaning it is
924 topologically sorted.
924 topologically sorted.
925
925
926 'heads' and 'common' are both lists of revision numbers. If heads is
926 'heads' and 'common' are both lists of revision numbers. If heads is
927 not supplied, uses all of the revlog's heads. If common is not
927 not supplied, uses all of the revlog's heads. If common is not
928 supplied, uses nullid."""
928 supplied, uses nullid."""
929 if common is None:
929 if common is None:
930 common = [nullrev]
930 common = [nullrev]
931 if heads is None:
931 if heads is None:
932 heads = self.headrevs()
932 heads = self.headrevs()
933
933
934 inc = self.incrementalmissingrevs(common=common)
934 inc = self.incrementalmissingrevs(common=common)
935 return inc.missingancestors(heads)
935 return inc.missingancestors(heads)
936
936
937 def findmissing(self, common=None, heads=None):
937 def findmissing(self, common=None, heads=None):
938 """Return the ancestors of heads that are not ancestors of common.
938 """Return the ancestors of heads that are not ancestors of common.
939
939
940 More specifically, return a list of nodes N such that every N
940 More specifically, return a list of nodes N such that every N
941 satisfies the following constraints:
941 satisfies the following constraints:
942
942
943 1. N is an ancestor of some node in 'heads'
943 1. N is an ancestor of some node in 'heads'
944 2. N is not an ancestor of any node in 'common'
944 2. N is not an ancestor of any node in 'common'
945
945
946 The list is sorted by revision number, meaning it is
946 The list is sorted by revision number, meaning it is
947 topologically sorted.
947 topologically sorted.
948
948
949 'heads' and 'common' are both lists of node IDs. If heads is
949 'heads' and 'common' are both lists of node IDs. If heads is
950 not supplied, uses all of the revlog's heads. If common is not
950 not supplied, uses all of the revlog's heads. If common is not
951 supplied, uses nullid."""
951 supplied, uses nullid."""
952 if common is None:
952 if common is None:
953 common = [nullid]
953 common = [nullid]
954 if heads is None:
954 if heads is None:
955 heads = self.heads()
955 heads = self.heads()
956
956
957 common = [self.rev(n) for n in common]
957 common = [self.rev(n) for n in common]
958 heads = [self.rev(n) for n in heads]
958 heads = [self.rev(n) for n in heads]
959
959
960 inc = self.incrementalmissingrevs(common=common)
960 inc = self.incrementalmissingrevs(common=common)
961 return [self.node(r) for r in inc.missingancestors(heads)]
961 return [self.node(r) for r in inc.missingancestors(heads)]
962
962
963 def nodesbetween(self, roots=None, heads=None):
963 def nodesbetween(self, roots=None, heads=None):
964 """Return a topological path from 'roots' to 'heads'.
964 """Return a topological path from 'roots' to 'heads'.
965
965
966 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
966 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
967 topologically sorted list of all nodes N that satisfy both of
967 topologically sorted list of all nodes N that satisfy both of
968 these constraints:
968 these constraints:
969
969
970 1. N is a descendant of some node in 'roots'
970 1. N is a descendant of some node in 'roots'
971 2. N is an ancestor of some node in 'heads'
971 2. N is an ancestor of some node in 'heads'
972
972
973 Every node is considered to be both a descendant and an ancestor
973 Every node is considered to be both a descendant and an ancestor
974 of itself, so every reachable node in 'roots' and 'heads' will be
974 of itself, so every reachable node in 'roots' and 'heads' will be
975 included in 'nodes'.
975 included in 'nodes'.
976
976
977 'outroots' is the list of reachable nodes in 'roots', i.e., the
977 'outroots' is the list of reachable nodes in 'roots', i.e., the
978 subset of 'roots' that is returned in 'nodes'. Likewise,
978 subset of 'roots' that is returned in 'nodes'. Likewise,
979 'outheads' is the subset of 'heads' that is also in 'nodes'.
979 'outheads' is the subset of 'heads' that is also in 'nodes'.
980
980
981 'roots' and 'heads' are both lists of node IDs. If 'roots' is
981 'roots' and 'heads' are both lists of node IDs. If 'roots' is
982 unspecified, uses nullid as the only root. If 'heads' is
982 unspecified, uses nullid as the only root. If 'heads' is
983 unspecified, uses list of all of the revlog's heads."""
983 unspecified, uses list of all of the revlog's heads."""
984 nonodes = ([], [], [])
984 nonodes = ([], [], [])
985 if roots is not None:
985 if roots is not None:
986 roots = list(roots)
986 roots = list(roots)
987 if not roots:
987 if not roots:
988 return nonodes
988 return nonodes
989 lowestrev = min([self.rev(n) for n in roots])
989 lowestrev = min([self.rev(n) for n in roots])
990 else:
990 else:
991 roots = [nullid] # Everybody's a descendant of nullid
991 roots = [nullid] # Everybody's a descendant of nullid
992 lowestrev = nullrev
992 lowestrev = nullrev
993 if (lowestrev == nullrev) and (heads is None):
993 if (lowestrev == nullrev) and (heads is None):
994 # We want _all_ the nodes!
994 # We want _all_ the nodes!
995 return ([self.node(r) for r in self], [nullid], list(self.heads()))
995 return ([self.node(r) for r in self], [nullid], list(self.heads()))
996 if heads is None:
996 if heads is None:
997 # All nodes are ancestors, so the latest ancestor is the last
997 # All nodes are ancestors, so the latest ancestor is the last
998 # node.
998 # node.
999 highestrev = len(self) - 1
999 highestrev = len(self) - 1
1000 # Set ancestors to None to signal that every node is an ancestor.
1000 # Set ancestors to None to signal that every node is an ancestor.
1001 ancestors = None
1001 ancestors = None
1002 # Set heads to an empty dictionary for later discovery of heads
1002 # Set heads to an empty dictionary for later discovery of heads
1003 heads = {}
1003 heads = {}
1004 else:
1004 else:
1005 heads = list(heads)
1005 heads = list(heads)
1006 if not heads:
1006 if not heads:
1007 return nonodes
1007 return nonodes
1008 ancestors = set()
1008 ancestors = set()
1009 # Turn heads into a dictionary so we can remove 'fake' heads.
1009 # Turn heads into a dictionary so we can remove 'fake' heads.
1010 # Also, later we will be using it to filter out the heads we can't
1010 # Also, later we will be using it to filter out the heads we can't
1011 # find from roots.
1011 # find from roots.
1012 heads = dict.fromkeys(heads, False)
1012 heads = dict.fromkeys(heads, False)
1013 # Start at the top and keep marking parents until we're done.
1013 # Start at the top and keep marking parents until we're done.
1014 nodestotag = set(heads)
1014 nodestotag = set(heads)
1015 # Remember where the top was so we can use it as a limit later.
1015 # Remember where the top was so we can use it as a limit later.
1016 highestrev = max([self.rev(n) for n in nodestotag])
1016 highestrev = max([self.rev(n) for n in nodestotag])
1017 while nodestotag:
1017 while nodestotag:
1018 # grab a node to tag
1018 # grab a node to tag
1019 n = nodestotag.pop()
1019 n = nodestotag.pop()
1020 # Never tag nullid
1020 # Never tag nullid
1021 if n == nullid:
1021 if n == nullid:
1022 continue
1022 continue
1023 # A node's revision number represents its place in a
1023 # A node's revision number represents its place in a
1024 # topologically sorted list of nodes.
1024 # topologically sorted list of nodes.
1025 r = self.rev(n)
1025 r = self.rev(n)
1026 if r >= lowestrev:
1026 if r >= lowestrev:
1027 if n not in ancestors:
1027 if n not in ancestors:
1028 # If we are possibly a descendant of one of the roots
1028 # If we are possibly a descendant of one of the roots
1029 # and we haven't already been marked as an ancestor
1029 # and we haven't already been marked as an ancestor
1030 ancestors.add(n) # Mark as ancestor
1030 ancestors.add(n) # Mark as ancestor
1031 # Add non-nullid parents to list of nodes to tag.
1031 # Add non-nullid parents to list of nodes to tag.
1032 nodestotag.update([p for p in self.parents(n) if
1032 nodestotag.update([p for p in self.parents(n) if
1033 p != nullid])
1033 p != nullid])
1034 elif n in heads: # We've seen it before, is it a fake head?
1034 elif n in heads: # We've seen it before, is it a fake head?
1035 # So it is, real heads should not be the ancestors of
1035 # So it is, real heads should not be the ancestors of
1036 # any other heads.
1036 # any other heads.
1037 heads.pop(n)
1037 heads.pop(n)
1038 if not ancestors:
1038 if not ancestors:
1039 return nonodes
1039 return nonodes
1040 # Now that we have our set of ancestors, we want to remove any
1040 # Now that we have our set of ancestors, we want to remove any
1041 # roots that are not ancestors.
1041 # roots that are not ancestors.
1042
1042
1043 # If one of the roots was nullid, everything is included anyway.
1043 # If one of the roots was nullid, everything is included anyway.
1044 if lowestrev > nullrev:
1044 if lowestrev > nullrev:
1045 # But, since we weren't, let's recompute the lowest rev to not
1045 # But, since we weren't, let's recompute the lowest rev to not
1046 # include roots that aren't ancestors.
1046 # include roots that aren't ancestors.
1047
1047
1048 # Filter out roots that aren't ancestors of heads
1048 # Filter out roots that aren't ancestors of heads
1049 roots = [root for root in roots if root in ancestors]
1049 roots = [root for root in roots if root in ancestors]
1050 # Recompute the lowest revision
1050 # Recompute the lowest revision
1051 if roots:
1051 if roots:
1052 lowestrev = min([self.rev(root) for root in roots])
1052 lowestrev = min([self.rev(root) for root in roots])
1053 else:
1053 else:
1054 # No more roots? Return empty list
1054 # No more roots? Return empty list
1055 return nonodes
1055 return nonodes
1056 else:
1056 else:
1057 # We are descending from nullid, and don't need to care about
1057 # We are descending from nullid, and don't need to care about
1058 # any other roots.
1058 # any other roots.
1059 lowestrev = nullrev
1059 lowestrev = nullrev
1060 roots = [nullid]
1060 roots = [nullid]
1061 # Transform our roots list into a set.
1061 # Transform our roots list into a set.
1062 descendants = set(roots)
1062 descendants = set(roots)
1063 # Also, keep the original roots so we can filter out roots that aren't
1063 # Also, keep the original roots so we can filter out roots that aren't
1064 # 'real' roots (i.e. are descended from other roots).
1064 # 'real' roots (i.e. are descended from other roots).
1065 roots = descendants.copy()
1065 roots = descendants.copy()
1066 # Our topologically sorted list of output nodes.
1066 # Our topologically sorted list of output nodes.
1067 orderedout = []
1067 orderedout = []
1068 # Don't start at nullid since we don't want nullid in our output list,
1068 # Don't start at nullid since we don't want nullid in our output list,
1069 # and if nullid shows up in descendants, empty parents will look like
1069 # and if nullid shows up in descendants, empty parents will look like
1070 # they're descendants.
1070 # they're descendants.
1071 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1071 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1072 n = self.node(r)
1072 n = self.node(r)
1073 isdescendant = False
1073 isdescendant = False
1074 if lowestrev == nullrev: # Everybody is a descendant of nullid
1074 if lowestrev == nullrev: # Everybody is a descendant of nullid
1075 isdescendant = True
1075 isdescendant = True
1076 elif n in descendants:
1076 elif n in descendants:
1077 # n is already a descendant
1077 # n is already a descendant
1078 isdescendant = True
1078 isdescendant = True
1079 # This check only needs to be done here because all the roots
1079 # This check only needs to be done here because all the roots
1080 # will start being marked is descendants before the loop.
1080 # will start being marked is descendants before the loop.
1081 if n in roots:
1081 if n in roots:
1082 # If n was a root, check if it's a 'real' root.
1082 # If n was a root, check if it's a 'real' root.
1083 p = tuple(self.parents(n))
1083 p = tuple(self.parents(n))
1084 # If any of its parents are descendants, it's not a root.
1084 # If any of its parents are descendants, it's not a root.
1085 if (p[0] in descendants) or (p[1] in descendants):
1085 if (p[0] in descendants) or (p[1] in descendants):
1086 roots.remove(n)
1086 roots.remove(n)
1087 else:
1087 else:
1088 p = tuple(self.parents(n))
1088 p = tuple(self.parents(n))
1089 # A node is a descendant if either of its parents are
1089 # A node is a descendant if either of its parents are
1090 # descendants. (We seeded the dependents list with the roots
1090 # descendants. (We seeded the dependents list with the roots
1091 # up there, remember?)
1091 # up there, remember?)
1092 if (p[0] in descendants) or (p[1] in descendants):
1092 if (p[0] in descendants) or (p[1] in descendants):
1093 descendants.add(n)
1093 descendants.add(n)
1094 isdescendant = True
1094 isdescendant = True
1095 if isdescendant and ((ancestors is None) or (n in ancestors)):
1095 if isdescendant and ((ancestors is None) or (n in ancestors)):
1096 # Only include nodes that are both descendants and ancestors.
1096 # Only include nodes that are both descendants and ancestors.
1097 orderedout.append(n)
1097 orderedout.append(n)
1098 if (ancestors is not None) and (n in heads):
1098 if (ancestors is not None) and (n in heads):
1099 # We're trying to figure out which heads are reachable
1099 # We're trying to figure out which heads are reachable
1100 # from roots.
1100 # from roots.
1101 # Mark this head as having been reached
1101 # Mark this head as having been reached
1102 heads[n] = True
1102 heads[n] = True
1103 elif ancestors is None:
1103 elif ancestors is None:
1104 # Otherwise, we're trying to discover the heads.
1104 # Otherwise, we're trying to discover the heads.
1105 # Assume this is a head because if it isn't, the next step
1105 # Assume this is a head because if it isn't, the next step
1106 # will eventually remove it.
1106 # will eventually remove it.
1107 heads[n] = True
1107 heads[n] = True
1108 # But, obviously its parents aren't.
1108 # But, obviously its parents aren't.
1109 for p in self.parents(n):
1109 for p in self.parents(n):
1110 heads.pop(p, None)
1110 heads.pop(p, None)
1111 heads = [head for head, flag in heads.iteritems() if flag]
1111 heads = [head for head, flag in heads.iteritems() if flag]
1112 roots = list(roots)
1112 roots = list(roots)
1113 assert orderedout
1113 assert orderedout
1114 assert roots
1114 assert roots
1115 assert heads
1115 assert heads
1116 return (orderedout, roots, heads)
1116 return (orderedout, roots, heads)
1117
1117
1118 def headrevs(self, revs=None):
1118 def headrevs(self, revs=None):
1119 if revs is None:
1119 if revs is None:
1120 try:
1120 try:
1121 return self.index.headrevs()
1121 return self.index.headrevs()
1122 except AttributeError:
1122 except AttributeError:
1123 return self._headrevs()
1123 return self._headrevs()
1124 return dagop.headrevs(revs, self.parentrevs)
1124 return dagop.headrevs(revs, self.parentrevs)
1125
1125
1126 def computephases(self, roots):
1126 def computephases(self, roots):
1127 return self.index.computephasesmapsets(roots)
1127 return self.index.computephasesmapsets(roots)
1128
1128
1129 def _headrevs(self):
1129 def _headrevs(self):
1130 count = len(self)
1130 count = len(self)
1131 if not count:
1131 if not count:
1132 return [nullrev]
1132 return [nullrev]
1133 # we won't iter over filtered rev so nobody is a head at start
1133 # we won't iter over filtered rev so nobody is a head at start
1134 ishead = [0] * (count + 1)
1134 ishead = [0] * (count + 1)
1135 index = self.index
1135 index = self.index
1136 for r in self:
1136 for r in self:
1137 ishead[r] = 1 # I may be an head
1137 ishead[r] = 1 # I may be an head
1138 e = index[r]
1138 e = index[r]
1139 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1139 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1140 return [r for r, val in enumerate(ishead) if val]
1140 return [r for r, val in enumerate(ishead) if val]
1141
1141
1142 def heads(self, start=None, stop=None):
1142 def heads(self, start=None, stop=None):
1143 """return the list of all nodes that have no children
1143 """return the list of all nodes that have no children
1144
1144
1145 if start is specified, only heads that are descendants of
1145 if start is specified, only heads that are descendants of
1146 start will be returned
1146 start will be returned
1147 if stop is specified, it will consider all the revs from stop
1147 if stop is specified, it will consider all the revs from stop
1148 as if they had no children
1148 as if they had no children
1149 """
1149 """
1150 if start is None and stop is None:
1150 if start is None and stop is None:
1151 if not len(self):
1151 if not len(self):
1152 return [nullid]
1152 return [nullid]
1153 return [self.node(r) for r in self.headrevs()]
1153 return [self.node(r) for r in self.headrevs()]
1154
1154
1155 if start is None:
1155 if start is None:
1156 start = nullrev
1156 start = nullrev
1157 else:
1157 else:
1158 start = self.rev(start)
1158 start = self.rev(start)
1159
1159
1160 stoprevs = set(self.rev(n) for n in stop or [])
1160 stoprevs = set(self.rev(n) for n in stop or [])
1161
1161
1162 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1162 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1163 stoprevs=stoprevs)
1163 stoprevs=stoprevs)
1164
1164
1165 return [self.node(rev) for rev in revs]
1165 return [self.node(rev) for rev in revs]
1166
1166
1167 def children(self, node):
1167 def children(self, node):
1168 """find the children of a given node"""
1168 """find the children of a given node"""
1169 c = []
1169 c = []
1170 p = self.rev(node)
1170 p = self.rev(node)
1171 for r in self.revs(start=p + 1):
1171 for r in self.revs(start=p + 1):
1172 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1172 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1173 if prevs:
1173 if prevs:
1174 for pr in prevs:
1174 for pr in prevs:
1175 if pr == p:
1175 if pr == p:
1176 c.append(self.node(r))
1176 c.append(self.node(r))
1177 elif p == nullrev:
1177 elif p == nullrev:
1178 c.append(self.node(r))
1178 c.append(self.node(r))
1179 return c
1179 return c
1180
1180
1181 def commonancestorsheads(self, a, b):
1181 def commonancestorsheads(self, a, b):
1182 """calculate all the heads of the common ancestors of nodes a and b"""
1182 """calculate all the heads of the common ancestors of nodes a and b"""
1183 a, b = self.rev(a), self.rev(b)
1183 a, b = self.rev(a), self.rev(b)
1184 ancs = self._commonancestorsheads(a, b)
1184 ancs = self._commonancestorsheads(a, b)
1185 return pycompat.maplist(self.node, ancs)
1185 return pycompat.maplist(self.node, ancs)
1186
1186
1187 def _commonancestorsheads(self, *revs):
1187 def _commonancestorsheads(self, *revs):
1188 """calculate all the heads of the common ancestors of revs"""
1188 """calculate all the heads of the common ancestors of revs"""
1189 try:
1189 try:
1190 ancs = self.index.commonancestorsheads(*revs)
1190 ancs = self.index.commonancestorsheads(*revs)
1191 except (AttributeError, OverflowError): # C implementation failed
1191 except (AttributeError, OverflowError): # C implementation failed
1192 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1192 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1193 return ancs
1193 return ancs
1194
1194
1195 def isancestor(self, a, b):
1195 def isancestor(self, a, b):
1196 """return True if node a is an ancestor of node b
1196 """return True if node a is an ancestor of node b
1197
1197
1198 A revision is considered an ancestor of itself."""
1198 A revision is considered an ancestor of itself."""
1199 a, b = self.rev(a), self.rev(b)
1199 a, b = self.rev(a), self.rev(b)
1200 return self.isancestorrev(a, b)
1200 return self.isancestorrev(a, b)
1201
1201
1202 def isancestorrev(self, a, b):
1202 def isancestorrev(self, a, b):
1203 """return True if revision a is an ancestor of revision b
1203 """return True if revision a is an ancestor of revision b
1204
1204
1205 A revision is considered an ancestor of itself.
1205 A revision is considered an ancestor of itself.
1206
1206
1207 The implementation of this is trivial but the use of
1207 The implementation of this is trivial but the use of
1208 commonancestorsheads is not."""
1208 commonancestorsheads is not."""
1209 if a == nullrev:
1209 if a == nullrev:
1210 return True
1210 return True
1211 elif a == b:
1211 elif a == b:
1212 return True
1212 return True
1213 elif a > b:
1213 elif a > b:
1214 return False
1214 return False
1215 return a in self._commonancestorsheads(a, b)
1215 return a in self._commonancestorsheads(a, b)
1216
1216
1217 def ancestor(self, a, b):
1217 def ancestor(self, a, b):
1218 """calculate the "best" common ancestor of nodes a and b"""
1218 """calculate the "best" common ancestor of nodes a and b"""
1219
1219
1220 a, b = self.rev(a), self.rev(b)
1220 a, b = self.rev(a), self.rev(b)
1221 try:
1221 try:
1222 ancs = self.index.ancestors(a, b)
1222 ancs = self.index.ancestors(a, b)
1223 except (AttributeError, OverflowError):
1223 except (AttributeError, OverflowError):
1224 ancs = ancestor.ancestors(self.parentrevs, a, b)
1224 ancs = ancestor.ancestors(self.parentrevs, a, b)
1225 if ancs:
1225 if ancs:
1226 # choose a consistent winner when there's a tie
1226 # choose a consistent winner when there's a tie
1227 return min(map(self.node, ancs))
1227 return min(map(self.node, ancs))
1228 return nullid
1228 return nullid
1229
1229
1230 def _match(self, id):
1230 def _match(self, id):
1231 if isinstance(id, int):
1231 if isinstance(id, int):
1232 # rev
1232 # rev
1233 return self.node(id)
1233 return self.node(id)
1234 if len(id) == 20:
1234 if len(id) == 20:
1235 # possibly a binary node
1235 # possibly a binary node
1236 # odds of a binary node being all hex in ASCII are 1 in 10**25
1236 # odds of a binary node being all hex in ASCII are 1 in 10**25
1237 try:
1237 try:
1238 node = id
1238 node = id
1239 self.rev(node) # quick search the index
1239 self.rev(node) # quick search the index
1240 return node
1240 return node
1241 except error.LookupError:
1241 except error.LookupError:
1242 pass # may be partial hex id
1242 pass # may be partial hex id
1243 try:
1243 try:
1244 # str(rev)
1244 # str(rev)
1245 rev = int(id)
1245 rev = int(id)
1246 if "%d" % rev != id:
1246 if "%d" % rev != id:
1247 raise ValueError
1247 raise ValueError
1248 if rev < 0:
1248 if rev < 0:
1249 rev = len(self) + rev
1249 rev = len(self) + rev
1250 if rev < 0 or rev >= len(self):
1250 if rev < 0 or rev >= len(self):
1251 raise ValueError
1251 raise ValueError
1252 return self.node(rev)
1252 return self.node(rev)
1253 except (ValueError, OverflowError):
1253 except (ValueError, OverflowError):
1254 pass
1254 pass
1255 if len(id) == 40:
1255 if len(id) == 40:
1256 try:
1256 try:
1257 # a full hex nodeid?
1257 # a full hex nodeid?
1258 node = bin(id)
1258 node = bin(id)
1259 self.rev(node)
1259 self.rev(node)
1260 return node
1260 return node
1261 except (TypeError, error.LookupError):
1261 except (TypeError, error.LookupError):
1262 pass
1262 pass
1263
1263
1264 def _partialmatch(self, id):
1264 def _partialmatch(self, id):
1265 # we don't care wdirfilenodeids as they should be always full hash
1265 # we don't care wdirfilenodeids as they should be always full hash
1266 maybewdir = wdirhex.startswith(id)
1266 maybewdir = wdirhex.startswith(id)
1267 try:
1267 try:
1268 partial = self.index.partialmatch(id)
1268 partial = self.index.partialmatch(id)
1269 if partial and self.hasnode(partial):
1269 if partial and self.hasnode(partial):
1270 if maybewdir:
1270 if maybewdir:
1271 # single 'ff...' match in radix tree, ambiguous with wdir
1271 # single 'ff...' match in radix tree, ambiguous with wdir
1272 raise error.RevlogError
1272 raise error.RevlogError
1273 return partial
1273 return partial
1274 if maybewdir:
1274 if maybewdir:
1275 # no 'ff...' match in radix tree, wdir identified
1275 # no 'ff...' match in radix tree, wdir identified
1276 raise error.WdirUnsupported
1276 raise error.WdirUnsupported
1277 return None
1277 return None
1278 except error.RevlogError:
1278 except error.RevlogError:
1279 # parsers.c radix tree lookup gave multiple matches
1279 # parsers.c radix tree lookup gave multiple matches
1280 # fast path: for unfiltered changelog, radix tree is accurate
1280 # fast path: for unfiltered changelog, radix tree is accurate
1281 if not getattr(self, 'filteredrevs', None):
1281 if not getattr(self, 'filteredrevs', None):
1282 raise error.AmbiguousPrefixLookupError(
1282 raise error.AmbiguousPrefixLookupError(
1283 id, self.indexfile, _('ambiguous identifier'))
1283 id, self.indexfile, _('ambiguous identifier'))
1284 # fall through to slow path that filters hidden revisions
1284 # fall through to slow path that filters hidden revisions
1285 except (AttributeError, ValueError):
1285 except (AttributeError, ValueError):
1286 # we are pure python, or key was too short to search radix tree
1286 # we are pure python, or key was too short to search radix tree
1287 pass
1287 pass
1288
1288
1289 if id in self._pcache:
1289 if id in self._pcache:
1290 return self._pcache[id]
1290 return self._pcache[id]
1291
1291
1292 if len(id) <= 40:
1292 if len(id) <= 40:
1293 try:
1293 try:
1294 # hex(node)[:...]
1294 # hex(node)[:...]
1295 l = len(id) // 2 # grab an even number of digits
1295 l = len(id) // 2 # grab an even number of digits
1296 prefix = bin(id[:l * 2])
1296 prefix = bin(id[:l * 2])
1297 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1297 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1298 nl = [n for n in nl if hex(n).startswith(id) and
1298 nl = [n for n in nl if hex(n).startswith(id) and
1299 self.hasnode(n)]
1299 self.hasnode(n)]
1300 if nullhex.startswith(id):
1300 if nullhex.startswith(id):
1301 nl.append(nullid)
1301 nl.append(nullid)
1302 if len(nl) > 0:
1302 if len(nl) > 0:
1303 if len(nl) == 1 and not maybewdir:
1303 if len(nl) == 1 and not maybewdir:
1304 self._pcache[id] = nl[0]
1304 self._pcache[id] = nl[0]
1305 return nl[0]
1305 return nl[0]
1306 raise error.AmbiguousPrefixLookupError(
1306 raise error.AmbiguousPrefixLookupError(
1307 id, self.indexfile, _('ambiguous identifier'))
1307 id, self.indexfile, _('ambiguous identifier'))
1308 if maybewdir:
1308 if maybewdir:
1309 raise error.WdirUnsupported
1309 raise error.WdirUnsupported
1310 return None
1310 return None
1311 except TypeError:
1311 except TypeError:
1312 pass
1312 pass
1313
1313
1314 def lookup(self, id):
1314 def lookup(self, id):
1315 """locate a node based on:
1315 """locate a node based on:
1316 - revision number or str(revision number)
1316 - revision number or str(revision number)
1317 - nodeid or subset of hex nodeid
1317 - nodeid or subset of hex nodeid
1318 """
1318 """
1319 n = self._match(id)
1319 n = self._match(id)
1320 if n is not None:
1320 if n is not None:
1321 return n
1321 return n
1322 n = self._partialmatch(id)
1322 n = self._partialmatch(id)
1323 if n:
1323 if n:
1324 return n
1324 return n
1325
1325
1326 raise error.LookupError(id, self.indexfile, _('no match found'))
1326 raise error.LookupError(id, self.indexfile, _('no match found'))
1327
1327
1328 def shortest(self, node, minlength=1):
1328 def shortest(self, node, minlength=1):
1329 """Find the shortest unambiguous prefix that matches node."""
1329 """Find the shortest unambiguous prefix that matches node."""
1330 def isvalid(prefix):
1330 def isvalid(prefix):
1331 try:
1331 try:
1332 node = self._partialmatch(prefix)
1332 node = self._partialmatch(prefix)
1333 except error.AmbiguousPrefixLookupError:
1333 except error.AmbiguousPrefixLookupError:
1334 return False
1334 return False
1335 except error.WdirUnsupported:
1335 except error.WdirUnsupported:
1336 # single 'ff...' match
1336 # single 'ff...' match
1337 return True
1337 return True
1338 if node is None:
1338 if node is None:
1339 raise error.LookupError(node, self.indexfile, _('no node'))
1339 raise error.LookupError(node, self.indexfile, _('no node'))
1340 return True
1340 return True
1341
1341
1342 def maybewdir(prefix):
1342 def maybewdir(prefix):
1343 return all(c == 'f' for c in prefix)
1343 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1344
1344
1345 hexnode = hex(node)
1345 hexnode = hex(node)
1346
1346
1347 def disambiguate(hexnode, minlength):
1347 def disambiguate(hexnode, minlength):
1348 """Disambiguate against wdirid."""
1348 """Disambiguate against wdirid."""
1349 for length in range(minlength, 41):
1349 for length in range(minlength, 41):
1350 prefix = hexnode[:length]
1350 prefix = hexnode[:length]
1351 if not maybewdir(prefix):
1351 if not maybewdir(prefix):
1352 return prefix
1352 return prefix
1353
1353
1354 if not getattr(self, 'filteredrevs', None):
1354 if not getattr(self, 'filteredrevs', None):
1355 try:
1355 try:
1356 length = max(self.index.shortest(node), minlength)
1356 length = max(self.index.shortest(node), minlength)
1357 return disambiguate(hexnode, length)
1357 return disambiguate(hexnode, length)
1358 except error.RevlogError:
1358 except error.RevlogError:
1359 if node != wdirid:
1359 if node != wdirid:
1360 raise error.LookupError(node, self.indexfile, _('no node'))
1360 raise error.LookupError(node, self.indexfile, _('no node'))
1361 except AttributeError:
1361 except AttributeError:
1362 # Fall through to pure code
1362 # Fall through to pure code
1363 pass
1363 pass
1364
1364
1365 if node == wdirid:
1365 if node == wdirid:
1366 for length in range(minlength, 41):
1366 for length in range(minlength, 41):
1367 prefix = hexnode[:length]
1367 prefix = hexnode[:length]
1368 if isvalid(prefix):
1368 if isvalid(prefix):
1369 return prefix
1369 return prefix
1370
1370
1371 for length in range(minlength, 41):
1371 for length in range(minlength, 41):
1372 prefix = hexnode[:length]
1372 prefix = hexnode[:length]
1373 if isvalid(prefix):
1373 if isvalid(prefix):
1374 return disambiguate(hexnode, length)
1374 return disambiguate(hexnode, length)
1375
1375
1376 def cmp(self, node, text):
1376 def cmp(self, node, text):
1377 """compare text with a given file revision
1377 """compare text with a given file revision
1378
1378
1379 returns True if text is different than what is stored.
1379 returns True if text is different than what is stored.
1380 """
1380 """
1381 p1, p2 = self.parents(node)
1381 p1, p2 = self.parents(node)
1382 return storageutil.hashrevisionsha1(text, p1, p2) != node
1382 return storageutil.hashrevisionsha1(text, p1, p2) != node
1383
1383
1384 def _cachesegment(self, offset, data):
1384 def _cachesegment(self, offset, data):
1385 """Add a segment to the revlog cache.
1385 """Add a segment to the revlog cache.
1386
1386
1387 Accepts an absolute offset and the data that is at that location.
1387 Accepts an absolute offset and the data that is at that location.
1388 """
1388 """
1389 o, d = self._chunkcache
1389 o, d = self._chunkcache
1390 # try to add to existing cache
1390 # try to add to existing cache
1391 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1391 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1392 self._chunkcache = o, d + data
1392 self._chunkcache = o, d + data
1393 else:
1393 else:
1394 self._chunkcache = offset, data
1394 self._chunkcache = offset, data
1395
1395
1396 def _readsegment(self, offset, length, df=None):
1396 def _readsegment(self, offset, length, df=None):
1397 """Load a segment of raw data from the revlog.
1397 """Load a segment of raw data from the revlog.
1398
1398
1399 Accepts an absolute offset, length to read, and an optional existing
1399 Accepts an absolute offset, length to read, and an optional existing
1400 file handle to read from.
1400 file handle to read from.
1401
1401
1402 If an existing file handle is passed, it will be seeked and the
1402 If an existing file handle is passed, it will be seeked and the
1403 original seek position will NOT be restored.
1403 original seek position will NOT be restored.
1404
1404
1405 Returns a str or buffer of raw byte data.
1405 Returns a str or buffer of raw byte data.
1406
1406
1407 Raises if the requested number of bytes could not be read.
1407 Raises if the requested number of bytes could not be read.
1408 """
1408 """
1409 # Cache data both forward and backward around the requested
1409 # Cache data both forward and backward around the requested
1410 # data, in a fixed size window. This helps speed up operations
1410 # data, in a fixed size window. This helps speed up operations
1411 # involving reading the revlog backwards.
1411 # involving reading the revlog backwards.
1412 cachesize = self._chunkcachesize
1412 cachesize = self._chunkcachesize
1413 realoffset = offset & ~(cachesize - 1)
1413 realoffset = offset & ~(cachesize - 1)
1414 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1414 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1415 - realoffset)
1415 - realoffset)
1416 with self._datareadfp(df) as df:
1416 with self._datareadfp(df) as df:
1417 df.seek(realoffset)
1417 df.seek(realoffset)
1418 d = df.read(reallength)
1418 d = df.read(reallength)
1419
1419
1420 self._cachesegment(realoffset, d)
1420 self._cachesegment(realoffset, d)
1421 if offset != realoffset or reallength != length:
1421 if offset != realoffset or reallength != length:
1422 startoffset = offset - realoffset
1422 startoffset = offset - realoffset
1423 if len(d) - startoffset < length:
1423 if len(d) - startoffset < length:
1424 raise error.RevlogError(
1424 raise error.RevlogError(
1425 _('partial read of revlog %s; expected %d bytes from '
1425 _('partial read of revlog %s; expected %d bytes from '
1426 'offset %d, got %d') %
1426 'offset %d, got %d') %
1427 (self.indexfile if self._inline else self.datafile,
1427 (self.indexfile if self._inline else self.datafile,
1428 length, realoffset, len(d) - startoffset))
1428 length, realoffset, len(d) - startoffset))
1429
1429
1430 return util.buffer(d, startoffset, length)
1430 return util.buffer(d, startoffset, length)
1431
1431
1432 if len(d) < length:
1432 if len(d) < length:
1433 raise error.RevlogError(
1433 raise error.RevlogError(
1434 _('partial read of revlog %s; expected %d bytes from offset '
1434 _('partial read of revlog %s; expected %d bytes from offset '
1435 '%d, got %d') %
1435 '%d, got %d') %
1436 (self.indexfile if self._inline else self.datafile,
1436 (self.indexfile if self._inline else self.datafile,
1437 length, offset, len(d)))
1437 length, offset, len(d)))
1438
1438
1439 return d
1439 return d
1440
1440
1441 def _getsegment(self, offset, length, df=None):
1441 def _getsegment(self, offset, length, df=None):
1442 """Obtain a segment of raw data from the revlog.
1442 """Obtain a segment of raw data from the revlog.
1443
1443
1444 Accepts an absolute offset, length of bytes to obtain, and an
1444 Accepts an absolute offset, length of bytes to obtain, and an
1445 optional file handle to the already-opened revlog. If the file
1445 optional file handle to the already-opened revlog. If the file
1446 handle is used, it's original seek position will not be preserved.
1446 handle is used, it's original seek position will not be preserved.
1447
1447
1448 Requests for data may be returned from a cache.
1448 Requests for data may be returned from a cache.
1449
1449
1450 Returns a str or a buffer instance of raw byte data.
1450 Returns a str or a buffer instance of raw byte data.
1451 """
1451 """
1452 o, d = self._chunkcache
1452 o, d = self._chunkcache
1453 l = len(d)
1453 l = len(d)
1454
1454
1455 # is it in the cache?
1455 # is it in the cache?
1456 cachestart = offset - o
1456 cachestart = offset - o
1457 cacheend = cachestart + length
1457 cacheend = cachestart + length
1458 if cachestart >= 0 and cacheend <= l:
1458 if cachestart >= 0 and cacheend <= l:
1459 if cachestart == 0 and cacheend == l:
1459 if cachestart == 0 and cacheend == l:
1460 return d # avoid a copy
1460 return d # avoid a copy
1461 return util.buffer(d, cachestart, cacheend - cachestart)
1461 return util.buffer(d, cachestart, cacheend - cachestart)
1462
1462
1463 return self._readsegment(offset, length, df=df)
1463 return self._readsegment(offset, length, df=df)
1464
1464
1465 def _getsegmentforrevs(self, startrev, endrev, df=None):
1465 def _getsegmentforrevs(self, startrev, endrev, df=None):
1466 """Obtain a segment of raw data corresponding to a range of revisions.
1466 """Obtain a segment of raw data corresponding to a range of revisions.
1467
1467
1468 Accepts the start and end revisions and an optional already-open
1468 Accepts the start and end revisions and an optional already-open
1469 file handle to be used for reading. If the file handle is read, its
1469 file handle to be used for reading. If the file handle is read, its
1470 seek position will not be preserved.
1470 seek position will not be preserved.
1471
1471
1472 Requests for data may be satisfied by a cache.
1472 Requests for data may be satisfied by a cache.
1473
1473
1474 Returns a 2-tuple of (offset, data) for the requested range of
1474 Returns a 2-tuple of (offset, data) for the requested range of
1475 revisions. Offset is the integer offset from the beginning of the
1475 revisions. Offset is the integer offset from the beginning of the
1476 revlog and data is a str or buffer of the raw byte data.
1476 revlog and data is a str or buffer of the raw byte data.
1477
1477
1478 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1478 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1479 to determine where each revision's data begins and ends.
1479 to determine where each revision's data begins and ends.
1480 """
1480 """
1481 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1481 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1482 # (functions are expensive).
1482 # (functions are expensive).
1483 index = self.index
1483 index = self.index
1484 istart = index[startrev]
1484 istart = index[startrev]
1485 start = int(istart[0] >> 16)
1485 start = int(istart[0] >> 16)
1486 if startrev == endrev:
1486 if startrev == endrev:
1487 end = start + istart[1]
1487 end = start + istart[1]
1488 else:
1488 else:
1489 iend = index[endrev]
1489 iend = index[endrev]
1490 end = int(iend[0] >> 16) + iend[1]
1490 end = int(iend[0] >> 16) + iend[1]
1491
1491
1492 if self._inline:
1492 if self._inline:
1493 start += (startrev + 1) * self._io.size
1493 start += (startrev + 1) * self._io.size
1494 end += (endrev + 1) * self._io.size
1494 end += (endrev + 1) * self._io.size
1495 length = end - start
1495 length = end - start
1496
1496
1497 return start, self._getsegment(start, length, df=df)
1497 return start, self._getsegment(start, length, df=df)
1498
1498
1499 def _chunk(self, rev, df=None):
1499 def _chunk(self, rev, df=None):
1500 """Obtain a single decompressed chunk for a revision.
1500 """Obtain a single decompressed chunk for a revision.
1501
1501
1502 Accepts an integer revision and an optional already-open file handle
1502 Accepts an integer revision and an optional already-open file handle
1503 to be used for reading. If used, the seek position of the file will not
1503 to be used for reading. If used, the seek position of the file will not
1504 be preserved.
1504 be preserved.
1505
1505
1506 Returns a str holding uncompressed data for the requested revision.
1506 Returns a str holding uncompressed data for the requested revision.
1507 """
1507 """
1508 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1508 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1509
1509
1510 def _chunks(self, revs, df=None, targetsize=None):
1510 def _chunks(self, revs, df=None, targetsize=None):
1511 """Obtain decompressed chunks for the specified revisions.
1511 """Obtain decompressed chunks for the specified revisions.
1512
1512
1513 Accepts an iterable of numeric revisions that are assumed to be in
1513 Accepts an iterable of numeric revisions that are assumed to be in
1514 ascending order. Also accepts an optional already-open file handle
1514 ascending order. Also accepts an optional already-open file handle
1515 to be used for reading. If used, the seek position of the file will
1515 to be used for reading. If used, the seek position of the file will
1516 not be preserved.
1516 not be preserved.
1517
1517
1518 This function is similar to calling ``self._chunk()`` multiple times,
1518 This function is similar to calling ``self._chunk()`` multiple times,
1519 but is faster.
1519 but is faster.
1520
1520
1521 Returns a list with decompressed data for each requested revision.
1521 Returns a list with decompressed data for each requested revision.
1522 """
1522 """
1523 if not revs:
1523 if not revs:
1524 return []
1524 return []
1525 start = self.start
1525 start = self.start
1526 length = self.length
1526 length = self.length
1527 inline = self._inline
1527 inline = self._inline
1528 iosize = self._io.size
1528 iosize = self._io.size
1529 buffer = util.buffer
1529 buffer = util.buffer
1530
1530
1531 l = []
1531 l = []
1532 ladd = l.append
1532 ladd = l.append
1533
1533
1534 if not self._withsparseread:
1534 if not self._withsparseread:
1535 slicedchunks = (revs,)
1535 slicedchunks = (revs,)
1536 else:
1536 else:
1537 slicedchunks = deltautil.slicechunk(self, revs,
1537 slicedchunks = deltautil.slicechunk(self, revs,
1538 targetsize=targetsize)
1538 targetsize=targetsize)
1539
1539
1540 for revschunk in slicedchunks:
1540 for revschunk in slicedchunks:
1541 firstrev = revschunk[0]
1541 firstrev = revschunk[0]
1542 # Skip trailing revisions with empty diff
1542 # Skip trailing revisions with empty diff
1543 for lastrev in revschunk[::-1]:
1543 for lastrev in revschunk[::-1]:
1544 if length(lastrev) != 0:
1544 if length(lastrev) != 0:
1545 break
1545 break
1546
1546
1547 try:
1547 try:
1548 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1548 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1549 except OverflowError:
1549 except OverflowError:
1550 # issue4215 - we can't cache a run of chunks greater than
1550 # issue4215 - we can't cache a run of chunks greater than
1551 # 2G on Windows
1551 # 2G on Windows
1552 return [self._chunk(rev, df=df) for rev in revschunk]
1552 return [self._chunk(rev, df=df) for rev in revschunk]
1553
1553
1554 decomp = self.decompress
1554 decomp = self.decompress
1555 for rev in revschunk:
1555 for rev in revschunk:
1556 chunkstart = start(rev)
1556 chunkstart = start(rev)
1557 if inline:
1557 if inline:
1558 chunkstart += (rev + 1) * iosize
1558 chunkstart += (rev + 1) * iosize
1559 chunklength = length(rev)
1559 chunklength = length(rev)
1560 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1560 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1561
1561
1562 return l
1562 return l
1563
1563
1564 def _chunkclear(self):
1564 def _chunkclear(self):
1565 """Clear the raw chunk cache."""
1565 """Clear the raw chunk cache."""
1566 self._chunkcache = (0, '')
1566 self._chunkcache = (0, '')
1567
1567
1568 def deltaparent(self, rev):
1568 def deltaparent(self, rev):
1569 """return deltaparent of the given revision"""
1569 """return deltaparent of the given revision"""
1570 base = self.index[rev][3]
1570 base = self.index[rev][3]
1571 if base == rev:
1571 if base == rev:
1572 return nullrev
1572 return nullrev
1573 elif self._generaldelta:
1573 elif self._generaldelta:
1574 return base
1574 return base
1575 else:
1575 else:
1576 return rev - 1
1576 return rev - 1
1577
1577
1578 def issnapshot(self, rev):
1578 def issnapshot(self, rev):
1579 """tells whether rev is a snapshot
1579 """tells whether rev is a snapshot
1580 """
1580 """
1581 if not self._sparserevlog:
1581 if not self._sparserevlog:
1582 return self.deltaparent(rev) == nullrev
1582 return self.deltaparent(rev) == nullrev
1583 elif util.safehasattr(self.index, 'issnapshot'):
1583 elif util.safehasattr(self.index, 'issnapshot'):
1584 # directly assign the method to cache the testing and access
1584 # directly assign the method to cache the testing and access
1585 self.issnapshot = self.index.issnapshot
1585 self.issnapshot = self.index.issnapshot
1586 return self.issnapshot(rev)
1586 return self.issnapshot(rev)
1587 if rev == nullrev:
1587 if rev == nullrev:
1588 return True
1588 return True
1589 entry = self.index[rev]
1589 entry = self.index[rev]
1590 base = entry[3]
1590 base = entry[3]
1591 if base == rev:
1591 if base == rev:
1592 return True
1592 return True
1593 if base == nullrev:
1593 if base == nullrev:
1594 return True
1594 return True
1595 p1 = entry[5]
1595 p1 = entry[5]
1596 p2 = entry[6]
1596 p2 = entry[6]
1597 if base == p1 or base == p2:
1597 if base == p1 or base == p2:
1598 return False
1598 return False
1599 return self.issnapshot(base)
1599 return self.issnapshot(base)
1600
1600
1601 def snapshotdepth(self, rev):
1601 def snapshotdepth(self, rev):
1602 """number of snapshot in the chain before this one"""
1602 """number of snapshot in the chain before this one"""
1603 if not self.issnapshot(rev):
1603 if not self.issnapshot(rev):
1604 raise error.ProgrammingError('revision %d not a snapshot')
1604 raise error.ProgrammingError('revision %d not a snapshot')
1605 return len(self._deltachain(rev)[0]) - 1
1605 return len(self._deltachain(rev)[0]) - 1
1606
1606
1607 def revdiff(self, rev1, rev2):
1607 def revdiff(self, rev1, rev2):
1608 """return or calculate a delta between two revisions
1608 """return or calculate a delta between two revisions
1609
1609
1610 The delta calculated is in binary form and is intended to be written to
1610 The delta calculated is in binary form and is intended to be written to
1611 revlog data directly. So this function needs raw revision data.
1611 revlog data directly. So this function needs raw revision data.
1612 """
1612 """
1613 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1613 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1614 return bytes(self._chunk(rev2))
1614 return bytes(self._chunk(rev2))
1615
1615
1616 return mdiff.textdiff(self.revision(rev1, raw=True),
1616 return mdiff.textdiff(self.revision(rev1, raw=True),
1617 self.revision(rev2, raw=True))
1617 self.revision(rev2, raw=True))
1618
1618
1619 def revision(self, nodeorrev, _df=None, raw=False):
1619 def revision(self, nodeorrev, _df=None, raw=False):
1620 """return an uncompressed revision of a given node or revision
1620 """return an uncompressed revision of a given node or revision
1621 number.
1621 number.
1622
1622
1623 _df - an existing file handle to read from. (internal-only)
1623 _df - an existing file handle to read from. (internal-only)
1624 raw - an optional argument specifying if the revision data is to be
1624 raw - an optional argument specifying if the revision data is to be
1625 treated as raw data when applying flag transforms. 'raw' should be set
1625 treated as raw data when applying flag transforms. 'raw' should be set
1626 to True when generating changegroups or in debug commands.
1626 to True when generating changegroups or in debug commands.
1627 """
1627 """
1628 if isinstance(nodeorrev, int):
1628 if isinstance(nodeorrev, int):
1629 rev = nodeorrev
1629 rev = nodeorrev
1630 node = self.node(rev)
1630 node = self.node(rev)
1631 else:
1631 else:
1632 node = nodeorrev
1632 node = nodeorrev
1633 rev = None
1633 rev = None
1634
1634
1635 cachedrev = None
1635 cachedrev = None
1636 flags = None
1636 flags = None
1637 rawtext = None
1637 rawtext = None
1638 if node == nullid:
1638 if node == nullid:
1639 return ""
1639 return ""
1640 if self._revisioncache:
1640 if self._revisioncache:
1641 if self._revisioncache[0] == node:
1641 if self._revisioncache[0] == node:
1642 # _cache only stores rawtext
1642 # _cache only stores rawtext
1643 if raw:
1643 if raw:
1644 return self._revisioncache[2]
1644 return self._revisioncache[2]
1645 # duplicated, but good for perf
1645 # duplicated, but good for perf
1646 if rev is None:
1646 if rev is None:
1647 rev = self.rev(node)
1647 rev = self.rev(node)
1648 if flags is None:
1648 if flags is None:
1649 flags = self.flags(rev)
1649 flags = self.flags(rev)
1650 # no extra flags set, no flag processor runs, text = rawtext
1650 # no extra flags set, no flag processor runs, text = rawtext
1651 if flags == REVIDX_DEFAULT_FLAGS:
1651 if flags == REVIDX_DEFAULT_FLAGS:
1652 return self._revisioncache[2]
1652 return self._revisioncache[2]
1653 # rawtext is reusable. need to run flag processor
1653 # rawtext is reusable. need to run flag processor
1654 rawtext = self._revisioncache[2]
1654 rawtext = self._revisioncache[2]
1655
1655
1656 cachedrev = self._revisioncache[1]
1656 cachedrev = self._revisioncache[1]
1657
1657
1658 # look up what we need to read
1658 # look up what we need to read
1659 if rawtext is None:
1659 if rawtext is None:
1660 if rev is None:
1660 if rev is None:
1661 rev = self.rev(node)
1661 rev = self.rev(node)
1662
1662
1663 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1663 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1664 if stopped:
1664 if stopped:
1665 rawtext = self._revisioncache[2]
1665 rawtext = self._revisioncache[2]
1666
1666
1667 # drop cache to save memory
1667 # drop cache to save memory
1668 self._revisioncache = None
1668 self._revisioncache = None
1669
1669
1670 targetsize = None
1670 targetsize = None
1671 rawsize = self.index[rev][2]
1671 rawsize = self.index[rev][2]
1672 if 0 <= rawsize:
1672 if 0 <= rawsize:
1673 targetsize = 4 * rawsize
1673 targetsize = 4 * rawsize
1674
1674
1675 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1675 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1676 if rawtext is None:
1676 if rawtext is None:
1677 rawtext = bytes(bins[0])
1677 rawtext = bytes(bins[0])
1678 bins = bins[1:]
1678 bins = bins[1:]
1679
1679
1680 rawtext = mdiff.patches(rawtext, bins)
1680 rawtext = mdiff.patches(rawtext, bins)
1681 self._revisioncache = (node, rev, rawtext)
1681 self._revisioncache = (node, rev, rawtext)
1682
1682
1683 if flags is None:
1683 if flags is None:
1684 if rev is None:
1684 if rev is None:
1685 rev = self.rev(node)
1685 rev = self.rev(node)
1686 flags = self.flags(rev)
1686 flags = self.flags(rev)
1687
1687
1688 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1688 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1689 if validatehash:
1689 if validatehash:
1690 self.checkhash(text, node, rev=rev)
1690 self.checkhash(text, node, rev=rev)
1691
1691
1692 return text
1692 return text
1693
1693
1694 def hash(self, text, p1, p2):
1694 def hash(self, text, p1, p2):
1695 """Compute a node hash.
1695 """Compute a node hash.
1696
1696
1697 Available as a function so that subclasses can replace the hash
1697 Available as a function so that subclasses can replace the hash
1698 as needed.
1698 as needed.
1699 """
1699 """
1700 return storageutil.hashrevisionsha1(text, p1, p2)
1700 return storageutil.hashrevisionsha1(text, p1, p2)
1701
1701
1702 def _processflags(self, text, flags, operation, raw=False):
1702 def _processflags(self, text, flags, operation, raw=False):
1703 """Inspect revision data flags and applies transforms defined by
1703 """Inspect revision data flags and applies transforms defined by
1704 registered flag processors.
1704 registered flag processors.
1705
1705
1706 ``text`` - the revision data to process
1706 ``text`` - the revision data to process
1707 ``flags`` - the revision flags
1707 ``flags`` - the revision flags
1708 ``operation`` - the operation being performed (read or write)
1708 ``operation`` - the operation being performed (read or write)
1709 ``raw`` - an optional argument describing if the raw transform should be
1709 ``raw`` - an optional argument describing if the raw transform should be
1710 applied.
1710 applied.
1711
1711
1712 This method processes the flags in the order (or reverse order if
1712 This method processes the flags in the order (or reverse order if
1713 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1713 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1714 flag processors registered for present flags. The order of flags defined
1714 flag processors registered for present flags. The order of flags defined
1715 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1715 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1716
1716
1717 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1717 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1718 processed text and ``validatehash`` is a bool indicating whether the
1718 processed text and ``validatehash`` is a bool indicating whether the
1719 returned text should be checked for hash integrity.
1719 returned text should be checked for hash integrity.
1720
1720
1721 Note: If the ``raw`` argument is set, it has precedence over the
1721 Note: If the ``raw`` argument is set, it has precedence over the
1722 operation and will only update the value of ``validatehash``.
1722 operation and will only update the value of ``validatehash``.
1723 """
1723 """
1724 # fast path: no flag processors will run
1724 # fast path: no flag processors will run
1725 if flags == 0:
1725 if flags == 0:
1726 return text, True
1726 return text, True
1727 if not operation in ('read', 'write'):
1727 if not operation in ('read', 'write'):
1728 raise error.ProgrammingError(_("invalid '%s' operation") %
1728 raise error.ProgrammingError(_("invalid '%s' operation") %
1729 operation)
1729 operation)
1730 # Check all flags are known.
1730 # Check all flags are known.
1731 if flags & ~REVIDX_KNOWN_FLAGS:
1731 if flags & ~REVIDX_KNOWN_FLAGS:
1732 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1732 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1733 (flags & ~REVIDX_KNOWN_FLAGS))
1733 (flags & ~REVIDX_KNOWN_FLAGS))
1734 validatehash = True
1734 validatehash = True
1735 # Depending on the operation (read or write), the order might be
1735 # Depending on the operation (read or write), the order might be
1736 # reversed due to non-commutative transforms.
1736 # reversed due to non-commutative transforms.
1737 orderedflags = REVIDX_FLAGS_ORDER
1737 orderedflags = REVIDX_FLAGS_ORDER
1738 if operation == 'write':
1738 if operation == 'write':
1739 orderedflags = reversed(orderedflags)
1739 orderedflags = reversed(orderedflags)
1740
1740
1741 for flag in orderedflags:
1741 for flag in orderedflags:
1742 # If a flagprocessor has been registered for a known flag, apply the
1742 # If a flagprocessor has been registered for a known flag, apply the
1743 # related operation transform and update result tuple.
1743 # related operation transform and update result tuple.
1744 if flag & flags:
1744 if flag & flags:
1745 vhash = True
1745 vhash = True
1746
1746
1747 if flag not in self._flagprocessors:
1747 if flag not in self._flagprocessors:
1748 message = _("missing processor for flag '%#x'") % (flag)
1748 message = _("missing processor for flag '%#x'") % (flag)
1749 raise error.RevlogError(message)
1749 raise error.RevlogError(message)
1750
1750
1751 processor = self._flagprocessors[flag]
1751 processor = self._flagprocessors[flag]
1752 if processor is not None:
1752 if processor is not None:
1753 readtransform, writetransform, rawtransform = processor
1753 readtransform, writetransform, rawtransform = processor
1754
1754
1755 if raw:
1755 if raw:
1756 vhash = rawtransform(self, text)
1756 vhash = rawtransform(self, text)
1757 elif operation == 'read':
1757 elif operation == 'read':
1758 text, vhash = readtransform(self, text)
1758 text, vhash = readtransform(self, text)
1759 else: # write operation
1759 else: # write operation
1760 text, vhash = writetransform(self, text)
1760 text, vhash = writetransform(self, text)
1761 validatehash = validatehash and vhash
1761 validatehash = validatehash and vhash
1762
1762
1763 return text, validatehash
1763 return text, validatehash
1764
1764
1765 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1765 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1766 """Check node hash integrity.
1766 """Check node hash integrity.
1767
1767
1768 Available as a function so that subclasses can extend hash mismatch
1768 Available as a function so that subclasses can extend hash mismatch
1769 behaviors as needed.
1769 behaviors as needed.
1770 """
1770 """
1771 try:
1771 try:
1772 if p1 is None and p2 is None:
1772 if p1 is None and p2 is None:
1773 p1, p2 = self.parents(node)
1773 p1, p2 = self.parents(node)
1774 if node != self.hash(text, p1, p2):
1774 if node != self.hash(text, p1, p2):
1775 # Clear the revision cache on hash failure. The revision cache
1775 # Clear the revision cache on hash failure. The revision cache
1776 # only stores the raw revision and clearing the cache does have
1776 # only stores the raw revision and clearing the cache does have
1777 # the side-effect that we won't have a cache hit when the raw
1777 # the side-effect that we won't have a cache hit when the raw
1778 # revision data is accessed. But this case should be rare and
1778 # revision data is accessed. But this case should be rare and
1779 # it is extra work to teach the cache about the hash
1779 # it is extra work to teach the cache about the hash
1780 # verification state.
1780 # verification state.
1781 if self._revisioncache and self._revisioncache[0] == node:
1781 if self._revisioncache and self._revisioncache[0] == node:
1782 self._revisioncache = None
1782 self._revisioncache = None
1783
1783
1784 revornode = rev
1784 revornode = rev
1785 if revornode is None:
1785 if revornode is None:
1786 revornode = templatefilters.short(hex(node))
1786 revornode = templatefilters.short(hex(node))
1787 raise error.RevlogError(_("integrity check failed on %s:%s")
1787 raise error.RevlogError(_("integrity check failed on %s:%s")
1788 % (self.indexfile, pycompat.bytestr(revornode)))
1788 % (self.indexfile, pycompat.bytestr(revornode)))
1789 except error.RevlogError:
1789 except error.RevlogError:
1790 if self._censorable and storageutil.iscensoredtext(text):
1790 if self._censorable and storageutil.iscensoredtext(text):
1791 raise error.CensoredNodeError(self.indexfile, node, text)
1791 raise error.CensoredNodeError(self.indexfile, node, text)
1792 raise
1792 raise
1793
1793
1794 def _enforceinlinesize(self, tr, fp=None):
1794 def _enforceinlinesize(self, tr, fp=None):
1795 """Check if the revlog is too big for inline and convert if so.
1795 """Check if the revlog is too big for inline and convert if so.
1796
1796
1797 This should be called after revisions are added to the revlog. If the
1797 This should be called after revisions are added to the revlog. If the
1798 revlog has grown too large to be an inline revlog, it will convert it
1798 revlog has grown too large to be an inline revlog, it will convert it
1799 to use multiple index and data files.
1799 to use multiple index and data files.
1800 """
1800 """
1801 tiprev = len(self) - 1
1801 tiprev = len(self) - 1
1802 if (not self._inline or
1802 if (not self._inline or
1803 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1803 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1804 return
1804 return
1805
1805
1806 trinfo = tr.find(self.indexfile)
1806 trinfo = tr.find(self.indexfile)
1807 if trinfo is None:
1807 if trinfo is None:
1808 raise error.RevlogError(_("%s not found in the transaction")
1808 raise error.RevlogError(_("%s not found in the transaction")
1809 % self.indexfile)
1809 % self.indexfile)
1810
1810
1811 trindex = trinfo[2]
1811 trindex = trinfo[2]
1812 if trindex is not None:
1812 if trindex is not None:
1813 dataoff = self.start(trindex)
1813 dataoff = self.start(trindex)
1814 else:
1814 else:
1815 # revlog was stripped at start of transaction, use all leftover data
1815 # revlog was stripped at start of transaction, use all leftover data
1816 trindex = len(self) - 1
1816 trindex = len(self) - 1
1817 dataoff = self.end(tiprev)
1817 dataoff = self.end(tiprev)
1818
1818
1819 tr.add(self.datafile, dataoff)
1819 tr.add(self.datafile, dataoff)
1820
1820
1821 if fp:
1821 if fp:
1822 fp.flush()
1822 fp.flush()
1823 fp.close()
1823 fp.close()
1824 # We can't use the cached file handle after close(). So prevent
1824 # We can't use the cached file handle after close(). So prevent
1825 # its usage.
1825 # its usage.
1826 self._writinghandles = None
1826 self._writinghandles = None
1827
1827
1828 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1828 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1829 for r in self:
1829 for r in self:
1830 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1830 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1831
1831
1832 with self._indexfp('w') as fp:
1832 with self._indexfp('w') as fp:
1833 self.version &= ~FLAG_INLINE_DATA
1833 self.version &= ~FLAG_INLINE_DATA
1834 self._inline = False
1834 self._inline = False
1835 io = self._io
1835 io = self._io
1836 for i in self:
1836 for i in self:
1837 e = io.packentry(self.index[i], self.node, self.version, i)
1837 e = io.packentry(self.index[i], self.node, self.version, i)
1838 fp.write(e)
1838 fp.write(e)
1839
1839
1840 # the temp file replace the real index when we exit the context
1840 # the temp file replace the real index when we exit the context
1841 # manager
1841 # manager
1842
1842
1843 tr.replace(self.indexfile, trindex * self._io.size)
1843 tr.replace(self.indexfile, trindex * self._io.size)
1844 self._chunkclear()
1844 self._chunkclear()
1845
1845
1846 def _nodeduplicatecallback(self, transaction, node):
1846 def _nodeduplicatecallback(self, transaction, node):
1847 """called when trying to add a node already stored.
1847 """called when trying to add a node already stored.
1848 """
1848 """
1849
1849
1850 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1850 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1851 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1851 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1852 """add a revision to the log
1852 """add a revision to the log
1853
1853
1854 text - the revision data to add
1854 text - the revision data to add
1855 transaction - the transaction object used for rollback
1855 transaction - the transaction object used for rollback
1856 link - the linkrev data to add
1856 link - the linkrev data to add
1857 p1, p2 - the parent nodeids of the revision
1857 p1, p2 - the parent nodeids of the revision
1858 cachedelta - an optional precomputed delta
1858 cachedelta - an optional precomputed delta
1859 node - nodeid of revision; typically node is not specified, and it is
1859 node - nodeid of revision; typically node is not specified, and it is
1860 computed by default as hash(text, p1, p2), however subclasses might
1860 computed by default as hash(text, p1, p2), however subclasses might
1861 use different hashing method (and override checkhash() in such case)
1861 use different hashing method (and override checkhash() in such case)
1862 flags - the known flags to set on the revision
1862 flags - the known flags to set on the revision
1863 deltacomputer - an optional deltacomputer instance shared between
1863 deltacomputer - an optional deltacomputer instance shared between
1864 multiple calls
1864 multiple calls
1865 """
1865 """
1866 if link == nullrev:
1866 if link == nullrev:
1867 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1867 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1868 % self.indexfile)
1868 % self.indexfile)
1869
1869
1870 if flags:
1870 if flags:
1871 node = node or self.hash(text, p1, p2)
1871 node = node or self.hash(text, p1, p2)
1872
1872
1873 rawtext, validatehash = self._processflags(text, flags, 'write')
1873 rawtext, validatehash = self._processflags(text, flags, 'write')
1874
1874
1875 # If the flag processor modifies the revision data, ignore any provided
1875 # If the flag processor modifies the revision data, ignore any provided
1876 # cachedelta.
1876 # cachedelta.
1877 if rawtext != text:
1877 if rawtext != text:
1878 cachedelta = None
1878 cachedelta = None
1879
1879
1880 if len(rawtext) > _maxentrysize:
1880 if len(rawtext) > _maxentrysize:
1881 raise error.RevlogError(
1881 raise error.RevlogError(
1882 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1882 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1883 % (self.indexfile, len(rawtext)))
1883 % (self.indexfile, len(rawtext)))
1884
1884
1885 node = node or self.hash(rawtext, p1, p2)
1885 node = node or self.hash(rawtext, p1, p2)
1886 if node in self.nodemap:
1886 if node in self.nodemap:
1887 return node
1887 return node
1888
1888
1889 if validatehash:
1889 if validatehash:
1890 self.checkhash(rawtext, node, p1=p1, p2=p2)
1890 self.checkhash(rawtext, node, p1=p1, p2=p2)
1891
1891
1892 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1892 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1893 flags, cachedelta=cachedelta,
1893 flags, cachedelta=cachedelta,
1894 deltacomputer=deltacomputer)
1894 deltacomputer=deltacomputer)
1895
1895
1896 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1896 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1897 cachedelta=None, deltacomputer=None):
1897 cachedelta=None, deltacomputer=None):
1898 """add a raw revision with known flags, node and parents
1898 """add a raw revision with known flags, node and parents
1899 useful when reusing a revision not stored in this revlog (ex: received
1899 useful when reusing a revision not stored in this revlog (ex: received
1900 over wire, or read from an external bundle).
1900 over wire, or read from an external bundle).
1901 """
1901 """
1902 dfh = None
1902 dfh = None
1903 if not self._inline:
1903 if not self._inline:
1904 dfh = self._datafp("a+")
1904 dfh = self._datafp("a+")
1905 ifh = self._indexfp("a+")
1905 ifh = self._indexfp("a+")
1906 try:
1906 try:
1907 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1907 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1908 flags, cachedelta, ifh, dfh,
1908 flags, cachedelta, ifh, dfh,
1909 deltacomputer=deltacomputer)
1909 deltacomputer=deltacomputer)
1910 finally:
1910 finally:
1911 if dfh:
1911 if dfh:
1912 dfh.close()
1912 dfh.close()
1913 ifh.close()
1913 ifh.close()
1914
1914
1915 def compress(self, data):
1915 def compress(self, data):
1916 """Generate a possibly-compressed representation of data."""
1916 """Generate a possibly-compressed representation of data."""
1917 if not data:
1917 if not data:
1918 return '', data
1918 return '', data
1919
1919
1920 compressed = self._compressor.compress(data)
1920 compressed = self._compressor.compress(data)
1921
1921
1922 if compressed:
1922 if compressed:
1923 # The revlog compressor added the header in the returned data.
1923 # The revlog compressor added the header in the returned data.
1924 return '', compressed
1924 return '', compressed
1925
1925
1926 if data[0:1] == '\0':
1926 if data[0:1] == '\0':
1927 return '', data
1927 return '', data
1928 return 'u', data
1928 return 'u', data
1929
1929
1930 def decompress(self, data):
1930 def decompress(self, data):
1931 """Decompress a revlog chunk.
1931 """Decompress a revlog chunk.
1932
1932
1933 The chunk is expected to begin with a header identifying the
1933 The chunk is expected to begin with a header identifying the
1934 format type so it can be routed to an appropriate decompressor.
1934 format type so it can be routed to an appropriate decompressor.
1935 """
1935 """
1936 if not data:
1936 if not data:
1937 return data
1937 return data
1938
1938
1939 # Revlogs are read much more frequently than they are written and many
1939 # Revlogs are read much more frequently than they are written and many
1940 # chunks only take microseconds to decompress, so performance is
1940 # chunks only take microseconds to decompress, so performance is
1941 # important here.
1941 # important here.
1942 #
1942 #
1943 # We can make a few assumptions about revlogs:
1943 # We can make a few assumptions about revlogs:
1944 #
1944 #
1945 # 1) the majority of chunks will be compressed (as opposed to inline
1945 # 1) the majority of chunks will be compressed (as opposed to inline
1946 # raw data).
1946 # raw data).
1947 # 2) decompressing *any* data will likely by at least 10x slower than
1947 # 2) decompressing *any* data will likely by at least 10x slower than
1948 # returning raw inline data.
1948 # returning raw inline data.
1949 # 3) we want to prioritize common and officially supported compression
1949 # 3) we want to prioritize common and officially supported compression
1950 # engines
1950 # engines
1951 #
1951 #
1952 # It follows that we want to optimize for "decompress compressed data
1952 # It follows that we want to optimize for "decompress compressed data
1953 # when encoded with common and officially supported compression engines"
1953 # when encoded with common and officially supported compression engines"
1954 # case over "raw data" and "data encoded by less common or non-official
1954 # case over "raw data" and "data encoded by less common or non-official
1955 # compression engines." That is why we have the inline lookup first
1955 # compression engines." That is why we have the inline lookup first
1956 # followed by the compengines lookup.
1956 # followed by the compengines lookup.
1957 #
1957 #
1958 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1958 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1959 # compressed chunks. And this matters for changelog and manifest reads.
1959 # compressed chunks. And this matters for changelog and manifest reads.
1960 t = data[0:1]
1960 t = data[0:1]
1961
1961
1962 if t == 'x':
1962 if t == 'x':
1963 try:
1963 try:
1964 return _zlibdecompress(data)
1964 return _zlibdecompress(data)
1965 except zlib.error as e:
1965 except zlib.error as e:
1966 raise error.RevlogError(_('revlog decompress error: %s') %
1966 raise error.RevlogError(_('revlog decompress error: %s') %
1967 stringutil.forcebytestr(e))
1967 stringutil.forcebytestr(e))
1968 # '\0' is more common than 'u' so it goes first.
1968 # '\0' is more common than 'u' so it goes first.
1969 elif t == '\0':
1969 elif t == '\0':
1970 return data
1970 return data
1971 elif t == 'u':
1971 elif t == 'u':
1972 return util.buffer(data, 1)
1972 return util.buffer(data, 1)
1973
1973
1974 try:
1974 try:
1975 compressor = self._decompressors[t]
1975 compressor = self._decompressors[t]
1976 except KeyError:
1976 except KeyError:
1977 try:
1977 try:
1978 engine = util.compengines.forrevlogheader(t)
1978 engine = util.compengines.forrevlogheader(t)
1979 compressor = engine.revlogcompressor()
1979 compressor = engine.revlogcompressor()
1980 self._decompressors[t] = compressor
1980 self._decompressors[t] = compressor
1981 except KeyError:
1981 except KeyError:
1982 raise error.RevlogError(_('unknown compression type %r') % t)
1982 raise error.RevlogError(_('unknown compression type %r') % t)
1983
1983
1984 return compressor.decompress(data)
1984 return compressor.decompress(data)
1985
1985
1986 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1986 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1987 cachedelta, ifh, dfh, alwayscache=False,
1987 cachedelta, ifh, dfh, alwayscache=False,
1988 deltacomputer=None):
1988 deltacomputer=None):
1989 """internal function to add revisions to the log
1989 """internal function to add revisions to the log
1990
1990
1991 see addrevision for argument descriptions.
1991 see addrevision for argument descriptions.
1992
1992
1993 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1993 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1994
1994
1995 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1995 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1996 be used.
1996 be used.
1997
1997
1998 invariants:
1998 invariants:
1999 - rawtext is optional (can be None); if not set, cachedelta must be set.
1999 - rawtext is optional (can be None); if not set, cachedelta must be set.
2000 if both are set, they must correspond to each other.
2000 if both are set, they must correspond to each other.
2001 """
2001 """
2002 if node == nullid:
2002 if node == nullid:
2003 raise error.RevlogError(_("%s: attempt to add null revision") %
2003 raise error.RevlogError(_("%s: attempt to add null revision") %
2004 self.indexfile)
2004 self.indexfile)
2005 if node == wdirid or node in wdirfilenodeids:
2005 if node == wdirid or node in wdirfilenodeids:
2006 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2006 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2007 self.indexfile)
2007 self.indexfile)
2008
2008
2009 if self._inline:
2009 if self._inline:
2010 fh = ifh
2010 fh = ifh
2011 else:
2011 else:
2012 fh = dfh
2012 fh = dfh
2013
2013
2014 btext = [rawtext]
2014 btext = [rawtext]
2015
2015
2016 curr = len(self)
2016 curr = len(self)
2017 prev = curr - 1
2017 prev = curr - 1
2018 offset = self.end(prev)
2018 offset = self.end(prev)
2019 p1r, p2r = self.rev(p1), self.rev(p2)
2019 p1r, p2r = self.rev(p1), self.rev(p2)
2020
2020
2021 # full versions are inserted when the needed deltas
2021 # full versions are inserted when the needed deltas
2022 # become comparable to the uncompressed text
2022 # become comparable to the uncompressed text
2023 if rawtext is None:
2023 if rawtext is None:
2024 # need rawtext size, before changed by flag processors, which is
2024 # need rawtext size, before changed by flag processors, which is
2025 # the non-raw size. use revlog explicitly to avoid filelog's extra
2025 # the non-raw size. use revlog explicitly to avoid filelog's extra
2026 # logic that might remove metadata size.
2026 # logic that might remove metadata size.
2027 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2027 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2028 cachedelta[1])
2028 cachedelta[1])
2029 else:
2029 else:
2030 textlen = len(rawtext)
2030 textlen = len(rawtext)
2031
2031
2032 if deltacomputer is None:
2032 if deltacomputer is None:
2033 deltacomputer = deltautil.deltacomputer(self)
2033 deltacomputer = deltautil.deltacomputer(self)
2034
2034
2035 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2035 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2036
2036
2037 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2037 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2038
2038
2039 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2039 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2040 deltainfo.base, link, p1r, p2r, node)
2040 deltainfo.base, link, p1r, p2r, node)
2041 self.index.append(e)
2041 self.index.append(e)
2042 self.nodemap[node] = curr
2042 self.nodemap[node] = curr
2043
2043
2044 # Reset the pure node cache start lookup offset to account for new
2044 # Reset the pure node cache start lookup offset to account for new
2045 # revision.
2045 # revision.
2046 if self._nodepos is not None:
2046 if self._nodepos is not None:
2047 self._nodepos = curr
2047 self._nodepos = curr
2048
2048
2049 entry = self._io.packentry(e, self.node, self.version, curr)
2049 entry = self._io.packentry(e, self.node, self.version, curr)
2050 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2050 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2051 link, offset)
2051 link, offset)
2052
2052
2053 rawtext = btext[0]
2053 rawtext = btext[0]
2054
2054
2055 if alwayscache and rawtext is None:
2055 if alwayscache and rawtext is None:
2056 rawtext = deltacomputer.buildtext(revinfo, fh)
2056 rawtext = deltacomputer.buildtext(revinfo, fh)
2057
2057
2058 if type(rawtext) == bytes: # only accept immutable objects
2058 if type(rawtext) == bytes: # only accept immutable objects
2059 self._revisioncache = (node, curr, rawtext)
2059 self._revisioncache = (node, curr, rawtext)
2060 self._chainbasecache[curr] = deltainfo.chainbase
2060 self._chainbasecache[curr] = deltainfo.chainbase
2061 return node
2061 return node
2062
2062
2063 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2063 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2064 # Files opened in a+ mode have inconsistent behavior on various
2064 # Files opened in a+ mode have inconsistent behavior on various
2065 # platforms. Windows requires that a file positioning call be made
2065 # platforms. Windows requires that a file positioning call be made
2066 # when the file handle transitions between reads and writes. See
2066 # when the file handle transitions between reads and writes. See
2067 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2067 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2068 # platforms, Python or the platform itself can be buggy. Some versions
2068 # platforms, Python or the platform itself can be buggy. Some versions
2069 # of Solaris have been observed to not append at the end of the file
2069 # of Solaris have been observed to not append at the end of the file
2070 # if the file was seeked to before the end. See issue4943 for more.
2070 # if the file was seeked to before the end. See issue4943 for more.
2071 #
2071 #
2072 # We work around this issue by inserting a seek() before writing.
2072 # We work around this issue by inserting a seek() before writing.
2073 # Note: This is likely not necessary on Python 3. However, because
2073 # Note: This is likely not necessary on Python 3. However, because
2074 # the file handle is reused for reads and may be seeked there, we need
2074 # the file handle is reused for reads and may be seeked there, we need
2075 # to be careful before changing this.
2075 # to be careful before changing this.
2076 ifh.seek(0, os.SEEK_END)
2076 ifh.seek(0, os.SEEK_END)
2077 if dfh:
2077 if dfh:
2078 dfh.seek(0, os.SEEK_END)
2078 dfh.seek(0, os.SEEK_END)
2079
2079
2080 curr = len(self) - 1
2080 curr = len(self) - 1
2081 if not self._inline:
2081 if not self._inline:
2082 transaction.add(self.datafile, offset)
2082 transaction.add(self.datafile, offset)
2083 transaction.add(self.indexfile, curr * len(entry))
2083 transaction.add(self.indexfile, curr * len(entry))
2084 if data[0]:
2084 if data[0]:
2085 dfh.write(data[0])
2085 dfh.write(data[0])
2086 dfh.write(data[1])
2086 dfh.write(data[1])
2087 ifh.write(entry)
2087 ifh.write(entry)
2088 else:
2088 else:
2089 offset += curr * self._io.size
2089 offset += curr * self._io.size
2090 transaction.add(self.indexfile, offset, curr)
2090 transaction.add(self.indexfile, offset, curr)
2091 ifh.write(entry)
2091 ifh.write(entry)
2092 ifh.write(data[0])
2092 ifh.write(data[0])
2093 ifh.write(data[1])
2093 ifh.write(data[1])
2094 self._enforceinlinesize(transaction, ifh)
2094 self._enforceinlinesize(transaction, ifh)
2095
2095
2096 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2096 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2097 """
2097 """
2098 add a delta group
2098 add a delta group
2099
2099
2100 given a set of deltas, add them to the revision log. the
2100 given a set of deltas, add them to the revision log. the
2101 first delta is against its parent, which should be in our
2101 first delta is against its parent, which should be in our
2102 log, the rest are against the previous delta.
2102 log, the rest are against the previous delta.
2103
2103
2104 If ``addrevisioncb`` is defined, it will be called with arguments of
2104 If ``addrevisioncb`` is defined, it will be called with arguments of
2105 this revlog and the node that was added.
2105 this revlog and the node that was added.
2106 """
2106 """
2107
2107
2108 if self._writinghandles:
2108 if self._writinghandles:
2109 raise error.ProgrammingError('cannot nest addgroup() calls')
2109 raise error.ProgrammingError('cannot nest addgroup() calls')
2110
2110
2111 nodes = []
2111 nodes = []
2112
2112
2113 r = len(self)
2113 r = len(self)
2114 end = 0
2114 end = 0
2115 if r:
2115 if r:
2116 end = self.end(r - 1)
2116 end = self.end(r - 1)
2117 ifh = self._indexfp("a+")
2117 ifh = self._indexfp("a+")
2118 isize = r * self._io.size
2118 isize = r * self._io.size
2119 if self._inline:
2119 if self._inline:
2120 transaction.add(self.indexfile, end + isize, r)
2120 transaction.add(self.indexfile, end + isize, r)
2121 dfh = None
2121 dfh = None
2122 else:
2122 else:
2123 transaction.add(self.indexfile, isize, r)
2123 transaction.add(self.indexfile, isize, r)
2124 transaction.add(self.datafile, end)
2124 transaction.add(self.datafile, end)
2125 dfh = self._datafp("a+")
2125 dfh = self._datafp("a+")
2126 def flush():
2126 def flush():
2127 if dfh:
2127 if dfh:
2128 dfh.flush()
2128 dfh.flush()
2129 ifh.flush()
2129 ifh.flush()
2130
2130
2131 self._writinghandles = (ifh, dfh)
2131 self._writinghandles = (ifh, dfh)
2132
2132
2133 try:
2133 try:
2134 deltacomputer = deltautil.deltacomputer(self)
2134 deltacomputer = deltautil.deltacomputer(self)
2135 # loop through our set of deltas
2135 # loop through our set of deltas
2136 for data in deltas:
2136 for data in deltas:
2137 node, p1, p2, linknode, deltabase, delta, flags = data
2137 node, p1, p2, linknode, deltabase, delta, flags = data
2138 link = linkmapper(linknode)
2138 link = linkmapper(linknode)
2139 flags = flags or REVIDX_DEFAULT_FLAGS
2139 flags = flags or REVIDX_DEFAULT_FLAGS
2140
2140
2141 nodes.append(node)
2141 nodes.append(node)
2142
2142
2143 if node in self.nodemap:
2143 if node in self.nodemap:
2144 self._nodeduplicatecallback(transaction, node)
2144 self._nodeduplicatecallback(transaction, node)
2145 # this can happen if two branches make the same change
2145 # this can happen if two branches make the same change
2146 continue
2146 continue
2147
2147
2148 for p in (p1, p2):
2148 for p in (p1, p2):
2149 if p not in self.nodemap:
2149 if p not in self.nodemap:
2150 raise error.LookupError(p, self.indexfile,
2150 raise error.LookupError(p, self.indexfile,
2151 _('unknown parent'))
2151 _('unknown parent'))
2152
2152
2153 if deltabase not in self.nodemap:
2153 if deltabase not in self.nodemap:
2154 raise error.LookupError(deltabase, self.indexfile,
2154 raise error.LookupError(deltabase, self.indexfile,
2155 _('unknown delta base'))
2155 _('unknown delta base'))
2156
2156
2157 baserev = self.rev(deltabase)
2157 baserev = self.rev(deltabase)
2158
2158
2159 if baserev != nullrev and self.iscensored(baserev):
2159 if baserev != nullrev and self.iscensored(baserev):
2160 # if base is censored, delta must be full replacement in a
2160 # if base is censored, delta must be full replacement in a
2161 # single patch operation
2161 # single patch operation
2162 hlen = struct.calcsize(">lll")
2162 hlen = struct.calcsize(">lll")
2163 oldlen = self.rawsize(baserev)
2163 oldlen = self.rawsize(baserev)
2164 newlen = len(delta) - hlen
2164 newlen = len(delta) - hlen
2165 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2165 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2166 raise error.CensoredBaseError(self.indexfile,
2166 raise error.CensoredBaseError(self.indexfile,
2167 self.node(baserev))
2167 self.node(baserev))
2168
2168
2169 if not flags and self._peek_iscensored(baserev, delta, flush):
2169 if not flags and self._peek_iscensored(baserev, delta, flush):
2170 flags |= REVIDX_ISCENSORED
2170 flags |= REVIDX_ISCENSORED
2171
2171
2172 # We assume consumers of addrevisioncb will want to retrieve
2172 # We assume consumers of addrevisioncb will want to retrieve
2173 # the added revision, which will require a call to
2173 # the added revision, which will require a call to
2174 # revision(). revision() will fast path if there is a cache
2174 # revision(). revision() will fast path if there is a cache
2175 # hit. So, we tell _addrevision() to always cache in this case.
2175 # hit. So, we tell _addrevision() to always cache in this case.
2176 # We're only using addgroup() in the context of changegroup
2176 # We're only using addgroup() in the context of changegroup
2177 # generation so the revision data can always be handled as raw
2177 # generation so the revision data can always be handled as raw
2178 # by the flagprocessor.
2178 # by the flagprocessor.
2179 self._addrevision(node, None, transaction, link,
2179 self._addrevision(node, None, transaction, link,
2180 p1, p2, flags, (baserev, delta),
2180 p1, p2, flags, (baserev, delta),
2181 ifh, dfh,
2181 ifh, dfh,
2182 alwayscache=bool(addrevisioncb),
2182 alwayscache=bool(addrevisioncb),
2183 deltacomputer=deltacomputer)
2183 deltacomputer=deltacomputer)
2184
2184
2185 if addrevisioncb:
2185 if addrevisioncb:
2186 addrevisioncb(self, node)
2186 addrevisioncb(self, node)
2187
2187
2188 if not dfh and not self._inline:
2188 if not dfh and not self._inline:
2189 # addrevision switched from inline to conventional
2189 # addrevision switched from inline to conventional
2190 # reopen the index
2190 # reopen the index
2191 ifh.close()
2191 ifh.close()
2192 dfh = self._datafp("a+")
2192 dfh = self._datafp("a+")
2193 ifh = self._indexfp("a+")
2193 ifh = self._indexfp("a+")
2194 self._writinghandles = (ifh, dfh)
2194 self._writinghandles = (ifh, dfh)
2195 finally:
2195 finally:
2196 self._writinghandles = None
2196 self._writinghandles = None
2197
2197
2198 if dfh:
2198 if dfh:
2199 dfh.close()
2199 dfh.close()
2200 ifh.close()
2200 ifh.close()
2201
2201
2202 return nodes
2202 return nodes
2203
2203
2204 def iscensored(self, rev):
2204 def iscensored(self, rev):
2205 """Check if a file revision is censored."""
2205 """Check if a file revision is censored."""
2206 if not self._censorable:
2206 if not self._censorable:
2207 return False
2207 return False
2208
2208
2209 return self.flags(rev) & REVIDX_ISCENSORED
2209 return self.flags(rev) & REVIDX_ISCENSORED
2210
2210
2211 def _peek_iscensored(self, baserev, delta, flush):
2211 def _peek_iscensored(self, baserev, delta, flush):
2212 """Quickly check if a delta produces a censored revision."""
2212 """Quickly check if a delta produces a censored revision."""
2213 if not self._censorable:
2213 if not self._censorable:
2214 return False
2214 return False
2215
2215
2216 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2216 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2217
2217
2218 def getstrippoint(self, minlink):
2218 def getstrippoint(self, minlink):
2219 """find the minimum rev that must be stripped to strip the linkrev
2219 """find the minimum rev that must be stripped to strip the linkrev
2220
2220
2221 Returns a tuple containing the minimum rev and a set of all revs that
2221 Returns a tuple containing the minimum rev and a set of all revs that
2222 have linkrevs that will be broken by this strip.
2222 have linkrevs that will be broken by this strip.
2223 """
2223 """
2224 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2224 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2225 self.headrevs(),
2225 self.headrevs(),
2226 self.linkrev, self.parentrevs)
2226 self.linkrev, self.parentrevs)
2227
2227
2228 def strip(self, minlink, transaction):
2228 def strip(self, minlink, transaction):
2229 """truncate the revlog on the first revision with a linkrev >= minlink
2229 """truncate the revlog on the first revision with a linkrev >= minlink
2230
2230
2231 This function is called when we're stripping revision minlink and
2231 This function is called when we're stripping revision minlink and
2232 its descendants from the repository.
2232 its descendants from the repository.
2233
2233
2234 We have to remove all revisions with linkrev >= minlink, because
2234 We have to remove all revisions with linkrev >= minlink, because
2235 the equivalent changelog revisions will be renumbered after the
2235 the equivalent changelog revisions will be renumbered after the
2236 strip.
2236 strip.
2237
2237
2238 So we truncate the revlog on the first of these revisions, and
2238 So we truncate the revlog on the first of these revisions, and
2239 trust that the caller has saved the revisions that shouldn't be
2239 trust that the caller has saved the revisions that shouldn't be
2240 removed and that it'll re-add them after this truncation.
2240 removed and that it'll re-add them after this truncation.
2241 """
2241 """
2242 if len(self) == 0:
2242 if len(self) == 0:
2243 return
2243 return
2244
2244
2245 rev, _ = self.getstrippoint(minlink)
2245 rev, _ = self.getstrippoint(minlink)
2246 if rev == len(self):
2246 if rev == len(self):
2247 return
2247 return
2248
2248
2249 # first truncate the files on disk
2249 # first truncate the files on disk
2250 end = self.start(rev)
2250 end = self.start(rev)
2251 if not self._inline:
2251 if not self._inline:
2252 transaction.add(self.datafile, end)
2252 transaction.add(self.datafile, end)
2253 end = rev * self._io.size
2253 end = rev * self._io.size
2254 else:
2254 else:
2255 end += rev * self._io.size
2255 end += rev * self._io.size
2256
2256
2257 transaction.add(self.indexfile, end)
2257 transaction.add(self.indexfile, end)
2258
2258
2259 # then reset internal state in memory to forget those revisions
2259 # then reset internal state in memory to forget those revisions
2260 self._revisioncache = None
2260 self._revisioncache = None
2261 self._chaininfocache = {}
2261 self._chaininfocache = {}
2262 self._chunkclear()
2262 self._chunkclear()
2263 for x in pycompat.xrange(rev, len(self)):
2263 for x in pycompat.xrange(rev, len(self)):
2264 del self.nodemap[self.node(x)]
2264 del self.nodemap[self.node(x)]
2265
2265
2266 del self.index[rev:-1]
2266 del self.index[rev:-1]
2267 self._nodepos = None
2267 self._nodepos = None
2268
2268
2269 def checksize(self):
2269 def checksize(self):
2270 expected = 0
2270 expected = 0
2271 if len(self):
2271 if len(self):
2272 expected = max(0, self.end(len(self) - 1))
2272 expected = max(0, self.end(len(self) - 1))
2273
2273
2274 try:
2274 try:
2275 with self._datafp() as f:
2275 with self._datafp() as f:
2276 f.seek(0, 2)
2276 f.seek(0, 2)
2277 actual = f.tell()
2277 actual = f.tell()
2278 dd = actual - expected
2278 dd = actual - expected
2279 except IOError as inst:
2279 except IOError as inst:
2280 if inst.errno != errno.ENOENT:
2280 if inst.errno != errno.ENOENT:
2281 raise
2281 raise
2282 dd = 0
2282 dd = 0
2283
2283
2284 try:
2284 try:
2285 f = self.opener(self.indexfile)
2285 f = self.opener(self.indexfile)
2286 f.seek(0, 2)
2286 f.seek(0, 2)
2287 actual = f.tell()
2287 actual = f.tell()
2288 f.close()
2288 f.close()
2289 s = self._io.size
2289 s = self._io.size
2290 i = max(0, actual // s)
2290 i = max(0, actual // s)
2291 di = actual - (i * s)
2291 di = actual - (i * s)
2292 if self._inline:
2292 if self._inline:
2293 databytes = 0
2293 databytes = 0
2294 for r in self:
2294 for r in self:
2295 databytes += max(0, self.length(r))
2295 databytes += max(0, self.length(r))
2296 dd = 0
2296 dd = 0
2297 di = actual - len(self) * s - databytes
2297 di = actual - len(self) * s - databytes
2298 except IOError as inst:
2298 except IOError as inst:
2299 if inst.errno != errno.ENOENT:
2299 if inst.errno != errno.ENOENT:
2300 raise
2300 raise
2301 di = 0
2301 di = 0
2302
2302
2303 return (dd, di)
2303 return (dd, di)
2304
2304
2305 def files(self):
2305 def files(self):
2306 res = [self.indexfile]
2306 res = [self.indexfile]
2307 if not self._inline:
2307 if not self._inline:
2308 res.append(self.datafile)
2308 res.append(self.datafile)
2309 return res
2309 return res
2310
2310
2311 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2311 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2312 assumehaveparentrevisions=False,
2312 assumehaveparentrevisions=False,
2313 deltamode=repository.CG_DELTAMODE_STD):
2313 deltamode=repository.CG_DELTAMODE_STD):
2314 if nodesorder not in ('nodes', 'storage', 'linear', None):
2314 if nodesorder not in ('nodes', 'storage', 'linear', None):
2315 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2315 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2316 nodesorder)
2316 nodesorder)
2317
2317
2318 if nodesorder is None and not self._generaldelta:
2318 if nodesorder is None and not self._generaldelta:
2319 nodesorder = 'storage'
2319 nodesorder = 'storage'
2320
2320
2321 if (not self._storedeltachains and
2321 if (not self._storedeltachains and
2322 deltamode != repository.CG_DELTAMODE_PREV):
2322 deltamode != repository.CG_DELTAMODE_PREV):
2323 deltamode = repository.CG_DELTAMODE_FULL
2323 deltamode = repository.CG_DELTAMODE_FULL
2324
2324
2325 return storageutil.emitrevisions(
2325 return storageutil.emitrevisions(
2326 self, nodes, nodesorder, revlogrevisiondelta,
2326 self, nodes, nodesorder, revlogrevisiondelta,
2327 deltaparentfn=self.deltaparent,
2327 deltaparentfn=self.deltaparent,
2328 candeltafn=self.candelta,
2328 candeltafn=self.candelta,
2329 rawsizefn=self.rawsize,
2329 rawsizefn=self.rawsize,
2330 revdifffn=self.revdiff,
2330 revdifffn=self.revdiff,
2331 flagsfn=self.flags,
2331 flagsfn=self.flags,
2332 deltamode=deltamode,
2332 deltamode=deltamode,
2333 revisiondata=revisiondata,
2333 revisiondata=revisiondata,
2334 assumehaveparentrevisions=assumehaveparentrevisions)
2334 assumehaveparentrevisions=assumehaveparentrevisions)
2335
2335
2336 DELTAREUSEALWAYS = 'always'
2336 DELTAREUSEALWAYS = 'always'
2337 DELTAREUSESAMEREVS = 'samerevs'
2337 DELTAREUSESAMEREVS = 'samerevs'
2338 DELTAREUSENEVER = 'never'
2338 DELTAREUSENEVER = 'never'
2339
2339
2340 DELTAREUSEFULLADD = 'fulladd'
2340 DELTAREUSEFULLADD = 'fulladd'
2341
2341
2342 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2342 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2343
2343
2344 def clone(self, tr, destrevlog, addrevisioncb=None,
2344 def clone(self, tr, destrevlog, addrevisioncb=None,
2345 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2345 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2346 """Copy this revlog to another, possibly with format changes.
2346 """Copy this revlog to another, possibly with format changes.
2347
2347
2348 The destination revlog will contain the same revisions and nodes.
2348 The destination revlog will contain the same revisions and nodes.
2349 However, it may not be bit-for-bit identical due to e.g. delta encoding
2349 However, it may not be bit-for-bit identical due to e.g. delta encoding
2350 differences.
2350 differences.
2351
2351
2352 The ``deltareuse`` argument control how deltas from the existing revlog
2352 The ``deltareuse`` argument control how deltas from the existing revlog
2353 are preserved in the destination revlog. The argument can have the
2353 are preserved in the destination revlog. The argument can have the
2354 following values:
2354 following values:
2355
2355
2356 DELTAREUSEALWAYS
2356 DELTAREUSEALWAYS
2357 Deltas will always be reused (if possible), even if the destination
2357 Deltas will always be reused (if possible), even if the destination
2358 revlog would not select the same revisions for the delta. This is the
2358 revlog would not select the same revisions for the delta. This is the
2359 fastest mode of operation.
2359 fastest mode of operation.
2360 DELTAREUSESAMEREVS
2360 DELTAREUSESAMEREVS
2361 Deltas will be reused if the destination revlog would pick the same
2361 Deltas will be reused if the destination revlog would pick the same
2362 revisions for the delta. This mode strikes a balance between speed
2362 revisions for the delta. This mode strikes a balance between speed
2363 and optimization.
2363 and optimization.
2364 DELTAREUSENEVER
2364 DELTAREUSENEVER
2365 Deltas will never be reused. This is the slowest mode of execution.
2365 Deltas will never be reused. This is the slowest mode of execution.
2366 This mode can be used to recompute deltas (e.g. if the diff/delta
2366 This mode can be used to recompute deltas (e.g. if the diff/delta
2367 algorithm changes).
2367 algorithm changes).
2368
2368
2369 Delta computation can be slow, so the choice of delta reuse policy can
2369 Delta computation can be slow, so the choice of delta reuse policy can
2370 significantly affect run time.
2370 significantly affect run time.
2371
2371
2372 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2372 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2373 two extremes. Deltas will be reused if they are appropriate. But if the
2373 two extremes. Deltas will be reused if they are appropriate. But if the
2374 delta could choose a better revision, it will do so. This means if you
2374 delta could choose a better revision, it will do so. This means if you
2375 are converting a non-generaldelta revlog to a generaldelta revlog,
2375 are converting a non-generaldelta revlog to a generaldelta revlog,
2376 deltas will be recomputed if the delta's parent isn't a parent of the
2376 deltas will be recomputed if the delta's parent isn't a parent of the
2377 revision.
2377 revision.
2378
2378
2379 In addition to the delta policy, the ``forcedeltabothparents``
2379 In addition to the delta policy, the ``forcedeltabothparents``
2380 argument controls whether to force compute deltas against both parents
2380 argument controls whether to force compute deltas against both parents
2381 for merges. By default, the current default is used.
2381 for merges. By default, the current default is used.
2382 """
2382 """
2383 if deltareuse not in self.DELTAREUSEALL:
2383 if deltareuse not in self.DELTAREUSEALL:
2384 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2384 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2385
2385
2386 if len(destrevlog):
2386 if len(destrevlog):
2387 raise ValueError(_('destination revlog is not empty'))
2387 raise ValueError(_('destination revlog is not empty'))
2388
2388
2389 if getattr(self, 'filteredrevs', None):
2389 if getattr(self, 'filteredrevs', None):
2390 raise ValueError(_('source revlog has filtered revisions'))
2390 raise ValueError(_('source revlog has filtered revisions'))
2391 if getattr(destrevlog, 'filteredrevs', None):
2391 if getattr(destrevlog, 'filteredrevs', None):
2392 raise ValueError(_('destination revlog has filtered revisions'))
2392 raise ValueError(_('destination revlog has filtered revisions'))
2393
2393
2394 # lazydeltabase controls whether to reuse a cached delta, if possible.
2394 # lazydeltabase controls whether to reuse a cached delta, if possible.
2395 oldlazydeltabase = destrevlog._lazydeltabase
2395 oldlazydeltabase = destrevlog._lazydeltabase
2396 oldamd = destrevlog._deltabothparents
2396 oldamd = destrevlog._deltabothparents
2397
2397
2398 try:
2398 try:
2399 if deltareuse == self.DELTAREUSEALWAYS:
2399 if deltareuse == self.DELTAREUSEALWAYS:
2400 destrevlog._lazydeltabase = True
2400 destrevlog._lazydeltabase = True
2401 elif deltareuse == self.DELTAREUSESAMEREVS:
2401 elif deltareuse == self.DELTAREUSESAMEREVS:
2402 destrevlog._lazydeltabase = False
2402 destrevlog._lazydeltabase = False
2403
2403
2404 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2404 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2405
2405
2406 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2406 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2407 self.DELTAREUSESAMEREVS)
2407 self.DELTAREUSESAMEREVS)
2408
2408
2409 deltacomputer = deltautil.deltacomputer(destrevlog)
2409 deltacomputer = deltautil.deltacomputer(destrevlog)
2410 index = self.index
2410 index = self.index
2411 for rev in self:
2411 for rev in self:
2412 entry = index[rev]
2412 entry = index[rev]
2413
2413
2414 # Some classes override linkrev to take filtered revs into
2414 # Some classes override linkrev to take filtered revs into
2415 # account. Use raw entry from index.
2415 # account. Use raw entry from index.
2416 flags = entry[0] & 0xffff
2416 flags = entry[0] & 0xffff
2417 linkrev = entry[4]
2417 linkrev = entry[4]
2418 p1 = index[entry[5]][7]
2418 p1 = index[entry[5]][7]
2419 p2 = index[entry[6]][7]
2419 p2 = index[entry[6]][7]
2420 node = entry[7]
2420 node = entry[7]
2421
2421
2422 # (Possibly) reuse the delta from the revlog if allowed and
2422 # (Possibly) reuse the delta from the revlog if allowed and
2423 # the revlog chunk is a delta.
2423 # the revlog chunk is a delta.
2424 cachedelta = None
2424 cachedelta = None
2425 rawtext = None
2425 rawtext = None
2426 if populatecachedelta:
2426 if populatecachedelta:
2427 dp = self.deltaparent(rev)
2427 dp = self.deltaparent(rev)
2428 if dp != nullrev:
2428 if dp != nullrev:
2429 cachedelta = (dp, bytes(self._chunk(rev)))
2429 cachedelta = (dp, bytes(self._chunk(rev)))
2430
2430
2431 if not cachedelta:
2431 if not cachedelta:
2432 rawtext = self.revision(rev, raw=True)
2432 rawtext = self.revision(rev, raw=True)
2433
2433
2434
2434
2435 if deltareuse == self.DELTAREUSEFULLADD:
2435 if deltareuse == self.DELTAREUSEFULLADD:
2436 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2436 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2437 cachedelta=cachedelta,
2437 cachedelta=cachedelta,
2438 node=node, flags=flags,
2438 node=node, flags=flags,
2439 deltacomputer=deltacomputer)
2439 deltacomputer=deltacomputer)
2440 else:
2440 else:
2441 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2441 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2442 checkambig=False)
2442 checkambig=False)
2443 dfh = None
2443 dfh = None
2444 if not destrevlog._inline:
2444 if not destrevlog._inline:
2445 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2445 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2446 try:
2446 try:
2447 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2447 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2448 p2, flags, cachedelta, ifh, dfh,
2448 p2, flags, cachedelta, ifh, dfh,
2449 deltacomputer=deltacomputer)
2449 deltacomputer=deltacomputer)
2450 finally:
2450 finally:
2451 if dfh:
2451 if dfh:
2452 dfh.close()
2452 dfh.close()
2453 ifh.close()
2453 ifh.close()
2454
2454
2455 if addrevisioncb:
2455 if addrevisioncb:
2456 addrevisioncb(self, rev, node)
2456 addrevisioncb(self, rev, node)
2457 finally:
2457 finally:
2458 destrevlog._lazydeltabase = oldlazydeltabase
2458 destrevlog._lazydeltabase = oldlazydeltabase
2459 destrevlog._deltabothparents = oldamd
2459 destrevlog._deltabothparents = oldamd
2460
2460
2461 def censorrevision(self, tr, censornode, tombstone=b''):
2461 def censorrevision(self, tr, censornode, tombstone=b''):
2462 if (self.version & 0xFFFF) == REVLOGV0:
2462 if (self.version & 0xFFFF) == REVLOGV0:
2463 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2463 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2464 self.version)
2464 self.version)
2465
2465
2466 censorrev = self.rev(censornode)
2466 censorrev = self.rev(censornode)
2467 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2467 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2468
2468
2469 if len(tombstone) > self.rawsize(censorrev):
2469 if len(tombstone) > self.rawsize(censorrev):
2470 raise error.Abort(_('censor tombstone must be no longer than '
2470 raise error.Abort(_('censor tombstone must be no longer than '
2471 'censored data'))
2471 'censored data'))
2472
2472
2473 # Rewriting the revlog in place is hard. Our strategy for censoring is
2473 # Rewriting the revlog in place is hard. Our strategy for censoring is
2474 # to create a new revlog, copy all revisions to it, then replace the
2474 # to create a new revlog, copy all revisions to it, then replace the
2475 # revlogs on transaction close.
2475 # revlogs on transaction close.
2476
2476
2477 newindexfile = self.indexfile + b'.tmpcensored'
2477 newindexfile = self.indexfile + b'.tmpcensored'
2478 newdatafile = self.datafile + b'.tmpcensored'
2478 newdatafile = self.datafile + b'.tmpcensored'
2479
2479
2480 # This is a bit dangerous. We could easily have a mismatch of state.
2480 # This is a bit dangerous. We could easily have a mismatch of state.
2481 newrl = revlog(self.opener, newindexfile, newdatafile,
2481 newrl = revlog(self.opener, newindexfile, newdatafile,
2482 censorable=True)
2482 censorable=True)
2483 newrl.version = self.version
2483 newrl.version = self.version
2484 newrl._generaldelta = self._generaldelta
2484 newrl._generaldelta = self._generaldelta
2485 newrl._io = self._io
2485 newrl._io = self._io
2486
2486
2487 for rev in self.revs():
2487 for rev in self.revs():
2488 node = self.node(rev)
2488 node = self.node(rev)
2489 p1, p2 = self.parents(node)
2489 p1, p2 = self.parents(node)
2490
2490
2491 if rev == censorrev:
2491 if rev == censorrev:
2492 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2492 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2493 p1, p2, censornode, REVIDX_ISCENSORED)
2493 p1, p2, censornode, REVIDX_ISCENSORED)
2494
2494
2495 if newrl.deltaparent(rev) != nullrev:
2495 if newrl.deltaparent(rev) != nullrev:
2496 raise error.Abort(_('censored revision stored as delta; '
2496 raise error.Abort(_('censored revision stored as delta; '
2497 'cannot censor'),
2497 'cannot censor'),
2498 hint=_('censoring of revlogs is not '
2498 hint=_('censoring of revlogs is not '
2499 'fully implemented; please report '
2499 'fully implemented; please report '
2500 'this bug'))
2500 'this bug'))
2501 continue
2501 continue
2502
2502
2503 if self.iscensored(rev):
2503 if self.iscensored(rev):
2504 if self.deltaparent(rev) != nullrev:
2504 if self.deltaparent(rev) != nullrev:
2505 raise error.Abort(_('cannot censor due to censored '
2505 raise error.Abort(_('cannot censor due to censored '
2506 'revision having delta stored'))
2506 'revision having delta stored'))
2507 rawtext = self._chunk(rev)
2507 rawtext = self._chunk(rev)
2508 else:
2508 else:
2509 rawtext = self.revision(rev, raw=True)
2509 rawtext = self.revision(rev, raw=True)
2510
2510
2511 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2511 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2512 self.flags(rev))
2512 self.flags(rev))
2513
2513
2514 tr.addbackup(self.indexfile, location='store')
2514 tr.addbackup(self.indexfile, location='store')
2515 if not self._inline:
2515 if not self._inline:
2516 tr.addbackup(self.datafile, location='store')
2516 tr.addbackup(self.datafile, location='store')
2517
2517
2518 self.opener.rename(newrl.indexfile, self.indexfile)
2518 self.opener.rename(newrl.indexfile, self.indexfile)
2519 if not self._inline:
2519 if not self._inline:
2520 self.opener.rename(newrl.datafile, self.datafile)
2520 self.opener.rename(newrl.datafile, self.datafile)
2521
2521
2522 self.clearcaches()
2522 self.clearcaches()
2523 self._loadindex()
2523 self._loadindex()
2524
2524
2525 def verifyintegrity(self, state):
2525 def verifyintegrity(self, state):
2526 """Verifies the integrity of the revlog.
2526 """Verifies the integrity of the revlog.
2527
2527
2528 Yields ``revlogproblem`` instances describing problems that are
2528 Yields ``revlogproblem`` instances describing problems that are
2529 found.
2529 found.
2530 """
2530 """
2531 dd, di = self.checksize()
2531 dd, di = self.checksize()
2532 if dd:
2532 if dd:
2533 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2533 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2534 if di:
2534 if di:
2535 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2535 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2536
2536
2537 version = self.version & 0xFFFF
2537 version = self.version & 0xFFFF
2538
2538
2539 # The verifier tells us what version revlog we should be.
2539 # The verifier tells us what version revlog we should be.
2540 if version != state['expectedversion']:
2540 if version != state['expectedversion']:
2541 yield revlogproblem(
2541 yield revlogproblem(
2542 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2542 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2543 (self.indexfile, version, state['expectedversion']))
2543 (self.indexfile, version, state['expectedversion']))
2544
2544
2545 state['skipread'] = set()
2545 state['skipread'] = set()
2546
2546
2547 for rev in self:
2547 for rev in self:
2548 node = self.node(rev)
2548 node = self.node(rev)
2549
2549
2550 # Verify contents. 4 cases to care about:
2550 # Verify contents. 4 cases to care about:
2551 #
2551 #
2552 # common: the most common case
2552 # common: the most common case
2553 # rename: with a rename
2553 # rename: with a rename
2554 # meta: file content starts with b'\1\n', the metadata
2554 # meta: file content starts with b'\1\n', the metadata
2555 # header defined in filelog.py, but without a rename
2555 # header defined in filelog.py, but without a rename
2556 # ext: content stored externally
2556 # ext: content stored externally
2557 #
2557 #
2558 # More formally, their differences are shown below:
2558 # More formally, their differences are shown below:
2559 #
2559 #
2560 # | common | rename | meta | ext
2560 # | common | rename | meta | ext
2561 # -------------------------------------------------------
2561 # -------------------------------------------------------
2562 # flags() | 0 | 0 | 0 | not 0
2562 # flags() | 0 | 0 | 0 | not 0
2563 # renamed() | False | True | False | ?
2563 # renamed() | False | True | False | ?
2564 # rawtext[0:2]=='\1\n'| False | True | True | ?
2564 # rawtext[0:2]=='\1\n'| False | True | True | ?
2565 #
2565 #
2566 # "rawtext" means the raw text stored in revlog data, which
2566 # "rawtext" means the raw text stored in revlog data, which
2567 # could be retrieved by "revision(rev, raw=True)". "text"
2567 # could be retrieved by "revision(rev, raw=True)". "text"
2568 # mentioned below is "revision(rev, raw=False)".
2568 # mentioned below is "revision(rev, raw=False)".
2569 #
2569 #
2570 # There are 3 different lengths stored physically:
2570 # There are 3 different lengths stored physically:
2571 # 1. L1: rawsize, stored in revlog index
2571 # 1. L1: rawsize, stored in revlog index
2572 # 2. L2: len(rawtext), stored in revlog data
2572 # 2. L2: len(rawtext), stored in revlog data
2573 # 3. L3: len(text), stored in revlog data if flags==0, or
2573 # 3. L3: len(text), stored in revlog data if flags==0, or
2574 # possibly somewhere else if flags!=0
2574 # possibly somewhere else if flags!=0
2575 #
2575 #
2576 # L1 should be equal to L2. L3 could be different from them.
2576 # L1 should be equal to L2. L3 could be different from them.
2577 # "text" may or may not affect commit hash depending on flag
2577 # "text" may or may not affect commit hash depending on flag
2578 # processors (see revlog.addflagprocessor).
2578 # processors (see revlog.addflagprocessor).
2579 #
2579 #
2580 # | common | rename | meta | ext
2580 # | common | rename | meta | ext
2581 # -------------------------------------------------
2581 # -------------------------------------------------
2582 # rawsize() | L1 | L1 | L1 | L1
2582 # rawsize() | L1 | L1 | L1 | L1
2583 # size() | L1 | L2-LM | L1(*) | L1 (?)
2583 # size() | L1 | L2-LM | L1(*) | L1 (?)
2584 # len(rawtext) | L2 | L2 | L2 | L2
2584 # len(rawtext) | L2 | L2 | L2 | L2
2585 # len(text) | L2 | L2 | L2 | L3
2585 # len(text) | L2 | L2 | L2 | L3
2586 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2586 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2587 #
2587 #
2588 # LM: length of metadata, depending on rawtext
2588 # LM: length of metadata, depending on rawtext
2589 # (*): not ideal, see comment in filelog.size
2589 # (*): not ideal, see comment in filelog.size
2590 # (?): could be "- len(meta)" if the resolved content has
2590 # (?): could be "- len(meta)" if the resolved content has
2591 # rename metadata
2591 # rename metadata
2592 #
2592 #
2593 # Checks needed to be done:
2593 # Checks needed to be done:
2594 # 1. length check: L1 == L2, in all cases.
2594 # 1. length check: L1 == L2, in all cases.
2595 # 2. hash check: depending on flag processor, we may need to
2595 # 2. hash check: depending on flag processor, we may need to
2596 # use either "text" (external), or "rawtext" (in revlog).
2596 # use either "text" (external), or "rawtext" (in revlog).
2597
2597
2598 try:
2598 try:
2599 skipflags = state.get('skipflags', 0)
2599 skipflags = state.get('skipflags', 0)
2600 if skipflags:
2600 if skipflags:
2601 skipflags &= self.flags(rev)
2601 skipflags &= self.flags(rev)
2602
2602
2603 if skipflags:
2603 if skipflags:
2604 state['skipread'].add(node)
2604 state['skipread'].add(node)
2605 else:
2605 else:
2606 # Side-effect: read content and verify hash.
2606 # Side-effect: read content and verify hash.
2607 self.revision(node)
2607 self.revision(node)
2608
2608
2609 l1 = self.rawsize(rev)
2609 l1 = self.rawsize(rev)
2610 l2 = len(self.revision(node, raw=True))
2610 l2 = len(self.revision(node, raw=True))
2611
2611
2612 if l1 != l2:
2612 if l1 != l2:
2613 yield revlogproblem(
2613 yield revlogproblem(
2614 error=_('unpacked size is %d, %d expected') % (l2, l1),
2614 error=_('unpacked size is %d, %d expected') % (l2, l1),
2615 node=node)
2615 node=node)
2616
2616
2617 except error.CensoredNodeError:
2617 except error.CensoredNodeError:
2618 if state['erroroncensored']:
2618 if state['erroroncensored']:
2619 yield revlogproblem(error=_('censored file data'),
2619 yield revlogproblem(error=_('censored file data'),
2620 node=node)
2620 node=node)
2621 state['skipread'].add(node)
2621 state['skipread'].add(node)
2622 except Exception as e:
2622 except Exception as e:
2623 yield revlogproblem(
2623 yield revlogproblem(
2624 error=_('unpacking %s: %s') % (short(node),
2624 error=_('unpacking %s: %s') % (short(node),
2625 stringutil.forcebytestr(e)),
2625 stringutil.forcebytestr(e)),
2626 node=node)
2626 node=node)
2627 state['skipread'].add(node)
2627 state['skipread'].add(node)
2628
2628
2629 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2629 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2630 revisionscount=False, trackedsize=False,
2630 revisionscount=False, trackedsize=False,
2631 storedsize=False):
2631 storedsize=False):
2632 d = {}
2632 d = {}
2633
2633
2634 if exclusivefiles:
2634 if exclusivefiles:
2635 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2635 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2636 if not self._inline:
2636 if not self._inline:
2637 d['exclusivefiles'].append((self.opener, self.datafile))
2637 d['exclusivefiles'].append((self.opener, self.datafile))
2638
2638
2639 if sharedfiles:
2639 if sharedfiles:
2640 d['sharedfiles'] = []
2640 d['sharedfiles'] = []
2641
2641
2642 if revisionscount:
2642 if revisionscount:
2643 d['revisionscount'] = len(self)
2643 d['revisionscount'] = len(self)
2644
2644
2645 if trackedsize:
2645 if trackedsize:
2646 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2646 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2647
2647
2648 if storedsize:
2648 if storedsize:
2649 d['storedsize'] = sum(self.opener.stat(path).st_size
2649 d['storedsize'] = sum(self.opener.stat(path).st_size
2650 for path in self.files())
2650 for path in self.files())
2651
2651
2652 return d
2652 return d
General Comments 0
You need to be logged in to leave comments. Login now