##// END OF EJS Templates
rust: add comment about lack of wdirrev handling...
Yuya Nishihara -
r41337:13c23396 stable
parent child Browse files
Show More
@@ -1,2641 +1,2643
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
375 self._withsparseread = False
375 self._withsparseread = False
376 self._sparserevlog = False
376 self._sparserevlog = False
377 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
378 self._srmingapsize = 262144
378 self._srmingapsize = 262144
379
379
380 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
381 # custom flags.
381 # custom flags.
382 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
383
383
384 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
385 self._writinghandles = None
385 self._writinghandles = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 def _loadindex(self):
389 def _loadindex(self):
390 mmapindexthreshold = None
390 mmapindexthreshold = None
391 opts = getattr(self.opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
392
392
393 if 'revlogv2' in opts:
393 if 'revlogv2' in opts:
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 elif 'revlogv1' in opts:
395 elif 'revlogv1' in opts:
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 if 'generaldelta' in opts:
397 if 'generaldelta' in opts:
398 newversionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 else:
399 else:
400 newversionflags = REVLOG_DEFAULT_VERSION
400 newversionflags = REVLOG_DEFAULT_VERSION
401
401
402 if 'chunkcachesize' in opts:
402 if 'chunkcachesize' in opts:
403 self._chunkcachesize = opts['chunkcachesize']
403 self._chunkcachesize = opts['chunkcachesize']
404 if 'maxchainlen' in opts:
404 if 'maxchainlen' in opts:
405 self._maxchainlen = opts['maxchainlen']
405 self._maxchainlen = opts['maxchainlen']
406 if 'deltabothparents' in opts:
406 if 'deltabothparents' in opts:
407 self._deltabothparents = opts['deltabothparents']
407 self._deltabothparents = opts['deltabothparents']
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 if 'compengine' in opts:
409 if 'compengine' in opts:
410 self._compengine = opts['compengine']
410 self._compengine = opts['compengine']
411 if 'maxdeltachainspan' in opts:
411 if 'maxdeltachainspan' in opts:
412 self._maxdeltachainspan = opts['maxdeltachainspan']
412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 mmapindexthreshold = opts['mmapindexthreshold']
414 mmapindexthreshold = opts['mmapindexthreshold']
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
417 # sparse-revlog forces sparse-read
417 # sparse-revlog forces sparse-read
418 self._withsparseread = self._sparserevlog or withsparseread
418 self._withsparseread = self._sparserevlog or withsparseread
419 if 'sparse-read-density-threshold' in opts:
419 if 'sparse-read-density-threshold' in opts:
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 if 'sparse-read-min-gap-size' in opts:
421 if 'sparse-read-min-gap-size' in opts:
422 self._srmingapsize = opts['sparse-read-min-gap-size']
422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 if opts.get('enableellipsis'):
423 if opts.get('enableellipsis'):
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425
425
426 # revlog v0 doesn't have flag processors
426 # revlog v0 doesn't have flag processors
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 _insertflagprocessor(flag, processor, self._flagprocessors)
428 _insertflagprocessor(flag, processor, self._flagprocessors)
429
429
430 if self._chunkcachesize <= 0:
430 if self._chunkcachesize <= 0:
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 'greater than 0') % self._chunkcachesize)
432 'greater than 0') % self._chunkcachesize)
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 'power of 2') % self._chunkcachesize)
435 'power of 2') % self._chunkcachesize)
436
436
437 indexdata = ''
437 indexdata = ''
438 self._initempty = True
438 self._initempty = True
439 try:
439 try:
440 with self._indexfp() as f:
440 with self._indexfp() as f:
441 if (mmapindexthreshold is not None and
441 if (mmapindexthreshold is not None and
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 # TODO: should .close() to release resources without
443 # TODO: should .close() to release resources without
444 # relying on Python GC
444 # relying on Python GC
445 indexdata = util.buffer(util.mmapread(f))
445 indexdata = util.buffer(util.mmapread(f))
446 else:
446 else:
447 indexdata = f.read()
447 indexdata = f.read()
448 if len(indexdata) > 0:
448 if len(indexdata) > 0:
449 versionflags = versionformat_unpack(indexdata[:4])[0]
449 versionflags = versionformat_unpack(indexdata[:4])[0]
450 self._initempty = False
450 self._initempty = False
451 else:
451 else:
452 versionflags = newversionflags
452 versionflags = newversionflags
453 except IOError as inst:
453 except IOError as inst:
454 if inst.errno != errno.ENOENT:
454 if inst.errno != errno.ENOENT:
455 raise
455 raise
456
456
457 versionflags = newversionflags
457 versionflags = newversionflags
458
458
459 self.version = versionflags
459 self.version = versionflags
460
460
461 flags = versionflags & ~0xFFFF
461 flags = versionflags & ~0xFFFF
462 fmt = versionflags & 0xFFFF
462 fmt = versionflags & 0xFFFF
463
463
464 if fmt == REVLOGV0:
464 if fmt == REVLOGV0:
465 if flags:
465 if flags:
466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
466 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
467 'revlog %s') %
467 'revlog %s') %
468 (flags >> 16, fmt, self.indexfile))
468 (flags >> 16, fmt, self.indexfile))
469
469
470 self._inline = False
470 self._inline = False
471 self._generaldelta = False
471 self._generaldelta = False
472
472
473 elif fmt == REVLOGV1:
473 elif fmt == REVLOGV1:
474 if flags & ~REVLOGV1_FLAGS:
474 if flags & ~REVLOGV1_FLAGS:
475 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
475 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
476 'revlog %s') %
476 'revlog %s') %
477 (flags >> 16, fmt, self.indexfile))
477 (flags >> 16, fmt, self.indexfile))
478
478
479 self._inline = versionflags & FLAG_INLINE_DATA
479 self._inline = versionflags & FLAG_INLINE_DATA
480 self._generaldelta = versionflags & FLAG_GENERALDELTA
480 self._generaldelta = versionflags & FLAG_GENERALDELTA
481
481
482 elif fmt == REVLOGV2:
482 elif fmt == REVLOGV2:
483 if flags & ~REVLOGV2_FLAGS:
483 if flags & ~REVLOGV2_FLAGS:
484 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
484 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
485 'revlog %s') %
485 'revlog %s') %
486 (flags >> 16, fmt, self.indexfile))
486 (flags >> 16, fmt, self.indexfile))
487
487
488 self._inline = versionflags & FLAG_INLINE_DATA
488 self._inline = versionflags & FLAG_INLINE_DATA
489 # generaldelta implied by version 2 revlogs.
489 # generaldelta implied by version 2 revlogs.
490 self._generaldelta = True
490 self._generaldelta = True
491
491
492 else:
492 else:
493 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
493 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
494 (fmt, self.indexfile))
494 (fmt, self.indexfile))
495
495
496 self._storedeltachains = True
496 self._storedeltachains = True
497
497
498 self._io = revlogio()
498 self._io = revlogio()
499 if self.version == REVLOGV0:
499 if self.version == REVLOGV0:
500 self._io = revlogoldio()
500 self._io = revlogoldio()
501 try:
501 try:
502 d = self._io.parseindex(indexdata, self._inline)
502 d = self._io.parseindex(indexdata, self._inline)
503 except (ValueError, IndexError):
503 except (ValueError, IndexError):
504 raise error.RevlogError(_("index %s is corrupted") %
504 raise error.RevlogError(_("index %s is corrupted") %
505 self.indexfile)
505 self.indexfile)
506 self.index, nodemap, self._chunkcache = d
506 self.index, nodemap, self._chunkcache = d
507 if nodemap is not None:
507 if nodemap is not None:
508 self.nodemap = self._nodecache = nodemap
508 self.nodemap = self._nodecache = nodemap
509 if not self._chunkcache:
509 if not self._chunkcache:
510 self._chunkclear()
510 self._chunkclear()
511 # revnum -> (chain-length, sum-delta-length)
511 # revnum -> (chain-length, sum-delta-length)
512 self._chaininfocache = {}
512 self._chaininfocache = {}
513 # revlog header -> revlog compressor
513 # revlog header -> revlog compressor
514 self._decompressors = {}
514 self._decompressors = {}
515
515
516 @util.propertycache
516 @util.propertycache
517 def _compressor(self):
517 def _compressor(self):
518 return util.compengines[self._compengine].revlogcompressor()
518 return util.compengines[self._compengine].revlogcompressor()
519
519
520 def _indexfp(self, mode='r'):
520 def _indexfp(self, mode='r'):
521 """file object for the revlog's index file"""
521 """file object for the revlog's index file"""
522 args = {r'mode': mode}
522 args = {r'mode': mode}
523 if mode != 'r':
523 if mode != 'r':
524 args[r'checkambig'] = self._checkambig
524 args[r'checkambig'] = self._checkambig
525 if mode == 'w':
525 if mode == 'w':
526 args[r'atomictemp'] = True
526 args[r'atomictemp'] = True
527 return self.opener(self.indexfile, **args)
527 return self.opener(self.indexfile, **args)
528
528
529 def _datafp(self, mode='r'):
529 def _datafp(self, mode='r'):
530 """file object for the revlog's data file"""
530 """file object for the revlog's data file"""
531 return self.opener(self.datafile, mode=mode)
531 return self.opener(self.datafile, mode=mode)
532
532
533 @contextlib.contextmanager
533 @contextlib.contextmanager
534 def _datareadfp(self, existingfp=None):
534 def _datareadfp(self, existingfp=None):
535 """file object suitable to read data"""
535 """file object suitable to read data"""
536 # Use explicit file handle, if given.
536 # Use explicit file handle, if given.
537 if existingfp is not None:
537 if existingfp is not None:
538 yield existingfp
538 yield existingfp
539
539
540 # Use a file handle being actively used for writes, if available.
540 # Use a file handle being actively used for writes, if available.
541 # There is some danger to doing this because reads will seek the
541 # There is some danger to doing this because reads will seek the
542 # file. However, _writeentry() performs a SEEK_END before all writes,
542 # file. However, _writeentry() performs a SEEK_END before all writes,
543 # so we should be safe.
543 # so we should be safe.
544 elif self._writinghandles:
544 elif self._writinghandles:
545 if self._inline:
545 if self._inline:
546 yield self._writinghandles[0]
546 yield self._writinghandles[0]
547 else:
547 else:
548 yield self._writinghandles[1]
548 yield self._writinghandles[1]
549
549
550 # Otherwise open a new file handle.
550 # Otherwise open a new file handle.
551 else:
551 else:
552 if self._inline:
552 if self._inline:
553 func = self._indexfp
553 func = self._indexfp
554 else:
554 else:
555 func = self._datafp
555 func = self._datafp
556 with func() as fp:
556 with func() as fp:
557 yield fp
557 yield fp
558
558
559 def tip(self):
559 def tip(self):
560 return self.node(len(self.index) - 1)
560 return self.node(len(self.index) - 1)
561 def __contains__(self, rev):
561 def __contains__(self, rev):
562 return 0 <= rev < len(self)
562 return 0 <= rev < len(self)
563 def __len__(self):
563 def __len__(self):
564 return len(self.index)
564 return len(self.index)
565 def __iter__(self):
565 def __iter__(self):
566 return iter(pycompat.xrange(len(self)))
566 return iter(pycompat.xrange(len(self)))
567 def revs(self, start=0, stop=None):
567 def revs(self, start=0, stop=None):
568 """iterate over all rev in this revlog (from start to stop)"""
568 """iterate over all rev in this revlog (from start to stop)"""
569 return storageutil.iterrevs(len(self), start=start, stop=stop)
569 return storageutil.iterrevs(len(self), start=start, stop=stop)
570
570
571 @util.propertycache
571 @util.propertycache
572 def nodemap(self):
572 def nodemap(self):
573 if self.index:
573 if self.index:
574 # populate mapping down to the initial node
574 # populate mapping down to the initial node
575 node0 = self.index[0][7] # get around changelog filtering
575 node0 = self.index[0][7] # get around changelog filtering
576 self.rev(node0)
576 self.rev(node0)
577 return self._nodecache
577 return self._nodecache
578
578
579 def hasnode(self, node):
579 def hasnode(self, node):
580 try:
580 try:
581 self.rev(node)
581 self.rev(node)
582 return True
582 return True
583 except KeyError:
583 except KeyError:
584 return False
584 return False
585
585
586 def candelta(self, baserev, rev):
586 def candelta(self, baserev, rev):
587 """whether two revisions (baserev, rev) can be delta-ed or not"""
587 """whether two revisions (baserev, rev) can be delta-ed or not"""
588 # Disable delta if either rev requires a content-changing flag
588 # Disable delta if either rev requires a content-changing flag
589 # processor (ex. LFS). This is because such flag processor can alter
589 # processor (ex. LFS). This is because such flag processor can alter
590 # the rawtext content that the delta will be based on, and two clients
590 # the rawtext content that the delta will be based on, and two clients
591 # could have a same revlog node with different flags (i.e. different
591 # could have a same revlog node with different flags (i.e. different
592 # rawtext contents) and the delta could be incompatible.
592 # rawtext contents) and the delta could be incompatible.
593 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
593 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
594 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
594 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
595 return False
595 return False
596 return True
596 return True
597
597
598 def clearcaches(self):
598 def clearcaches(self):
599 self._revisioncache = None
599 self._revisioncache = None
600 self._chainbasecache.clear()
600 self._chainbasecache.clear()
601 self._chunkcache = (0, '')
601 self._chunkcache = (0, '')
602 self._pcache = {}
602 self._pcache = {}
603
603
604 try:
604 try:
605 self._nodecache.clearcaches()
605 self._nodecache.clearcaches()
606 except AttributeError:
606 except AttributeError:
607 self._nodecache = {nullid: nullrev}
607 self._nodecache = {nullid: nullrev}
608 self._nodepos = None
608 self._nodepos = None
609
609
610 def rev(self, node):
610 def rev(self, node):
611 try:
611 try:
612 return self._nodecache[node]
612 return self._nodecache[node]
613 except TypeError:
613 except TypeError:
614 raise
614 raise
615 except error.RevlogError:
615 except error.RevlogError:
616 # parsers.c radix tree lookup failed
616 # parsers.c radix tree lookup failed
617 if node == wdirid or node in wdirfilenodeids:
617 if node == wdirid or node in wdirfilenodeids:
618 raise error.WdirUnsupported
618 raise error.WdirUnsupported
619 raise error.LookupError(node, self.indexfile, _('no node'))
619 raise error.LookupError(node, self.indexfile, _('no node'))
620 except KeyError:
620 except KeyError:
621 # pure python cache lookup failed
621 # pure python cache lookup failed
622 n = self._nodecache
622 n = self._nodecache
623 i = self.index
623 i = self.index
624 p = self._nodepos
624 p = self._nodepos
625 if p is None:
625 if p is None:
626 p = len(i) - 1
626 p = len(i) - 1
627 else:
627 else:
628 assert p < len(i)
628 assert p < len(i)
629 for r in pycompat.xrange(p, -1, -1):
629 for r in pycompat.xrange(p, -1, -1):
630 v = i[r][7]
630 v = i[r][7]
631 n[v] = r
631 n[v] = r
632 if v == node:
632 if v == node:
633 self._nodepos = r - 1
633 self._nodepos = r - 1
634 return r
634 return r
635 if node == wdirid or node in wdirfilenodeids:
635 if node == wdirid or node in wdirfilenodeids:
636 raise error.WdirUnsupported
636 raise error.WdirUnsupported
637 raise error.LookupError(node, self.indexfile, _('no node'))
637 raise error.LookupError(node, self.indexfile, _('no node'))
638
638
639 # Accessors for index entries.
639 # Accessors for index entries.
640
640
641 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
641 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
642 # are flags.
642 # are flags.
643 def start(self, rev):
643 def start(self, rev):
644 return int(self.index[rev][0] >> 16)
644 return int(self.index[rev][0] >> 16)
645
645
646 def flags(self, rev):
646 def flags(self, rev):
647 return self.index[rev][0] & 0xFFFF
647 return self.index[rev][0] & 0xFFFF
648
648
649 def length(self, rev):
649 def length(self, rev):
650 return self.index[rev][1]
650 return self.index[rev][1]
651
651
652 def rawsize(self, rev):
652 def rawsize(self, rev):
653 """return the length of the uncompressed text for a given revision"""
653 """return the length of the uncompressed text for a given revision"""
654 l = self.index[rev][2]
654 l = self.index[rev][2]
655 if l >= 0:
655 if l >= 0:
656 return l
656 return l
657
657
658 t = self.revision(rev, raw=True)
658 t = self.revision(rev, raw=True)
659 return len(t)
659 return len(t)
660
660
661 def size(self, rev):
661 def size(self, rev):
662 """length of non-raw text (processed by a "read" flag processor)"""
662 """length of non-raw text (processed by a "read" flag processor)"""
663 # fast path: if no "read" flag processor could change the content,
663 # fast path: if no "read" flag processor could change the content,
664 # size is rawsize. note: ELLIPSIS is known to not change the content.
664 # size is rawsize. note: ELLIPSIS is known to not change the content.
665 flags = self.flags(rev)
665 flags = self.flags(rev)
666 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
666 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
667 return self.rawsize(rev)
667 return self.rawsize(rev)
668
668
669 return len(self.revision(rev, raw=False))
669 return len(self.revision(rev, raw=False))
670
670
671 def chainbase(self, rev):
671 def chainbase(self, rev):
672 base = self._chainbasecache.get(rev)
672 base = self._chainbasecache.get(rev)
673 if base is not None:
673 if base is not None:
674 return base
674 return base
675
675
676 index = self.index
676 index = self.index
677 iterrev = rev
677 iterrev = rev
678 base = index[iterrev][3]
678 base = index[iterrev][3]
679 while base != iterrev:
679 while base != iterrev:
680 iterrev = base
680 iterrev = base
681 base = index[iterrev][3]
681 base = index[iterrev][3]
682
682
683 self._chainbasecache[rev] = base
683 self._chainbasecache[rev] = base
684 return base
684 return base
685
685
686 def linkrev(self, rev):
686 def linkrev(self, rev):
687 return self.index[rev][4]
687 return self.index[rev][4]
688
688
689 def parentrevs(self, rev):
689 def parentrevs(self, rev):
690 try:
690 try:
691 entry = self.index[rev]
691 entry = self.index[rev]
692 except IndexError:
692 except IndexError:
693 if rev == wdirrev:
693 if rev == wdirrev:
694 raise error.WdirUnsupported
694 raise error.WdirUnsupported
695 raise
695 raise
696
696
697 return entry[5], entry[6]
697 return entry[5], entry[6]
698
698
699 # fast parentrevs(rev) where rev isn't filtered
699 # fast parentrevs(rev) where rev isn't filtered
700 _uncheckedparentrevs = parentrevs
700 _uncheckedparentrevs = parentrevs
701
701
702 def node(self, rev):
702 def node(self, rev):
703 try:
703 try:
704 return self.index[rev][7]
704 return self.index[rev][7]
705 except IndexError:
705 except IndexError:
706 if rev == wdirrev:
706 if rev == wdirrev:
707 raise error.WdirUnsupported
707 raise error.WdirUnsupported
708 raise
708 raise
709
709
710 # Derived from index values.
710 # Derived from index values.
711
711
712 def end(self, rev):
712 def end(self, rev):
713 return self.start(rev) + self.length(rev)
713 return self.start(rev) + self.length(rev)
714
714
715 def parents(self, node):
715 def parents(self, node):
716 i = self.index
716 i = self.index
717 d = i[self.rev(node)]
717 d = i[self.rev(node)]
718 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
718 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
719
719
720 def chainlen(self, rev):
720 def chainlen(self, rev):
721 return self._chaininfo(rev)[0]
721 return self._chaininfo(rev)[0]
722
722
723 def _chaininfo(self, rev):
723 def _chaininfo(self, rev):
724 chaininfocache = self._chaininfocache
724 chaininfocache = self._chaininfocache
725 if rev in chaininfocache:
725 if rev in chaininfocache:
726 return chaininfocache[rev]
726 return chaininfocache[rev]
727 index = self.index
727 index = self.index
728 generaldelta = self._generaldelta
728 generaldelta = self._generaldelta
729 iterrev = rev
729 iterrev = rev
730 e = index[iterrev]
730 e = index[iterrev]
731 clen = 0
731 clen = 0
732 compresseddeltalen = 0
732 compresseddeltalen = 0
733 while iterrev != e[3]:
733 while iterrev != e[3]:
734 clen += 1
734 clen += 1
735 compresseddeltalen += e[1]
735 compresseddeltalen += e[1]
736 if generaldelta:
736 if generaldelta:
737 iterrev = e[3]
737 iterrev = e[3]
738 else:
738 else:
739 iterrev -= 1
739 iterrev -= 1
740 if iterrev in chaininfocache:
740 if iterrev in chaininfocache:
741 t = chaininfocache[iterrev]
741 t = chaininfocache[iterrev]
742 clen += t[0]
742 clen += t[0]
743 compresseddeltalen += t[1]
743 compresseddeltalen += t[1]
744 break
744 break
745 e = index[iterrev]
745 e = index[iterrev]
746 else:
746 else:
747 # Add text length of base since decompressing that also takes
747 # Add text length of base since decompressing that also takes
748 # work. For cache hits the length is already included.
748 # work. For cache hits the length is already included.
749 compresseddeltalen += e[1]
749 compresseddeltalen += e[1]
750 r = (clen, compresseddeltalen)
750 r = (clen, compresseddeltalen)
751 chaininfocache[rev] = r
751 chaininfocache[rev] = r
752 return r
752 return r
753
753
754 def _deltachain(self, rev, stoprev=None):
754 def _deltachain(self, rev, stoprev=None):
755 """Obtain the delta chain for a revision.
755 """Obtain the delta chain for a revision.
756
756
757 ``stoprev`` specifies a revision to stop at. If not specified, we
757 ``stoprev`` specifies a revision to stop at. If not specified, we
758 stop at the base of the chain.
758 stop at the base of the chain.
759
759
760 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
760 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
761 revs in ascending order and ``stopped`` is a bool indicating whether
761 revs in ascending order and ``stopped`` is a bool indicating whether
762 ``stoprev`` was hit.
762 ``stoprev`` was hit.
763 """
763 """
764 # Try C implementation.
764 # Try C implementation.
765 try:
765 try:
766 return self.index.deltachain(rev, stoprev, self._generaldelta)
766 return self.index.deltachain(rev, stoprev, self._generaldelta)
767 except AttributeError:
767 except AttributeError:
768 pass
768 pass
769
769
770 chain = []
770 chain = []
771
771
772 # Alias to prevent attribute lookup in tight loop.
772 # Alias to prevent attribute lookup in tight loop.
773 index = self.index
773 index = self.index
774 generaldelta = self._generaldelta
774 generaldelta = self._generaldelta
775
775
776 iterrev = rev
776 iterrev = rev
777 e = index[iterrev]
777 e = index[iterrev]
778 while iterrev != e[3] and iterrev != stoprev:
778 while iterrev != e[3] and iterrev != stoprev:
779 chain.append(iterrev)
779 chain.append(iterrev)
780 if generaldelta:
780 if generaldelta:
781 iterrev = e[3]
781 iterrev = e[3]
782 else:
782 else:
783 iterrev -= 1
783 iterrev -= 1
784 e = index[iterrev]
784 e = index[iterrev]
785
785
786 if iterrev == stoprev:
786 if iterrev == stoprev:
787 stopped = True
787 stopped = True
788 else:
788 else:
789 chain.append(iterrev)
789 chain.append(iterrev)
790 stopped = False
790 stopped = False
791
791
792 chain.reverse()
792 chain.reverse()
793 return chain, stopped
793 return chain, stopped
794
794
795 def ancestors(self, revs, stoprev=0, inclusive=False):
795 def ancestors(self, revs, stoprev=0, inclusive=False):
796 """Generate the ancestors of 'revs' in reverse revision order.
796 """Generate the ancestors of 'revs' in reverse revision order.
797 Does not generate revs lower than stoprev.
797 Does not generate revs lower than stoprev.
798
798
799 See the documentation for ancestor.lazyancestors for more details."""
799 See the documentation for ancestor.lazyancestors for more details."""
800
800
801 # first, make sure start revisions aren't filtered
801 # first, make sure start revisions aren't filtered
802 revs = list(revs)
802 revs = list(revs)
803 checkrev = self.node
803 checkrev = self.node
804 for r in revs:
804 for r in revs:
805 checkrev(r)
805 checkrev(r)
806 # and we're sure ancestors aren't filtered as well
806 # and we're sure ancestors aren't filtered as well
807
807
808 if rustext is not None:
808 if rustext is not None:
809 lazyancestors = rustext.ancestor.LazyAncestors
809 lazyancestors = rustext.ancestor.LazyAncestors
810 arg = self.index
810 arg = self.index
811 elif util.safehasattr(parsers, 'rustlazyancestors'):
811 elif util.safehasattr(parsers, 'rustlazyancestors'):
812 lazyancestors = ancestor.rustlazyancestors
812 lazyancestors = ancestor.rustlazyancestors
813 arg = self.index
813 arg = self.index
814 else:
814 else:
815 lazyancestors = ancestor.lazyancestors
815 lazyancestors = ancestor.lazyancestors
816 arg = self._uncheckedparentrevs
816 arg = self._uncheckedparentrevs
817 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
817 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
818
818
819 def descendants(self, revs):
819 def descendants(self, revs):
820 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
820 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
821
821
822 def findcommonmissing(self, common=None, heads=None):
822 def findcommonmissing(self, common=None, heads=None):
823 """Return a tuple of the ancestors of common and the ancestors of heads
823 """Return a tuple of the ancestors of common and the ancestors of heads
824 that are not ancestors of common. In revset terminology, we return the
824 that are not ancestors of common. In revset terminology, we return the
825 tuple:
825 tuple:
826
826
827 ::common, (::heads) - (::common)
827 ::common, (::heads) - (::common)
828
828
829 The list is sorted by revision number, meaning it is
829 The list is sorted by revision number, meaning it is
830 topologically sorted.
830 topologically sorted.
831
831
832 'heads' and 'common' are both lists of node IDs. If heads is
832 'heads' and 'common' are both lists of node IDs. If heads is
833 not supplied, uses all of the revlog's heads. If common is not
833 not supplied, uses all of the revlog's heads. If common is not
834 supplied, uses nullid."""
834 supplied, uses nullid."""
835 if common is None:
835 if common is None:
836 common = [nullid]
836 common = [nullid]
837 if heads is None:
837 if heads is None:
838 heads = self.heads()
838 heads = self.heads()
839
839
840 common = [self.rev(n) for n in common]
840 common = [self.rev(n) for n in common]
841 heads = [self.rev(n) for n in heads]
841 heads = [self.rev(n) for n in heads]
842
842
843 # we want the ancestors, but inclusive
843 # we want the ancestors, but inclusive
844 class lazyset(object):
844 class lazyset(object):
845 def __init__(self, lazyvalues):
845 def __init__(self, lazyvalues):
846 self.addedvalues = set()
846 self.addedvalues = set()
847 self.lazyvalues = lazyvalues
847 self.lazyvalues = lazyvalues
848
848
849 def __contains__(self, value):
849 def __contains__(self, value):
850 return value in self.addedvalues or value in self.lazyvalues
850 return value in self.addedvalues or value in self.lazyvalues
851
851
852 def __iter__(self):
852 def __iter__(self):
853 added = self.addedvalues
853 added = self.addedvalues
854 for r in added:
854 for r in added:
855 yield r
855 yield r
856 for r in self.lazyvalues:
856 for r in self.lazyvalues:
857 if not r in added:
857 if not r in added:
858 yield r
858 yield r
859
859
860 def add(self, value):
860 def add(self, value):
861 self.addedvalues.add(value)
861 self.addedvalues.add(value)
862
862
863 def update(self, values):
863 def update(self, values):
864 self.addedvalues.update(values)
864 self.addedvalues.update(values)
865
865
866 has = lazyset(self.ancestors(common))
866 has = lazyset(self.ancestors(common))
867 has.add(nullrev)
867 has.add(nullrev)
868 has.update(common)
868 has.update(common)
869
869
870 # take all ancestors from heads that aren't in has
870 # take all ancestors from heads that aren't in has
871 missing = set()
871 missing = set()
872 visit = collections.deque(r for r in heads if r not in has)
872 visit = collections.deque(r for r in heads if r not in has)
873 while visit:
873 while visit:
874 r = visit.popleft()
874 r = visit.popleft()
875 if r in missing:
875 if r in missing:
876 continue
876 continue
877 else:
877 else:
878 missing.add(r)
878 missing.add(r)
879 for p in self.parentrevs(r):
879 for p in self.parentrevs(r):
880 if p not in has:
880 if p not in has:
881 visit.append(p)
881 visit.append(p)
882 missing = list(missing)
882 missing = list(missing)
883 missing.sort()
883 missing.sort()
884 return has, [self.node(miss) for miss in missing]
884 return has, [self.node(miss) for miss in missing]
885
885
886 def incrementalmissingrevs(self, common=None):
886 def incrementalmissingrevs(self, common=None):
887 """Return an object that can be used to incrementally compute the
887 """Return an object that can be used to incrementally compute the
888 revision numbers of the ancestors of arbitrary sets that are not
888 revision numbers of the ancestors of arbitrary sets that are not
889 ancestors of common. This is an ancestor.incrementalmissingancestors
889 ancestors of common. This is an ancestor.incrementalmissingancestors
890 object.
890 object.
891
891
892 'common' is a list of revision numbers. If common is not supplied, uses
892 'common' is a list of revision numbers. If common is not supplied, uses
893 nullrev.
893 nullrev.
894 """
894 """
895 if common is None:
895 if common is None:
896 common = [nullrev]
896 common = [nullrev]
897
897
898 if rustext is not None:
898 if rustext is not None:
899 # TODO: WdirUnsupported should be raised instead of GraphError
900 # if common includes wdirrev
899 return rustext.ancestor.MissingAncestors(self.index, common)
901 return rustext.ancestor.MissingAncestors(self.index, common)
900 return ancestor.incrementalmissingancestors(self.parentrevs, common)
902 return ancestor.incrementalmissingancestors(self.parentrevs, common)
901
903
902 def findmissingrevs(self, common=None, heads=None):
904 def findmissingrevs(self, common=None, heads=None):
903 """Return the revision numbers of the ancestors of heads that
905 """Return the revision numbers of the ancestors of heads that
904 are not ancestors of common.
906 are not ancestors of common.
905
907
906 More specifically, return a list of revision numbers corresponding to
908 More specifically, return a list of revision numbers corresponding to
907 nodes N such that every N satisfies the following constraints:
909 nodes N such that every N satisfies the following constraints:
908
910
909 1. N is an ancestor of some node in 'heads'
911 1. N is an ancestor of some node in 'heads'
910 2. N is not an ancestor of any node in 'common'
912 2. N is not an ancestor of any node in 'common'
911
913
912 The list is sorted by revision number, meaning it is
914 The list is sorted by revision number, meaning it is
913 topologically sorted.
915 topologically sorted.
914
916
915 'heads' and 'common' are both lists of revision numbers. If heads is
917 'heads' and 'common' are both lists of revision numbers. If heads is
916 not supplied, uses all of the revlog's heads. If common is not
918 not supplied, uses all of the revlog's heads. If common is not
917 supplied, uses nullid."""
919 supplied, uses nullid."""
918 if common is None:
920 if common is None:
919 common = [nullrev]
921 common = [nullrev]
920 if heads is None:
922 if heads is None:
921 heads = self.headrevs()
923 heads = self.headrevs()
922
924
923 inc = self.incrementalmissingrevs(common=common)
925 inc = self.incrementalmissingrevs(common=common)
924 return inc.missingancestors(heads)
926 return inc.missingancestors(heads)
925
927
926 def findmissing(self, common=None, heads=None):
928 def findmissing(self, common=None, heads=None):
927 """Return the ancestors of heads that are not ancestors of common.
929 """Return the ancestors of heads that are not ancestors of common.
928
930
929 More specifically, return a list of nodes N such that every N
931 More specifically, return a list of nodes N such that every N
930 satisfies the following constraints:
932 satisfies the following constraints:
931
933
932 1. N is an ancestor of some node in 'heads'
934 1. N is an ancestor of some node in 'heads'
933 2. N is not an ancestor of any node in 'common'
935 2. N is not an ancestor of any node in 'common'
934
936
935 The list is sorted by revision number, meaning it is
937 The list is sorted by revision number, meaning it is
936 topologically sorted.
938 topologically sorted.
937
939
938 'heads' and 'common' are both lists of node IDs. If heads is
940 'heads' and 'common' are both lists of node IDs. If heads is
939 not supplied, uses all of the revlog's heads. If common is not
941 not supplied, uses all of the revlog's heads. If common is not
940 supplied, uses nullid."""
942 supplied, uses nullid."""
941 if common is None:
943 if common is None:
942 common = [nullid]
944 common = [nullid]
943 if heads is None:
945 if heads is None:
944 heads = self.heads()
946 heads = self.heads()
945
947
946 common = [self.rev(n) for n in common]
948 common = [self.rev(n) for n in common]
947 heads = [self.rev(n) for n in heads]
949 heads = [self.rev(n) for n in heads]
948
950
949 inc = self.incrementalmissingrevs(common=common)
951 inc = self.incrementalmissingrevs(common=common)
950 return [self.node(r) for r in inc.missingancestors(heads)]
952 return [self.node(r) for r in inc.missingancestors(heads)]
951
953
952 def nodesbetween(self, roots=None, heads=None):
954 def nodesbetween(self, roots=None, heads=None):
953 """Return a topological path from 'roots' to 'heads'.
955 """Return a topological path from 'roots' to 'heads'.
954
956
955 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
957 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
956 topologically sorted list of all nodes N that satisfy both of
958 topologically sorted list of all nodes N that satisfy both of
957 these constraints:
959 these constraints:
958
960
959 1. N is a descendant of some node in 'roots'
961 1. N is a descendant of some node in 'roots'
960 2. N is an ancestor of some node in 'heads'
962 2. N is an ancestor of some node in 'heads'
961
963
962 Every node is considered to be both a descendant and an ancestor
964 Every node is considered to be both a descendant and an ancestor
963 of itself, so every reachable node in 'roots' and 'heads' will be
965 of itself, so every reachable node in 'roots' and 'heads' will be
964 included in 'nodes'.
966 included in 'nodes'.
965
967
966 'outroots' is the list of reachable nodes in 'roots', i.e., the
968 'outroots' is the list of reachable nodes in 'roots', i.e., the
967 subset of 'roots' that is returned in 'nodes'. Likewise,
969 subset of 'roots' that is returned in 'nodes'. Likewise,
968 'outheads' is the subset of 'heads' that is also in 'nodes'.
970 'outheads' is the subset of 'heads' that is also in 'nodes'.
969
971
970 'roots' and 'heads' are both lists of node IDs. If 'roots' is
972 'roots' and 'heads' are both lists of node IDs. If 'roots' is
971 unspecified, uses nullid as the only root. If 'heads' is
973 unspecified, uses nullid as the only root. If 'heads' is
972 unspecified, uses list of all of the revlog's heads."""
974 unspecified, uses list of all of the revlog's heads."""
973 nonodes = ([], [], [])
975 nonodes = ([], [], [])
974 if roots is not None:
976 if roots is not None:
975 roots = list(roots)
977 roots = list(roots)
976 if not roots:
978 if not roots:
977 return nonodes
979 return nonodes
978 lowestrev = min([self.rev(n) for n in roots])
980 lowestrev = min([self.rev(n) for n in roots])
979 else:
981 else:
980 roots = [nullid] # Everybody's a descendant of nullid
982 roots = [nullid] # Everybody's a descendant of nullid
981 lowestrev = nullrev
983 lowestrev = nullrev
982 if (lowestrev == nullrev) and (heads is None):
984 if (lowestrev == nullrev) and (heads is None):
983 # We want _all_ the nodes!
985 # We want _all_ the nodes!
984 return ([self.node(r) for r in self], [nullid], list(self.heads()))
986 return ([self.node(r) for r in self], [nullid], list(self.heads()))
985 if heads is None:
987 if heads is None:
986 # All nodes are ancestors, so the latest ancestor is the last
988 # All nodes are ancestors, so the latest ancestor is the last
987 # node.
989 # node.
988 highestrev = len(self) - 1
990 highestrev = len(self) - 1
989 # Set ancestors to None to signal that every node is an ancestor.
991 # Set ancestors to None to signal that every node is an ancestor.
990 ancestors = None
992 ancestors = None
991 # Set heads to an empty dictionary for later discovery of heads
993 # Set heads to an empty dictionary for later discovery of heads
992 heads = {}
994 heads = {}
993 else:
995 else:
994 heads = list(heads)
996 heads = list(heads)
995 if not heads:
997 if not heads:
996 return nonodes
998 return nonodes
997 ancestors = set()
999 ancestors = set()
998 # Turn heads into a dictionary so we can remove 'fake' heads.
1000 # Turn heads into a dictionary so we can remove 'fake' heads.
999 # Also, later we will be using it to filter out the heads we can't
1001 # Also, later we will be using it to filter out the heads we can't
1000 # find from roots.
1002 # find from roots.
1001 heads = dict.fromkeys(heads, False)
1003 heads = dict.fromkeys(heads, False)
1002 # Start at the top and keep marking parents until we're done.
1004 # Start at the top and keep marking parents until we're done.
1003 nodestotag = set(heads)
1005 nodestotag = set(heads)
1004 # Remember where the top was so we can use it as a limit later.
1006 # Remember where the top was so we can use it as a limit later.
1005 highestrev = max([self.rev(n) for n in nodestotag])
1007 highestrev = max([self.rev(n) for n in nodestotag])
1006 while nodestotag:
1008 while nodestotag:
1007 # grab a node to tag
1009 # grab a node to tag
1008 n = nodestotag.pop()
1010 n = nodestotag.pop()
1009 # Never tag nullid
1011 # Never tag nullid
1010 if n == nullid:
1012 if n == nullid:
1011 continue
1013 continue
1012 # A node's revision number represents its place in a
1014 # A node's revision number represents its place in a
1013 # topologically sorted list of nodes.
1015 # topologically sorted list of nodes.
1014 r = self.rev(n)
1016 r = self.rev(n)
1015 if r >= lowestrev:
1017 if r >= lowestrev:
1016 if n not in ancestors:
1018 if n not in ancestors:
1017 # If we are possibly a descendant of one of the roots
1019 # If we are possibly a descendant of one of the roots
1018 # and we haven't already been marked as an ancestor
1020 # and we haven't already been marked as an ancestor
1019 ancestors.add(n) # Mark as ancestor
1021 ancestors.add(n) # Mark as ancestor
1020 # Add non-nullid parents to list of nodes to tag.
1022 # Add non-nullid parents to list of nodes to tag.
1021 nodestotag.update([p for p in self.parents(n) if
1023 nodestotag.update([p for p in self.parents(n) if
1022 p != nullid])
1024 p != nullid])
1023 elif n in heads: # We've seen it before, is it a fake head?
1025 elif n in heads: # We've seen it before, is it a fake head?
1024 # So it is, real heads should not be the ancestors of
1026 # So it is, real heads should not be the ancestors of
1025 # any other heads.
1027 # any other heads.
1026 heads.pop(n)
1028 heads.pop(n)
1027 if not ancestors:
1029 if not ancestors:
1028 return nonodes
1030 return nonodes
1029 # Now that we have our set of ancestors, we want to remove any
1031 # Now that we have our set of ancestors, we want to remove any
1030 # roots that are not ancestors.
1032 # roots that are not ancestors.
1031
1033
1032 # If one of the roots was nullid, everything is included anyway.
1034 # If one of the roots was nullid, everything is included anyway.
1033 if lowestrev > nullrev:
1035 if lowestrev > nullrev:
1034 # But, since we weren't, let's recompute the lowest rev to not
1036 # But, since we weren't, let's recompute the lowest rev to not
1035 # include roots that aren't ancestors.
1037 # include roots that aren't ancestors.
1036
1038
1037 # Filter out roots that aren't ancestors of heads
1039 # Filter out roots that aren't ancestors of heads
1038 roots = [root for root in roots if root in ancestors]
1040 roots = [root for root in roots if root in ancestors]
1039 # Recompute the lowest revision
1041 # Recompute the lowest revision
1040 if roots:
1042 if roots:
1041 lowestrev = min([self.rev(root) for root in roots])
1043 lowestrev = min([self.rev(root) for root in roots])
1042 else:
1044 else:
1043 # No more roots? Return empty list
1045 # No more roots? Return empty list
1044 return nonodes
1046 return nonodes
1045 else:
1047 else:
1046 # We are descending from nullid, and don't need to care about
1048 # We are descending from nullid, and don't need to care about
1047 # any other roots.
1049 # any other roots.
1048 lowestrev = nullrev
1050 lowestrev = nullrev
1049 roots = [nullid]
1051 roots = [nullid]
1050 # Transform our roots list into a set.
1052 # Transform our roots list into a set.
1051 descendants = set(roots)
1053 descendants = set(roots)
1052 # Also, keep the original roots so we can filter out roots that aren't
1054 # Also, keep the original roots so we can filter out roots that aren't
1053 # 'real' roots (i.e. are descended from other roots).
1055 # 'real' roots (i.e. are descended from other roots).
1054 roots = descendants.copy()
1056 roots = descendants.copy()
1055 # Our topologically sorted list of output nodes.
1057 # Our topologically sorted list of output nodes.
1056 orderedout = []
1058 orderedout = []
1057 # Don't start at nullid since we don't want nullid in our output list,
1059 # Don't start at nullid since we don't want nullid in our output list,
1058 # and if nullid shows up in descendants, empty parents will look like
1060 # and if nullid shows up in descendants, empty parents will look like
1059 # they're descendants.
1061 # they're descendants.
1060 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1062 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1061 n = self.node(r)
1063 n = self.node(r)
1062 isdescendant = False
1064 isdescendant = False
1063 if lowestrev == nullrev: # Everybody is a descendant of nullid
1065 if lowestrev == nullrev: # Everybody is a descendant of nullid
1064 isdescendant = True
1066 isdescendant = True
1065 elif n in descendants:
1067 elif n in descendants:
1066 # n is already a descendant
1068 # n is already a descendant
1067 isdescendant = True
1069 isdescendant = True
1068 # This check only needs to be done here because all the roots
1070 # This check only needs to be done here because all the roots
1069 # will start being marked is descendants before the loop.
1071 # will start being marked is descendants before the loop.
1070 if n in roots:
1072 if n in roots:
1071 # If n was a root, check if it's a 'real' root.
1073 # If n was a root, check if it's a 'real' root.
1072 p = tuple(self.parents(n))
1074 p = tuple(self.parents(n))
1073 # If any of its parents are descendants, it's not a root.
1075 # If any of its parents are descendants, it's not a root.
1074 if (p[0] in descendants) or (p[1] in descendants):
1076 if (p[0] in descendants) or (p[1] in descendants):
1075 roots.remove(n)
1077 roots.remove(n)
1076 else:
1078 else:
1077 p = tuple(self.parents(n))
1079 p = tuple(self.parents(n))
1078 # A node is a descendant if either of its parents are
1080 # A node is a descendant if either of its parents are
1079 # descendants. (We seeded the dependents list with the roots
1081 # descendants. (We seeded the dependents list with the roots
1080 # up there, remember?)
1082 # up there, remember?)
1081 if (p[0] in descendants) or (p[1] in descendants):
1083 if (p[0] in descendants) or (p[1] in descendants):
1082 descendants.add(n)
1084 descendants.add(n)
1083 isdescendant = True
1085 isdescendant = True
1084 if isdescendant and ((ancestors is None) or (n in ancestors)):
1086 if isdescendant and ((ancestors is None) or (n in ancestors)):
1085 # Only include nodes that are both descendants and ancestors.
1087 # Only include nodes that are both descendants and ancestors.
1086 orderedout.append(n)
1088 orderedout.append(n)
1087 if (ancestors is not None) and (n in heads):
1089 if (ancestors is not None) and (n in heads):
1088 # We're trying to figure out which heads are reachable
1090 # We're trying to figure out which heads are reachable
1089 # from roots.
1091 # from roots.
1090 # Mark this head as having been reached
1092 # Mark this head as having been reached
1091 heads[n] = True
1093 heads[n] = True
1092 elif ancestors is None:
1094 elif ancestors is None:
1093 # Otherwise, we're trying to discover the heads.
1095 # Otherwise, we're trying to discover the heads.
1094 # Assume this is a head because if it isn't, the next step
1096 # Assume this is a head because if it isn't, the next step
1095 # will eventually remove it.
1097 # will eventually remove it.
1096 heads[n] = True
1098 heads[n] = True
1097 # But, obviously its parents aren't.
1099 # But, obviously its parents aren't.
1098 for p in self.parents(n):
1100 for p in self.parents(n):
1099 heads.pop(p, None)
1101 heads.pop(p, None)
1100 heads = [head for head, flag in heads.iteritems() if flag]
1102 heads = [head for head, flag in heads.iteritems() if flag]
1101 roots = list(roots)
1103 roots = list(roots)
1102 assert orderedout
1104 assert orderedout
1103 assert roots
1105 assert roots
1104 assert heads
1106 assert heads
1105 return (orderedout, roots, heads)
1107 return (orderedout, roots, heads)
1106
1108
1107 def headrevs(self, revs=None):
1109 def headrevs(self, revs=None):
1108 if revs is None:
1110 if revs is None:
1109 try:
1111 try:
1110 return self.index.headrevs()
1112 return self.index.headrevs()
1111 except AttributeError:
1113 except AttributeError:
1112 return self._headrevs()
1114 return self._headrevs()
1113 return dagop.headrevs(revs, self.parentrevs)
1115 return dagop.headrevs(revs, self.parentrevs)
1114
1116
1115 def computephases(self, roots):
1117 def computephases(self, roots):
1116 return self.index.computephasesmapsets(roots)
1118 return self.index.computephasesmapsets(roots)
1117
1119
1118 def _headrevs(self):
1120 def _headrevs(self):
1119 count = len(self)
1121 count = len(self)
1120 if not count:
1122 if not count:
1121 return [nullrev]
1123 return [nullrev]
1122 # we won't iter over filtered rev so nobody is a head at start
1124 # we won't iter over filtered rev so nobody is a head at start
1123 ishead = [0] * (count + 1)
1125 ishead = [0] * (count + 1)
1124 index = self.index
1126 index = self.index
1125 for r in self:
1127 for r in self:
1126 ishead[r] = 1 # I may be an head
1128 ishead[r] = 1 # I may be an head
1127 e = index[r]
1129 e = index[r]
1128 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1130 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1129 return [r for r, val in enumerate(ishead) if val]
1131 return [r for r, val in enumerate(ishead) if val]
1130
1132
1131 def heads(self, start=None, stop=None):
1133 def heads(self, start=None, stop=None):
1132 """return the list of all nodes that have no children
1134 """return the list of all nodes that have no children
1133
1135
1134 if start is specified, only heads that are descendants of
1136 if start is specified, only heads that are descendants of
1135 start will be returned
1137 start will be returned
1136 if stop is specified, it will consider all the revs from stop
1138 if stop is specified, it will consider all the revs from stop
1137 as if they had no children
1139 as if they had no children
1138 """
1140 """
1139 if start is None and stop is None:
1141 if start is None and stop is None:
1140 if not len(self):
1142 if not len(self):
1141 return [nullid]
1143 return [nullid]
1142 return [self.node(r) for r in self.headrevs()]
1144 return [self.node(r) for r in self.headrevs()]
1143
1145
1144 if start is None:
1146 if start is None:
1145 start = nullrev
1147 start = nullrev
1146 else:
1148 else:
1147 start = self.rev(start)
1149 start = self.rev(start)
1148
1150
1149 stoprevs = set(self.rev(n) for n in stop or [])
1151 stoprevs = set(self.rev(n) for n in stop or [])
1150
1152
1151 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1153 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1152 stoprevs=stoprevs)
1154 stoprevs=stoprevs)
1153
1155
1154 return [self.node(rev) for rev in revs]
1156 return [self.node(rev) for rev in revs]
1155
1157
1156 def children(self, node):
1158 def children(self, node):
1157 """find the children of a given node"""
1159 """find the children of a given node"""
1158 c = []
1160 c = []
1159 p = self.rev(node)
1161 p = self.rev(node)
1160 for r in self.revs(start=p + 1):
1162 for r in self.revs(start=p + 1):
1161 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1163 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1162 if prevs:
1164 if prevs:
1163 for pr in prevs:
1165 for pr in prevs:
1164 if pr == p:
1166 if pr == p:
1165 c.append(self.node(r))
1167 c.append(self.node(r))
1166 elif p == nullrev:
1168 elif p == nullrev:
1167 c.append(self.node(r))
1169 c.append(self.node(r))
1168 return c
1170 return c
1169
1171
1170 def commonancestorsheads(self, a, b):
1172 def commonancestorsheads(self, a, b):
1171 """calculate all the heads of the common ancestors of nodes a and b"""
1173 """calculate all the heads of the common ancestors of nodes a and b"""
1172 a, b = self.rev(a), self.rev(b)
1174 a, b = self.rev(a), self.rev(b)
1173 ancs = self._commonancestorsheads(a, b)
1175 ancs = self._commonancestorsheads(a, b)
1174 return pycompat.maplist(self.node, ancs)
1176 return pycompat.maplist(self.node, ancs)
1175
1177
1176 def _commonancestorsheads(self, *revs):
1178 def _commonancestorsheads(self, *revs):
1177 """calculate all the heads of the common ancestors of revs"""
1179 """calculate all the heads of the common ancestors of revs"""
1178 try:
1180 try:
1179 ancs = self.index.commonancestorsheads(*revs)
1181 ancs = self.index.commonancestorsheads(*revs)
1180 except (AttributeError, OverflowError): # C implementation failed
1182 except (AttributeError, OverflowError): # C implementation failed
1181 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1183 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1182 return ancs
1184 return ancs
1183
1185
1184 def isancestor(self, a, b):
1186 def isancestor(self, a, b):
1185 """return True if node a is an ancestor of node b
1187 """return True if node a is an ancestor of node b
1186
1188
1187 A revision is considered an ancestor of itself."""
1189 A revision is considered an ancestor of itself."""
1188 a, b = self.rev(a), self.rev(b)
1190 a, b = self.rev(a), self.rev(b)
1189 return self.isancestorrev(a, b)
1191 return self.isancestorrev(a, b)
1190
1192
1191 def isancestorrev(self, a, b):
1193 def isancestorrev(self, a, b):
1192 """return True if revision a is an ancestor of revision b
1194 """return True if revision a is an ancestor of revision b
1193
1195
1194 A revision is considered an ancestor of itself.
1196 A revision is considered an ancestor of itself.
1195
1197
1196 The implementation of this is trivial but the use of
1198 The implementation of this is trivial but the use of
1197 commonancestorsheads is not."""
1199 commonancestorsheads is not."""
1198 if a == nullrev:
1200 if a == nullrev:
1199 return True
1201 return True
1200 elif a == b:
1202 elif a == b:
1201 return True
1203 return True
1202 elif a > b:
1204 elif a > b:
1203 return False
1205 return False
1204 return a in self._commonancestorsheads(a, b)
1206 return a in self._commonancestorsheads(a, b)
1205
1207
1206 def ancestor(self, a, b):
1208 def ancestor(self, a, b):
1207 """calculate the "best" common ancestor of nodes a and b"""
1209 """calculate the "best" common ancestor of nodes a and b"""
1208
1210
1209 a, b = self.rev(a), self.rev(b)
1211 a, b = self.rev(a), self.rev(b)
1210 try:
1212 try:
1211 ancs = self.index.ancestors(a, b)
1213 ancs = self.index.ancestors(a, b)
1212 except (AttributeError, OverflowError):
1214 except (AttributeError, OverflowError):
1213 ancs = ancestor.ancestors(self.parentrevs, a, b)
1215 ancs = ancestor.ancestors(self.parentrevs, a, b)
1214 if ancs:
1216 if ancs:
1215 # choose a consistent winner when there's a tie
1217 # choose a consistent winner when there's a tie
1216 return min(map(self.node, ancs))
1218 return min(map(self.node, ancs))
1217 return nullid
1219 return nullid
1218
1220
1219 def _match(self, id):
1221 def _match(self, id):
1220 if isinstance(id, int):
1222 if isinstance(id, int):
1221 # rev
1223 # rev
1222 return self.node(id)
1224 return self.node(id)
1223 if len(id) == 20:
1225 if len(id) == 20:
1224 # possibly a binary node
1226 # possibly a binary node
1225 # odds of a binary node being all hex in ASCII are 1 in 10**25
1227 # odds of a binary node being all hex in ASCII are 1 in 10**25
1226 try:
1228 try:
1227 node = id
1229 node = id
1228 self.rev(node) # quick search the index
1230 self.rev(node) # quick search the index
1229 return node
1231 return node
1230 except error.LookupError:
1232 except error.LookupError:
1231 pass # may be partial hex id
1233 pass # may be partial hex id
1232 try:
1234 try:
1233 # str(rev)
1235 # str(rev)
1234 rev = int(id)
1236 rev = int(id)
1235 if "%d" % rev != id:
1237 if "%d" % rev != id:
1236 raise ValueError
1238 raise ValueError
1237 if rev < 0:
1239 if rev < 0:
1238 rev = len(self) + rev
1240 rev = len(self) + rev
1239 if rev < 0 or rev >= len(self):
1241 if rev < 0 or rev >= len(self):
1240 raise ValueError
1242 raise ValueError
1241 return self.node(rev)
1243 return self.node(rev)
1242 except (ValueError, OverflowError):
1244 except (ValueError, OverflowError):
1243 pass
1245 pass
1244 if len(id) == 40:
1246 if len(id) == 40:
1245 try:
1247 try:
1246 # a full hex nodeid?
1248 # a full hex nodeid?
1247 node = bin(id)
1249 node = bin(id)
1248 self.rev(node)
1250 self.rev(node)
1249 return node
1251 return node
1250 except (TypeError, error.LookupError):
1252 except (TypeError, error.LookupError):
1251 pass
1253 pass
1252
1254
1253 def _partialmatch(self, id):
1255 def _partialmatch(self, id):
1254 # we don't care wdirfilenodeids as they should be always full hash
1256 # we don't care wdirfilenodeids as they should be always full hash
1255 maybewdir = wdirhex.startswith(id)
1257 maybewdir = wdirhex.startswith(id)
1256 try:
1258 try:
1257 partial = self.index.partialmatch(id)
1259 partial = self.index.partialmatch(id)
1258 if partial and self.hasnode(partial):
1260 if partial and self.hasnode(partial):
1259 if maybewdir:
1261 if maybewdir:
1260 # single 'ff...' match in radix tree, ambiguous with wdir
1262 # single 'ff...' match in radix tree, ambiguous with wdir
1261 raise error.RevlogError
1263 raise error.RevlogError
1262 return partial
1264 return partial
1263 if maybewdir:
1265 if maybewdir:
1264 # no 'ff...' match in radix tree, wdir identified
1266 # no 'ff...' match in radix tree, wdir identified
1265 raise error.WdirUnsupported
1267 raise error.WdirUnsupported
1266 return None
1268 return None
1267 except error.RevlogError:
1269 except error.RevlogError:
1268 # parsers.c radix tree lookup gave multiple matches
1270 # parsers.c radix tree lookup gave multiple matches
1269 # fast path: for unfiltered changelog, radix tree is accurate
1271 # fast path: for unfiltered changelog, radix tree is accurate
1270 if not getattr(self, 'filteredrevs', None):
1272 if not getattr(self, 'filteredrevs', None):
1271 raise error.AmbiguousPrefixLookupError(
1273 raise error.AmbiguousPrefixLookupError(
1272 id, self.indexfile, _('ambiguous identifier'))
1274 id, self.indexfile, _('ambiguous identifier'))
1273 # fall through to slow path that filters hidden revisions
1275 # fall through to slow path that filters hidden revisions
1274 except (AttributeError, ValueError):
1276 except (AttributeError, ValueError):
1275 # we are pure python, or key was too short to search radix tree
1277 # we are pure python, or key was too short to search radix tree
1276 pass
1278 pass
1277
1279
1278 if id in self._pcache:
1280 if id in self._pcache:
1279 return self._pcache[id]
1281 return self._pcache[id]
1280
1282
1281 if len(id) <= 40:
1283 if len(id) <= 40:
1282 try:
1284 try:
1283 # hex(node)[:...]
1285 # hex(node)[:...]
1284 l = len(id) // 2 # grab an even number of digits
1286 l = len(id) // 2 # grab an even number of digits
1285 prefix = bin(id[:l * 2])
1287 prefix = bin(id[:l * 2])
1286 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1288 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1287 nl = [n for n in nl if hex(n).startswith(id) and
1289 nl = [n for n in nl if hex(n).startswith(id) and
1288 self.hasnode(n)]
1290 self.hasnode(n)]
1289 if nullhex.startswith(id):
1291 if nullhex.startswith(id):
1290 nl.append(nullid)
1292 nl.append(nullid)
1291 if len(nl) > 0:
1293 if len(nl) > 0:
1292 if len(nl) == 1 and not maybewdir:
1294 if len(nl) == 1 and not maybewdir:
1293 self._pcache[id] = nl[0]
1295 self._pcache[id] = nl[0]
1294 return nl[0]
1296 return nl[0]
1295 raise error.AmbiguousPrefixLookupError(
1297 raise error.AmbiguousPrefixLookupError(
1296 id, self.indexfile, _('ambiguous identifier'))
1298 id, self.indexfile, _('ambiguous identifier'))
1297 if maybewdir:
1299 if maybewdir:
1298 raise error.WdirUnsupported
1300 raise error.WdirUnsupported
1299 return None
1301 return None
1300 except TypeError:
1302 except TypeError:
1301 pass
1303 pass
1302
1304
1303 def lookup(self, id):
1305 def lookup(self, id):
1304 """locate a node based on:
1306 """locate a node based on:
1305 - revision number or str(revision number)
1307 - revision number or str(revision number)
1306 - nodeid or subset of hex nodeid
1308 - nodeid or subset of hex nodeid
1307 """
1309 """
1308 n = self._match(id)
1310 n = self._match(id)
1309 if n is not None:
1311 if n is not None:
1310 return n
1312 return n
1311 n = self._partialmatch(id)
1313 n = self._partialmatch(id)
1312 if n:
1314 if n:
1313 return n
1315 return n
1314
1316
1315 raise error.LookupError(id, self.indexfile, _('no match found'))
1317 raise error.LookupError(id, self.indexfile, _('no match found'))
1316
1318
1317 def shortest(self, node, minlength=1):
1319 def shortest(self, node, minlength=1):
1318 """Find the shortest unambiguous prefix that matches node."""
1320 """Find the shortest unambiguous prefix that matches node."""
1319 def isvalid(prefix):
1321 def isvalid(prefix):
1320 try:
1322 try:
1321 node = self._partialmatch(prefix)
1323 node = self._partialmatch(prefix)
1322 except error.AmbiguousPrefixLookupError:
1324 except error.AmbiguousPrefixLookupError:
1323 return False
1325 return False
1324 except error.WdirUnsupported:
1326 except error.WdirUnsupported:
1325 # single 'ff...' match
1327 # single 'ff...' match
1326 return True
1328 return True
1327 if node is None:
1329 if node is None:
1328 raise error.LookupError(node, self.indexfile, _('no node'))
1330 raise error.LookupError(node, self.indexfile, _('no node'))
1329 return True
1331 return True
1330
1332
1331 def maybewdir(prefix):
1333 def maybewdir(prefix):
1332 return all(c == 'f' for c in prefix)
1334 return all(c == 'f' for c in prefix)
1333
1335
1334 hexnode = hex(node)
1336 hexnode = hex(node)
1335
1337
1336 def disambiguate(hexnode, minlength):
1338 def disambiguate(hexnode, minlength):
1337 """Disambiguate against wdirid."""
1339 """Disambiguate against wdirid."""
1338 for length in range(minlength, 41):
1340 for length in range(minlength, 41):
1339 prefix = hexnode[:length]
1341 prefix = hexnode[:length]
1340 if not maybewdir(prefix):
1342 if not maybewdir(prefix):
1341 return prefix
1343 return prefix
1342
1344
1343 if not getattr(self, 'filteredrevs', None):
1345 if not getattr(self, 'filteredrevs', None):
1344 try:
1346 try:
1345 length = max(self.index.shortest(node), minlength)
1347 length = max(self.index.shortest(node), minlength)
1346 return disambiguate(hexnode, length)
1348 return disambiguate(hexnode, length)
1347 except error.RevlogError:
1349 except error.RevlogError:
1348 if node != wdirid:
1350 if node != wdirid:
1349 raise error.LookupError(node, self.indexfile, _('no node'))
1351 raise error.LookupError(node, self.indexfile, _('no node'))
1350 except AttributeError:
1352 except AttributeError:
1351 # Fall through to pure code
1353 # Fall through to pure code
1352 pass
1354 pass
1353
1355
1354 if node == wdirid:
1356 if node == wdirid:
1355 for length in range(minlength, 41):
1357 for length in range(minlength, 41):
1356 prefix = hexnode[:length]
1358 prefix = hexnode[:length]
1357 if isvalid(prefix):
1359 if isvalid(prefix):
1358 return prefix
1360 return prefix
1359
1361
1360 for length in range(minlength, 41):
1362 for length in range(minlength, 41):
1361 prefix = hexnode[:length]
1363 prefix = hexnode[:length]
1362 if isvalid(prefix):
1364 if isvalid(prefix):
1363 return disambiguate(hexnode, length)
1365 return disambiguate(hexnode, length)
1364
1366
1365 def cmp(self, node, text):
1367 def cmp(self, node, text):
1366 """compare text with a given file revision
1368 """compare text with a given file revision
1367
1369
1368 returns True if text is different than what is stored.
1370 returns True if text is different than what is stored.
1369 """
1371 """
1370 p1, p2 = self.parents(node)
1372 p1, p2 = self.parents(node)
1371 return storageutil.hashrevisionsha1(text, p1, p2) != node
1373 return storageutil.hashrevisionsha1(text, p1, p2) != node
1372
1374
1373 def _cachesegment(self, offset, data):
1375 def _cachesegment(self, offset, data):
1374 """Add a segment to the revlog cache.
1376 """Add a segment to the revlog cache.
1375
1377
1376 Accepts an absolute offset and the data that is at that location.
1378 Accepts an absolute offset and the data that is at that location.
1377 """
1379 """
1378 o, d = self._chunkcache
1380 o, d = self._chunkcache
1379 # try to add to existing cache
1381 # try to add to existing cache
1380 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1382 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1381 self._chunkcache = o, d + data
1383 self._chunkcache = o, d + data
1382 else:
1384 else:
1383 self._chunkcache = offset, data
1385 self._chunkcache = offset, data
1384
1386
1385 def _readsegment(self, offset, length, df=None):
1387 def _readsegment(self, offset, length, df=None):
1386 """Load a segment of raw data from the revlog.
1388 """Load a segment of raw data from the revlog.
1387
1389
1388 Accepts an absolute offset, length to read, and an optional existing
1390 Accepts an absolute offset, length to read, and an optional existing
1389 file handle to read from.
1391 file handle to read from.
1390
1392
1391 If an existing file handle is passed, it will be seeked and the
1393 If an existing file handle is passed, it will be seeked and the
1392 original seek position will NOT be restored.
1394 original seek position will NOT be restored.
1393
1395
1394 Returns a str or buffer of raw byte data.
1396 Returns a str or buffer of raw byte data.
1395
1397
1396 Raises if the requested number of bytes could not be read.
1398 Raises if the requested number of bytes could not be read.
1397 """
1399 """
1398 # Cache data both forward and backward around the requested
1400 # Cache data both forward and backward around the requested
1399 # data, in a fixed size window. This helps speed up operations
1401 # data, in a fixed size window. This helps speed up operations
1400 # involving reading the revlog backwards.
1402 # involving reading the revlog backwards.
1401 cachesize = self._chunkcachesize
1403 cachesize = self._chunkcachesize
1402 realoffset = offset & ~(cachesize - 1)
1404 realoffset = offset & ~(cachesize - 1)
1403 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1405 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1404 - realoffset)
1406 - realoffset)
1405 with self._datareadfp(df) as df:
1407 with self._datareadfp(df) as df:
1406 df.seek(realoffset)
1408 df.seek(realoffset)
1407 d = df.read(reallength)
1409 d = df.read(reallength)
1408
1410
1409 self._cachesegment(realoffset, d)
1411 self._cachesegment(realoffset, d)
1410 if offset != realoffset or reallength != length:
1412 if offset != realoffset or reallength != length:
1411 startoffset = offset - realoffset
1413 startoffset = offset - realoffset
1412 if len(d) - startoffset < length:
1414 if len(d) - startoffset < length:
1413 raise error.RevlogError(
1415 raise error.RevlogError(
1414 _('partial read of revlog %s; expected %d bytes from '
1416 _('partial read of revlog %s; expected %d bytes from '
1415 'offset %d, got %d') %
1417 'offset %d, got %d') %
1416 (self.indexfile if self._inline else self.datafile,
1418 (self.indexfile if self._inline else self.datafile,
1417 length, realoffset, len(d) - startoffset))
1419 length, realoffset, len(d) - startoffset))
1418
1420
1419 return util.buffer(d, startoffset, length)
1421 return util.buffer(d, startoffset, length)
1420
1422
1421 if len(d) < length:
1423 if len(d) < length:
1422 raise error.RevlogError(
1424 raise error.RevlogError(
1423 _('partial read of revlog %s; expected %d bytes from offset '
1425 _('partial read of revlog %s; expected %d bytes from offset '
1424 '%d, got %d') %
1426 '%d, got %d') %
1425 (self.indexfile if self._inline else self.datafile,
1427 (self.indexfile if self._inline else self.datafile,
1426 length, offset, len(d)))
1428 length, offset, len(d)))
1427
1429
1428 return d
1430 return d
1429
1431
1430 def _getsegment(self, offset, length, df=None):
1432 def _getsegment(self, offset, length, df=None):
1431 """Obtain a segment of raw data from the revlog.
1433 """Obtain a segment of raw data from the revlog.
1432
1434
1433 Accepts an absolute offset, length of bytes to obtain, and an
1435 Accepts an absolute offset, length of bytes to obtain, and an
1434 optional file handle to the already-opened revlog. If the file
1436 optional file handle to the already-opened revlog. If the file
1435 handle is used, it's original seek position will not be preserved.
1437 handle is used, it's original seek position will not be preserved.
1436
1438
1437 Requests for data may be returned from a cache.
1439 Requests for data may be returned from a cache.
1438
1440
1439 Returns a str or a buffer instance of raw byte data.
1441 Returns a str or a buffer instance of raw byte data.
1440 """
1442 """
1441 o, d = self._chunkcache
1443 o, d = self._chunkcache
1442 l = len(d)
1444 l = len(d)
1443
1445
1444 # is it in the cache?
1446 # is it in the cache?
1445 cachestart = offset - o
1447 cachestart = offset - o
1446 cacheend = cachestart + length
1448 cacheend = cachestart + length
1447 if cachestart >= 0 and cacheend <= l:
1449 if cachestart >= 0 and cacheend <= l:
1448 if cachestart == 0 and cacheend == l:
1450 if cachestart == 0 and cacheend == l:
1449 return d # avoid a copy
1451 return d # avoid a copy
1450 return util.buffer(d, cachestart, cacheend - cachestart)
1452 return util.buffer(d, cachestart, cacheend - cachestart)
1451
1453
1452 return self._readsegment(offset, length, df=df)
1454 return self._readsegment(offset, length, df=df)
1453
1455
1454 def _getsegmentforrevs(self, startrev, endrev, df=None):
1456 def _getsegmentforrevs(self, startrev, endrev, df=None):
1455 """Obtain a segment of raw data corresponding to a range of revisions.
1457 """Obtain a segment of raw data corresponding to a range of revisions.
1456
1458
1457 Accepts the start and end revisions and an optional already-open
1459 Accepts the start and end revisions and an optional already-open
1458 file handle to be used for reading. If the file handle is read, its
1460 file handle to be used for reading. If the file handle is read, its
1459 seek position will not be preserved.
1461 seek position will not be preserved.
1460
1462
1461 Requests for data may be satisfied by a cache.
1463 Requests for data may be satisfied by a cache.
1462
1464
1463 Returns a 2-tuple of (offset, data) for the requested range of
1465 Returns a 2-tuple of (offset, data) for the requested range of
1464 revisions. Offset is the integer offset from the beginning of the
1466 revisions. Offset is the integer offset from the beginning of the
1465 revlog and data is a str or buffer of the raw byte data.
1467 revlog and data is a str or buffer of the raw byte data.
1466
1468
1467 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1469 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1468 to determine where each revision's data begins and ends.
1470 to determine where each revision's data begins and ends.
1469 """
1471 """
1470 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1472 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1471 # (functions are expensive).
1473 # (functions are expensive).
1472 index = self.index
1474 index = self.index
1473 istart = index[startrev]
1475 istart = index[startrev]
1474 start = int(istart[0] >> 16)
1476 start = int(istart[0] >> 16)
1475 if startrev == endrev:
1477 if startrev == endrev:
1476 end = start + istart[1]
1478 end = start + istart[1]
1477 else:
1479 else:
1478 iend = index[endrev]
1480 iend = index[endrev]
1479 end = int(iend[0] >> 16) + iend[1]
1481 end = int(iend[0] >> 16) + iend[1]
1480
1482
1481 if self._inline:
1483 if self._inline:
1482 start += (startrev + 1) * self._io.size
1484 start += (startrev + 1) * self._io.size
1483 end += (endrev + 1) * self._io.size
1485 end += (endrev + 1) * self._io.size
1484 length = end - start
1486 length = end - start
1485
1487
1486 return start, self._getsegment(start, length, df=df)
1488 return start, self._getsegment(start, length, df=df)
1487
1489
1488 def _chunk(self, rev, df=None):
1490 def _chunk(self, rev, df=None):
1489 """Obtain a single decompressed chunk for a revision.
1491 """Obtain a single decompressed chunk for a revision.
1490
1492
1491 Accepts an integer revision and an optional already-open file handle
1493 Accepts an integer revision and an optional already-open file handle
1492 to be used for reading. If used, the seek position of the file will not
1494 to be used for reading. If used, the seek position of the file will not
1493 be preserved.
1495 be preserved.
1494
1496
1495 Returns a str holding uncompressed data for the requested revision.
1497 Returns a str holding uncompressed data for the requested revision.
1496 """
1498 """
1497 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1499 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1498
1500
1499 def _chunks(self, revs, df=None, targetsize=None):
1501 def _chunks(self, revs, df=None, targetsize=None):
1500 """Obtain decompressed chunks for the specified revisions.
1502 """Obtain decompressed chunks for the specified revisions.
1501
1503
1502 Accepts an iterable of numeric revisions that are assumed to be in
1504 Accepts an iterable of numeric revisions that are assumed to be in
1503 ascending order. Also accepts an optional already-open file handle
1505 ascending order. Also accepts an optional already-open file handle
1504 to be used for reading. If used, the seek position of the file will
1506 to be used for reading. If used, the seek position of the file will
1505 not be preserved.
1507 not be preserved.
1506
1508
1507 This function is similar to calling ``self._chunk()`` multiple times,
1509 This function is similar to calling ``self._chunk()`` multiple times,
1508 but is faster.
1510 but is faster.
1509
1511
1510 Returns a list with decompressed data for each requested revision.
1512 Returns a list with decompressed data for each requested revision.
1511 """
1513 """
1512 if not revs:
1514 if not revs:
1513 return []
1515 return []
1514 start = self.start
1516 start = self.start
1515 length = self.length
1517 length = self.length
1516 inline = self._inline
1518 inline = self._inline
1517 iosize = self._io.size
1519 iosize = self._io.size
1518 buffer = util.buffer
1520 buffer = util.buffer
1519
1521
1520 l = []
1522 l = []
1521 ladd = l.append
1523 ladd = l.append
1522
1524
1523 if not self._withsparseread:
1525 if not self._withsparseread:
1524 slicedchunks = (revs,)
1526 slicedchunks = (revs,)
1525 else:
1527 else:
1526 slicedchunks = deltautil.slicechunk(self, revs,
1528 slicedchunks = deltautil.slicechunk(self, revs,
1527 targetsize=targetsize)
1529 targetsize=targetsize)
1528
1530
1529 for revschunk in slicedchunks:
1531 for revschunk in slicedchunks:
1530 firstrev = revschunk[0]
1532 firstrev = revschunk[0]
1531 # Skip trailing revisions with empty diff
1533 # Skip trailing revisions with empty diff
1532 for lastrev in revschunk[::-1]:
1534 for lastrev in revschunk[::-1]:
1533 if length(lastrev) != 0:
1535 if length(lastrev) != 0:
1534 break
1536 break
1535
1537
1536 try:
1538 try:
1537 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1539 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1538 except OverflowError:
1540 except OverflowError:
1539 # issue4215 - we can't cache a run of chunks greater than
1541 # issue4215 - we can't cache a run of chunks greater than
1540 # 2G on Windows
1542 # 2G on Windows
1541 return [self._chunk(rev, df=df) for rev in revschunk]
1543 return [self._chunk(rev, df=df) for rev in revschunk]
1542
1544
1543 decomp = self.decompress
1545 decomp = self.decompress
1544 for rev in revschunk:
1546 for rev in revschunk:
1545 chunkstart = start(rev)
1547 chunkstart = start(rev)
1546 if inline:
1548 if inline:
1547 chunkstart += (rev + 1) * iosize
1549 chunkstart += (rev + 1) * iosize
1548 chunklength = length(rev)
1550 chunklength = length(rev)
1549 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1551 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1550
1552
1551 return l
1553 return l
1552
1554
1553 def _chunkclear(self):
1555 def _chunkclear(self):
1554 """Clear the raw chunk cache."""
1556 """Clear the raw chunk cache."""
1555 self._chunkcache = (0, '')
1557 self._chunkcache = (0, '')
1556
1558
1557 def deltaparent(self, rev):
1559 def deltaparent(self, rev):
1558 """return deltaparent of the given revision"""
1560 """return deltaparent of the given revision"""
1559 base = self.index[rev][3]
1561 base = self.index[rev][3]
1560 if base == rev:
1562 if base == rev:
1561 return nullrev
1563 return nullrev
1562 elif self._generaldelta:
1564 elif self._generaldelta:
1563 return base
1565 return base
1564 else:
1566 else:
1565 return rev - 1
1567 return rev - 1
1566
1568
1567 def issnapshot(self, rev):
1569 def issnapshot(self, rev):
1568 """tells whether rev is a snapshot
1570 """tells whether rev is a snapshot
1569 """
1571 """
1570 if not self._sparserevlog:
1572 if not self._sparserevlog:
1571 return self.deltaparent(rev) == nullrev
1573 return self.deltaparent(rev) == nullrev
1572 elif util.safehasattr(self.index, 'issnapshot'):
1574 elif util.safehasattr(self.index, 'issnapshot'):
1573 # directly assign the method to cache the testing and access
1575 # directly assign the method to cache the testing and access
1574 self.issnapshot = self.index.issnapshot
1576 self.issnapshot = self.index.issnapshot
1575 return self.issnapshot(rev)
1577 return self.issnapshot(rev)
1576 if rev == nullrev:
1578 if rev == nullrev:
1577 return True
1579 return True
1578 entry = self.index[rev]
1580 entry = self.index[rev]
1579 base = entry[3]
1581 base = entry[3]
1580 if base == rev:
1582 if base == rev:
1581 return True
1583 return True
1582 if base == nullrev:
1584 if base == nullrev:
1583 return True
1585 return True
1584 p1 = entry[5]
1586 p1 = entry[5]
1585 p2 = entry[6]
1587 p2 = entry[6]
1586 if base == p1 or base == p2:
1588 if base == p1 or base == p2:
1587 return False
1589 return False
1588 return self.issnapshot(base)
1590 return self.issnapshot(base)
1589
1591
1590 def snapshotdepth(self, rev):
1592 def snapshotdepth(self, rev):
1591 """number of snapshot in the chain before this one"""
1593 """number of snapshot in the chain before this one"""
1592 if not self.issnapshot(rev):
1594 if not self.issnapshot(rev):
1593 raise error.ProgrammingError('revision %d not a snapshot')
1595 raise error.ProgrammingError('revision %d not a snapshot')
1594 return len(self._deltachain(rev)[0]) - 1
1596 return len(self._deltachain(rev)[0]) - 1
1595
1597
1596 def revdiff(self, rev1, rev2):
1598 def revdiff(self, rev1, rev2):
1597 """return or calculate a delta between two revisions
1599 """return or calculate a delta between two revisions
1598
1600
1599 The delta calculated is in binary form and is intended to be written to
1601 The delta calculated is in binary form and is intended to be written to
1600 revlog data directly. So this function needs raw revision data.
1602 revlog data directly. So this function needs raw revision data.
1601 """
1603 """
1602 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1604 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1603 return bytes(self._chunk(rev2))
1605 return bytes(self._chunk(rev2))
1604
1606
1605 return mdiff.textdiff(self.revision(rev1, raw=True),
1607 return mdiff.textdiff(self.revision(rev1, raw=True),
1606 self.revision(rev2, raw=True))
1608 self.revision(rev2, raw=True))
1607
1609
1608 def revision(self, nodeorrev, _df=None, raw=False):
1610 def revision(self, nodeorrev, _df=None, raw=False):
1609 """return an uncompressed revision of a given node or revision
1611 """return an uncompressed revision of a given node or revision
1610 number.
1612 number.
1611
1613
1612 _df - an existing file handle to read from. (internal-only)
1614 _df - an existing file handle to read from. (internal-only)
1613 raw - an optional argument specifying if the revision data is to be
1615 raw - an optional argument specifying if the revision data is to be
1614 treated as raw data when applying flag transforms. 'raw' should be set
1616 treated as raw data when applying flag transforms. 'raw' should be set
1615 to True when generating changegroups or in debug commands.
1617 to True when generating changegroups or in debug commands.
1616 """
1618 """
1617 if isinstance(nodeorrev, int):
1619 if isinstance(nodeorrev, int):
1618 rev = nodeorrev
1620 rev = nodeorrev
1619 node = self.node(rev)
1621 node = self.node(rev)
1620 else:
1622 else:
1621 node = nodeorrev
1623 node = nodeorrev
1622 rev = None
1624 rev = None
1623
1625
1624 cachedrev = None
1626 cachedrev = None
1625 flags = None
1627 flags = None
1626 rawtext = None
1628 rawtext = None
1627 if node == nullid:
1629 if node == nullid:
1628 return ""
1630 return ""
1629 if self._revisioncache:
1631 if self._revisioncache:
1630 if self._revisioncache[0] == node:
1632 if self._revisioncache[0] == node:
1631 # _cache only stores rawtext
1633 # _cache only stores rawtext
1632 if raw:
1634 if raw:
1633 return self._revisioncache[2]
1635 return self._revisioncache[2]
1634 # duplicated, but good for perf
1636 # duplicated, but good for perf
1635 if rev is None:
1637 if rev is None:
1636 rev = self.rev(node)
1638 rev = self.rev(node)
1637 if flags is None:
1639 if flags is None:
1638 flags = self.flags(rev)
1640 flags = self.flags(rev)
1639 # no extra flags set, no flag processor runs, text = rawtext
1641 # no extra flags set, no flag processor runs, text = rawtext
1640 if flags == REVIDX_DEFAULT_FLAGS:
1642 if flags == REVIDX_DEFAULT_FLAGS:
1641 return self._revisioncache[2]
1643 return self._revisioncache[2]
1642 # rawtext is reusable. need to run flag processor
1644 # rawtext is reusable. need to run flag processor
1643 rawtext = self._revisioncache[2]
1645 rawtext = self._revisioncache[2]
1644
1646
1645 cachedrev = self._revisioncache[1]
1647 cachedrev = self._revisioncache[1]
1646
1648
1647 # look up what we need to read
1649 # look up what we need to read
1648 if rawtext is None:
1650 if rawtext is None:
1649 if rev is None:
1651 if rev is None:
1650 rev = self.rev(node)
1652 rev = self.rev(node)
1651
1653
1652 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1654 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1653 if stopped:
1655 if stopped:
1654 rawtext = self._revisioncache[2]
1656 rawtext = self._revisioncache[2]
1655
1657
1656 # drop cache to save memory
1658 # drop cache to save memory
1657 self._revisioncache = None
1659 self._revisioncache = None
1658
1660
1659 targetsize = None
1661 targetsize = None
1660 rawsize = self.index[rev][2]
1662 rawsize = self.index[rev][2]
1661 if 0 <= rawsize:
1663 if 0 <= rawsize:
1662 targetsize = 4 * rawsize
1664 targetsize = 4 * rawsize
1663
1665
1664 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1666 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1665 if rawtext is None:
1667 if rawtext is None:
1666 rawtext = bytes(bins[0])
1668 rawtext = bytes(bins[0])
1667 bins = bins[1:]
1669 bins = bins[1:]
1668
1670
1669 rawtext = mdiff.patches(rawtext, bins)
1671 rawtext = mdiff.patches(rawtext, bins)
1670 self._revisioncache = (node, rev, rawtext)
1672 self._revisioncache = (node, rev, rawtext)
1671
1673
1672 if flags is None:
1674 if flags is None:
1673 if rev is None:
1675 if rev is None:
1674 rev = self.rev(node)
1676 rev = self.rev(node)
1675 flags = self.flags(rev)
1677 flags = self.flags(rev)
1676
1678
1677 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1679 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1678 if validatehash:
1680 if validatehash:
1679 self.checkhash(text, node, rev=rev)
1681 self.checkhash(text, node, rev=rev)
1680
1682
1681 return text
1683 return text
1682
1684
1683 def hash(self, text, p1, p2):
1685 def hash(self, text, p1, p2):
1684 """Compute a node hash.
1686 """Compute a node hash.
1685
1687
1686 Available as a function so that subclasses can replace the hash
1688 Available as a function so that subclasses can replace the hash
1687 as needed.
1689 as needed.
1688 """
1690 """
1689 return storageutil.hashrevisionsha1(text, p1, p2)
1691 return storageutil.hashrevisionsha1(text, p1, p2)
1690
1692
1691 def _processflags(self, text, flags, operation, raw=False):
1693 def _processflags(self, text, flags, operation, raw=False):
1692 """Inspect revision data flags and applies transforms defined by
1694 """Inspect revision data flags and applies transforms defined by
1693 registered flag processors.
1695 registered flag processors.
1694
1696
1695 ``text`` - the revision data to process
1697 ``text`` - the revision data to process
1696 ``flags`` - the revision flags
1698 ``flags`` - the revision flags
1697 ``operation`` - the operation being performed (read or write)
1699 ``operation`` - the operation being performed (read or write)
1698 ``raw`` - an optional argument describing if the raw transform should be
1700 ``raw`` - an optional argument describing if the raw transform should be
1699 applied.
1701 applied.
1700
1702
1701 This method processes the flags in the order (or reverse order if
1703 This method processes the flags in the order (or reverse order if
1702 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1704 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1703 flag processors registered for present flags. The order of flags defined
1705 flag processors registered for present flags. The order of flags defined
1704 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1706 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1705
1707
1706 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1708 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1707 processed text and ``validatehash`` is a bool indicating whether the
1709 processed text and ``validatehash`` is a bool indicating whether the
1708 returned text should be checked for hash integrity.
1710 returned text should be checked for hash integrity.
1709
1711
1710 Note: If the ``raw`` argument is set, it has precedence over the
1712 Note: If the ``raw`` argument is set, it has precedence over the
1711 operation and will only update the value of ``validatehash``.
1713 operation and will only update the value of ``validatehash``.
1712 """
1714 """
1713 # fast path: no flag processors will run
1715 # fast path: no flag processors will run
1714 if flags == 0:
1716 if flags == 0:
1715 return text, True
1717 return text, True
1716 if not operation in ('read', 'write'):
1718 if not operation in ('read', 'write'):
1717 raise error.ProgrammingError(_("invalid '%s' operation") %
1719 raise error.ProgrammingError(_("invalid '%s' operation") %
1718 operation)
1720 operation)
1719 # Check all flags are known.
1721 # Check all flags are known.
1720 if flags & ~REVIDX_KNOWN_FLAGS:
1722 if flags & ~REVIDX_KNOWN_FLAGS:
1721 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1723 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1722 (flags & ~REVIDX_KNOWN_FLAGS))
1724 (flags & ~REVIDX_KNOWN_FLAGS))
1723 validatehash = True
1725 validatehash = True
1724 # Depending on the operation (read or write), the order might be
1726 # Depending on the operation (read or write), the order might be
1725 # reversed due to non-commutative transforms.
1727 # reversed due to non-commutative transforms.
1726 orderedflags = REVIDX_FLAGS_ORDER
1728 orderedflags = REVIDX_FLAGS_ORDER
1727 if operation == 'write':
1729 if operation == 'write':
1728 orderedflags = reversed(orderedflags)
1730 orderedflags = reversed(orderedflags)
1729
1731
1730 for flag in orderedflags:
1732 for flag in orderedflags:
1731 # If a flagprocessor has been registered for a known flag, apply the
1733 # If a flagprocessor has been registered for a known flag, apply the
1732 # related operation transform and update result tuple.
1734 # related operation transform and update result tuple.
1733 if flag & flags:
1735 if flag & flags:
1734 vhash = True
1736 vhash = True
1735
1737
1736 if flag not in self._flagprocessors:
1738 if flag not in self._flagprocessors:
1737 message = _("missing processor for flag '%#x'") % (flag)
1739 message = _("missing processor for flag '%#x'") % (flag)
1738 raise error.RevlogError(message)
1740 raise error.RevlogError(message)
1739
1741
1740 processor = self._flagprocessors[flag]
1742 processor = self._flagprocessors[flag]
1741 if processor is not None:
1743 if processor is not None:
1742 readtransform, writetransform, rawtransform = processor
1744 readtransform, writetransform, rawtransform = processor
1743
1745
1744 if raw:
1746 if raw:
1745 vhash = rawtransform(self, text)
1747 vhash = rawtransform(self, text)
1746 elif operation == 'read':
1748 elif operation == 'read':
1747 text, vhash = readtransform(self, text)
1749 text, vhash = readtransform(self, text)
1748 else: # write operation
1750 else: # write operation
1749 text, vhash = writetransform(self, text)
1751 text, vhash = writetransform(self, text)
1750 validatehash = validatehash and vhash
1752 validatehash = validatehash and vhash
1751
1753
1752 return text, validatehash
1754 return text, validatehash
1753
1755
1754 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1756 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1755 """Check node hash integrity.
1757 """Check node hash integrity.
1756
1758
1757 Available as a function so that subclasses can extend hash mismatch
1759 Available as a function so that subclasses can extend hash mismatch
1758 behaviors as needed.
1760 behaviors as needed.
1759 """
1761 """
1760 try:
1762 try:
1761 if p1 is None and p2 is None:
1763 if p1 is None and p2 is None:
1762 p1, p2 = self.parents(node)
1764 p1, p2 = self.parents(node)
1763 if node != self.hash(text, p1, p2):
1765 if node != self.hash(text, p1, p2):
1764 # Clear the revision cache on hash failure. The revision cache
1766 # Clear the revision cache on hash failure. The revision cache
1765 # only stores the raw revision and clearing the cache does have
1767 # only stores the raw revision and clearing the cache does have
1766 # the side-effect that we won't have a cache hit when the raw
1768 # the side-effect that we won't have a cache hit when the raw
1767 # revision data is accessed. But this case should be rare and
1769 # revision data is accessed. But this case should be rare and
1768 # it is extra work to teach the cache about the hash
1770 # it is extra work to teach the cache about the hash
1769 # verification state.
1771 # verification state.
1770 if self._revisioncache and self._revisioncache[0] == node:
1772 if self._revisioncache and self._revisioncache[0] == node:
1771 self._revisioncache = None
1773 self._revisioncache = None
1772
1774
1773 revornode = rev
1775 revornode = rev
1774 if revornode is None:
1776 if revornode is None:
1775 revornode = templatefilters.short(hex(node))
1777 revornode = templatefilters.short(hex(node))
1776 raise error.RevlogError(_("integrity check failed on %s:%s")
1778 raise error.RevlogError(_("integrity check failed on %s:%s")
1777 % (self.indexfile, pycompat.bytestr(revornode)))
1779 % (self.indexfile, pycompat.bytestr(revornode)))
1778 except error.RevlogError:
1780 except error.RevlogError:
1779 if self._censorable and storageutil.iscensoredtext(text):
1781 if self._censorable and storageutil.iscensoredtext(text):
1780 raise error.CensoredNodeError(self.indexfile, node, text)
1782 raise error.CensoredNodeError(self.indexfile, node, text)
1781 raise
1783 raise
1782
1784
1783 def _enforceinlinesize(self, tr, fp=None):
1785 def _enforceinlinesize(self, tr, fp=None):
1784 """Check if the revlog is too big for inline and convert if so.
1786 """Check if the revlog is too big for inline and convert if so.
1785
1787
1786 This should be called after revisions are added to the revlog. If the
1788 This should be called after revisions are added to the revlog. If the
1787 revlog has grown too large to be an inline revlog, it will convert it
1789 revlog has grown too large to be an inline revlog, it will convert it
1788 to use multiple index and data files.
1790 to use multiple index and data files.
1789 """
1791 """
1790 tiprev = len(self) - 1
1792 tiprev = len(self) - 1
1791 if (not self._inline or
1793 if (not self._inline or
1792 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1794 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1793 return
1795 return
1794
1796
1795 trinfo = tr.find(self.indexfile)
1797 trinfo = tr.find(self.indexfile)
1796 if trinfo is None:
1798 if trinfo is None:
1797 raise error.RevlogError(_("%s not found in the transaction")
1799 raise error.RevlogError(_("%s not found in the transaction")
1798 % self.indexfile)
1800 % self.indexfile)
1799
1801
1800 trindex = trinfo[2]
1802 trindex = trinfo[2]
1801 if trindex is not None:
1803 if trindex is not None:
1802 dataoff = self.start(trindex)
1804 dataoff = self.start(trindex)
1803 else:
1805 else:
1804 # revlog was stripped at start of transaction, use all leftover data
1806 # revlog was stripped at start of transaction, use all leftover data
1805 trindex = len(self) - 1
1807 trindex = len(self) - 1
1806 dataoff = self.end(tiprev)
1808 dataoff = self.end(tiprev)
1807
1809
1808 tr.add(self.datafile, dataoff)
1810 tr.add(self.datafile, dataoff)
1809
1811
1810 if fp:
1812 if fp:
1811 fp.flush()
1813 fp.flush()
1812 fp.close()
1814 fp.close()
1813 # We can't use the cached file handle after close(). So prevent
1815 # We can't use the cached file handle after close(). So prevent
1814 # its usage.
1816 # its usage.
1815 self._writinghandles = None
1817 self._writinghandles = None
1816
1818
1817 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1819 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1818 for r in self:
1820 for r in self:
1819 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1821 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1820
1822
1821 with self._indexfp('w') as fp:
1823 with self._indexfp('w') as fp:
1822 self.version &= ~FLAG_INLINE_DATA
1824 self.version &= ~FLAG_INLINE_DATA
1823 self._inline = False
1825 self._inline = False
1824 io = self._io
1826 io = self._io
1825 for i in self:
1827 for i in self:
1826 e = io.packentry(self.index[i], self.node, self.version, i)
1828 e = io.packentry(self.index[i], self.node, self.version, i)
1827 fp.write(e)
1829 fp.write(e)
1828
1830
1829 # the temp file replace the real index when we exit the context
1831 # the temp file replace the real index when we exit the context
1830 # manager
1832 # manager
1831
1833
1832 tr.replace(self.indexfile, trindex * self._io.size)
1834 tr.replace(self.indexfile, trindex * self._io.size)
1833 self._chunkclear()
1835 self._chunkclear()
1834
1836
1835 def _nodeduplicatecallback(self, transaction, node):
1837 def _nodeduplicatecallback(self, transaction, node):
1836 """called when trying to add a node already stored.
1838 """called when trying to add a node already stored.
1837 """
1839 """
1838
1840
1839 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1841 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1840 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1842 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1841 """add a revision to the log
1843 """add a revision to the log
1842
1844
1843 text - the revision data to add
1845 text - the revision data to add
1844 transaction - the transaction object used for rollback
1846 transaction - the transaction object used for rollback
1845 link - the linkrev data to add
1847 link - the linkrev data to add
1846 p1, p2 - the parent nodeids of the revision
1848 p1, p2 - the parent nodeids of the revision
1847 cachedelta - an optional precomputed delta
1849 cachedelta - an optional precomputed delta
1848 node - nodeid of revision; typically node is not specified, and it is
1850 node - nodeid of revision; typically node is not specified, and it is
1849 computed by default as hash(text, p1, p2), however subclasses might
1851 computed by default as hash(text, p1, p2), however subclasses might
1850 use different hashing method (and override checkhash() in such case)
1852 use different hashing method (and override checkhash() in such case)
1851 flags - the known flags to set on the revision
1853 flags - the known flags to set on the revision
1852 deltacomputer - an optional deltacomputer instance shared between
1854 deltacomputer - an optional deltacomputer instance shared between
1853 multiple calls
1855 multiple calls
1854 """
1856 """
1855 if link == nullrev:
1857 if link == nullrev:
1856 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1858 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1857 % self.indexfile)
1859 % self.indexfile)
1858
1860
1859 if flags:
1861 if flags:
1860 node = node or self.hash(text, p1, p2)
1862 node = node or self.hash(text, p1, p2)
1861
1863
1862 rawtext, validatehash = self._processflags(text, flags, 'write')
1864 rawtext, validatehash = self._processflags(text, flags, 'write')
1863
1865
1864 # If the flag processor modifies the revision data, ignore any provided
1866 # If the flag processor modifies the revision data, ignore any provided
1865 # cachedelta.
1867 # cachedelta.
1866 if rawtext != text:
1868 if rawtext != text:
1867 cachedelta = None
1869 cachedelta = None
1868
1870
1869 if len(rawtext) > _maxentrysize:
1871 if len(rawtext) > _maxentrysize:
1870 raise error.RevlogError(
1872 raise error.RevlogError(
1871 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1873 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1872 % (self.indexfile, len(rawtext)))
1874 % (self.indexfile, len(rawtext)))
1873
1875
1874 node = node or self.hash(rawtext, p1, p2)
1876 node = node or self.hash(rawtext, p1, p2)
1875 if node in self.nodemap:
1877 if node in self.nodemap:
1876 return node
1878 return node
1877
1879
1878 if validatehash:
1880 if validatehash:
1879 self.checkhash(rawtext, node, p1=p1, p2=p2)
1881 self.checkhash(rawtext, node, p1=p1, p2=p2)
1880
1882
1881 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1883 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1882 flags, cachedelta=cachedelta,
1884 flags, cachedelta=cachedelta,
1883 deltacomputer=deltacomputer)
1885 deltacomputer=deltacomputer)
1884
1886
1885 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1887 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1886 cachedelta=None, deltacomputer=None):
1888 cachedelta=None, deltacomputer=None):
1887 """add a raw revision with known flags, node and parents
1889 """add a raw revision with known flags, node and parents
1888 useful when reusing a revision not stored in this revlog (ex: received
1890 useful when reusing a revision not stored in this revlog (ex: received
1889 over wire, or read from an external bundle).
1891 over wire, or read from an external bundle).
1890 """
1892 """
1891 dfh = None
1893 dfh = None
1892 if not self._inline:
1894 if not self._inline:
1893 dfh = self._datafp("a+")
1895 dfh = self._datafp("a+")
1894 ifh = self._indexfp("a+")
1896 ifh = self._indexfp("a+")
1895 try:
1897 try:
1896 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1898 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1897 flags, cachedelta, ifh, dfh,
1899 flags, cachedelta, ifh, dfh,
1898 deltacomputer=deltacomputer)
1900 deltacomputer=deltacomputer)
1899 finally:
1901 finally:
1900 if dfh:
1902 if dfh:
1901 dfh.close()
1903 dfh.close()
1902 ifh.close()
1904 ifh.close()
1903
1905
1904 def compress(self, data):
1906 def compress(self, data):
1905 """Generate a possibly-compressed representation of data."""
1907 """Generate a possibly-compressed representation of data."""
1906 if not data:
1908 if not data:
1907 return '', data
1909 return '', data
1908
1910
1909 compressed = self._compressor.compress(data)
1911 compressed = self._compressor.compress(data)
1910
1912
1911 if compressed:
1913 if compressed:
1912 # The revlog compressor added the header in the returned data.
1914 # The revlog compressor added the header in the returned data.
1913 return '', compressed
1915 return '', compressed
1914
1916
1915 if data[0:1] == '\0':
1917 if data[0:1] == '\0':
1916 return '', data
1918 return '', data
1917 return 'u', data
1919 return 'u', data
1918
1920
1919 def decompress(self, data):
1921 def decompress(self, data):
1920 """Decompress a revlog chunk.
1922 """Decompress a revlog chunk.
1921
1923
1922 The chunk is expected to begin with a header identifying the
1924 The chunk is expected to begin with a header identifying the
1923 format type so it can be routed to an appropriate decompressor.
1925 format type so it can be routed to an appropriate decompressor.
1924 """
1926 """
1925 if not data:
1927 if not data:
1926 return data
1928 return data
1927
1929
1928 # Revlogs are read much more frequently than they are written and many
1930 # Revlogs are read much more frequently than they are written and many
1929 # chunks only take microseconds to decompress, so performance is
1931 # chunks only take microseconds to decompress, so performance is
1930 # important here.
1932 # important here.
1931 #
1933 #
1932 # We can make a few assumptions about revlogs:
1934 # We can make a few assumptions about revlogs:
1933 #
1935 #
1934 # 1) the majority of chunks will be compressed (as opposed to inline
1936 # 1) the majority of chunks will be compressed (as opposed to inline
1935 # raw data).
1937 # raw data).
1936 # 2) decompressing *any* data will likely by at least 10x slower than
1938 # 2) decompressing *any* data will likely by at least 10x slower than
1937 # returning raw inline data.
1939 # returning raw inline data.
1938 # 3) we want to prioritize common and officially supported compression
1940 # 3) we want to prioritize common and officially supported compression
1939 # engines
1941 # engines
1940 #
1942 #
1941 # It follows that we want to optimize for "decompress compressed data
1943 # It follows that we want to optimize for "decompress compressed data
1942 # when encoded with common and officially supported compression engines"
1944 # when encoded with common and officially supported compression engines"
1943 # case over "raw data" and "data encoded by less common or non-official
1945 # case over "raw data" and "data encoded by less common or non-official
1944 # compression engines." That is why we have the inline lookup first
1946 # compression engines." That is why we have the inline lookup first
1945 # followed by the compengines lookup.
1947 # followed by the compengines lookup.
1946 #
1948 #
1947 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1949 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1948 # compressed chunks. And this matters for changelog and manifest reads.
1950 # compressed chunks. And this matters for changelog and manifest reads.
1949 t = data[0:1]
1951 t = data[0:1]
1950
1952
1951 if t == 'x':
1953 if t == 'x':
1952 try:
1954 try:
1953 return _zlibdecompress(data)
1955 return _zlibdecompress(data)
1954 except zlib.error as e:
1956 except zlib.error as e:
1955 raise error.RevlogError(_('revlog decompress error: %s') %
1957 raise error.RevlogError(_('revlog decompress error: %s') %
1956 stringutil.forcebytestr(e))
1958 stringutil.forcebytestr(e))
1957 # '\0' is more common than 'u' so it goes first.
1959 # '\0' is more common than 'u' so it goes first.
1958 elif t == '\0':
1960 elif t == '\0':
1959 return data
1961 return data
1960 elif t == 'u':
1962 elif t == 'u':
1961 return util.buffer(data, 1)
1963 return util.buffer(data, 1)
1962
1964
1963 try:
1965 try:
1964 compressor = self._decompressors[t]
1966 compressor = self._decompressors[t]
1965 except KeyError:
1967 except KeyError:
1966 try:
1968 try:
1967 engine = util.compengines.forrevlogheader(t)
1969 engine = util.compengines.forrevlogheader(t)
1968 compressor = engine.revlogcompressor()
1970 compressor = engine.revlogcompressor()
1969 self._decompressors[t] = compressor
1971 self._decompressors[t] = compressor
1970 except KeyError:
1972 except KeyError:
1971 raise error.RevlogError(_('unknown compression type %r') % t)
1973 raise error.RevlogError(_('unknown compression type %r') % t)
1972
1974
1973 return compressor.decompress(data)
1975 return compressor.decompress(data)
1974
1976
1975 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1977 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1976 cachedelta, ifh, dfh, alwayscache=False,
1978 cachedelta, ifh, dfh, alwayscache=False,
1977 deltacomputer=None):
1979 deltacomputer=None):
1978 """internal function to add revisions to the log
1980 """internal function to add revisions to the log
1979
1981
1980 see addrevision for argument descriptions.
1982 see addrevision for argument descriptions.
1981
1983
1982 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1984 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1983
1985
1984 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1986 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1985 be used.
1987 be used.
1986
1988
1987 invariants:
1989 invariants:
1988 - rawtext is optional (can be None); if not set, cachedelta must be set.
1990 - rawtext is optional (can be None); if not set, cachedelta must be set.
1989 if both are set, they must correspond to each other.
1991 if both are set, they must correspond to each other.
1990 """
1992 """
1991 if node == nullid:
1993 if node == nullid:
1992 raise error.RevlogError(_("%s: attempt to add null revision") %
1994 raise error.RevlogError(_("%s: attempt to add null revision") %
1993 self.indexfile)
1995 self.indexfile)
1994 if node == wdirid or node in wdirfilenodeids:
1996 if node == wdirid or node in wdirfilenodeids:
1995 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1997 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1996 self.indexfile)
1998 self.indexfile)
1997
1999
1998 if self._inline:
2000 if self._inline:
1999 fh = ifh
2001 fh = ifh
2000 else:
2002 else:
2001 fh = dfh
2003 fh = dfh
2002
2004
2003 btext = [rawtext]
2005 btext = [rawtext]
2004
2006
2005 curr = len(self)
2007 curr = len(self)
2006 prev = curr - 1
2008 prev = curr - 1
2007 offset = self.end(prev)
2009 offset = self.end(prev)
2008 p1r, p2r = self.rev(p1), self.rev(p2)
2010 p1r, p2r = self.rev(p1), self.rev(p2)
2009
2011
2010 # full versions are inserted when the needed deltas
2012 # full versions are inserted when the needed deltas
2011 # become comparable to the uncompressed text
2013 # become comparable to the uncompressed text
2012 if rawtext is None:
2014 if rawtext is None:
2013 # need rawtext size, before changed by flag processors, which is
2015 # need rawtext size, before changed by flag processors, which is
2014 # the non-raw size. use revlog explicitly to avoid filelog's extra
2016 # the non-raw size. use revlog explicitly to avoid filelog's extra
2015 # logic that might remove metadata size.
2017 # logic that might remove metadata size.
2016 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2018 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2017 cachedelta[1])
2019 cachedelta[1])
2018 else:
2020 else:
2019 textlen = len(rawtext)
2021 textlen = len(rawtext)
2020
2022
2021 if deltacomputer is None:
2023 if deltacomputer is None:
2022 deltacomputer = deltautil.deltacomputer(self)
2024 deltacomputer = deltautil.deltacomputer(self)
2023
2025
2024 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2026 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2025
2027
2026 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2028 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2027
2029
2028 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2030 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2029 deltainfo.base, link, p1r, p2r, node)
2031 deltainfo.base, link, p1r, p2r, node)
2030 self.index.append(e)
2032 self.index.append(e)
2031 self.nodemap[node] = curr
2033 self.nodemap[node] = curr
2032
2034
2033 # Reset the pure node cache start lookup offset to account for new
2035 # Reset the pure node cache start lookup offset to account for new
2034 # revision.
2036 # revision.
2035 if self._nodepos is not None:
2037 if self._nodepos is not None:
2036 self._nodepos = curr
2038 self._nodepos = curr
2037
2039
2038 entry = self._io.packentry(e, self.node, self.version, curr)
2040 entry = self._io.packentry(e, self.node, self.version, curr)
2039 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2041 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2040 link, offset)
2042 link, offset)
2041
2043
2042 rawtext = btext[0]
2044 rawtext = btext[0]
2043
2045
2044 if alwayscache and rawtext is None:
2046 if alwayscache and rawtext is None:
2045 rawtext = deltacomputer.buildtext(revinfo, fh)
2047 rawtext = deltacomputer.buildtext(revinfo, fh)
2046
2048
2047 if type(rawtext) == bytes: # only accept immutable objects
2049 if type(rawtext) == bytes: # only accept immutable objects
2048 self._revisioncache = (node, curr, rawtext)
2050 self._revisioncache = (node, curr, rawtext)
2049 self._chainbasecache[curr] = deltainfo.chainbase
2051 self._chainbasecache[curr] = deltainfo.chainbase
2050 return node
2052 return node
2051
2053
2052 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2054 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2053 # Files opened in a+ mode have inconsistent behavior on various
2055 # Files opened in a+ mode have inconsistent behavior on various
2054 # platforms. Windows requires that a file positioning call be made
2056 # platforms. Windows requires that a file positioning call be made
2055 # when the file handle transitions between reads and writes. See
2057 # when the file handle transitions between reads and writes. See
2056 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2058 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2057 # platforms, Python or the platform itself can be buggy. Some versions
2059 # platforms, Python or the platform itself can be buggy. Some versions
2058 # of Solaris have been observed to not append at the end of the file
2060 # of Solaris have been observed to not append at the end of the file
2059 # if the file was seeked to before the end. See issue4943 for more.
2061 # if the file was seeked to before the end. See issue4943 for more.
2060 #
2062 #
2061 # We work around this issue by inserting a seek() before writing.
2063 # We work around this issue by inserting a seek() before writing.
2062 # Note: This is likely not necessary on Python 3. However, because
2064 # Note: This is likely not necessary on Python 3. However, because
2063 # the file handle is reused for reads and may be seeked there, we need
2065 # the file handle is reused for reads and may be seeked there, we need
2064 # to be careful before changing this.
2066 # to be careful before changing this.
2065 ifh.seek(0, os.SEEK_END)
2067 ifh.seek(0, os.SEEK_END)
2066 if dfh:
2068 if dfh:
2067 dfh.seek(0, os.SEEK_END)
2069 dfh.seek(0, os.SEEK_END)
2068
2070
2069 curr = len(self) - 1
2071 curr = len(self) - 1
2070 if not self._inline:
2072 if not self._inline:
2071 transaction.add(self.datafile, offset)
2073 transaction.add(self.datafile, offset)
2072 transaction.add(self.indexfile, curr * len(entry))
2074 transaction.add(self.indexfile, curr * len(entry))
2073 if data[0]:
2075 if data[0]:
2074 dfh.write(data[0])
2076 dfh.write(data[0])
2075 dfh.write(data[1])
2077 dfh.write(data[1])
2076 ifh.write(entry)
2078 ifh.write(entry)
2077 else:
2079 else:
2078 offset += curr * self._io.size
2080 offset += curr * self._io.size
2079 transaction.add(self.indexfile, offset, curr)
2081 transaction.add(self.indexfile, offset, curr)
2080 ifh.write(entry)
2082 ifh.write(entry)
2081 ifh.write(data[0])
2083 ifh.write(data[0])
2082 ifh.write(data[1])
2084 ifh.write(data[1])
2083 self._enforceinlinesize(transaction, ifh)
2085 self._enforceinlinesize(transaction, ifh)
2084
2086
2085 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2087 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2086 """
2088 """
2087 add a delta group
2089 add a delta group
2088
2090
2089 given a set of deltas, add them to the revision log. the
2091 given a set of deltas, add them to the revision log. the
2090 first delta is against its parent, which should be in our
2092 first delta is against its parent, which should be in our
2091 log, the rest are against the previous delta.
2093 log, the rest are against the previous delta.
2092
2094
2093 If ``addrevisioncb`` is defined, it will be called with arguments of
2095 If ``addrevisioncb`` is defined, it will be called with arguments of
2094 this revlog and the node that was added.
2096 this revlog and the node that was added.
2095 """
2097 """
2096
2098
2097 if self._writinghandles:
2099 if self._writinghandles:
2098 raise error.ProgrammingError('cannot nest addgroup() calls')
2100 raise error.ProgrammingError('cannot nest addgroup() calls')
2099
2101
2100 nodes = []
2102 nodes = []
2101
2103
2102 r = len(self)
2104 r = len(self)
2103 end = 0
2105 end = 0
2104 if r:
2106 if r:
2105 end = self.end(r - 1)
2107 end = self.end(r - 1)
2106 ifh = self._indexfp("a+")
2108 ifh = self._indexfp("a+")
2107 isize = r * self._io.size
2109 isize = r * self._io.size
2108 if self._inline:
2110 if self._inline:
2109 transaction.add(self.indexfile, end + isize, r)
2111 transaction.add(self.indexfile, end + isize, r)
2110 dfh = None
2112 dfh = None
2111 else:
2113 else:
2112 transaction.add(self.indexfile, isize, r)
2114 transaction.add(self.indexfile, isize, r)
2113 transaction.add(self.datafile, end)
2115 transaction.add(self.datafile, end)
2114 dfh = self._datafp("a+")
2116 dfh = self._datafp("a+")
2115 def flush():
2117 def flush():
2116 if dfh:
2118 if dfh:
2117 dfh.flush()
2119 dfh.flush()
2118 ifh.flush()
2120 ifh.flush()
2119
2121
2120 self._writinghandles = (ifh, dfh)
2122 self._writinghandles = (ifh, dfh)
2121
2123
2122 try:
2124 try:
2123 deltacomputer = deltautil.deltacomputer(self)
2125 deltacomputer = deltautil.deltacomputer(self)
2124 # loop through our set of deltas
2126 # loop through our set of deltas
2125 for data in deltas:
2127 for data in deltas:
2126 node, p1, p2, linknode, deltabase, delta, flags = data
2128 node, p1, p2, linknode, deltabase, delta, flags = data
2127 link = linkmapper(linknode)
2129 link = linkmapper(linknode)
2128 flags = flags or REVIDX_DEFAULT_FLAGS
2130 flags = flags or REVIDX_DEFAULT_FLAGS
2129
2131
2130 nodes.append(node)
2132 nodes.append(node)
2131
2133
2132 if node in self.nodemap:
2134 if node in self.nodemap:
2133 self._nodeduplicatecallback(transaction, node)
2135 self._nodeduplicatecallback(transaction, node)
2134 # this can happen if two branches make the same change
2136 # this can happen if two branches make the same change
2135 continue
2137 continue
2136
2138
2137 for p in (p1, p2):
2139 for p in (p1, p2):
2138 if p not in self.nodemap:
2140 if p not in self.nodemap:
2139 raise error.LookupError(p, self.indexfile,
2141 raise error.LookupError(p, self.indexfile,
2140 _('unknown parent'))
2142 _('unknown parent'))
2141
2143
2142 if deltabase not in self.nodemap:
2144 if deltabase not in self.nodemap:
2143 raise error.LookupError(deltabase, self.indexfile,
2145 raise error.LookupError(deltabase, self.indexfile,
2144 _('unknown delta base'))
2146 _('unknown delta base'))
2145
2147
2146 baserev = self.rev(deltabase)
2148 baserev = self.rev(deltabase)
2147
2149
2148 if baserev != nullrev and self.iscensored(baserev):
2150 if baserev != nullrev and self.iscensored(baserev):
2149 # if base is censored, delta must be full replacement in a
2151 # if base is censored, delta must be full replacement in a
2150 # single patch operation
2152 # single patch operation
2151 hlen = struct.calcsize(">lll")
2153 hlen = struct.calcsize(">lll")
2152 oldlen = self.rawsize(baserev)
2154 oldlen = self.rawsize(baserev)
2153 newlen = len(delta) - hlen
2155 newlen = len(delta) - hlen
2154 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2156 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2155 raise error.CensoredBaseError(self.indexfile,
2157 raise error.CensoredBaseError(self.indexfile,
2156 self.node(baserev))
2158 self.node(baserev))
2157
2159
2158 if not flags and self._peek_iscensored(baserev, delta, flush):
2160 if not flags and self._peek_iscensored(baserev, delta, flush):
2159 flags |= REVIDX_ISCENSORED
2161 flags |= REVIDX_ISCENSORED
2160
2162
2161 # We assume consumers of addrevisioncb will want to retrieve
2163 # We assume consumers of addrevisioncb will want to retrieve
2162 # the added revision, which will require a call to
2164 # the added revision, which will require a call to
2163 # revision(). revision() will fast path if there is a cache
2165 # revision(). revision() will fast path if there is a cache
2164 # hit. So, we tell _addrevision() to always cache in this case.
2166 # hit. So, we tell _addrevision() to always cache in this case.
2165 # We're only using addgroup() in the context of changegroup
2167 # We're only using addgroup() in the context of changegroup
2166 # generation so the revision data can always be handled as raw
2168 # generation so the revision data can always be handled as raw
2167 # by the flagprocessor.
2169 # by the flagprocessor.
2168 self._addrevision(node, None, transaction, link,
2170 self._addrevision(node, None, transaction, link,
2169 p1, p2, flags, (baserev, delta),
2171 p1, p2, flags, (baserev, delta),
2170 ifh, dfh,
2172 ifh, dfh,
2171 alwayscache=bool(addrevisioncb),
2173 alwayscache=bool(addrevisioncb),
2172 deltacomputer=deltacomputer)
2174 deltacomputer=deltacomputer)
2173
2175
2174 if addrevisioncb:
2176 if addrevisioncb:
2175 addrevisioncb(self, node)
2177 addrevisioncb(self, node)
2176
2178
2177 if not dfh and not self._inline:
2179 if not dfh and not self._inline:
2178 # addrevision switched from inline to conventional
2180 # addrevision switched from inline to conventional
2179 # reopen the index
2181 # reopen the index
2180 ifh.close()
2182 ifh.close()
2181 dfh = self._datafp("a+")
2183 dfh = self._datafp("a+")
2182 ifh = self._indexfp("a+")
2184 ifh = self._indexfp("a+")
2183 self._writinghandles = (ifh, dfh)
2185 self._writinghandles = (ifh, dfh)
2184 finally:
2186 finally:
2185 self._writinghandles = None
2187 self._writinghandles = None
2186
2188
2187 if dfh:
2189 if dfh:
2188 dfh.close()
2190 dfh.close()
2189 ifh.close()
2191 ifh.close()
2190
2192
2191 return nodes
2193 return nodes
2192
2194
2193 def iscensored(self, rev):
2195 def iscensored(self, rev):
2194 """Check if a file revision is censored."""
2196 """Check if a file revision is censored."""
2195 if not self._censorable:
2197 if not self._censorable:
2196 return False
2198 return False
2197
2199
2198 return self.flags(rev) & REVIDX_ISCENSORED
2200 return self.flags(rev) & REVIDX_ISCENSORED
2199
2201
2200 def _peek_iscensored(self, baserev, delta, flush):
2202 def _peek_iscensored(self, baserev, delta, flush):
2201 """Quickly check if a delta produces a censored revision."""
2203 """Quickly check if a delta produces a censored revision."""
2202 if not self._censorable:
2204 if not self._censorable:
2203 return False
2205 return False
2204
2206
2205 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2207 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2206
2208
2207 def getstrippoint(self, minlink):
2209 def getstrippoint(self, minlink):
2208 """find the minimum rev that must be stripped to strip the linkrev
2210 """find the minimum rev that must be stripped to strip the linkrev
2209
2211
2210 Returns a tuple containing the minimum rev and a set of all revs that
2212 Returns a tuple containing the minimum rev and a set of all revs that
2211 have linkrevs that will be broken by this strip.
2213 have linkrevs that will be broken by this strip.
2212 """
2214 """
2213 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2215 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2214 self.headrevs(),
2216 self.headrevs(),
2215 self.linkrev, self.parentrevs)
2217 self.linkrev, self.parentrevs)
2216
2218
2217 def strip(self, minlink, transaction):
2219 def strip(self, minlink, transaction):
2218 """truncate the revlog on the first revision with a linkrev >= minlink
2220 """truncate the revlog on the first revision with a linkrev >= minlink
2219
2221
2220 This function is called when we're stripping revision minlink and
2222 This function is called when we're stripping revision minlink and
2221 its descendants from the repository.
2223 its descendants from the repository.
2222
2224
2223 We have to remove all revisions with linkrev >= minlink, because
2225 We have to remove all revisions with linkrev >= minlink, because
2224 the equivalent changelog revisions will be renumbered after the
2226 the equivalent changelog revisions will be renumbered after the
2225 strip.
2227 strip.
2226
2228
2227 So we truncate the revlog on the first of these revisions, and
2229 So we truncate the revlog on the first of these revisions, and
2228 trust that the caller has saved the revisions that shouldn't be
2230 trust that the caller has saved the revisions that shouldn't be
2229 removed and that it'll re-add them after this truncation.
2231 removed and that it'll re-add them after this truncation.
2230 """
2232 """
2231 if len(self) == 0:
2233 if len(self) == 0:
2232 return
2234 return
2233
2235
2234 rev, _ = self.getstrippoint(minlink)
2236 rev, _ = self.getstrippoint(minlink)
2235 if rev == len(self):
2237 if rev == len(self):
2236 return
2238 return
2237
2239
2238 # first truncate the files on disk
2240 # first truncate the files on disk
2239 end = self.start(rev)
2241 end = self.start(rev)
2240 if not self._inline:
2242 if not self._inline:
2241 transaction.add(self.datafile, end)
2243 transaction.add(self.datafile, end)
2242 end = rev * self._io.size
2244 end = rev * self._io.size
2243 else:
2245 else:
2244 end += rev * self._io.size
2246 end += rev * self._io.size
2245
2247
2246 transaction.add(self.indexfile, end)
2248 transaction.add(self.indexfile, end)
2247
2249
2248 # then reset internal state in memory to forget those revisions
2250 # then reset internal state in memory to forget those revisions
2249 self._revisioncache = None
2251 self._revisioncache = None
2250 self._chaininfocache = {}
2252 self._chaininfocache = {}
2251 self._chunkclear()
2253 self._chunkclear()
2252 for x in pycompat.xrange(rev, len(self)):
2254 for x in pycompat.xrange(rev, len(self)):
2253 del self.nodemap[self.node(x)]
2255 del self.nodemap[self.node(x)]
2254
2256
2255 del self.index[rev:-1]
2257 del self.index[rev:-1]
2256 self._nodepos = None
2258 self._nodepos = None
2257
2259
2258 def checksize(self):
2260 def checksize(self):
2259 expected = 0
2261 expected = 0
2260 if len(self):
2262 if len(self):
2261 expected = max(0, self.end(len(self) - 1))
2263 expected = max(0, self.end(len(self) - 1))
2262
2264
2263 try:
2265 try:
2264 with self._datafp() as f:
2266 with self._datafp() as f:
2265 f.seek(0, 2)
2267 f.seek(0, 2)
2266 actual = f.tell()
2268 actual = f.tell()
2267 dd = actual - expected
2269 dd = actual - expected
2268 except IOError as inst:
2270 except IOError as inst:
2269 if inst.errno != errno.ENOENT:
2271 if inst.errno != errno.ENOENT:
2270 raise
2272 raise
2271 dd = 0
2273 dd = 0
2272
2274
2273 try:
2275 try:
2274 f = self.opener(self.indexfile)
2276 f = self.opener(self.indexfile)
2275 f.seek(0, 2)
2277 f.seek(0, 2)
2276 actual = f.tell()
2278 actual = f.tell()
2277 f.close()
2279 f.close()
2278 s = self._io.size
2280 s = self._io.size
2279 i = max(0, actual // s)
2281 i = max(0, actual // s)
2280 di = actual - (i * s)
2282 di = actual - (i * s)
2281 if self._inline:
2283 if self._inline:
2282 databytes = 0
2284 databytes = 0
2283 for r in self:
2285 for r in self:
2284 databytes += max(0, self.length(r))
2286 databytes += max(0, self.length(r))
2285 dd = 0
2287 dd = 0
2286 di = actual - len(self) * s - databytes
2288 di = actual - len(self) * s - databytes
2287 except IOError as inst:
2289 except IOError as inst:
2288 if inst.errno != errno.ENOENT:
2290 if inst.errno != errno.ENOENT:
2289 raise
2291 raise
2290 di = 0
2292 di = 0
2291
2293
2292 return (dd, di)
2294 return (dd, di)
2293
2295
2294 def files(self):
2296 def files(self):
2295 res = [self.indexfile]
2297 res = [self.indexfile]
2296 if not self._inline:
2298 if not self._inline:
2297 res.append(self.datafile)
2299 res.append(self.datafile)
2298 return res
2300 return res
2299
2301
2300 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2302 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2301 assumehaveparentrevisions=False,
2303 assumehaveparentrevisions=False,
2302 deltamode=repository.CG_DELTAMODE_STD):
2304 deltamode=repository.CG_DELTAMODE_STD):
2303 if nodesorder not in ('nodes', 'storage', 'linear', None):
2305 if nodesorder not in ('nodes', 'storage', 'linear', None):
2304 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2306 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2305 nodesorder)
2307 nodesorder)
2306
2308
2307 if nodesorder is None and not self._generaldelta:
2309 if nodesorder is None and not self._generaldelta:
2308 nodesorder = 'storage'
2310 nodesorder = 'storage'
2309
2311
2310 if (not self._storedeltachains and
2312 if (not self._storedeltachains and
2311 deltamode != repository.CG_DELTAMODE_PREV):
2313 deltamode != repository.CG_DELTAMODE_PREV):
2312 deltamode = repository.CG_DELTAMODE_FULL
2314 deltamode = repository.CG_DELTAMODE_FULL
2313
2315
2314 return storageutil.emitrevisions(
2316 return storageutil.emitrevisions(
2315 self, nodes, nodesorder, revlogrevisiondelta,
2317 self, nodes, nodesorder, revlogrevisiondelta,
2316 deltaparentfn=self.deltaparent,
2318 deltaparentfn=self.deltaparent,
2317 candeltafn=self.candelta,
2319 candeltafn=self.candelta,
2318 rawsizefn=self.rawsize,
2320 rawsizefn=self.rawsize,
2319 revdifffn=self.revdiff,
2321 revdifffn=self.revdiff,
2320 flagsfn=self.flags,
2322 flagsfn=self.flags,
2321 deltamode=deltamode,
2323 deltamode=deltamode,
2322 revisiondata=revisiondata,
2324 revisiondata=revisiondata,
2323 assumehaveparentrevisions=assumehaveparentrevisions)
2325 assumehaveparentrevisions=assumehaveparentrevisions)
2324
2326
2325 DELTAREUSEALWAYS = 'always'
2327 DELTAREUSEALWAYS = 'always'
2326 DELTAREUSESAMEREVS = 'samerevs'
2328 DELTAREUSESAMEREVS = 'samerevs'
2327 DELTAREUSENEVER = 'never'
2329 DELTAREUSENEVER = 'never'
2328
2330
2329 DELTAREUSEFULLADD = 'fulladd'
2331 DELTAREUSEFULLADD = 'fulladd'
2330
2332
2331 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2333 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2332
2334
2333 def clone(self, tr, destrevlog, addrevisioncb=None,
2335 def clone(self, tr, destrevlog, addrevisioncb=None,
2334 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2336 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2335 """Copy this revlog to another, possibly with format changes.
2337 """Copy this revlog to another, possibly with format changes.
2336
2338
2337 The destination revlog will contain the same revisions and nodes.
2339 The destination revlog will contain the same revisions and nodes.
2338 However, it may not be bit-for-bit identical due to e.g. delta encoding
2340 However, it may not be bit-for-bit identical due to e.g. delta encoding
2339 differences.
2341 differences.
2340
2342
2341 The ``deltareuse`` argument control how deltas from the existing revlog
2343 The ``deltareuse`` argument control how deltas from the existing revlog
2342 are preserved in the destination revlog. The argument can have the
2344 are preserved in the destination revlog. The argument can have the
2343 following values:
2345 following values:
2344
2346
2345 DELTAREUSEALWAYS
2347 DELTAREUSEALWAYS
2346 Deltas will always be reused (if possible), even if the destination
2348 Deltas will always be reused (if possible), even if the destination
2347 revlog would not select the same revisions for the delta. This is the
2349 revlog would not select the same revisions for the delta. This is the
2348 fastest mode of operation.
2350 fastest mode of operation.
2349 DELTAREUSESAMEREVS
2351 DELTAREUSESAMEREVS
2350 Deltas will be reused if the destination revlog would pick the same
2352 Deltas will be reused if the destination revlog would pick the same
2351 revisions for the delta. This mode strikes a balance between speed
2353 revisions for the delta. This mode strikes a balance between speed
2352 and optimization.
2354 and optimization.
2353 DELTAREUSENEVER
2355 DELTAREUSENEVER
2354 Deltas will never be reused. This is the slowest mode of execution.
2356 Deltas will never be reused. This is the slowest mode of execution.
2355 This mode can be used to recompute deltas (e.g. if the diff/delta
2357 This mode can be used to recompute deltas (e.g. if the diff/delta
2356 algorithm changes).
2358 algorithm changes).
2357
2359
2358 Delta computation can be slow, so the choice of delta reuse policy can
2360 Delta computation can be slow, so the choice of delta reuse policy can
2359 significantly affect run time.
2361 significantly affect run time.
2360
2362
2361 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2363 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2362 two extremes. Deltas will be reused if they are appropriate. But if the
2364 two extremes. Deltas will be reused if they are appropriate. But if the
2363 delta could choose a better revision, it will do so. This means if you
2365 delta could choose a better revision, it will do so. This means if you
2364 are converting a non-generaldelta revlog to a generaldelta revlog,
2366 are converting a non-generaldelta revlog to a generaldelta revlog,
2365 deltas will be recomputed if the delta's parent isn't a parent of the
2367 deltas will be recomputed if the delta's parent isn't a parent of the
2366 revision.
2368 revision.
2367
2369
2368 In addition to the delta policy, the ``forcedeltabothparents``
2370 In addition to the delta policy, the ``forcedeltabothparents``
2369 argument controls whether to force compute deltas against both parents
2371 argument controls whether to force compute deltas against both parents
2370 for merges. By default, the current default is used.
2372 for merges. By default, the current default is used.
2371 """
2373 """
2372 if deltareuse not in self.DELTAREUSEALL:
2374 if deltareuse not in self.DELTAREUSEALL:
2373 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2375 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2374
2376
2375 if len(destrevlog):
2377 if len(destrevlog):
2376 raise ValueError(_('destination revlog is not empty'))
2378 raise ValueError(_('destination revlog is not empty'))
2377
2379
2378 if getattr(self, 'filteredrevs', None):
2380 if getattr(self, 'filteredrevs', None):
2379 raise ValueError(_('source revlog has filtered revisions'))
2381 raise ValueError(_('source revlog has filtered revisions'))
2380 if getattr(destrevlog, 'filteredrevs', None):
2382 if getattr(destrevlog, 'filteredrevs', None):
2381 raise ValueError(_('destination revlog has filtered revisions'))
2383 raise ValueError(_('destination revlog has filtered revisions'))
2382
2384
2383 # lazydeltabase controls whether to reuse a cached delta, if possible.
2385 # lazydeltabase controls whether to reuse a cached delta, if possible.
2384 oldlazydeltabase = destrevlog._lazydeltabase
2386 oldlazydeltabase = destrevlog._lazydeltabase
2385 oldamd = destrevlog._deltabothparents
2387 oldamd = destrevlog._deltabothparents
2386
2388
2387 try:
2389 try:
2388 if deltareuse == self.DELTAREUSEALWAYS:
2390 if deltareuse == self.DELTAREUSEALWAYS:
2389 destrevlog._lazydeltabase = True
2391 destrevlog._lazydeltabase = True
2390 elif deltareuse == self.DELTAREUSESAMEREVS:
2392 elif deltareuse == self.DELTAREUSESAMEREVS:
2391 destrevlog._lazydeltabase = False
2393 destrevlog._lazydeltabase = False
2392
2394
2393 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2395 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2394
2396
2395 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2397 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2396 self.DELTAREUSESAMEREVS)
2398 self.DELTAREUSESAMEREVS)
2397
2399
2398 deltacomputer = deltautil.deltacomputer(destrevlog)
2400 deltacomputer = deltautil.deltacomputer(destrevlog)
2399 index = self.index
2401 index = self.index
2400 for rev in self:
2402 for rev in self:
2401 entry = index[rev]
2403 entry = index[rev]
2402
2404
2403 # Some classes override linkrev to take filtered revs into
2405 # Some classes override linkrev to take filtered revs into
2404 # account. Use raw entry from index.
2406 # account. Use raw entry from index.
2405 flags = entry[0] & 0xffff
2407 flags = entry[0] & 0xffff
2406 linkrev = entry[4]
2408 linkrev = entry[4]
2407 p1 = index[entry[5]][7]
2409 p1 = index[entry[5]][7]
2408 p2 = index[entry[6]][7]
2410 p2 = index[entry[6]][7]
2409 node = entry[7]
2411 node = entry[7]
2410
2412
2411 # (Possibly) reuse the delta from the revlog if allowed and
2413 # (Possibly) reuse the delta from the revlog if allowed and
2412 # the revlog chunk is a delta.
2414 # the revlog chunk is a delta.
2413 cachedelta = None
2415 cachedelta = None
2414 rawtext = None
2416 rawtext = None
2415 if populatecachedelta:
2417 if populatecachedelta:
2416 dp = self.deltaparent(rev)
2418 dp = self.deltaparent(rev)
2417 if dp != nullrev:
2419 if dp != nullrev:
2418 cachedelta = (dp, bytes(self._chunk(rev)))
2420 cachedelta = (dp, bytes(self._chunk(rev)))
2419
2421
2420 if not cachedelta:
2422 if not cachedelta:
2421 rawtext = self.revision(rev, raw=True)
2423 rawtext = self.revision(rev, raw=True)
2422
2424
2423
2425
2424 if deltareuse == self.DELTAREUSEFULLADD:
2426 if deltareuse == self.DELTAREUSEFULLADD:
2425 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2427 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2426 cachedelta=cachedelta,
2428 cachedelta=cachedelta,
2427 node=node, flags=flags,
2429 node=node, flags=flags,
2428 deltacomputer=deltacomputer)
2430 deltacomputer=deltacomputer)
2429 else:
2431 else:
2430 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2432 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2431 checkambig=False)
2433 checkambig=False)
2432 dfh = None
2434 dfh = None
2433 if not destrevlog._inline:
2435 if not destrevlog._inline:
2434 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2436 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2435 try:
2437 try:
2436 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2438 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2437 p2, flags, cachedelta, ifh, dfh,
2439 p2, flags, cachedelta, ifh, dfh,
2438 deltacomputer=deltacomputer)
2440 deltacomputer=deltacomputer)
2439 finally:
2441 finally:
2440 if dfh:
2442 if dfh:
2441 dfh.close()
2443 dfh.close()
2442 ifh.close()
2444 ifh.close()
2443
2445
2444 if addrevisioncb:
2446 if addrevisioncb:
2445 addrevisioncb(self, rev, node)
2447 addrevisioncb(self, rev, node)
2446 finally:
2448 finally:
2447 destrevlog._lazydeltabase = oldlazydeltabase
2449 destrevlog._lazydeltabase = oldlazydeltabase
2448 destrevlog._deltabothparents = oldamd
2450 destrevlog._deltabothparents = oldamd
2449
2451
2450 def censorrevision(self, tr, censornode, tombstone=b''):
2452 def censorrevision(self, tr, censornode, tombstone=b''):
2451 if (self.version & 0xFFFF) == REVLOGV0:
2453 if (self.version & 0xFFFF) == REVLOGV0:
2452 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2454 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2453 self.version)
2455 self.version)
2454
2456
2455 censorrev = self.rev(censornode)
2457 censorrev = self.rev(censornode)
2456 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2458 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2457
2459
2458 if len(tombstone) > self.rawsize(censorrev):
2460 if len(tombstone) > self.rawsize(censorrev):
2459 raise error.Abort(_('censor tombstone must be no longer than '
2461 raise error.Abort(_('censor tombstone must be no longer than '
2460 'censored data'))
2462 'censored data'))
2461
2463
2462 # Rewriting the revlog in place is hard. Our strategy for censoring is
2464 # Rewriting the revlog in place is hard. Our strategy for censoring is
2463 # to create a new revlog, copy all revisions to it, then replace the
2465 # to create a new revlog, copy all revisions to it, then replace the
2464 # revlogs on transaction close.
2466 # revlogs on transaction close.
2465
2467
2466 newindexfile = self.indexfile + b'.tmpcensored'
2468 newindexfile = self.indexfile + b'.tmpcensored'
2467 newdatafile = self.datafile + b'.tmpcensored'
2469 newdatafile = self.datafile + b'.tmpcensored'
2468
2470
2469 # This is a bit dangerous. We could easily have a mismatch of state.
2471 # This is a bit dangerous. We could easily have a mismatch of state.
2470 newrl = revlog(self.opener, newindexfile, newdatafile,
2472 newrl = revlog(self.opener, newindexfile, newdatafile,
2471 censorable=True)
2473 censorable=True)
2472 newrl.version = self.version
2474 newrl.version = self.version
2473 newrl._generaldelta = self._generaldelta
2475 newrl._generaldelta = self._generaldelta
2474 newrl._io = self._io
2476 newrl._io = self._io
2475
2477
2476 for rev in self.revs():
2478 for rev in self.revs():
2477 node = self.node(rev)
2479 node = self.node(rev)
2478 p1, p2 = self.parents(node)
2480 p1, p2 = self.parents(node)
2479
2481
2480 if rev == censorrev:
2482 if rev == censorrev:
2481 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2483 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2482 p1, p2, censornode, REVIDX_ISCENSORED)
2484 p1, p2, censornode, REVIDX_ISCENSORED)
2483
2485
2484 if newrl.deltaparent(rev) != nullrev:
2486 if newrl.deltaparent(rev) != nullrev:
2485 raise error.Abort(_('censored revision stored as delta; '
2487 raise error.Abort(_('censored revision stored as delta; '
2486 'cannot censor'),
2488 'cannot censor'),
2487 hint=_('censoring of revlogs is not '
2489 hint=_('censoring of revlogs is not '
2488 'fully implemented; please report '
2490 'fully implemented; please report '
2489 'this bug'))
2491 'this bug'))
2490 continue
2492 continue
2491
2493
2492 if self.iscensored(rev):
2494 if self.iscensored(rev):
2493 if self.deltaparent(rev) != nullrev:
2495 if self.deltaparent(rev) != nullrev:
2494 raise error.Abort(_('cannot censor due to censored '
2496 raise error.Abort(_('cannot censor due to censored '
2495 'revision having delta stored'))
2497 'revision having delta stored'))
2496 rawtext = self._chunk(rev)
2498 rawtext = self._chunk(rev)
2497 else:
2499 else:
2498 rawtext = self.revision(rev, raw=True)
2500 rawtext = self.revision(rev, raw=True)
2499
2501
2500 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2502 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2501 self.flags(rev))
2503 self.flags(rev))
2502
2504
2503 tr.addbackup(self.indexfile, location='store')
2505 tr.addbackup(self.indexfile, location='store')
2504 if not self._inline:
2506 if not self._inline:
2505 tr.addbackup(self.datafile, location='store')
2507 tr.addbackup(self.datafile, location='store')
2506
2508
2507 self.opener.rename(newrl.indexfile, self.indexfile)
2509 self.opener.rename(newrl.indexfile, self.indexfile)
2508 if not self._inline:
2510 if not self._inline:
2509 self.opener.rename(newrl.datafile, self.datafile)
2511 self.opener.rename(newrl.datafile, self.datafile)
2510
2512
2511 self.clearcaches()
2513 self.clearcaches()
2512 self._loadindex()
2514 self._loadindex()
2513
2515
2514 def verifyintegrity(self, state):
2516 def verifyintegrity(self, state):
2515 """Verifies the integrity of the revlog.
2517 """Verifies the integrity of the revlog.
2516
2518
2517 Yields ``revlogproblem`` instances describing problems that are
2519 Yields ``revlogproblem`` instances describing problems that are
2518 found.
2520 found.
2519 """
2521 """
2520 dd, di = self.checksize()
2522 dd, di = self.checksize()
2521 if dd:
2523 if dd:
2522 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2524 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2523 if di:
2525 if di:
2524 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2526 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2525
2527
2526 version = self.version & 0xFFFF
2528 version = self.version & 0xFFFF
2527
2529
2528 # The verifier tells us what version revlog we should be.
2530 # The verifier tells us what version revlog we should be.
2529 if version != state['expectedversion']:
2531 if version != state['expectedversion']:
2530 yield revlogproblem(
2532 yield revlogproblem(
2531 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2533 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2532 (self.indexfile, version, state['expectedversion']))
2534 (self.indexfile, version, state['expectedversion']))
2533
2535
2534 state['skipread'] = set()
2536 state['skipread'] = set()
2535
2537
2536 for rev in self:
2538 for rev in self:
2537 node = self.node(rev)
2539 node = self.node(rev)
2538
2540
2539 # Verify contents. 4 cases to care about:
2541 # Verify contents. 4 cases to care about:
2540 #
2542 #
2541 # common: the most common case
2543 # common: the most common case
2542 # rename: with a rename
2544 # rename: with a rename
2543 # meta: file content starts with b'\1\n', the metadata
2545 # meta: file content starts with b'\1\n', the metadata
2544 # header defined in filelog.py, but without a rename
2546 # header defined in filelog.py, but without a rename
2545 # ext: content stored externally
2547 # ext: content stored externally
2546 #
2548 #
2547 # More formally, their differences are shown below:
2549 # More formally, their differences are shown below:
2548 #
2550 #
2549 # | common | rename | meta | ext
2551 # | common | rename | meta | ext
2550 # -------------------------------------------------------
2552 # -------------------------------------------------------
2551 # flags() | 0 | 0 | 0 | not 0
2553 # flags() | 0 | 0 | 0 | not 0
2552 # renamed() | False | True | False | ?
2554 # renamed() | False | True | False | ?
2553 # rawtext[0:2]=='\1\n'| False | True | True | ?
2555 # rawtext[0:2]=='\1\n'| False | True | True | ?
2554 #
2556 #
2555 # "rawtext" means the raw text stored in revlog data, which
2557 # "rawtext" means the raw text stored in revlog data, which
2556 # could be retrieved by "revision(rev, raw=True)". "text"
2558 # could be retrieved by "revision(rev, raw=True)". "text"
2557 # mentioned below is "revision(rev, raw=False)".
2559 # mentioned below is "revision(rev, raw=False)".
2558 #
2560 #
2559 # There are 3 different lengths stored physically:
2561 # There are 3 different lengths stored physically:
2560 # 1. L1: rawsize, stored in revlog index
2562 # 1. L1: rawsize, stored in revlog index
2561 # 2. L2: len(rawtext), stored in revlog data
2563 # 2. L2: len(rawtext), stored in revlog data
2562 # 3. L3: len(text), stored in revlog data if flags==0, or
2564 # 3. L3: len(text), stored in revlog data if flags==0, or
2563 # possibly somewhere else if flags!=0
2565 # possibly somewhere else if flags!=0
2564 #
2566 #
2565 # L1 should be equal to L2. L3 could be different from them.
2567 # L1 should be equal to L2. L3 could be different from them.
2566 # "text" may or may not affect commit hash depending on flag
2568 # "text" may or may not affect commit hash depending on flag
2567 # processors (see revlog.addflagprocessor).
2569 # processors (see revlog.addflagprocessor).
2568 #
2570 #
2569 # | common | rename | meta | ext
2571 # | common | rename | meta | ext
2570 # -------------------------------------------------
2572 # -------------------------------------------------
2571 # rawsize() | L1 | L1 | L1 | L1
2573 # rawsize() | L1 | L1 | L1 | L1
2572 # size() | L1 | L2-LM | L1(*) | L1 (?)
2574 # size() | L1 | L2-LM | L1(*) | L1 (?)
2573 # len(rawtext) | L2 | L2 | L2 | L2
2575 # len(rawtext) | L2 | L2 | L2 | L2
2574 # len(text) | L2 | L2 | L2 | L3
2576 # len(text) | L2 | L2 | L2 | L3
2575 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2577 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2576 #
2578 #
2577 # LM: length of metadata, depending on rawtext
2579 # LM: length of metadata, depending on rawtext
2578 # (*): not ideal, see comment in filelog.size
2580 # (*): not ideal, see comment in filelog.size
2579 # (?): could be "- len(meta)" if the resolved content has
2581 # (?): could be "- len(meta)" if the resolved content has
2580 # rename metadata
2582 # rename metadata
2581 #
2583 #
2582 # Checks needed to be done:
2584 # Checks needed to be done:
2583 # 1. length check: L1 == L2, in all cases.
2585 # 1. length check: L1 == L2, in all cases.
2584 # 2. hash check: depending on flag processor, we may need to
2586 # 2. hash check: depending on flag processor, we may need to
2585 # use either "text" (external), or "rawtext" (in revlog).
2587 # use either "text" (external), or "rawtext" (in revlog).
2586
2588
2587 try:
2589 try:
2588 skipflags = state.get('skipflags', 0)
2590 skipflags = state.get('skipflags', 0)
2589 if skipflags:
2591 if skipflags:
2590 skipflags &= self.flags(rev)
2592 skipflags &= self.flags(rev)
2591
2593
2592 if skipflags:
2594 if skipflags:
2593 state['skipread'].add(node)
2595 state['skipread'].add(node)
2594 else:
2596 else:
2595 # Side-effect: read content and verify hash.
2597 # Side-effect: read content and verify hash.
2596 self.revision(node)
2598 self.revision(node)
2597
2599
2598 l1 = self.rawsize(rev)
2600 l1 = self.rawsize(rev)
2599 l2 = len(self.revision(node, raw=True))
2601 l2 = len(self.revision(node, raw=True))
2600
2602
2601 if l1 != l2:
2603 if l1 != l2:
2602 yield revlogproblem(
2604 yield revlogproblem(
2603 error=_('unpacked size is %d, %d expected') % (l2, l1),
2605 error=_('unpacked size is %d, %d expected') % (l2, l1),
2604 node=node)
2606 node=node)
2605
2607
2606 except error.CensoredNodeError:
2608 except error.CensoredNodeError:
2607 if state['erroroncensored']:
2609 if state['erroroncensored']:
2608 yield revlogproblem(error=_('censored file data'),
2610 yield revlogproblem(error=_('censored file data'),
2609 node=node)
2611 node=node)
2610 state['skipread'].add(node)
2612 state['skipread'].add(node)
2611 except Exception as e:
2613 except Exception as e:
2612 yield revlogproblem(
2614 yield revlogproblem(
2613 error=_('unpacking %s: %s') % (short(node),
2615 error=_('unpacking %s: %s') % (short(node),
2614 stringutil.forcebytestr(e)),
2616 stringutil.forcebytestr(e)),
2615 node=node)
2617 node=node)
2616 state['skipread'].add(node)
2618 state['skipread'].add(node)
2617
2619
2618 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2620 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2619 revisionscount=False, trackedsize=False,
2621 revisionscount=False, trackedsize=False,
2620 storedsize=False):
2622 storedsize=False):
2621 d = {}
2623 d = {}
2622
2624
2623 if exclusivefiles:
2625 if exclusivefiles:
2624 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2626 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2625 if not self._inline:
2627 if not self._inline:
2626 d['exclusivefiles'].append((self.opener, self.datafile))
2628 d['exclusivefiles'].append((self.opener, self.datafile))
2627
2629
2628 if sharedfiles:
2630 if sharedfiles:
2629 d['sharedfiles'] = []
2631 d['sharedfiles'] = []
2630
2632
2631 if revisionscount:
2633 if revisionscount:
2632 d['revisionscount'] = len(self)
2634 d['revisionscount'] = len(self)
2633
2635
2634 if trackedsize:
2636 if trackedsize:
2635 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2637 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2636
2638
2637 if storedsize:
2639 if storedsize:
2638 d['storedsize'] = sum(self.opener.stat(path).st_size
2640 d['storedsize'] = sum(self.opener.stat(path).st_size
2639 for path in self.files())
2641 for path in self.files())
2640
2642
2641 return d
2643 return d
General Comments 0
You need to be logged in to leave comments. Login now