##// END OF EJS Templates
revlog: use separate variables to track version flags...
Gregory Szorc -
r41241:3f807237 default
parent child Browse files
Show More
@@ -1,2631 +1,2635 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
375 self._withsparseread = False
375 self._withsparseread = False
376 self._sparserevlog = False
376 self._sparserevlog = False
377 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
378 self._srmingapsize = 262144
378 self._srmingapsize = 262144
379
379
380 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
381 # custom flags.
381 # custom flags.
382 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
383
383
384 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
385 self._writinghandles = None
385 self._writinghandles = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 def _loadindex(self):
389 def _loadindex(self):
390 mmapindexthreshold = None
390 mmapindexthreshold = None
391 opts = getattr(self.opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
392
392
393 if 'revlogv2' in opts:
393 if 'revlogv2' in opts:
394 versionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 elif 'revlogv1' in opts:
395 elif 'revlogv1' in opts:
396 versionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 if 'generaldelta' in opts:
397 if 'generaldelta' in opts:
398 versionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 else:
399 else:
400 versionflags = REVLOG_DEFAULT_VERSION
400 newversionflags = REVLOG_DEFAULT_VERSION
401
401
402 if 'chunkcachesize' in opts:
402 if 'chunkcachesize' in opts:
403 self._chunkcachesize = opts['chunkcachesize']
403 self._chunkcachesize = opts['chunkcachesize']
404 if 'maxchainlen' in opts:
404 if 'maxchainlen' in opts:
405 self._maxchainlen = opts['maxchainlen']
405 self._maxchainlen = opts['maxchainlen']
406 if 'deltabothparents' in opts:
406 if 'deltabothparents' in opts:
407 self._deltabothparents = opts['deltabothparents']
407 self._deltabothparents = opts['deltabothparents']
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 if 'compengine' in opts:
409 if 'compengine' in opts:
410 self._compengine = opts['compengine']
410 self._compengine = opts['compengine']
411 if 'maxdeltachainspan' in opts:
411 if 'maxdeltachainspan' in opts:
412 self._maxdeltachainspan = opts['maxdeltachainspan']
412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 mmapindexthreshold = opts['mmapindexthreshold']
414 mmapindexthreshold = opts['mmapindexthreshold']
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
417 # sparse-revlog forces sparse-read
417 # sparse-revlog forces sparse-read
418 self._withsparseread = self._sparserevlog or withsparseread
418 self._withsparseread = self._sparserevlog or withsparseread
419 if 'sparse-read-density-threshold' in opts:
419 if 'sparse-read-density-threshold' in opts:
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 if 'sparse-read-min-gap-size' in opts:
421 if 'sparse-read-min-gap-size' in opts:
422 self._srmingapsize = opts['sparse-read-min-gap-size']
422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 if opts.get('enableellipsis'):
423 if opts.get('enableellipsis'):
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425
425
426 # revlog v0 doesn't have flag processors
426 # revlog v0 doesn't have flag processors
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 _insertflagprocessor(flag, processor, self._flagprocessors)
428 _insertflagprocessor(flag, processor, self._flagprocessors)
429
429
430 if self._chunkcachesize <= 0:
430 if self._chunkcachesize <= 0:
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 'greater than 0') % self._chunkcachesize)
432 'greater than 0') % self._chunkcachesize)
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 'power of 2') % self._chunkcachesize)
435 'power of 2') % self._chunkcachesize)
436
436
437 indexdata = ''
437 indexdata = ''
438 self._initempty = True
438 self._initempty = True
439 try:
439 try:
440 with self._indexfp() as f:
440 with self._indexfp() as f:
441 if (mmapindexthreshold is not None and
441 if (mmapindexthreshold is not None and
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 indexdata = util.buffer(util.mmapread(f))
443 indexdata = util.buffer(util.mmapread(f))
444 else:
444 else:
445 indexdata = f.read()
445 indexdata = f.read()
446 if len(indexdata) > 0:
446 if len(indexdata) > 0:
447 versionflags = versionformat_unpack(indexdata[:4])[0]
447 versionflags = versionformat_unpack(indexdata[:4])[0]
448 self._initempty = False
448 self._initempty = False
449 else:
450 versionflags = newversionflags
449 except IOError as inst:
451 except IOError as inst:
450 if inst.errno != errno.ENOENT:
452 if inst.errno != errno.ENOENT:
451 raise
453 raise
452
454
455 versionflags = newversionflags
456
453 self.version = versionflags
457 self.version = versionflags
454
458
455 flags = versionflags & ~0xFFFF
459 flags = versionflags & ~0xFFFF
456 fmt = versionflags & 0xFFFF
460 fmt = versionflags & 0xFFFF
457
461
458 if fmt == REVLOGV0:
462 if fmt == REVLOGV0:
459 if flags:
463 if flags:
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 'revlog %s') %
465 'revlog %s') %
462 (flags >> 16, fmt, self.indexfile))
466 (flags >> 16, fmt, self.indexfile))
463
467
464 self._inline = False
468 self._inline = False
465 self._generaldelta = False
469 self._generaldelta = False
466
470
467 elif fmt == REVLOGV1:
471 elif fmt == REVLOGV1:
468 if flags & ~REVLOGV1_FLAGS:
472 if flags & ~REVLOGV1_FLAGS:
469 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
470 'revlog %s') %
474 'revlog %s') %
471 (flags >> 16, fmt, self.indexfile))
475 (flags >> 16, fmt, self.indexfile))
472
476
473 self._inline = versionflags & FLAG_INLINE_DATA
477 self._inline = versionflags & FLAG_INLINE_DATA
474 self._generaldelta = versionflags & FLAG_GENERALDELTA
478 self._generaldelta = versionflags & FLAG_GENERALDELTA
475
479
476 elif fmt == REVLOGV2:
480 elif fmt == REVLOGV2:
477 if flags & ~REVLOGV2_FLAGS:
481 if flags & ~REVLOGV2_FLAGS:
478 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
479 'revlog %s') %
483 'revlog %s') %
480 (flags >> 16, fmt, self.indexfile))
484 (flags >> 16, fmt, self.indexfile))
481
485
482 self._inline = versionflags & FLAG_INLINE_DATA
486 self._inline = versionflags & FLAG_INLINE_DATA
483 # generaldelta implied by version 2 revlogs.
487 # generaldelta implied by version 2 revlogs.
484 self._generaldelta = True
488 self._generaldelta = True
485
489
486 else:
490 else:
487 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
488 (fmt, self.indexfile))
492 (fmt, self.indexfile))
489
493
490 self._storedeltachains = True
494 self._storedeltachains = True
491
495
492 self._io = revlogio()
496 self._io = revlogio()
493 if self.version == REVLOGV0:
497 if self.version == REVLOGV0:
494 self._io = revlogoldio()
498 self._io = revlogoldio()
495 try:
499 try:
496 d = self._io.parseindex(indexdata, self._inline)
500 d = self._io.parseindex(indexdata, self._inline)
497 except (ValueError, IndexError):
501 except (ValueError, IndexError):
498 raise error.RevlogError(_("index %s is corrupted") %
502 raise error.RevlogError(_("index %s is corrupted") %
499 self.indexfile)
503 self.indexfile)
500 self.index, nodemap, self._chunkcache = d
504 self.index, nodemap, self._chunkcache = d
501 if nodemap is not None:
505 if nodemap is not None:
502 self.nodemap = self._nodecache = nodemap
506 self.nodemap = self._nodecache = nodemap
503 if not self._chunkcache:
507 if not self._chunkcache:
504 self._chunkclear()
508 self._chunkclear()
505 # revnum -> (chain-length, sum-delta-length)
509 # revnum -> (chain-length, sum-delta-length)
506 self._chaininfocache = {}
510 self._chaininfocache = {}
507 # revlog header -> revlog compressor
511 # revlog header -> revlog compressor
508 self._decompressors = {}
512 self._decompressors = {}
509
513
510 @util.propertycache
514 @util.propertycache
511 def _compressor(self):
515 def _compressor(self):
512 return util.compengines[self._compengine].revlogcompressor()
516 return util.compengines[self._compengine].revlogcompressor()
513
517
514 def _indexfp(self, mode='r'):
518 def _indexfp(self, mode='r'):
515 """file object for the revlog's index file"""
519 """file object for the revlog's index file"""
516 args = {r'mode': mode}
520 args = {r'mode': mode}
517 if mode != 'r':
521 if mode != 'r':
518 args[r'checkambig'] = self._checkambig
522 args[r'checkambig'] = self._checkambig
519 if mode == 'w':
523 if mode == 'w':
520 args[r'atomictemp'] = True
524 args[r'atomictemp'] = True
521 return self.opener(self.indexfile, **args)
525 return self.opener(self.indexfile, **args)
522
526
523 def _datafp(self, mode='r'):
527 def _datafp(self, mode='r'):
524 """file object for the revlog's data file"""
528 """file object for the revlog's data file"""
525 return self.opener(self.datafile, mode=mode)
529 return self.opener(self.datafile, mode=mode)
526
530
527 @contextlib.contextmanager
531 @contextlib.contextmanager
528 def _datareadfp(self, existingfp=None):
532 def _datareadfp(self, existingfp=None):
529 """file object suitable to read data"""
533 """file object suitable to read data"""
530 # Use explicit file handle, if given.
534 # Use explicit file handle, if given.
531 if existingfp is not None:
535 if existingfp is not None:
532 yield existingfp
536 yield existingfp
533
537
534 # Use a file handle being actively used for writes, if available.
538 # Use a file handle being actively used for writes, if available.
535 # There is some danger to doing this because reads will seek the
539 # There is some danger to doing this because reads will seek the
536 # file. However, _writeentry() performs a SEEK_END before all writes,
540 # file. However, _writeentry() performs a SEEK_END before all writes,
537 # so we should be safe.
541 # so we should be safe.
538 elif self._writinghandles:
542 elif self._writinghandles:
539 if self._inline:
543 if self._inline:
540 yield self._writinghandles[0]
544 yield self._writinghandles[0]
541 else:
545 else:
542 yield self._writinghandles[1]
546 yield self._writinghandles[1]
543
547
544 # Otherwise open a new file handle.
548 # Otherwise open a new file handle.
545 else:
549 else:
546 if self._inline:
550 if self._inline:
547 func = self._indexfp
551 func = self._indexfp
548 else:
552 else:
549 func = self._datafp
553 func = self._datafp
550 with func() as fp:
554 with func() as fp:
551 yield fp
555 yield fp
552
556
553 def tip(self):
557 def tip(self):
554 return self.node(len(self.index) - 1)
558 return self.node(len(self.index) - 1)
555 def __contains__(self, rev):
559 def __contains__(self, rev):
556 return 0 <= rev < len(self)
560 return 0 <= rev < len(self)
557 def __len__(self):
561 def __len__(self):
558 return len(self.index)
562 return len(self.index)
559 def __iter__(self):
563 def __iter__(self):
560 return iter(pycompat.xrange(len(self)))
564 return iter(pycompat.xrange(len(self)))
561 def revs(self, start=0, stop=None):
565 def revs(self, start=0, stop=None):
562 """iterate over all rev in this revlog (from start to stop)"""
566 """iterate over all rev in this revlog (from start to stop)"""
563 return storageutil.iterrevs(len(self), start=start, stop=stop)
567 return storageutil.iterrevs(len(self), start=start, stop=stop)
564
568
565 @util.propertycache
569 @util.propertycache
566 def nodemap(self):
570 def nodemap(self):
567 if self.index:
571 if self.index:
568 # populate mapping down to the initial node
572 # populate mapping down to the initial node
569 node0 = self.index[0][7] # get around changelog filtering
573 node0 = self.index[0][7] # get around changelog filtering
570 self.rev(node0)
574 self.rev(node0)
571 return self._nodecache
575 return self._nodecache
572
576
573 def hasnode(self, node):
577 def hasnode(self, node):
574 try:
578 try:
575 self.rev(node)
579 self.rev(node)
576 return True
580 return True
577 except KeyError:
581 except KeyError:
578 return False
582 return False
579
583
580 def candelta(self, baserev, rev):
584 def candelta(self, baserev, rev):
581 """whether two revisions (baserev, rev) can be delta-ed or not"""
585 """whether two revisions (baserev, rev) can be delta-ed or not"""
582 # Disable delta if either rev requires a content-changing flag
586 # Disable delta if either rev requires a content-changing flag
583 # processor (ex. LFS). This is because such flag processor can alter
587 # processor (ex. LFS). This is because such flag processor can alter
584 # the rawtext content that the delta will be based on, and two clients
588 # the rawtext content that the delta will be based on, and two clients
585 # could have a same revlog node with different flags (i.e. different
589 # could have a same revlog node with different flags (i.e. different
586 # rawtext contents) and the delta could be incompatible.
590 # rawtext contents) and the delta could be incompatible.
587 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
588 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
589 return False
593 return False
590 return True
594 return True
591
595
592 def clearcaches(self):
596 def clearcaches(self):
593 self._revisioncache = None
597 self._revisioncache = None
594 self._chainbasecache.clear()
598 self._chainbasecache.clear()
595 self._chunkcache = (0, '')
599 self._chunkcache = (0, '')
596 self._pcache = {}
600 self._pcache = {}
597
601
598 try:
602 try:
599 self._nodecache.clearcaches()
603 self._nodecache.clearcaches()
600 except AttributeError:
604 except AttributeError:
601 self._nodecache = {nullid: nullrev}
605 self._nodecache = {nullid: nullrev}
602 self._nodepos = None
606 self._nodepos = None
603
607
604 def rev(self, node):
608 def rev(self, node):
605 try:
609 try:
606 return self._nodecache[node]
610 return self._nodecache[node]
607 except TypeError:
611 except TypeError:
608 raise
612 raise
609 except error.RevlogError:
613 except error.RevlogError:
610 # parsers.c radix tree lookup failed
614 # parsers.c radix tree lookup failed
611 if node == wdirid or node in wdirfilenodeids:
615 if node == wdirid or node in wdirfilenodeids:
612 raise error.WdirUnsupported
616 raise error.WdirUnsupported
613 raise error.LookupError(node, self.indexfile, _('no node'))
617 raise error.LookupError(node, self.indexfile, _('no node'))
614 except KeyError:
618 except KeyError:
615 # pure python cache lookup failed
619 # pure python cache lookup failed
616 n = self._nodecache
620 n = self._nodecache
617 i = self.index
621 i = self.index
618 p = self._nodepos
622 p = self._nodepos
619 if p is None:
623 if p is None:
620 p = len(i) - 1
624 p = len(i) - 1
621 else:
625 else:
622 assert p < len(i)
626 assert p < len(i)
623 for r in pycompat.xrange(p, -1, -1):
627 for r in pycompat.xrange(p, -1, -1):
624 v = i[r][7]
628 v = i[r][7]
625 n[v] = r
629 n[v] = r
626 if v == node:
630 if v == node:
627 self._nodepos = r - 1
631 self._nodepos = r - 1
628 return r
632 return r
629 if node == wdirid or node in wdirfilenodeids:
633 if node == wdirid or node in wdirfilenodeids:
630 raise error.WdirUnsupported
634 raise error.WdirUnsupported
631 raise error.LookupError(node, self.indexfile, _('no node'))
635 raise error.LookupError(node, self.indexfile, _('no node'))
632
636
633 # Accessors for index entries.
637 # Accessors for index entries.
634
638
635 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
636 # are flags.
640 # are flags.
637 def start(self, rev):
641 def start(self, rev):
638 return int(self.index[rev][0] >> 16)
642 return int(self.index[rev][0] >> 16)
639
643
640 def flags(self, rev):
644 def flags(self, rev):
641 return self.index[rev][0] & 0xFFFF
645 return self.index[rev][0] & 0xFFFF
642
646
643 def length(self, rev):
647 def length(self, rev):
644 return self.index[rev][1]
648 return self.index[rev][1]
645
649
646 def rawsize(self, rev):
650 def rawsize(self, rev):
647 """return the length of the uncompressed text for a given revision"""
651 """return the length of the uncompressed text for a given revision"""
648 l = self.index[rev][2]
652 l = self.index[rev][2]
649 if l >= 0:
653 if l >= 0:
650 return l
654 return l
651
655
652 t = self.revision(rev, raw=True)
656 t = self.revision(rev, raw=True)
653 return len(t)
657 return len(t)
654
658
655 def size(self, rev):
659 def size(self, rev):
656 """length of non-raw text (processed by a "read" flag processor)"""
660 """length of non-raw text (processed by a "read" flag processor)"""
657 # fast path: if no "read" flag processor could change the content,
661 # fast path: if no "read" flag processor could change the content,
658 # size is rawsize. note: ELLIPSIS is known to not change the content.
662 # size is rawsize. note: ELLIPSIS is known to not change the content.
659 flags = self.flags(rev)
663 flags = self.flags(rev)
660 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
661 return self.rawsize(rev)
665 return self.rawsize(rev)
662
666
663 return len(self.revision(rev, raw=False))
667 return len(self.revision(rev, raw=False))
664
668
665 def chainbase(self, rev):
669 def chainbase(self, rev):
666 base = self._chainbasecache.get(rev)
670 base = self._chainbasecache.get(rev)
667 if base is not None:
671 if base is not None:
668 return base
672 return base
669
673
670 index = self.index
674 index = self.index
671 iterrev = rev
675 iterrev = rev
672 base = index[iterrev][3]
676 base = index[iterrev][3]
673 while base != iterrev:
677 while base != iterrev:
674 iterrev = base
678 iterrev = base
675 base = index[iterrev][3]
679 base = index[iterrev][3]
676
680
677 self._chainbasecache[rev] = base
681 self._chainbasecache[rev] = base
678 return base
682 return base
679
683
680 def linkrev(self, rev):
684 def linkrev(self, rev):
681 return self.index[rev][4]
685 return self.index[rev][4]
682
686
683 def parentrevs(self, rev):
687 def parentrevs(self, rev):
684 try:
688 try:
685 entry = self.index[rev]
689 entry = self.index[rev]
686 except IndexError:
690 except IndexError:
687 if rev == wdirrev:
691 if rev == wdirrev:
688 raise error.WdirUnsupported
692 raise error.WdirUnsupported
689 raise
693 raise
690
694
691 return entry[5], entry[6]
695 return entry[5], entry[6]
692
696
693 # fast parentrevs(rev) where rev isn't filtered
697 # fast parentrevs(rev) where rev isn't filtered
694 _uncheckedparentrevs = parentrevs
698 _uncheckedparentrevs = parentrevs
695
699
696 def node(self, rev):
700 def node(self, rev):
697 try:
701 try:
698 return self.index[rev][7]
702 return self.index[rev][7]
699 except IndexError:
703 except IndexError:
700 if rev == wdirrev:
704 if rev == wdirrev:
701 raise error.WdirUnsupported
705 raise error.WdirUnsupported
702 raise
706 raise
703
707
704 # Derived from index values.
708 # Derived from index values.
705
709
706 def end(self, rev):
710 def end(self, rev):
707 return self.start(rev) + self.length(rev)
711 return self.start(rev) + self.length(rev)
708
712
709 def parents(self, node):
713 def parents(self, node):
710 i = self.index
714 i = self.index
711 d = i[self.rev(node)]
715 d = i[self.rev(node)]
712 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
713
717
714 def chainlen(self, rev):
718 def chainlen(self, rev):
715 return self._chaininfo(rev)[0]
719 return self._chaininfo(rev)[0]
716
720
717 def _chaininfo(self, rev):
721 def _chaininfo(self, rev):
718 chaininfocache = self._chaininfocache
722 chaininfocache = self._chaininfocache
719 if rev in chaininfocache:
723 if rev in chaininfocache:
720 return chaininfocache[rev]
724 return chaininfocache[rev]
721 index = self.index
725 index = self.index
722 generaldelta = self._generaldelta
726 generaldelta = self._generaldelta
723 iterrev = rev
727 iterrev = rev
724 e = index[iterrev]
728 e = index[iterrev]
725 clen = 0
729 clen = 0
726 compresseddeltalen = 0
730 compresseddeltalen = 0
727 while iterrev != e[3]:
731 while iterrev != e[3]:
728 clen += 1
732 clen += 1
729 compresseddeltalen += e[1]
733 compresseddeltalen += e[1]
730 if generaldelta:
734 if generaldelta:
731 iterrev = e[3]
735 iterrev = e[3]
732 else:
736 else:
733 iterrev -= 1
737 iterrev -= 1
734 if iterrev in chaininfocache:
738 if iterrev in chaininfocache:
735 t = chaininfocache[iterrev]
739 t = chaininfocache[iterrev]
736 clen += t[0]
740 clen += t[0]
737 compresseddeltalen += t[1]
741 compresseddeltalen += t[1]
738 break
742 break
739 e = index[iterrev]
743 e = index[iterrev]
740 else:
744 else:
741 # Add text length of base since decompressing that also takes
745 # Add text length of base since decompressing that also takes
742 # work. For cache hits the length is already included.
746 # work. For cache hits the length is already included.
743 compresseddeltalen += e[1]
747 compresseddeltalen += e[1]
744 r = (clen, compresseddeltalen)
748 r = (clen, compresseddeltalen)
745 chaininfocache[rev] = r
749 chaininfocache[rev] = r
746 return r
750 return r
747
751
748 def _deltachain(self, rev, stoprev=None):
752 def _deltachain(self, rev, stoprev=None):
749 """Obtain the delta chain for a revision.
753 """Obtain the delta chain for a revision.
750
754
751 ``stoprev`` specifies a revision to stop at. If not specified, we
755 ``stoprev`` specifies a revision to stop at. If not specified, we
752 stop at the base of the chain.
756 stop at the base of the chain.
753
757
754 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
755 revs in ascending order and ``stopped`` is a bool indicating whether
759 revs in ascending order and ``stopped`` is a bool indicating whether
756 ``stoprev`` was hit.
760 ``stoprev`` was hit.
757 """
761 """
758 # Try C implementation.
762 # Try C implementation.
759 try:
763 try:
760 return self.index.deltachain(rev, stoprev, self._generaldelta)
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
761 except AttributeError:
765 except AttributeError:
762 pass
766 pass
763
767
764 chain = []
768 chain = []
765
769
766 # Alias to prevent attribute lookup in tight loop.
770 # Alias to prevent attribute lookup in tight loop.
767 index = self.index
771 index = self.index
768 generaldelta = self._generaldelta
772 generaldelta = self._generaldelta
769
773
770 iterrev = rev
774 iterrev = rev
771 e = index[iterrev]
775 e = index[iterrev]
772 while iterrev != e[3] and iterrev != stoprev:
776 while iterrev != e[3] and iterrev != stoprev:
773 chain.append(iterrev)
777 chain.append(iterrev)
774 if generaldelta:
778 if generaldelta:
775 iterrev = e[3]
779 iterrev = e[3]
776 else:
780 else:
777 iterrev -= 1
781 iterrev -= 1
778 e = index[iterrev]
782 e = index[iterrev]
779
783
780 if iterrev == stoprev:
784 if iterrev == stoprev:
781 stopped = True
785 stopped = True
782 else:
786 else:
783 chain.append(iterrev)
787 chain.append(iterrev)
784 stopped = False
788 stopped = False
785
789
786 chain.reverse()
790 chain.reverse()
787 return chain, stopped
791 return chain, stopped
788
792
789 def ancestors(self, revs, stoprev=0, inclusive=False):
793 def ancestors(self, revs, stoprev=0, inclusive=False):
790 """Generate the ancestors of 'revs' in reverse revision order.
794 """Generate the ancestors of 'revs' in reverse revision order.
791 Does not generate revs lower than stoprev.
795 Does not generate revs lower than stoprev.
792
796
793 See the documentation for ancestor.lazyancestors for more details."""
797 See the documentation for ancestor.lazyancestors for more details."""
794
798
795 # first, make sure start revisions aren't filtered
799 # first, make sure start revisions aren't filtered
796 revs = list(revs)
800 revs = list(revs)
797 checkrev = self.node
801 checkrev = self.node
798 for r in revs:
802 for r in revs:
799 checkrev(r)
803 checkrev(r)
800 # and we're sure ancestors aren't filtered as well
804 # and we're sure ancestors aren't filtered as well
801
805
802 if rustext is not None:
806 if rustext is not None:
803 lazyancestors = rustext.ancestor.LazyAncestors
807 lazyancestors = rustext.ancestor.LazyAncestors
804 arg = self.index
808 arg = self.index
805 elif util.safehasattr(parsers, 'rustlazyancestors'):
809 elif util.safehasattr(parsers, 'rustlazyancestors'):
806 lazyancestors = ancestor.rustlazyancestors
810 lazyancestors = ancestor.rustlazyancestors
807 arg = self.index
811 arg = self.index
808 else:
812 else:
809 lazyancestors = ancestor.lazyancestors
813 lazyancestors = ancestor.lazyancestors
810 arg = self._uncheckedparentrevs
814 arg = self._uncheckedparentrevs
811 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
812
816
813 def descendants(self, revs):
817 def descendants(self, revs):
814 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
815
819
816 def findcommonmissing(self, common=None, heads=None):
820 def findcommonmissing(self, common=None, heads=None):
817 """Return a tuple of the ancestors of common and the ancestors of heads
821 """Return a tuple of the ancestors of common and the ancestors of heads
818 that are not ancestors of common. In revset terminology, we return the
822 that are not ancestors of common. In revset terminology, we return the
819 tuple:
823 tuple:
820
824
821 ::common, (::heads) - (::common)
825 ::common, (::heads) - (::common)
822
826
823 The list is sorted by revision number, meaning it is
827 The list is sorted by revision number, meaning it is
824 topologically sorted.
828 topologically sorted.
825
829
826 'heads' and 'common' are both lists of node IDs. If heads is
830 'heads' and 'common' are both lists of node IDs. If heads is
827 not supplied, uses all of the revlog's heads. If common is not
831 not supplied, uses all of the revlog's heads. If common is not
828 supplied, uses nullid."""
832 supplied, uses nullid."""
829 if common is None:
833 if common is None:
830 common = [nullid]
834 common = [nullid]
831 if heads is None:
835 if heads is None:
832 heads = self.heads()
836 heads = self.heads()
833
837
834 common = [self.rev(n) for n in common]
838 common = [self.rev(n) for n in common]
835 heads = [self.rev(n) for n in heads]
839 heads = [self.rev(n) for n in heads]
836
840
837 # we want the ancestors, but inclusive
841 # we want the ancestors, but inclusive
838 class lazyset(object):
842 class lazyset(object):
839 def __init__(self, lazyvalues):
843 def __init__(self, lazyvalues):
840 self.addedvalues = set()
844 self.addedvalues = set()
841 self.lazyvalues = lazyvalues
845 self.lazyvalues = lazyvalues
842
846
843 def __contains__(self, value):
847 def __contains__(self, value):
844 return value in self.addedvalues or value in self.lazyvalues
848 return value in self.addedvalues or value in self.lazyvalues
845
849
846 def __iter__(self):
850 def __iter__(self):
847 added = self.addedvalues
851 added = self.addedvalues
848 for r in added:
852 for r in added:
849 yield r
853 yield r
850 for r in self.lazyvalues:
854 for r in self.lazyvalues:
851 if not r in added:
855 if not r in added:
852 yield r
856 yield r
853
857
854 def add(self, value):
858 def add(self, value):
855 self.addedvalues.add(value)
859 self.addedvalues.add(value)
856
860
857 def update(self, values):
861 def update(self, values):
858 self.addedvalues.update(values)
862 self.addedvalues.update(values)
859
863
860 has = lazyset(self.ancestors(common))
864 has = lazyset(self.ancestors(common))
861 has.add(nullrev)
865 has.add(nullrev)
862 has.update(common)
866 has.update(common)
863
867
864 # take all ancestors from heads that aren't in has
868 # take all ancestors from heads that aren't in has
865 missing = set()
869 missing = set()
866 visit = collections.deque(r for r in heads if r not in has)
870 visit = collections.deque(r for r in heads if r not in has)
867 while visit:
871 while visit:
868 r = visit.popleft()
872 r = visit.popleft()
869 if r in missing:
873 if r in missing:
870 continue
874 continue
871 else:
875 else:
872 missing.add(r)
876 missing.add(r)
873 for p in self.parentrevs(r):
877 for p in self.parentrevs(r):
874 if p not in has:
878 if p not in has:
875 visit.append(p)
879 visit.append(p)
876 missing = list(missing)
880 missing = list(missing)
877 missing.sort()
881 missing.sort()
878 return has, [self.node(miss) for miss in missing]
882 return has, [self.node(miss) for miss in missing]
879
883
880 def incrementalmissingrevs(self, common=None):
884 def incrementalmissingrevs(self, common=None):
881 """Return an object that can be used to incrementally compute the
885 """Return an object that can be used to incrementally compute the
882 revision numbers of the ancestors of arbitrary sets that are not
886 revision numbers of the ancestors of arbitrary sets that are not
883 ancestors of common. This is an ancestor.incrementalmissingancestors
887 ancestors of common. This is an ancestor.incrementalmissingancestors
884 object.
888 object.
885
889
886 'common' is a list of revision numbers. If common is not supplied, uses
890 'common' is a list of revision numbers. If common is not supplied, uses
887 nullrev.
891 nullrev.
888 """
892 """
889 if common is None:
893 if common is None:
890 common = [nullrev]
894 common = [nullrev]
891
895
892 return ancestor.incrementalmissingancestors(self.parentrevs, common)
896 return ancestor.incrementalmissingancestors(self.parentrevs, common)
893
897
894 def findmissingrevs(self, common=None, heads=None):
898 def findmissingrevs(self, common=None, heads=None):
895 """Return the revision numbers of the ancestors of heads that
899 """Return the revision numbers of the ancestors of heads that
896 are not ancestors of common.
900 are not ancestors of common.
897
901
898 More specifically, return a list of revision numbers corresponding to
902 More specifically, return a list of revision numbers corresponding to
899 nodes N such that every N satisfies the following constraints:
903 nodes N such that every N satisfies the following constraints:
900
904
901 1. N is an ancestor of some node in 'heads'
905 1. N is an ancestor of some node in 'heads'
902 2. N is not an ancestor of any node in 'common'
906 2. N is not an ancestor of any node in 'common'
903
907
904 The list is sorted by revision number, meaning it is
908 The list is sorted by revision number, meaning it is
905 topologically sorted.
909 topologically sorted.
906
910
907 'heads' and 'common' are both lists of revision numbers. If heads is
911 'heads' and 'common' are both lists of revision numbers. If heads is
908 not supplied, uses all of the revlog's heads. If common is not
912 not supplied, uses all of the revlog's heads. If common is not
909 supplied, uses nullid."""
913 supplied, uses nullid."""
910 if common is None:
914 if common is None:
911 common = [nullrev]
915 common = [nullrev]
912 if heads is None:
916 if heads is None:
913 heads = self.headrevs()
917 heads = self.headrevs()
914
918
915 inc = self.incrementalmissingrevs(common=common)
919 inc = self.incrementalmissingrevs(common=common)
916 return inc.missingancestors(heads)
920 return inc.missingancestors(heads)
917
921
918 def findmissing(self, common=None, heads=None):
922 def findmissing(self, common=None, heads=None):
919 """Return the ancestors of heads that are not ancestors of common.
923 """Return the ancestors of heads that are not ancestors of common.
920
924
921 More specifically, return a list of nodes N such that every N
925 More specifically, return a list of nodes N such that every N
922 satisfies the following constraints:
926 satisfies the following constraints:
923
927
924 1. N is an ancestor of some node in 'heads'
928 1. N is an ancestor of some node in 'heads'
925 2. N is not an ancestor of any node in 'common'
929 2. N is not an ancestor of any node in 'common'
926
930
927 The list is sorted by revision number, meaning it is
931 The list is sorted by revision number, meaning it is
928 topologically sorted.
932 topologically sorted.
929
933
930 'heads' and 'common' are both lists of node IDs. If heads is
934 'heads' and 'common' are both lists of node IDs. If heads is
931 not supplied, uses all of the revlog's heads. If common is not
935 not supplied, uses all of the revlog's heads. If common is not
932 supplied, uses nullid."""
936 supplied, uses nullid."""
933 if common is None:
937 if common is None:
934 common = [nullid]
938 common = [nullid]
935 if heads is None:
939 if heads is None:
936 heads = self.heads()
940 heads = self.heads()
937
941
938 common = [self.rev(n) for n in common]
942 common = [self.rev(n) for n in common]
939 heads = [self.rev(n) for n in heads]
943 heads = [self.rev(n) for n in heads]
940
944
941 inc = self.incrementalmissingrevs(common=common)
945 inc = self.incrementalmissingrevs(common=common)
942 return [self.node(r) for r in inc.missingancestors(heads)]
946 return [self.node(r) for r in inc.missingancestors(heads)]
943
947
944 def nodesbetween(self, roots=None, heads=None):
948 def nodesbetween(self, roots=None, heads=None):
945 """Return a topological path from 'roots' to 'heads'.
949 """Return a topological path from 'roots' to 'heads'.
946
950
947 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
951 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
948 topologically sorted list of all nodes N that satisfy both of
952 topologically sorted list of all nodes N that satisfy both of
949 these constraints:
953 these constraints:
950
954
951 1. N is a descendant of some node in 'roots'
955 1. N is a descendant of some node in 'roots'
952 2. N is an ancestor of some node in 'heads'
956 2. N is an ancestor of some node in 'heads'
953
957
954 Every node is considered to be both a descendant and an ancestor
958 Every node is considered to be both a descendant and an ancestor
955 of itself, so every reachable node in 'roots' and 'heads' will be
959 of itself, so every reachable node in 'roots' and 'heads' will be
956 included in 'nodes'.
960 included in 'nodes'.
957
961
958 'outroots' is the list of reachable nodes in 'roots', i.e., the
962 'outroots' is the list of reachable nodes in 'roots', i.e., the
959 subset of 'roots' that is returned in 'nodes'. Likewise,
963 subset of 'roots' that is returned in 'nodes'. Likewise,
960 'outheads' is the subset of 'heads' that is also in 'nodes'.
964 'outheads' is the subset of 'heads' that is also in 'nodes'.
961
965
962 'roots' and 'heads' are both lists of node IDs. If 'roots' is
966 'roots' and 'heads' are both lists of node IDs. If 'roots' is
963 unspecified, uses nullid as the only root. If 'heads' is
967 unspecified, uses nullid as the only root. If 'heads' is
964 unspecified, uses list of all of the revlog's heads."""
968 unspecified, uses list of all of the revlog's heads."""
965 nonodes = ([], [], [])
969 nonodes = ([], [], [])
966 if roots is not None:
970 if roots is not None:
967 roots = list(roots)
971 roots = list(roots)
968 if not roots:
972 if not roots:
969 return nonodes
973 return nonodes
970 lowestrev = min([self.rev(n) for n in roots])
974 lowestrev = min([self.rev(n) for n in roots])
971 else:
975 else:
972 roots = [nullid] # Everybody's a descendant of nullid
976 roots = [nullid] # Everybody's a descendant of nullid
973 lowestrev = nullrev
977 lowestrev = nullrev
974 if (lowestrev == nullrev) and (heads is None):
978 if (lowestrev == nullrev) and (heads is None):
975 # We want _all_ the nodes!
979 # We want _all_ the nodes!
976 return ([self.node(r) for r in self], [nullid], list(self.heads()))
980 return ([self.node(r) for r in self], [nullid], list(self.heads()))
977 if heads is None:
981 if heads is None:
978 # All nodes are ancestors, so the latest ancestor is the last
982 # All nodes are ancestors, so the latest ancestor is the last
979 # node.
983 # node.
980 highestrev = len(self) - 1
984 highestrev = len(self) - 1
981 # Set ancestors to None to signal that every node is an ancestor.
985 # Set ancestors to None to signal that every node is an ancestor.
982 ancestors = None
986 ancestors = None
983 # Set heads to an empty dictionary for later discovery of heads
987 # Set heads to an empty dictionary for later discovery of heads
984 heads = {}
988 heads = {}
985 else:
989 else:
986 heads = list(heads)
990 heads = list(heads)
987 if not heads:
991 if not heads:
988 return nonodes
992 return nonodes
989 ancestors = set()
993 ancestors = set()
990 # Turn heads into a dictionary so we can remove 'fake' heads.
994 # Turn heads into a dictionary so we can remove 'fake' heads.
991 # Also, later we will be using it to filter out the heads we can't
995 # Also, later we will be using it to filter out the heads we can't
992 # find from roots.
996 # find from roots.
993 heads = dict.fromkeys(heads, False)
997 heads = dict.fromkeys(heads, False)
994 # Start at the top and keep marking parents until we're done.
998 # Start at the top and keep marking parents until we're done.
995 nodestotag = set(heads)
999 nodestotag = set(heads)
996 # Remember where the top was so we can use it as a limit later.
1000 # Remember where the top was so we can use it as a limit later.
997 highestrev = max([self.rev(n) for n in nodestotag])
1001 highestrev = max([self.rev(n) for n in nodestotag])
998 while nodestotag:
1002 while nodestotag:
999 # grab a node to tag
1003 # grab a node to tag
1000 n = nodestotag.pop()
1004 n = nodestotag.pop()
1001 # Never tag nullid
1005 # Never tag nullid
1002 if n == nullid:
1006 if n == nullid:
1003 continue
1007 continue
1004 # A node's revision number represents its place in a
1008 # A node's revision number represents its place in a
1005 # topologically sorted list of nodes.
1009 # topologically sorted list of nodes.
1006 r = self.rev(n)
1010 r = self.rev(n)
1007 if r >= lowestrev:
1011 if r >= lowestrev:
1008 if n not in ancestors:
1012 if n not in ancestors:
1009 # If we are possibly a descendant of one of the roots
1013 # If we are possibly a descendant of one of the roots
1010 # and we haven't already been marked as an ancestor
1014 # and we haven't already been marked as an ancestor
1011 ancestors.add(n) # Mark as ancestor
1015 ancestors.add(n) # Mark as ancestor
1012 # Add non-nullid parents to list of nodes to tag.
1016 # Add non-nullid parents to list of nodes to tag.
1013 nodestotag.update([p for p in self.parents(n) if
1017 nodestotag.update([p for p in self.parents(n) if
1014 p != nullid])
1018 p != nullid])
1015 elif n in heads: # We've seen it before, is it a fake head?
1019 elif n in heads: # We've seen it before, is it a fake head?
1016 # So it is, real heads should not be the ancestors of
1020 # So it is, real heads should not be the ancestors of
1017 # any other heads.
1021 # any other heads.
1018 heads.pop(n)
1022 heads.pop(n)
1019 if not ancestors:
1023 if not ancestors:
1020 return nonodes
1024 return nonodes
1021 # Now that we have our set of ancestors, we want to remove any
1025 # Now that we have our set of ancestors, we want to remove any
1022 # roots that are not ancestors.
1026 # roots that are not ancestors.
1023
1027
1024 # If one of the roots was nullid, everything is included anyway.
1028 # If one of the roots was nullid, everything is included anyway.
1025 if lowestrev > nullrev:
1029 if lowestrev > nullrev:
1026 # But, since we weren't, let's recompute the lowest rev to not
1030 # But, since we weren't, let's recompute the lowest rev to not
1027 # include roots that aren't ancestors.
1031 # include roots that aren't ancestors.
1028
1032
1029 # Filter out roots that aren't ancestors of heads
1033 # Filter out roots that aren't ancestors of heads
1030 roots = [root for root in roots if root in ancestors]
1034 roots = [root for root in roots if root in ancestors]
1031 # Recompute the lowest revision
1035 # Recompute the lowest revision
1032 if roots:
1036 if roots:
1033 lowestrev = min([self.rev(root) for root in roots])
1037 lowestrev = min([self.rev(root) for root in roots])
1034 else:
1038 else:
1035 # No more roots? Return empty list
1039 # No more roots? Return empty list
1036 return nonodes
1040 return nonodes
1037 else:
1041 else:
1038 # We are descending from nullid, and don't need to care about
1042 # We are descending from nullid, and don't need to care about
1039 # any other roots.
1043 # any other roots.
1040 lowestrev = nullrev
1044 lowestrev = nullrev
1041 roots = [nullid]
1045 roots = [nullid]
1042 # Transform our roots list into a set.
1046 # Transform our roots list into a set.
1043 descendants = set(roots)
1047 descendants = set(roots)
1044 # Also, keep the original roots so we can filter out roots that aren't
1048 # Also, keep the original roots so we can filter out roots that aren't
1045 # 'real' roots (i.e. are descended from other roots).
1049 # 'real' roots (i.e. are descended from other roots).
1046 roots = descendants.copy()
1050 roots = descendants.copy()
1047 # Our topologically sorted list of output nodes.
1051 # Our topologically sorted list of output nodes.
1048 orderedout = []
1052 orderedout = []
1049 # Don't start at nullid since we don't want nullid in our output list,
1053 # Don't start at nullid since we don't want nullid in our output list,
1050 # and if nullid shows up in descendants, empty parents will look like
1054 # and if nullid shows up in descendants, empty parents will look like
1051 # they're descendants.
1055 # they're descendants.
1052 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1056 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1053 n = self.node(r)
1057 n = self.node(r)
1054 isdescendant = False
1058 isdescendant = False
1055 if lowestrev == nullrev: # Everybody is a descendant of nullid
1059 if lowestrev == nullrev: # Everybody is a descendant of nullid
1056 isdescendant = True
1060 isdescendant = True
1057 elif n in descendants:
1061 elif n in descendants:
1058 # n is already a descendant
1062 # n is already a descendant
1059 isdescendant = True
1063 isdescendant = True
1060 # This check only needs to be done here because all the roots
1064 # This check only needs to be done here because all the roots
1061 # will start being marked is descendants before the loop.
1065 # will start being marked is descendants before the loop.
1062 if n in roots:
1066 if n in roots:
1063 # If n was a root, check if it's a 'real' root.
1067 # If n was a root, check if it's a 'real' root.
1064 p = tuple(self.parents(n))
1068 p = tuple(self.parents(n))
1065 # If any of its parents are descendants, it's not a root.
1069 # If any of its parents are descendants, it's not a root.
1066 if (p[0] in descendants) or (p[1] in descendants):
1070 if (p[0] in descendants) or (p[1] in descendants):
1067 roots.remove(n)
1071 roots.remove(n)
1068 else:
1072 else:
1069 p = tuple(self.parents(n))
1073 p = tuple(self.parents(n))
1070 # A node is a descendant if either of its parents are
1074 # A node is a descendant if either of its parents are
1071 # descendants. (We seeded the dependents list with the roots
1075 # descendants. (We seeded the dependents list with the roots
1072 # up there, remember?)
1076 # up there, remember?)
1073 if (p[0] in descendants) or (p[1] in descendants):
1077 if (p[0] in descendants) or (p[1] in descendants):
1074 descendants.add(n)
1078 descendants.add(n)
1075 isdescendant = True
1079 isdescendant = True
1076 if isdescendant and ((ancestors is None) or (n in ancestors)):
1080 if isdescendant and ((ancestors is None) or (n in ancestors)):
1077 # Only include nodes that are both descendants and ancestors.
1081 # Only include nodes that are both descendants and ancestors.
1078 orderedout.append(n)
1082 orderedout.append(n)
1079 if (ancestors is not None) and (n in heads):
1083 if (ancestors is not None) and (n in heads):
1080 # We're trying to figure out which heads are reachable
1084 # We're trying to figure out which heads are reachable
1081 # from roots.
1085 # from roots.
1082 # Mark this head as having been reached
1086 # Mark this head as having been reached
1083 heads[n] = True
1087 heads[n] = True
1084 elif ancestors is None:
1088 elif ancestors is None:
1085 # Otherwise, we're trying to discover the heads.
1089 # Otherwise, we're trying to discover the heads.
1086 # Assume this is a head because if it isn't, the next step
1090 # Assume this is a head because if it isn't, the next step
1087 # will eventually remove it.
1091 # will eventually remove it.
1088 heads[n] = True
1092 heads[n] = True
1089 # But, obviously its parents aren't.
1093 # But, obviously its parents aren't.
1090 for p in self.parents(n):
1094 for p in self.parents(n):
1091 heads.pop(p, None)
1095 heads.pop(p, None)
1092 heads = [head for head, flag in heads.iteritems() if flag]
1096 heads = [head for head, flag in heads.iteritems() if flag]
1093 roots = list(roots)
1097 roots = list(roots)
1094 assert orderedout
1098 assert orderedout
1095 assert roots
1099 assert roots
1096 assert heads
1100 assert heads
1097 return (orderedout, roots, heads)
1101 return (orderedout, roots, heads)
1098
1102
1099 def headrevs(self):
1103 def headrevs(self):
1100 try:
1104 try:
1101 return self.index.headrevs()
1105 return self.index.headrevs()
1102 except AttributeError:
1106 except AttributeError:
1103 return self._headrevs()
1107 return self._headrevs()
1104
1108
1105 def computephases(self, roots):
1109 def computephases(self, roots):
1106 return self.index.computephasesmapsets(roots)
1110 return self.index.computephasesmapsets(roots)
1107
1111
1108 def _headrevs(self):
1112 def _headrevs(self):
1109 count = len(self)
1113 count = len(self)
1110 if not count:
1114 if not count:
1111 return [nullrev]
1115 return [nullrev]
1112 # we won't iter over filtered rev so nobody is a head at start
1116 # we won't iter over filtered rev so nobody is a head at start
1113 ishead = [0] * (count + 1)
1117 ishead = [0] * (count + 1)
1114 index = self.index
1118 index = self.index
1115 for r in self:
1119 for r in self:
1116 ishead[r] = 1 # I may be an head
1120 ishead[r] = 1 # I may be an head
1117 e = index[r]
1121 e = index[r]
1118 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1122 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1119 return [r for r, val in enumerate(ishead) if val]
1123 return [r for r, val in enumerate(ishead) if val]
1120
1124
1121 def heads(self, start=None, stop=None):
1125 def heads(self, start=None, stop=None):
1122 """return the list of all nodes that have no children
1126 """return the list of all nodes that have no children
1123
1127
1124 if start is specified, only heads that are descendants of
1128 if start is specified, only heads that are descendants of
1125 start will be returned
1129 start will be returned
1126 if stop is specified, it will consider all the revs from stop
1130 if stop is specified, it will consider all the revs from stop
1127 as if they had no children
1131 as if they had no children
1128 """
1132 """
1129 if start is None and stop is None:
1133 if start is None and stop is None:
1130 if not len(self):
1134 if not len(self):
1131 return [nullid]
1135 return [nullid]
1132 return [self.node(r) for r in self.headrevs()]
1136 return [self.node(r) for r in self.headrevs()]
1133
1137
1134 if start is None:
1138 if start is None:
1135 start = nullrev
1139 start = nullrev
1136 else:
1140 else:
1137 start = self.rev(start)
1141 start = self.rev(start)
1138
1142
1139 stoprevs = set(self.rev(n) for n in stop or [])
1143 stoprevs = set(self.rev(n) for n in stop or [])
1140
1144
1141 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1145 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1142 stoprevs=stoprevs)
1146 stoprevs=stoprevs)
1143
1147
1144 return [self.node(rev) for rev in revs]
1148 return [self.node(rev) for rev in revs]
1145
1149
1146 def children(self, node):
1150 def children(self, node):
1147 """find the children of a given node"""
1151 """find the children of a given node"""
1148 c = []
1152 c = []
1149 p = self.rev(node)
1153 p = self.rev(node)
1150 for r in self.revs(start=p + 1):
1154 for r in self.revs(start=p + 1):
1151 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1155 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1152 if prevs:
1156 if prevs:
1153 for pr in prevs:
1157 for pr in prevs:
1154 if pr == p:
1158 if pr == p:
1155 c.append(self.node(r))
1159 c.append(self.node(r))
1156 elif p == nullrev:
1160 elif p == nullrev:
1157 c.append(self.node(r))
1161 c.append(self.node(r))
1158 return c
1162 return c
1159
1163
1160 def commonancestorsheads(self, a, b):
1164 def commonancestorsheads(self, a, b):
1161 """calculate all the heads of the common ancestors of nodes a and b"""
1165 """calculate all the heads of the common ancestors of nodes a and b"""
1162 a, b = self.rev(a), self.rev(b)
1166 a, b = self.rev(a), self.rev(b)
1163 ancs = self._commonancestorsheads(a, b)
1167 ancs = self._commonancestorsheads(a, b)
1164 return pycompat.maplist(self.node, ancs)
1168 return pycompat.maplist(self.node, ancs)
1165
1169
1166 def _commonancestorsheads(self, *revs):
1170 def _commonancestorsheads(self, *revs):
1167 """calculate all the heads of the common ancestors of revs"""
1171 """calculate all the heads of the common ancestors of revs"""
1168 try:
1172 try:
1169 ancs = self.index.commonancestorsheads(*revs)
1173 ancs = self.index.commonancestorsheads(*revs)
1170 except (AttributeError, OverflowError): # C implementation failed
1174 except (AttributeError, OverflowError): # C implementation failed
1171 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1175 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1172 return ancs
1176 return ancs
1173
1177
1174 def isancestor(self, a, b):
1178 def isancestor(self, a, b):
1175 """return True if node a is an ancestor of node b
1179 """return True if node a is an ancestor of node b
1176
1180
1177 A revision is considered an ancestor of itself."""
1181 A revision is considered an ancestor of itself."""
1178 a, b = self.rev(a), self.rev(b)
1182 a, b = self.rev(a), self.rev(b)
1179 return self.isancestorrev(a, b)
1183 return self.isancestorrev(a, b)
1180
1184
1181 def isancestorrev(self, a, b):
1185 def isancestorrev(self, a, b):
1182 """return True if revision a is an ancestor of revision b
1186 """return True if revision a is an ancestor of revision b
1183
1187
1184 A revision is considered an ancestor of itself.
1188 A revision is considered an ancestor of itself.
1185
1189
1186 The implementation of this is trivial but the use of
1190 The implementation of this is trivial but the use of
1187 commonancestorsheads is not."""
1191 commonancestorsheads is not."""
1188 if a == nullrev:
1192 if a == nullrev:
1189 return True
1193 return True
1190 elif a == b:
1194 elif a == b:
1191 return True
1195 return True
1192 elif a > b:
1196 elif a > b:
1193 return False
1197 return False
1194 return a in self._commonancestorsheads(a, b)
1198 return a in self._commonancestorsheads(a, b)
1195
1199
1196 def ancestor(self, a, b):
1200 def ancestor(self, a, b):
1197 """calculate the "best" common ancestor of nodes a and b"""
1201 """calculate the "best" common ancestor of nodes a and b"""
1198
1202
1199 a, b = self.rev(a), self.rev(b)
1203 a, b = self.rev(a), self.rev(b)
1200 try:
1204 try:
1201 ancs = self.index.ancestors(a, b)
1205 ancs = self.index.ancestors(a, b)
1202 except (AttributeError, OverflowError):
1206 except (AttributeError, OverflowError):
1203 ancs = ancestor.ancestors(self.parentrevs, a, b)
1207 ancs = ancestor.ancestors(self.parentrevs, a, b)
1204 if ancs:
1208 if ancs:
1205 # choose a consistent winner when there's a tie
1209 # choose a consistent winner when there's a tie
1206 return min(map(self.node, ancs))
1210 return min(map(self.node, ancs))
1207 return nullid
1211 return nullid
1208
1212
1209 def _match(self, id):
1213 def _match(self, id):
1210 if isinstance(id, int):
1214 if isinstance(id, int):
1211 # rev
1215 # rev
1212 return self.node(id)
1216 return self.node(id)
1213 if len(id) == 20:
1217 if len(id) == 20:
1214 # possibly a binary node
1218 # possibly a binary node
1215 # odds of a binary node being all hex in ASCII are 1 in 10**25
1219 # odds of a binary node being all hex in ASCII are 1 in 10**25
1216 try:
1220 try:
1217 node = id
1221 node = id
1218 self.rev(node) # quick search the index
1222 self.rev(node) # quick search the index
1219 return node
1223 return node
1220 except error.LookupError:
1224 except error.LookupError:
1221 pass # may be partial hex id
1225 pass # may be partial hex id
1222 try:
1226 try:
1223 # str(rev)
1227 # str(rev)
1224 rev = int(id)
1228 rev = int(id)
1225 if "%d" % rev != id:
1229 if "%d" % rev != id:
1226 raise ValueError
1230 raise ValueError
1227 if rev < 0:
1231 if rev < 0:
1228 rev = len(self) + rev
1232 rev = len(self) + rev
1229 if rev < 0 or rev >= len(self):
1233 if rev < 0 or rev >= len(self):
1230 raise ValueError
1234 raise ValueError
1231 return self.node(rev)
1235 return self.node(rev)
1232 except (ValueError, OverflowError):
1236 except (ValueError, OverflowError):
1233 pass
1237 pass
1234 if len(id) == 40:
1238 if len(id) == 40:
1235 try:
1239 try:
1236 # a full hex nodeid?
1240 # a full hex nodeid?
1237 node = bin(id)
1241 node = bin(id)
1238 self.rev(node)
1242 self.rev(node)
1239 return node
1243 return node
1240 except (TypeError, error.LookupError):
1244 except (TypeError, error.LookupError):
1241 pass
1245 pass
1242
1246
1243 def _partialmatch(self, id):
1247 def _partialmatch(self, id):
1244 # we don't care wdirfilenodeids as they should be always full hash
1248 # we don't care wdirfilenodeids as they should be always full hash
1245 maybewdir = wdirhex.startswith(id)
1249 maybewdir = wdirhex.startswith(id)
1246 try:
1250 try:
1247 partial = self.index.partialmatch(id)
1251 partial = self.index.partialmatch(id)
1248 if partial and self.hasnode(partial):
1252 if partial and self.hasnode(partial):
1249 if maybewdir:
1253 if maybewdir:
1250 # single 'ff...' match in radix tree, ambiguous with wdir
1254 # single 'ff...' match in radix tree, ambiguous with wdir
1251 raise error.RevlogError
1255 raise error.RevlogError
1252 return partial
1256 return partial
1253 if maybewdir:
1257 if maybewdir:
1254 # no 'ff...' match in radix tree, wdir identified
1258 # no 'ff...' match in radix tree, wdir identified
1255 raise error.WdirUnsupported
1259 raise error.WdirUnsupported
1256 return None
1260 return None
1257 except error.RevlogError:
1261 except error.RevlogError:
1258 # parsers.c radix tree lookup gave multiple matches
1262 # parsers.c radix tree lookup gave multiple matches
1259 # fast path: for unfiltered changelog, radix tree is accurate
1263 # fast path: for unfiltered changelog, radix tree is accurate
1260 if not getattr(self, 'filteredrevs', None):
1264 if not getattr(self, 'filteredrevs', None):
1261 raise error.AmbiguousPrefixLookupError(
1265 raise error.AmbiguousPrefixLookupError(
1262 id, self.indexfile, _('ambiguous identifier'))
1266 id, self.indexfile, _('ambiguous identifier'))
1263 # fall through to slow path that filters hidden revisions
1267 # fall through to slow path that filters hidden revisions
1264 except (AttributeError, ValueError):
1268 except (AttributeError, ValueError):
1265 # we are pure python, or key was too short to search radix tree
1269 # we are pure python, or key was too short to search radix tree
1266 pass
1270 pass
1267
1271
1268 if id in self._pcache:
1272 if id in self._pcache:
1269 return self._pcache[id]
1273 return self._pcache[id]
1270
1274
1271 if len(id) <= 40:
1275 if len(id) <= 40:
1272 try:
1276 try:
1273 # hex(node)[:...]
1277 # hex(node)[:...]
1274 l = len(id) // 2 # grab an even number of digits
1278 l = len(id) // 2 # grab an even number of digits
1275 prefix = bin(id[:l * 2])
1279 prefix = bin(id[:l * 2])
1276 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1280 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1277 nl = [n for n in nl if hex(n).startswith(id) and
1281 nl = [n for n in nl if hex(n).startswith(id) and
1278 self.hasnode(n)]
1282 self.hasnode(n)]
1279 if nullhex.startswith(id):
1283 if nullhex.startswith(id):
1280 nl.append(nullid)
1284 nl.append(nullid)
1281 if len(nl) > 0:
1285 if len(nl) > 0:
1282 if len(nl) == 1 and not maybewdir:
1286 if len(nl) == 1 and not maybewdir:
1283 self._pcache[id] = nl[0]
1287 self._pcache[id] = nl[0]
1284 return nl[0]
1288 return nl[0]
1285 raise error.AmbiguousPrefixLookupError(
1289 raise error.AmbiguousPrefixLookupError(
1286 id, self.indexfile, _('ambiguous identifier'))
1290 id, self.indexfile, _('ambiguous identifier'))
1287 if maybewdir:
1291 if maybewdir:
1288 raise error.WdirUnsupported
1292 raise error.WdirUnsupported
1289 return None
1293 return None
1290 except TypeError:
1294 except TypeError:
1291 pass
1295 pass
1292
1296
1293 def lookup(self, id):
1297 def lookup(self, id):
1294 """locate a node based on:
1298 """locate a node based on:
1295 - revision number or str(revision number)
1299 - revision number or str(revision number)
1296 - nodeid or subset of hex nodeid
1300 - nodeid or subset of hex nodeid
1297 """
1301 """
1298 n = self._match(id)
1302 n = self._match(id)
1299 if n is not None:
1303 if n is not None:
1300 return n
1304 return n
1301 n = self._partialmatch(id)
1305 n = self._partialmatch(id)
1302 if n:
1306 if n:
1303 return n
1307 return n
1304
1308
1305 raise error.LookupError(id, self.indexfile, _('no match found'))
1309 raise error.LookupError(id, self.indexfile, _('no match found'))
1306
1310
1307 def shortest(self, node, minlength=1):
1311 def shortest(self, node, minlength=1):
1308 """Find the shortest unambiguous prefix that matches node."""
1312 """Find the shortest unambiguous prefix that matches node."""
1309 def isvalid(prefix):
1313 def isvalid(prefix):
1310 try:
1314 try:
1311 node = self._partialmatch(prefix)
1315 node = self._partialmatch(prefix)
1312 except error.AmbiguousPrefixLookupError:
1316 except error.AmbiguousPrefixLookupError:
1313 return False
1317 return False
1314 except error.WdirUnsupported:
1318 except error.WdirUnsupported:
1315 # single 'ff...' match
1319 # single 'ff...' match
1316 return True
1320 return True
1317 if node is None:
1321 if node is None:
1318 raise error.LookupError(node, self.indexfile, _('no node'))
1322 raise error.LookupError(node, self.indexfile, _('no node'))
1319 return True
1323 return True
1320
1324
1321 def maybewdir(prefix):
1325 def maybewdir(prefix):
1322 return all(c == 'f' for c in prefix)
1326 return all(c == 'f' for c in prefix)
1323
1327
1324 hexnode = hex(node)
1328 hexnode = hex(node)
1325
1329
1326 def disambiguate(hexnode, minlength):
1330 def disambiguate(hexnode, minlength):
1327 """Disambiguate against wdirid."""
1331 """Disambiguate against wdirid."""
1328 for length in range(minlength, 41):
1332 for length in range(minlength, 41):
1329 prefix = hexnode[:length]
1333 prefix = hexnode[:length]
1330 if not maybewdir(prefix):
1334 if not maybewdir(prefix):
1331 return prefix
1335 return prefix
1332
1336
1333 if not getattr(self, 'filteredrevs', None):
1337 if not getattr(self, 'filteredrevs', None):
1334 try:
1338 try:
1335 length = max(self.index.shortest(node), minlength)
1339 length = max(self.index.shortest(node), minlength)
1336 return disambiguate(hexnode, length)
1340 return disambiguate(hexnode, length)
1337 except error.RevlogError:
1341 except error.RevlogError:
1338 if node != wdirid:
1342 if node != wdirid:
1339 raise error.LookupError(node, self.indexfile, _('no node'))
1343 raise error.LookupError(node, self.indexfile, _('no node'))
1340 except AttributeError:
1344 except AttributeError:
1341 # Fall through to pure code
1345 # Fall through to pure code
1342 pass
1346 pass
1343
1347
1344 if node == wdirid:
1348 if node == wdirid:
1345 for length in range(minlength, 41):
1349 for length in range(minlength, 41):
1346 prefix = hexnode[:length]
1350 prefix = hexnode[:length]
1347 if isvalid(prefix):
1351 if isvalid(prefix):
1348 return prefix
1352 return prefix
1349
1353
1350 for length in range(minlength, 41):
1354 for length in range(minlength, 41):
1351 prefix = hexnode[:length]
1355 prefix = hexnode[:length]
1352 if isvalid(prefix):
1356 if isvalid(prefix):
1353 return disambiguate(hexnode, length)
1357 return disambiguate(hexnode, length)
1354
1358
1355 def cmp(self, node, text):
1359 def cmp(self, node, text):
1356 """compare text with a given file revision
1360 """compare text with a given file revision
1357
1361
1358 returns True if text is different than what is stored.
1362 returns True if text is different than what is stored.
1359 """
1363 """
1360 p1, p2 = self.parents(node)
1364 p1, p2 = self.parents(node)
1361 return storageutil.hashrevisionsha1(text, p1, p2) != node
1365 return storageutil.hashrevisionsha1(text, p1, p2) != node
1362
1366
1363 def _cachesegment(self, offset, data):
1367 def _cachesegment(self, offset, data):
1364 """Add a segment to the revlog cache.
1368 """Add a segment to the revlog cache.
1365
1369
1366 Accepts an absolute offset and the data that is at that location.
1370 Accepts an absolute offset and the data that is at that location.
1367 """
1371 """
1368 o, d = self._chunkcache
1372 o, d = self._chunkcache
1369 # try to add to existing cache
1373 # try to add to existing cache
1370 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1374 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1371 self._chunkcache = o, d + data
1375 self._chunkcache = o, d + data
1372 else:
1376 else:
1373 self._chunkcache = offset, data
1377 self._chunkcache = offset, data
1374
1378
1375 def _readsegment(self, offset, length, df=None):
1379 def _readsegment(self, offset, length, df=None):
1376 """Load a segment of raw data from the revlog.
1380 """Load a segment of raw data from the revlog.
1377
1381
1378 Accepts an absolute offset, length to read, and an optional existing
1382 Accepts an absolute offset, length to read, and an optional existing
1379 file handle to read from.
1383 file handle to read from.
1380
1384
1381 If an existing file handle is passed, it will be seeked and the
1385 If an existing file handle is passed, it will be seeked and the
1382 original seek position will NOT be restored.
1386 original seek position will NOT be restored.
1383
1387
1384 Returns a str or buffer of raw byte data.
1388 Returns a str or buffer of raw byte data.
1385
1389
1386 Raises if the requested number of bytes could not be read.
1390 Raises if the requested number of bytes could not be read.
1387 """
1391 """
1388 # Cache data both forward and backward around the requested
1392 # Cache data both forward and backward around the requested
1389 # data, in a fixed size window. This helps speed up operations
1393 # data, in a fixed size window. This helps speed up operations
1390 # involving reading the revlog backwards.
1394 # involving reading the revlog backwards.
1391 cachesize = self._chunkcachesize
1395 cachesize = self._chunkcachesize
1392 realoffset = offset & ~(cachesize - 1)
1396 realoffset = offset & ~(cachesize - 1)
1393 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1397 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1394 - realoffset)
1398 - realoffset)
1395 with self._datareadfp(df) as df:
1399 with self._datareadfp(df) as df:
1396 df.seek(realoffset)
1400 df.seek(realoffset)
1397 d = df.read(reallength)
1401 d = df.read(reallength)
1398
1402
1399 self._cachesegment(realoffset, d)
1403 self._cachesegment(realoffset, d)
1400 if offset != realoffset or reallength != length:
1404 if offset != realoffset or reallength != length:
1401 startoffset = offset - realoffset
1405 startoffset = offset - realoffset
1402 if len(d) - startoffset < length:
1406 if len(d) - startoffset < length:
1403 raise error.RevlogError(
1407 raise error.RevlogError(
1404 _('partial read of revlog %s; expected %d bytes from '
1408 _('partial read of revlog %s; expected %d bytes from '
1405 'offset %d, got %d') %
1409 'offset %d, got %d') %
1406 (self.indexfile if self._inline else self.datafile,
1410 (self.indexfile if self._inline else self.datafile,
1407 length, realoffset, len(d) - startoffset))
1411 length, realoffset, len(d) - startoffset))
1408
1412
1409 return util.buffer(d, startoffset, length)
1413 return util.buffer(d, startoffset, length)
1410
1414
1411 if len(d) < length:
1415 if len(d) < length:
1412 raise error.RevlogError(
1416 raise error.RevlogError(
1413 _('partial read of revlog %s; expected %d bytes from offset '
1417 _('partial read of revlog %s; expected %d bytes from offset '
1414 '%d, got %d') %
1418 '%d, got %d') %
1415 (self.indexfile if self._inline else self.datafile,
1419 (self.indexfile if self._inline else self.datafile,
1416 length, offset, len(d)))
1420 length, offset, len(d)))
1417
1421
1418 return d
1422 return d
1419
1423
1420 def _getsegment(self, offset, length, df=None):
1424 def _getsegment(self, offset, length, df=None):
1421 """Obtain a segment of raw data from the revlog.
1425 """Obtain a segment of raw data from the revlog.
1422
1426
1423 Accepts an absolute offset, length of bytes to obtain, and an
1427 Accepts an absolute offset, length of bytes to obtain, and an
1424 optional file handle to the already-opened revlog. If the file
1428 optional file handle to the already-opened revlog. If the file
1425 handle is used, it's original seek position will not be preserved.
1429 handle is used, it's original seek position will not be preserved.
1426
1430
1427 Requests for data may be returned from a cache.
1431 Requests for data may be returned from a cache.
1428
1432
1429 Returns a str or a buffer instance of raw byte data.
1433 Returns a str or a buffer instance of raw byte data.
1430 """
1434 """
1431 o, d = self._chunkcache
1435 o, d = self._chunkcache
1432 l = len(d)
1436 l = len(d)
1433
1437
1434 # is it in the cache?
1438 # is it in the cache?
1435 cachestart = offset - o
1439 cachestart = offset - o
1436 cacheend = cachestart + length
1440 cacheend = cachestart + length
1437 if cachestart >= 0 and cacheend <= l:
1441 if cachestart >= 0 and cacheend <= l:
1438 if cachestart == 0 and cacheend == l:
1442 if cachestart == 0 and cacheend == l:
1439 return d # avoid a copy
1443 return d # avoid a copy
1440 return util.buffer(d, cachestart, cacheend - cachestart)
1444 return util.buffer(d, cachestart, cacheend - cachestart)
1441
1445
1442 return self._readsegment(offset, length, df=df)
1446 return self._readsegment(offset, length, df=df)
1443
1447
1444 def _getsegmentforrevs(self, startrev, endrev, df=None):
1448 def _getsegmentforrevs(self, startrev, endrev, df=None):
1445 """Obtain a segment of raw data corresponding to a range of revisions.
1449 """Obtain a segment of raw data corresponding to a range of revisions.
1446
1450
1447 Accepts the start and end revisions and an optional already-open
1451 Accepts the start and end revisions and an optional already-open
1448 file handle to be used for reading. If the file handle is read, its
1452 file handle to be used for reading. If the file handle is read, its
1449 seek position will not be preserved.
1453 seek position will not be preserved.
1450
1454
1451 Requests for data may be satisfied by a cache.
1455 Requests for data may be satisfied by a cache.
1452
1456
1453 Returns a 2-tuple of (offset, data) for the requested range of
1457 Returns a 2-tuple of (offset, data) for the requested range of
1454 revisions. Offset is the integer offset from the beginning of the
1458 revisions. Offset is the integer offset from the beginning of the
1455 revlog and data is a str or buffer of the raw byte data.
1459 revlog and data is a str or buffer of the raw byte data.
1456
1460
1457 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1461 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1458 to determine where each revision's data begins and ends.
1462 to determine where each revision's data begins and ends.
1459 """
1463 """
1460 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1464 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1461 # (functions are expensive).
1465 # (functions are expensive).
1462 index = self.index
1466 index = self.index
1463 istart = index[startrev]
1467 istart = index[startrev]
1464 start = int(istart[0] >> 16)
1468 start = int(istart[0] >> 16)
1465 if startrev == endrev:
1469 if startrev == endrev:
1466 end = start + istart[1]
1470 end = start + istart[1]
1467 else:
1471 else:
1468 iend = index[endrev]
1472 iend = index[endrev]
1469 end = int(iend[0] >> 16) + iend[1]
1473 end = int(iend[0] >> 16) + iend[1]
1470
1474
1471 if self._inline:
1475 if self._inline:
1472 start += (startrev + 1) * self._io.size
1476 start += (startrev + 1) * self._io.size
1473 end += (endrev + 1) * self._io.size
1477 end += (endrev + 1) * self._io.size
1474 length = end - start
1478 length = end - start
1475
1479
1476 return start, self._getsegment(start, length, df=df)
1480 return start, self._getsegment(start, length, df=df)
1477
1481
1478 def _chunk(self, rev, df=None):
1482 def _chunk(self, rev, df=None):
1479 """Obtain a single decompressed chunk for a revision.
1483 """Obtain a single decompressed chunk for a revision.
1480
1484
1481 Accepts an integer revision and an optional already-open file handle
1485 Accepts an integer revision and an optional already-open file handle
1482 to be used for reading. If used, the seek position of the file will not
1486 to be used for reading. If used, the seek position of the file will not
1483 be preserved.
1487 be preserved.
1484
1488
1485 Returns a str holding uncompressed data for the requested revision.
1489 Returns a str holding uncompressed data for the requested revision.
1486 """
1490 """
1487 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1491 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1488
1492
1489 def _chunks(self, revs, df=None, targetsize=None):
1493 def _chunks(self, revs, df=None, targetsize=None):
1490 """Obtain decompressed chunks for the specified revisions.
1494 """Obtain decompressed chunks for the specified revisions.
1491
1495
1492 Accepts an iterable of numeric revisions that are assumed to be in
1496 Accepts an iterable of numeric revisions that are assumed to be in
1493 ascending order. Also accepts an optional already-open file handle
1497 ascending order. Also accepts an optional already-open file handle
1494 to be used for reading. If used, the seek position of the file will
1498 to be used for reading. If used, the seek position of the file will
1495 not be preserved.
1499 not be preserved.
1496
1500
1497 This function is similar to calling ``self._chunk()`` multiple times,
1501 This function is similar to calling ``self._chunk()`` multiple times,
1498 but is faster.
1502 but is faster.
1499
1503
1500 Returns a list with decompressed data for each requested revision.
1504 Returns a list with decompressed data for each requested revision.
1501 """
1505 """
1502 if not revs:
1506 if not revs:
1503 return []
1507 return []
1504 start = self.start
1508 start = self.start
1505 length = self.length
1509 length = self.length
1506 inline = self._inline
1510 inline = self._inline
1507 iosize = self._io.size
1511 iosize = self._io.size
1508 buffer = util.buffer
1512 buffer = util.buffer
1509
1513
1510 l = []
1514 l = []
1511 ladd = l.append
1515 ladd = l.append
1512
1516
1513 if not self._withsparseread:
1517 if not self._withsparseread:
1514 slicedchunks = (revs,)
1518 slicedchunks = (revs,)
1515 else:
1519 else:
1516 slicedchunks = deltautil.slicechunk(self, revs,
1520 slicedchunks = deltautil.slicechunk(self, revs,
1517 targetsize=targetsize)
1521 targetsize=targetsize)
1518
1522
1519 for revschunk in slicedchunks:
1523 for revschunk in slicedchunks:
1520 firstrev = revschunk[0]
1524 firstrev = revschunk[0]
1521 # Skip trailing revisions with empty diff
1525 # Skip trailing revisions with empty diff
1522 for lastrev in revschunk[::-1]:
1526 for lastrev in revschunk[::-1]:
1523 if length(lastrev) != 0:
1527 if length(lastrev) != 0:
1524 break
1528 break
1525
1529
1526 try:
1530 try:
1527 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1531 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1528 except OverflowError:
1532 except OverflowError:
1529 # issue4215 - we can't cache a run of chunks greater than
1533 # issue4215 - we can't cache a run of chunks greater than
1530 # 2G on Windows
1534 # 2G on Windows
1531 return [self._chunk(rev, df=df) for rev in revschunk]
1535 return [self._chunk(rev, df=df) for rev in revschunk]
1532
1536
1533 decomp = self.decompress
1537 decomp = self.decompress
1534 for rev in revschunk:
1538 for rev in revschunk:
1535 chunkstart = start(rev)
1539 chunkstart = start(rev)
1536 if inline:
1540 if inline:
1537 chunkstart += (rev + 1) * iosize
1541 chunkstart += (rev + 1) * iosize
1538 chunklength = length(rev)
1542 chunklength = length(rev)
1539 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1543 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1540
1544
1541 return l
1545 return l
1542
1546
1543 def _chunkclear(self):
1547 def _chunkclear(self):
1544 """Clear the raw chunk cache."""
1548 """Clear the raw chunk cache."""
1545 self._chunkcache = (0, '')
1549 self._chunkcache = (0, '')
1546
1550
1547 def deltaparent(self, rev):
1551 def deltaparent(self, rev):
1548 """return deltaparent of the given revision"""
1552 """return deltaparent of the given revision"""
1549 base = self.index[rev][3]
1553 base = self.index[rev][3]
1550 if base == rev:
1554 if base == rev:
1551 return nullrev
1555 return nullrev
1552 elif self._generaldelta:
1556 elif self._generaldelta:
1553 return base
1557 return base
1554 else:
1558 else:
1555 return rev - 1
1559 return rev - 1
1556
1560
1557 def issnapshot(self, rev):
1561 def issnapshot(self, rev):
1558 """tells whether rev is a snapshot
1562 """tells whether rev is a snapshot
1559 """
1563 """
1560 if not self._sparserevlog:
1564 if not self._sparserevlog:
1561 return self.deltaparent(rev) == nullrev
1565 return self.deltaparent(rev) == nullrev
1562 elif util.safehasattr(self.index, 'issnapshot'):
1566 elif util.safehasattr(self.index, 'issnapshot'):
1563 # directly assign the method to cache the testing and access
1567 # directly assign the method to cache the testing and access
1564 self.issnapshot = self.index.issnapshot
1568 self.issnapshot = self.index.issnapshot
1565 return self.issnapshot(rev)
1569 return self.issnapshot(rev)
1566 if rev == nullrev:
1570 if rev == nullrev:
1567 return True
1571 return True
1568 entry = self.index[rev]
1572 entry = self.index[rev]
1569 base = entry[3]
1573 base = entry[3]
1570 if base == rev:
1574 if base == rev:
1571 return True
1575 return True
1572 if base == nullrev:
1576 if base == nullrev:
1573 return True
1577 return True
1574 p1 = entry[5]
1578 p1 = entry[5]
1575 p2 = entry[6]
1579 p2 = entry[6]
1576 if base == p1 or base == p2:
1580 if base == p1 or base == p2:
1577 return False
1581 return False
1578 return self.issnapshot(base)
1582 return self.issnapshot(base)
1579
1583
1580 def snapshotdepth(self, rev):
1584 def snapshotdepth(self, rev):
1581 """number of snapshot in the chain before this one"""
1585 """number of snapshot in the chain before this one"""
1582 if not self.issnapshot(rev):
1586 if not self.issnapshot(rev):
1583 raise error.ProgrammingError('revision %d not a snapshot')
1587 raise error.ProgrammingError('revision %d not a snapshot')
1584 return len(self._deltachain(rev)[0]) - 1
1588 return len(self._deltachain(rev)[0]) - 1
1585
1589
1586 def revdiff(self, rev1, rev2):
1590 def revdiff(self, rev1, rev2):
1587 """return or calculate a delta between two revisions
1591 """return or calculate a delta between two revisions
1588
1592
1589 The delta calculated is in binary form and is intended to be written to
1593 The delta calculated is in binary form and is intended to be written to
1590 revlog data directly. So this function needs raw revision data.
1594 revlog data directly. So this function needs raw revision data.
1591 """
1595 """
1592 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1596 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1593 return bytes(self._chunk(rev2))
1597 return bytes(self._chunk(rev2))
1594
1598
1595 return mdiff.textdiff(self.revision(rev1, raw=True),
1599 return mdiff.textdiff(self.revision(rev1, raw=True),
1596 self.revision(rev2, raw=True))
1600 self.revision(rev2, raw=True))
1597
1601
1598 def revision(self, nodeorrev, _df=None, raw=False):
1602 def revision(self, nodeorrev, _df=None, raw=False):
1599 """return an uncompressed revision of a given node or revision
1603 """return an uncompressed revision of a given node or revision
1600 number.
1604 number.
1601
1605
1602 _df - an existing file handle to read from. (internal-only)
1606 _df - an existing file handle to read from. (internal-only)
1603 raw - an optional argument specifying if the revision data is to be
1607 raw - an optional argument specifying if the revision data is to be
1604 treated as raw data when applying flag transforms. 'raw' should be set
1608 treated as raw data when applying flag transforms. 'raw' should be set
1605 to True when generating changegroups or in debug commands.
1609 to True when generating changegroups or in debug commands.
1606 """
1610 """
1607 if isinstance(nodeorrev, int):
1611 if isinstance(nodeorrev, int):
1608 rev = nodeorrev
1612 rev = nodeorrev
1609 node = self.node(rev)
1613 node = self.node(rev)
1610 else:
1614 else:
1611 node = nodeorrev
1615 node = nodeorrev
1612 rev = None
1616 rev = None
1613
1617
1614 cachedrev = None
1618 cachedrev = None
1615 flags = None
1619 flags = None
1616 rawtext = None
1620 rawtext = None
1617 if node == nullid:
1621 if node == nullid:
1618 return ""
1622 return ""
1619 if self._revisioncache:
1623 if self._revisioncache:
1620 if self._revisioncache[0] == node:
1624 if self._revisioncache[0] == node:
1621 # _cache only stores rawtext
1625 # _cache only stores rawtext
1622 if raw:
1626 if raw:
1623 return self._revisioncache[2]
1627 return self._revisioncache[2]
1624 # duplicated, but good for perf
1628 # duplicated, but good for perf
1625 if rev is None:
1629 if rev is None:
1626 rev = self.rev(node)
1630 rev = self.rev(node)
1627 if flags is None:
1631 if flags is None:
1628 flags = self.flags(rev)
1632 flags = self.flags(rev)
1629 # no extra flags set, no flag processor runs, text = rawtext
1633 # no extra flags set, no flag processor runs, text = rawtext
1630 if flags == REVIDX_DEFAULT_FLAGS:
1634 if flags == REVIDX_DEFAULT_FLAGS:
1631 return self._revisioncache[2]
1635 return self._revisioncache[2]
1632 # rawtext is reusable. need to run flag processor
1636 # rawtext is reusable. need to run flag processor
1633 rawtext = self._revisioncache[2]
1637 rawtext = self._revisioncache[2]
1634
1638
1635 cachedrev = self._revisioncache[1]
1639 cachedrev = self._revisioncache[1]
1636
1640
1637 # look up what we need to read
1641 # look up what we need to read
1638 if rawtext is None:
1642 if rawtext is None:
1639 if rev is None:
1643 if rev is None:
1640 rev = self.rev(node)
1644 rev = self.rev(node)
1641
1645
1642 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1646 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1643 if stopped:
1647 if stopped:
1644 rawtext = self._revisioncache[2]
1648 rawtext = self._revisioncache[2]
1645
1649
1646 # drop cache to save memory
1650 # drop cache to save memory
1647 self._revisioncache = None
1651 self._revisioncache = None
1648
1652
1649 targetsize = None
1653 targetsize = None
1650 rawsize = self.index[rev][2]
1654 rawsize = self.index[rev][2]
1651 if 0 <= rawsize:
1655 if 0 <= rawsize:
1652 targetsize = 4 * rawsize
1656 targetsize = 4 * rawsize
1653
1657
1654 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1658 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1655 if rawtext is None:
1659 if rawtext is None:
1656 rawtext = bytes(bins[0])
1660 rawtext = bytes(bins[0])
1657 bins = bins[1:]
1661 bins = bins[1:]
1658
1662
1659 rawtext = mdiff.patches(rawtext, bins)
1663 rawtext = mdiff.patches(rawtext, bins)
1660 self._revisioncache = (node, rev, rawtext)
1664 self._revisioncache = (node, rev, rawtext)
1661
1665
1662 if flags is None:
1666 if flags is None:
1663 if rev is None:
1667 if rev is None:
1664 rev = self.rev(node)
1668 rev = self.rev(node)
1665 flags = self.flags(rev)
1669 flags = self.flags(rev)
1666
1670
1667 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1671 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1668 if validatehash:
1672 if validatehash:
1669 self.checkhash(text, node, rev=rev)
1673 self.checkhash(text, node, rev=rev)
1670
1674
1671 return text
1675 return text
1672
1676
1673 def hash(self, text, p1, p2):
1677 def hash(self, text, p1, p2):
1674 """Compute a node hash.
1678 """Compute a node hash.
1675
1679
1676 Available as a function so that subclasses can replace the hash
1680 Available as a function so that subclasses can replace the hash
1677 as needed.
1681 as needed.
1678 """
1682 """
1679 return storageutil.hashrevisionsha1(text, p1, p2)
1683 return storageutil.hashrevisionsha1(text, p1, p2)
1680
1684
1681 def _processflags(self, text, flags, operation, raw=False):
1685 def _processflags(self, text, flags, operation, raw=False):
1682 """Inspect revision data flags and applies transforms defined by
1686 """Inspect revision data flags and applies transforms defined by
1683 registered flag processors.
1687 registered flag processors.
1684
1688
1685 ``text`` - the revision data to process
1689 ``text`` - the revision data to process
1686 ``flags`` - the revision flags
1690 ``flags`` - the revision flags
1687 ``operation`` - the operation being performed (read or write)
1691 ``operation`` - the operation being performed (read or write)
1688 ``raw`` - an optional argument describing if the raw transform should be
1692 ``raw`` - an optional argument describing if the raw transform should be
1689 applied.
1693 applied.
1690
1694
1691 This method processes the flags in the order (or reverse order if
1695 This method processes the flags in the order (or reverse order if
1692 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1696 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1693 flag processors registered for present flags. The order of flags defined
1697 flag processors registered for present flags. The order of flags defined
1694 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1698 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1695
1699
1696 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1700 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1697 processed text and ``validatehash`` is a bool indicating whether the
1701 processed text and ``validatehash`` is a bool indicating whether the
1698 returned text should be checked for hash integrity.
1702 returned text should be checked for hash integrity.
1699
1703
1700 Note: If the ``raw`` argument is set, it has precedence over the
1704 Note: If the ``raw`` argument is set, it has precedence over the
1701 operation and will only update the value of ``validatehash``.
1705 operation and will only update the value of ``validatehash``.
1702 """
1706 """
1703 # fast path: no flag processors will run
1707 # fast path: no flag processors will run
1704 if flags == 0:
1708 if flags == 0:
1705 return text, True
1709 return text, True
1706 if not operation in ('read', 'write'):
1710 if not operation in ('read', 'write'):
1707 raise error.ProgrammingError(_("invalid '%s' operation") %
1711 raise error.ProgrammingError(_("invalid '%s' operation") %
1708 operation)
1712 operation)
1709 # Check all flags are known.
1713 # Check all flags are known.
1710 if flags & ~REVIDX_KNOWN_FLAGS:
1714 if flags & ~REVIDX_KNOWN_FLAGS:
1711 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1715 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1712 (flags & ~REVIDX_KNOWN_FLAGS))
1716 (flags & ~REVIDX_KNOWN_FLAGS))
1713 validatehash = True
1717 validatehash = True
1714 # Depending on the operation (read or write), the order might be
1718 # Depending on the operation (read or write), the order might be
1715 # reversed due to non-commutative transforms.
1719 # reversed due to non-commutative transforms.
1716 orderedflags = REVIDX_FLAGS_ORDER
1720 orderedflags = REVIDX_FLAGS_ORDER
1717 if operation == 'write':
1721 if operation == 'write':
1718 orderedflags = reversed(orderedflags)
1722 orderedflags = reversed(orderedflags)
1719
1723
1720 for flag in orderedflags:
1724 for flag in orderedflags:
1721 # If a flagprocessor has been registered for a known flag, apply the
1725 # If a flagprocessor has been registered for a known flag, apply the
1722 # related operation transform and update result tuple.
1726 # related operation transform and update result tuple.
1723 if flag & flags:
1727 if flag & flags:
1724 vhash = True
1728 vhash = True
1725
1729
1726 if flag not in self._flagprocessors:
1730 if flag not in self._flagprocessors:
1727 message = _("missing processor for flag '%#x'") % (flag)
1731 message = _("missing processor for flag '%#x'") % (flag)
1728 raise error.RevlogError(message)
1732 raise error.RevlogError(message)
1729
1733
1730 processor = self._flagprocessors[flag]
1734 processor = self._flagprocessors[flag]
1731 if processor is not None:
1735 if processor is not None:
1732 readtransform, writetransform, rawtransform = processor
1736 readtransform, writetransform, rawtransform = processor
1733
1737
1734 if raw:
1738 if raw:
1735 vhash = rawtransform(self, text)
1739 vhash = rawtransform(self, text)
1736 elif operation == 'read':
1740 elif operation == 'read':
1737 text, vhash = readtransform(self, text)
1741 text, vhash = readtransform(self, text)
1738 else: # write operation
1742 else: # write operation
1739 text, vhash = writetransform(self, text)
1743 text, vhash = writetransform(self, text)
1740 validatehash = validatehash and vhash
1744 validatehash = validatehash and vhash
1741
1745
1742 return text, validatehash
1746 return text, validatehash
1743
1747
1744 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1748 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1745 """Check node hash integrity.
1749 """Check node hash integrity.
1746
1750
1747 Available as a function so that subclasses can extend hash mismatch
1751 Available as a function so that subclasses can extend hash mismatch
1748 behaviors as needed.
1752 behaviors as needed.
1749 """
1753 """
1750 try:
1754 try:
1751 if p1 is None and p2 is None:
1755 if p1 is None and p2 is None:
1752 p1, p2 = self.parents(node)
1756 p1, p2 = self.parents(node)
1753 if node != self.hash(text, p1, p2):
1757 if node != self.hash(text, p1, p2):
1754 # Clear the revision cache on hash failure. The revision cache
1758 # Clear the revision cache on hash failure. The revision cache
1755 # only stores the raw revision and clearing the cache does have
1759 # only stores the raw revision and clearing the cache does have
1756 # the side-effect that we won't have a cache hit when the raw
1760 # the side-effect that we won't have a cache hit when the raw
1757 # revision data is accessed. But this case should be rare and
1761 # revision data is accessed. But this case should be rare and
1758 # it is extra work to teach the cache about the hash
1762 # it is extra work to teach the cache about the hash
1759 # verification state.
1763 # verification state.
1760 if self._revisioncache and self._revisioncache[0] == node:
1764 if self._revisioncache and self._revisioncache[0] == node:
1761 self._revisioncache = None
1765 self._revisioncache = None
1762
1766
1763 revornode = rev
1767 revornode = rev
1764 if revornode is None:
1768 if revornode is None:
1765 revornode = templatefilters.short(hex(node))
1769 revornode = templatefilters.short(hex(node))
1766 raise error.RevlogError(_("integrity check failed on %s:%s")
1770 raise error.RevlogError(_("integrity check failed on %s:%s")
1767 % (self.indexfile, pycompat.bytestr(revornode)))
1771 % (self.indexfile, pycompat.bytestr(revornode)))
1768 except error.RevlogError:
1772 except error.RevlogError:
1769 if self._censorable and storageutil.iscensoredtext(text):
1773 if self._censorable and storageutil.iscensoredtext(text):
1770 raise error.CensoredNodeError(self.indexfile, node, text)
1774 raise error.CensoredNodeError(self.indexfile, node, text)
1771 raise
1775 raise
1772
1776
1773 def _enforceinlinesize(self, tr, fp=None):
1777 def _enforceinlinesize(self, tr, fp=None):
1774 """Check if the revlog is too big for inline and convert if so.
1778 """Check if the revlog is too big for inline and convert if so.
1775
1779
1776 This should be called after revisions are added to the revlog. If the
1780 This should be called after revisions are added to the revlog. If the
1777 revlog has grown too large to be an inline revlog, it will convert it
1781 revlog has grown too large to be an inline revlog, it will convert it
1778 to use multiple index and data files.
1782 to use multiple index and data files.
1779 """
1783 """
1780 tiprev = len(self) - 1
1784 tiprev = len(self) - 1
1781 if (not self._inline or
1785 if (not self._inline or
1782 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1786 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1783 return
1787 return
1784
1788
1785 trinfo = tr.find(self.indexfile)
1789 trinfo = tr.find(self.indexfile)
1786 if trinfo is None:
1790 if trinfo is None:
1787 raise error.RevlogError(_("%s not found in the transaction")
1791 raise error.RevlogError(_("%s not found in the transaction")
1788 % self.indexfile)
1792 % self.indexfile)
1789
1793
1790 trindex = trinfo[2]
1794 trindex = trinfo[2]
1791 if trindex is not None:
1795 if trindex is not None:
1792 dataoff = self.start(trindex)
1796 dataoff = self.start(trindex)
1793 else:
1797 else:
1794 # revlog was stripped at start of transaction, use all leftover data
1798 # revlog was stripped at start of transaction, use all leftover data
1795 trindex = len(self) - 1
1799 trindex = len(self) - 1
1796 dataoff = self.end(tiprev)
1800 dataoff = self.end(tiprev)
1797
1801
1798 tr.add(self.datafile, dataoff)
1802 tr.add(self.datafile, dataoff)
1799
1803
1800 if fp:
1804 if fp:
1801 fp.flush()
1805 fp.flush()
1802 fp.close()
1806 fp.close()
1803 # We can't use the cached file handle after close(). So prevent
1807 # We can't use the cached file handle after close(). So prevent
1804 # its usage.
1808 # its usage.
1805 self._writinghandles = None
1809 self._writinghandles = None
1806
1810
1807 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1811 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1808 for r in self:
1812 for r in self:
1809 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1813 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1810
1814
1811 with self._indexfp('w') as fp:
1815 with self._indexfp('w') as fp:
1812 self.version &= ~FLAG_INLINE_DATA
1816 self.version &= ~FLAG_INLINE_DATA
1813 self._inline = False
1817 self._inline = False
1814 io = self._io
1818 io = self._io
1815 for i in self:
1819 for i in self:
1816 e = io.packentry(self.index[i], self.node, self.version, i)
1820 e = io.packentry(self.index[i], self.node, self.version, i)
1817 fp.write(e)
1821 fp.write(e)
1818
1822
1819 # the temp file replace the real index when we exit the context
1823 # the temp file replace the real index when we exit the context
1820 # manager
1824 # manager
1821
1825
1822 tr.replace(self.indexfile, trindex * self._io.size)
1826 tr.replace(self.indexfile, trindex * self._io.size)
1823 self._chunkclear()
1827 self._chunkclear()
1824
1828
1825 def _nodeduplicatecallback(self, transaction, node):
1829 def _nodeduplicatecallback(self, transaction, node):
1826 """called when trying to add a node already stored.
1830 """called when trying to add a node already stored.
1827 """
1831 """
1828
1832
1829 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1833 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1830 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1834 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1831 """add a revision to the log
1835 """add a revision to the log
1832
1836
1833 text - the revision data to add
1837 text - the revision data to add
1834 transaction - the transaction object used for rollback
1838 transaction - the transaction object used for rollback
1835 link - the linkrev data to add
1839 link - the linkrev data to add
1836 p1, p2 - the parent nodeids of the revision
1840 p1, p2 - the parent nodeids of the revision
1837 cachedelta - an optional precomputed delta
1841 cachedelta - an optional precomputed delta
1838 node - nodeid of revision; typically node is not specified, and it is
1842 node - nodeid of revision; typically node is not specified, and it is
1839 computed by default as hash(text, p1, p2), however subclasses might
1843 computed by default as hash(text, p1, p2), however subclasses might
1840 use different hashing method (and override checkhash() in such case)
1844 use different hashing method (and override checkhash() in such case)
1841 flags - the known flags to set on the revision
1845 flags - the known flags to set on the revision
1842 deltacomputer - an optional deltacomputer instance shared between
1846 deltacomputer - an optional deltacomputer instance shared between
1843 multiple calls
1847 multiple calls
1844 """
1848 """
1845 if link == nullrev:
1849 if link == nullrev:
1846 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1850 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1847 % self.indexfile)
1851 % self.indexfile)
1848
1852
1849 if flags:
1853 if flags:
1850 node = node or self.hash(text, p1, p2)
1854 node = node or self.hash(text, p1, p2)
1851
1855
1852 rawtext, validatehash = self._processflags(text, flags, 'write')
1856 rawtext, validatehash = self._processflags(text, flags, 'write')
1853
1857
1854 # If the flag processor modifies the revision data, ignore any provided
1858 # If the flag processor modifies the revision data, ignore any provided
1855 # cachedelta.
1859 # cachedelta.
1856 if rawtext != text:
1860 if rawtext != text:
1857 cachedelta = None
1861 cachedelta = None
1858
1862
1859 if len(rawtext) > _maxentrysize:
1863 if len(rawtext) > _maxentrysize:
1860 raise error.RevlogError(
1864 raise error.RevlogError(
1861 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1865 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1862 % (self.indexfile, len(rawtext)))
1866 % (self.indexfile, len(rawtext)))
1863
1867
1864 node = node or self.hash(rawtext, p1, p2)
1868 node = node or self.hash(rawtext, p1, p2)
1865 if node in self.nodemap:
1869 if node in self.nodemap:
1866 return node
1870 return node
1867
1871
1868 if validatehash:
1872 if validatehash:
1869 self.checkhash(rawtext, node, p1=p1, p2=p2)
1873 self.checkhash(rawtext, node, p1=p1, p2=p2)
1870
1874
1871 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1875 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1872 flags, cachedelta=cachedelta,
1876 flags, cachedelta=cachedelta,
1873 deltacomputer=deltacomputer)
1877 deltacomputer=deltacomputer)
1874
1878
1875 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1879 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1876 cachedelta=None, deltacomputer=None):
1880 cachedelta=None, deltacomputer=None):
1877 """add a raw revision with known flags, node and parents
1881 """add a raw revision with known flags, node and parents
1878 useful when reusing a revision not stored in this revlog (ex: received
1882 useful when reusing a revision not stored in this revlog (ex: received
1879 over wire, or read from an external bundle).
1883 over wire, or read from an external bundle).
1880 """
1884 """
1881 dfh = None
1885 dfh = None
1882 if not self._inline:
1886 if not self._inline:
1883 dfh = self._datafp("a+")
1887 dfh = self._datafp("a+")
1884 ifh = self._indexfp("a+")
1888 ifh = self._indexfp("a+")
1885 try:
1889 try:
1886 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1890 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1887 flags, cachedelta, ifh, dfh,
1891 flags, cachedelta, ifh, dfh,
1888 deltacomputer=deltacomputer)
1892 deltacomputer=deltacomputer)
1889 finally:
1893 finally:
1890 if dfh:
1894 if dfh:
1891 dfh.close()
1895 dfh.close()
1892 ifh.close()
1896 ifh.close()
1893
1897
1894 def compress(self, data):
1898 def compress(self, data):
1895 """Generate a possibly-compressed representation of data."""
1899 """Generate a possibly-compressed representation of data."""
1896 if not data:
1900 if not data:
1897 return '', data
1901 return '', data
1898
1902
1899 compressed = self._compressor.compress(data)
1903 compressed = self._compressor.compress(data)
1900
1904
1901 if compressed:
1905 if compressed:
1902 # The revlog compressor added the header in the returned data.
1906 # The revlog compressor added the header in the returned data.
1903 return '', compressed
1907 return '', compressed
1904
1908
1905 if data[0:1] == '\0':
1909 if data[0:1] == '\0':
1906 return '', data
1910 return '', data
1907 return 'u', data
1911 return 'u', data
1908
1912
1909 def decompress(self, data):
1913 def decompress(self, data):
1910 """Decompress a revlog chunk.
1914 """Decompress a revlog chunk.
1911
1915
1912 The chunk is expected to begin with a header identifying the
1916 The chunk is expected to begin with a header identifying the
1913 format type so it can be routed to an appropriate decompressor.
1917 format type so it can be routed to an appropriate decompressor.
1914 """
1918 """
1915 if not data:
1919 if not data:
1916 return data
1920 return data
1917
1921
1918 # Revlogs are read much more frequently than they are written and many
1922 # Revlogs are read much more frequently than they are written and many
1919 # chunks only take microseconds to decompress, so performance is
1923 # chunks only take microseconds to decompress, so performance is
1920 # important here.
1924 # important here.
1921 #
1925 #
1922 # We can make a few assumptions about revlogs:
1926 # We can make a few assumptions about revlogs:
1923 #
1927 #
1924 # 1) the majority of chunks will be compressed (as opposed to inline
1928 # 1) the majority of chunks will be compressed (as opposed to inline
1925 # raw data).
1929 # raw data).
1926 # 2) decompressing *any* data will likely by at least 10x slower than
1930 # 2) decompressing *any* data will likely by at least 10x slower than
1927 # returning raw inline data.
1931 # returning raw inline data.
1928 # 3) we want to prioritize common and officially supported compression
1932 # 3) we want to prioritize common and officially supported compression
1929 # engines
1933 # engines
1930 #
1934 #
1931 # It follows that we want to optimize for "decompress compressed data
1935 # It follows that we want to optimize for "decompress compressed data
1932 # when encoded with common and officially supported compression engines"
1936 # when encoded with common and officially supported compression engines"
1933 # case over "raw data" and "data encoded by less common or non-official
1937 # case over "raw data" and "data encoded by less common or non-official
1934 # compression engines." That is why we have the inline lookup first
1938 # compression engines." That is why we have the inline lookup first
1935 # followed by the compengines lookup.
1939 # followed by the compengines lookup.
1936 #
1940 #
1937 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1941 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1938 # compressed chunks. And this matters for changelog and manifest reads.
1942 # compressed chunks. And this matters for changelog and manifest reads.
1939 t = data[0:1]
1943 t = data[0:1]
1940
1944
1941 if t == 'x':
1945 if t == 'x':
1942 try:
1946 try:
1943 return _zlibdecompress(data)
1947 return _zlibdecompress(data)
1944 except zlib.error as e:
1948 except zlib.error as e:
1945 raise error.RevlogError(_('revlog decompress error: %s') %
1949 raise error.RevlogError(_('revlog decompress error: %s') %
1946 stringutil.forcebytestr(e))
1950 stringutil.forcebytestr(e))
1947 # '\0' is more common than 'u' so it goes first.
1951 # '\0' is more common than 'u' so it goes first.
1948 elif t == '\0':
1952 elif t == '\0':
1949 return data
1953 return data
1950 elif t == 'u':
1954 elif t == 'u':
1951 return util.buffer(data, 1)
1955 return util.buffer(data, 1)
1952
1956
1953 try:
1957 try:
1954 compressor = self._decompressors[t]
1958 compressor = self._decompressors[t]
1955 except KeyError:
1959 except KeyError:
1956 try:
1960 try:
1957 engine = util.compengines.forrevlogheader(t)
1961 engine = util.compengines.forrevlogheader(t)
1958 compressor = engine.revlogcompressor()
1962 compressor = engine.revlogcompressor()
1959 self._decompressors[t] = compressor
1963 self._decompressors[t] = compressor
1960 except KeyError:
1964 except KeyError:
1961 raise error.RevlogError(_('unknown compression type %r') % t)
1965 raise error.RevlogError(_('unknown compression type %r') % t)
1962
1966
1963 return compressor.decompress(data)
1967 return compressor.decompress(data)
1964
1968
1965 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1969 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1966 cachedelta, ifh, dfh, alwayscache=False,
1970 cachedelta, ifh, dfh, alwayscache=False,
1967 deltacomputer=None):
1971 deltacomputer=None):
1968 """internal function to add revisions to the log
1972 """internal function to add revisions to the log
1969
1973
1970 see addrevision for argument descriptions.
1974 see addrevision for argument descriptions.
1971
1975
1972 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1976 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1973
1977
1974 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1978 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1975 be used.
1979 be used.
1976
1980
1977 invariants:
1981 invariants:
1978 - rawtext is optional (can be None); if not set, cachedelta must be set.
1982 - rawtext is optional (can be None); if not set, cachedelta must be set.
1979 if both are set, they must correspond to each other.
1983 if both are set, they must correspond to each other.
1980 """
1984 """
1981 if node == nullid:
1985 if node == nullid:
1982 raise error.RevlogError(_("%s: attempt to add null revision") %
1986 raise error.RevlogError(_("%s: attempt to add null revision") %
1983 self.indexfile)
1987 self.indexfile)
1984 if node == wdirid or node in wdirfilenodeids:
1988 if node == wdirid or node in wdirfilenodeids:
1985 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1989 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1986 self.indexfile)
1990 self.indexfile)
1987
1991
1988 if self._inline:
1992 if self._inline:
1989 fh = ifh
1993 fh = ifh
1990 else:
1994 else:
1991 fh = dfh
1995 fh = dfh
1992
1996
1993 btext = [rawtext]
1997 btext = [rawtext]
1994
1998
1995 curr = len(self)
1999 curr = len(self)
1996 prev = curr - 1
2000 prev = curr - 1
1997 offset = self.end(prev)
2001 offset = self.end(prev)
1998 p1r, p2r = self.rev(p1), self.rev(p2)
2002 p1r, p2r = self.rev(p1), self.rev(p2)
1999
2003
2000 # full versions are inserted when the needed deltas
2004 # full versions are inserted when the needed deltas
2001 # become comparable to the uncompressed text
2005 # become comparable to the uncompressed text
2002 if rawtext is None:
2006 if rawtext is None:
2003 # need rawtext size, before changed by flag processors, which is
2007 # need rawtext size, before changed by flag processors, which is
2004 # the non-raw size. use revlog explicitly to avoid filelog's extra
2008 # the non-raw size. use revlog explicitly to avoid filelog's extra
2005 # logic that might remove metadata size.
2009 # logic that might remove metadata size.
2006 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2010 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2007 cachedelta[1])
2011 cachedelta[1])
2008 else:
2012 else:
2009 textlen = len(rawtext)
2013 textlen = len(rawtext)
2010
2014
2011 if deltacomputer is None:
2015 if deltacomputer is None:
2012 deltacomputer = deltautil.deltacomputer(self)
2016 deltacomputer = deltautil.deltacomputer(self)
2013
2017
2014 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2018 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2015
2019
2016 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2020 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2017
2021
2018 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2022 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2019 deltainfo.base, link, p1r, p2r, node)
2023 deltainfo.base, link, p1r, p2r, node)
2020 self.index.append(e)
2024 self.index.append(e)
2021 self.nodemap[node] = curr
2025 self.nodemap[node] = curr
2022
2026
2023 # Reset the pure node cache start lookup offset to account for new
2027 # Reset the pure node cache start lookup offset to account for new
2024 # revision.
2028 # revision.
2025 if self._nodepos is not None:
2029 if self._nodepos is not None:
2026 self._nodepos = curr
2030 self._nodepos = curr
2027
2031
2028 entry = self._io.packentry(e, self.node, self.version, curr)
2032 entry = self._io.packentry(e, self.node, self.version, curr)
2029 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2033 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2030 link, offset)
2034 link, offset)
2031
2035
2032 rawtext = btext[0]
2036 rawtext = btext[0]
2033
2037
2034 if alwayscache and rawtext is None:
2038 if alwayscache and rawtext is None:
2035 rawtext = deltacomputer.buildtext(revinfo, fh)
2039 rawtext = deltacomputer.buildtext(revinfo, fh)
2036
2040
2037 if type(rawtext) == bytes: # only accept immutable objects
2041 if type(rawtext) == bytes: # only accept immutable objects
2038 self._revisioncache = (node, curr, rawtext)
2042 self._revisioncache = (node, curr, rawtext)
2039 self._chainbasecache[curr] = deltainfo.chainbase
2043 self._chainbasecache[curr] = deltainfo.chainbase
2040 return node
2044 return node
2041
2045
2042 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2046 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2043 # Files opened in a+ mode have inconsistent behavior on various
2047 # Files opened in a+ mode have inconsistent behavior on various
2044 # platforms. Windows requires that a file positioning call be made
2048 # platforms. Windows requires that a file positioning call be made
2045 # when the file handle transitions between reads and writes. See
2049 # when the file handle transitions between reads and writes. See
2046 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2050 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2047 # platforms, Python or the platform itself can be buggy. Some versions
2051 # platforms, Python or the platform itself can be buggy. Some versions
2048 # of Solaris have been observed to not append at the end of the file
2052 # of Solaris have been observed to not append at the end of the file
2049 # if the file was seeked to before the end. See issue4943 for more.
2053 # if the file was seeked to before the end. See issue4943 for more.
2050 #
2054 #
2051 # We work around this issue by inserting a seek() before writing.
2055 # We work around this issue by inserting a seek() before writing.
2052 # Note: This is likely not necessary on Python 3. However, because
2056 # Note: This is likely not necessary on Python 3. However, because
2053 # the file handle is reused for reads and may be seeked there, we need
2057 # the file handle is reused for reads and may be seeked there, we need
2054 # to be careful before changing this.
2058 # to be careful before changing this.
2055 ifh.seek(0, os.SEEK_END)
2059 ifh.seek(0, os.SEEK_END)
2056 if dfh:
2060 if dfh:
2057 dfh.seek(0, os.SEEK_END)
2061 dfh.seek(0, os.SEEK_END)
2058
2062
2059 curr = len(self) - 1
2063 curr = len(self) - 1
2060 if not self._inline:
2064 if not self._inline:
2061 transaction.add(self.datafile, offset)
2065 transaction.add(self.datafile, offset)
2062 transaction.add(self.indexfile, curr * len(entry))
2066 transaction.add(self.indexfile, curr * len(entry))
2063 if data[0]:
2067 if data[0]:
2064 dfh.write(data[0])
2068 dfh.write(data[0])
2065 dfh.write(data[1])
2069 dfh.write(data[1])
2066 ifh.write(entry)
2070 ifh.write(entry)
2067 else:
2071 else:
2068 offset += curr * self._io.size
2072 offset += curr * self._io.size
2069 transaction.add(self.indexfile, offset, curr)
2073 transaction.add(self.indexfile, offset, curr)
2070 ifh.write(entry)
2074 ifh.write(entry)
2071 ifh.write(data[0])
2075 ifh.write(data[0])
2072 ifh.write(data[1])
2076 ifh.write(data[1])
2073 self._enforceinlinesize(transaction, ifh)
2077 self._enforceinlinesize(transaction, ifh)
2074
2078
2075 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2079 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2076 """
2080 """
2077 add a delta group
2081 add a delta group
2078
2082
2079 given a set of deltas, add them to the revision log. the
2083 given a set of deltas, add them to the revision log. the
2080 first delta is against its parent, which should be in our
2084 first delta is against its parent, which should be in our
2081 log, the rest are against the previous delta.
2085 log, the rest are against the previous delta.
2082
2086
2083 If ``addrevisioncb`` is defined, it will be called with arguments of
2087 If ``addrevisioncb`` is defined, it will be called with arguments of
2084 this revlog and the node that was added.
2088 this revlog and the node that was added.
2085 """
2089 """
2086
2090
2087 if self._writinghandles:
2091 if self._writinghandles:
2088 raise error.ProgrammingError('cannot nest addgroup() calls')
2092 raise error.ProgrammingError('cannot nest addgroup() calls')
2089
2093
2090 nodes = []
2094 nodes = []
2091
2095
2092 r = len(self)
2096 r = len(self)
2093 end = 0
2097 end = 0
2094 if r:
2098 if r:
2095 end = self.end(r - 1)
2099 end = self.end(r - 1)
2096 ifh = self._indexfp("a+")
2100 ifh = self._indexfp("a+")
2097 isize = r * self._io.size
2101 isize = r * self._io.size
2098 if self._inline:
2102 if self._inline:
2099 transaction.add(self.indexfile, end + isize, r)
2103 transaction.add(self.indexfile, end + isize, r)
2100 dfh = None
2104 dfh = None
2101 else:
2105 else:
2102 transaction.add(self.indexfile, isize, r)
2106 transaction.add(self.indexfile, isize, r)
2103 transaction.add(self.datafile, end)
2107 transaction.add(self.datafile, end)
2104 dfh = self._datafp("a+")
2108 dfh = self._datafp("a+")
2105 def flush():
2109 def flush():
2106 if dfh:
2110 if dfh:
2107 dfh.flush()
2111 dfh.flush()
2108 ifh.flush()
2112 ifh.flush()
2109
2113
2110 self._writinghandles = (ifh, dfh)
2114 self._writinghandles = (ifh, dfh)
2111
2115
2112 try:
2116 try:
2113 deltacomputer = deltautil.deltacomputer(self)
2117 deltacomputer = deltautil.deltacomputer(self)
2114 # loop through our set of deltas
2118 # loop through our set of deltas
2115 for data in deltas:
2119 for data in deltas:
2116 node, p1, p2, linknode, deltabase, delta, flags = data
2120 node, p1, p2, linknode, deltabase, delta, flags = data
2117 link = linkmapper(linknode)
2121 link = linkmapper(linknode)
2118 flags = flags or REVIDX_DEFAULT_FLAGS
2122 flags = flags or REVIDX_DEFAULT_FLAGS
2119
2123
2120 nodes.append(node)
2124 nodes.append(node)
2121
2125
2122 if node in self.nodemap:
2126 if node in self.nodemap:
2123 self._nodeduplicatecallback(transaction, node)
2127 self._nodeduplicatecallback(transaction, node)
2124 # this can happen if two branches make the same change
2128 # this can happen if two branches make the same change
2125 continue
2129 continue
2126
2130
2127 for p in (p1, p2):
2131 for p in (p1, p2):
2128 if p not in self.nodemap:
2132 if p not in self.nodemap:
2129 raise error.LookupError(p, self.indexfile,
2133 raise error.LookupError(p, self.indexfile,
2130 _('unknown parent'))
2134 _('unknown parent'))
2131
2135
2132 if deltabase not in self.nodemap:
2136 if deltabase not in self.nodemap:
2133 raise error.LookupError(deltabase, self.indexfile,
2137 raise error.LookupError(deltabase, self.indexfile,
2134 _('unknown delta base'))
2138 _('unknown delta base'))
2135
2139
2136 baserev = self.rev(deltabase)
2140 baserev = self.rev(deltabase)
2137
2141
2138 if baserev != nullrev and self.iscensored(baserev):
2142 if baserev != nullrev and self.iscensored(baserev):
2139 # if base is censored, delta must be full replacement in a
2143 # if base is censored, delta must be full replacement in a
2140 # single patch operation
2144 # single patch operation
2141 hlen = struct.calcsize(">lll")
2145 hlen = struct.calcsize(">lll")
2142 oldlen = self.rawsize(baserev)
2146 oldlen = self.rawsize(baserev)
2143 newlen = len(delta) - hlen
2147 newlen = len(delta) - hlen
2144 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2148 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2145 raise error.CensoredBaseError(self.indexfile,
2149 raise error.CensoredBaseError(self.indexfile,
2146 self.node(baserev))
2150 self.node(baserev))
2147
2151
2148 if not flags and self._peek_iscensored(baserev, delta, flush):
2152 if not flags and self._peek_iscensored(baserev, delta, flush):
2149 flags |= REVIDX_ISCENSORED
2153 flags |= REVIDX_ISCENSORED
2150
2154
2151 # We assume consumers of addrevisioncb will want to retrieve
2155 # We assume consumers of addrevisioncb will want to retrieve
2152 # the added revision, which will require a call to
2156 # the added revision, which will require a call to
2153 # revision(). revision() will fast path if there is a cache
2157 # revision(). revision() will fast path if there is a cache
2154 # hit. So, we tell _addrevision() to always cache in this case.
2158 # hit. So, we tell _addrevision() to always cache in this case.
2155 # We're only using addgroup() in the context of changegroup
2159 # We're only using addgroup() in the context of changegroup
2156 # generation so the revision data can always be handled as raw
2160 # generation so the revision data can always be handled as raw
2157 # by the flagprocessor.
2161 # by the flagprocessor.
2158 self._addrevision(node, None, transaction, link,
2162 self._addrevision(node, None, transaction, link,
2159 p1, p2, flags, (baserev, delta),
2163 p1, p2, flags, (baserev, delta),
2160 ifh, dfh,
2164 ifh, dfh,
2161 alwayscache=bool(addrevisioncb),
2165 alwayscache=bool(addrevisioncb),
2162 deltacomputer=deltacomputer)
2166 deltacomputer=deltacomputer)
2163
2167
2164 if addrevisioncb:
2168 if addrevisioncb:
2165 addrevisioncb(self, node)
2169 addrevisioncb(self, node)
2166
2170
2167 if not dfh and not self._inline:
2171 if not dfh and not self._inline:
2168 # addrevision switched from inline to conventional
2172 # addrevision switched from inline to conventional
2169 # reopen the index
2173 # reopen the index
2170 ifh.close()
2174 ifh.close()
2171 dfh = self._datafp("a+")
2175 dfh = self._datafp("a+")
2172 ifh = self._indexfp("a+")
2176 ifh = self._indexfp("a+")
2173 self._writinghandles = (ifh, dfh)
2177 self._writinghandles = (ifh, dfh)
2174 finally:
2178 finally:
2175 self._writinghandles = None
2179 self._writinghandles = None
2176
2180
2177 if dfh:
2181 if dfh:
2178 dfh.close()
2182 dfh.close()
2179 ifh.close()
2183 ifh.close()
2180
2184
2181 return nodes
2185 return nodes
2182
2186
2183 def iscensored(self, rev):
2187 def iscensored(self, rev):
2184 """Check if a file revision is censored."""
2188 """Check if a file revision is censored."""
2185 if not self._censorable:
2189 if not self._censorable:
2186 return False
2190 return False
2187
2191
2188 return self.flags(rev) & REVIDX_ISCENSORED
2192 return self.flags(rev) & REVIDX_ISCENSORED
2189
2193
2190 def _peek_iscensored(self, baserev, delta, flush):
2194 def _peek_iscensored(self, baserev, delta, flush):
2191 """Quickly check if a delta produces a censored revision."""
2195 """Quickly check if a delta produces a censored revision."""
2192 if not self._censorable:
2196 if not self._censorable:
2193 return False
2197 return False
2194
2198
2195 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2199 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2196
2200
2197 def getstrippoint(self, minlink):
2201 def getstrippoint(self, minlink):
2198 """find the minimum rev that must be stripped to strip the linkrev
2202 """find the minimum rev that must be stripped to strip the linkrev
2199
2203
2200 Returns a tuple containing the minimum rev and a set of all revs that
2204 Returns a tuple containing the minimum rev and a set of all revs that
2201 have linkrevs that will be broken by this strip.
2205 have linkrevs that will be broken by this strip.
2202 """
2206 """
2203 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2207 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2204 self.headrevs(),
2208 self.headrevs(),
2205 self.linkrev, self.parentrevs)
2209 self.linkrev, self.parentrevs)
2206
2210
2207 def strip(self, minlink, transaction):
2211 def strip(self, minlink, transaction):
2208 """truncate the revlog on the first revision with a linkrev >= minlink
2212 """truncate the revlog on the first revision with a linkrev >= minlink
2209
2213
2210 This function is called when we're stripping revision minlink and
2214 This function is called when we're stripping revision minlink and
2211 its descendants from the repository.
2215 its descendants from the repository.
2212
2216
2213 We have to remove all revisions with linkrev >= minlink, because
2217 We have to remove all revisions with linkrev >= minlink, because
2214 the equivalent changelog revisions will be renumbered after the
2218 the equivalent changelog revisions will be renumbered after the
2215 strip.
2219 strip.
2216
2220
2217 So we truncate the revlog on the first of these revisions, and
2221 So we truncate the revlog on the first of these revisions, and
2218 trust that the caller has saved the revisions that shouldn't be
2222 trust that the caller has saved the revisions that shouldn't be
2219 removed and that it'll re-add them after this truncation.
2223 removed and that it'll re-add them after this truncation.
2220 """
2224 """
2221 if len(self) == 0:
2225 if len(self) == 0:
2222 return
2226 return
2223
2227
2224 rev, _ = self.getstrippoint(minlink)
2228 rev, _ = self.getstrippoint(minlink)
2225 if rev == len(self):
2229 if rev == len(self):
2226 return
2230 return
2227
2231
2228 # first truncate the files on disk
2232 # first truncate the files on disk
2229 end = self.start(rev)
2233 end = self.start(rev)
2230 if not self._inline:
2234 if not self._inline:
2231 transaction.add(self.datafile, end)
2235 transaction.add(self.datafile, end)
2232 end = rev * self._io.size
2236 end = rev * self._io.size
2233 else:
2237 else:
2234 end += rev * self._io.size
2238 end += rev * self._io.size
2235
2239
2236 transaction.add(self.indexfile, end)
2240 transaction.add(self.indexfile, end)
2237
2241
2238 # then reset internal state in memory to forget those revisions
2242 # then reset internal state in memory to forget those revisions
2239 self._revisioncache = None
2243 self._revisioncache = None
2240 self._chaininfocache = {}
2244 self._chaininfocache = {}
2241 self._chunkclear()
2245 self._chunkclear()
2242 for x in pycompat.xrange(rev, len(self)):
2246 for x in pycompat.xrange(rev, len(self)):
2243 del self.nodemap[self.node(x)]
2247 del self.nodemap[self.node(x)]
2244
2248
2245 del self.index[rev:-1]
2249 del self.index[rev:-1]
2246 self._nodepos = None
2250 self._nodepos = None
2247
2251
2248 def checksize(self):
2252 def checksize(self):
2249 expected = 0
2253 expected = 0
2250 if len(self):
2254 if len(self):
2251 expected = max(0, self.end(len(self) - 1))
2255 expected = max(0, self.end(len(self) - 1))
2252
2256
2253 try:
2257 try:
2254 with self._datafp() as f:
2258 with self._datafp() as f:
2255 f.seek(0, 2)
2259 f.seek(0, 2)
2256 actual = f.tell()
2260 actual = f.tell()
2257 dd = actual - expected
2261 dd = actual - expected
2258 except IOError as inst:
2262 except IOError as inst:
2259 if inst.errno != errno.ENOENT:
2263 if inst.errno != errno.ENOENT:
2260 raise
2264 raise
2261 dd = 0
2265 dd = 0
2262
2266
2263 try:
2267 try:
2264 f = self.opener(self.indexfile)
2268 f = self.opener(self.indexfile)
2265 f.seek(0, 2)
2269 f.seek(0, 2)
2266 actual = f.tell()
2270 actual = f.tell()
2267 f.close()
2271 f.close()
2268 s = self._io.size
2272 s = self._io.size
2269 i = max(0, actual // s)
2273 i = max(0, actual // s)
2270 di = actual - (i * s)
2274 di = actual - (i * s)
2271 if self._inline:
2275 if self._inline:
2272 databytes = 0
2276 databytes = 0
2273 for r in self:
2277 for r in self:
2274 databytes += max(0, self.length(r))
2278 databytes += max(0, self.length(r))
2275 dd = 0
2279 dd = 0
2276 di = actual - len(self) * s - databytes
2280 di = actual - len(self) * s - databytes
2277 except IOError as inst:
2281 except IOError as inst:
2278 if inst.errno != errno.ENOENT:
2282 if inst.errno != errno.ENOENT:
2279 raise
2283 raise
2280 di = 0
2284 di = 0
2281
2285
2282 return (dd, di)
2286 return (dd, di)
2283
2287
2284 def files(self):
2288 def files(self):
2285 res = [self.indexfile]
2289 res = [self.indexfile]
2286 if not self._inline:
2290 if not self._inline:
2287 res.append(self.datafile)
2291 res.append(self.datafile)
2288 return res
2292 return res
2289
2293
2290 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2294 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2291 assumehaveparentrevisions=False,
2295 assumehaveparentrevisions=False,
2292 deltamode=repository.CG_DELTAMODE_STD):
2296 deltamode=repository.CG_DELTAMODE_STD):
2293 if nodesorder not in ('nodes', 'storage', 'linear', None):
2297 if nodesorder not in ('nodes', 'storage', 'linear', None):
2294 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2298 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2295 nodesorder)
2299 nodesorder)
2296
2300
2297 if nodesorder is None and not self._generaldelta:
2301 if nodesorder is None and not self._generaldelta:
2298 nodesorder = 'storage'
2302 nodesorder = 'storage'
2299
2303
2300 if (not self._storedeltachains and
2304 if (not self._storedeltachains and
2301 deltamode != repository.CG_DELTAMODE_PREV):
2305 deltamode != repository.CG_DELTAMODE_PREV):
2302 deltamode = repository.CG_DELTAMODE_FULL
2306 deltamode = repository.CG_DELTAMODE_FULL
2303
2307
2304 return storageutil.emitrevisions(
2308 return storageutil.emitrevisions(
2305 self, nodes, nodesorder, revlogrevisiondelta,
2309 self, nodes, nodesorder, revlogrevisiondelta,
2306 deltaparentfn=self.deltaparent,
2310 deltaparentfn=self.deltaparent,
2307 candeltafn=self.candelta,
2311 candeltafn=self.candelta,
2308 rawsizefn=self.rawsize,
2312 rawsizefn=self.rawsize,
2309 revdifffn=self.revdiff,
2313 revdifffn=self.revdiff,
2310 flagsfn=self.flags,
2314 flagsfn=self.flags,
2311 deltamode=deltamode,
2315 deltamode=deltamode,
2312 revisiondata=revisiondata,
2316 revisiondata=revisiondata,
2313 assumehaveparentrevisions=assumehaveparentrevisions)
2317 assumehaveparentrevisions=assumehaveparentrevisions)
2314
2318
2315 DELTAREUSEALWAYS = 'always'
2319 DELTAREUSEALWAYS = 'always'
2316 DELTAREUSESAMEREVS = 'samerevs'
2320 DELTAREUSESAMEREVS = 'samerevs'
2317 DELTAREUSENEVER = 'never'
2321 DELTAREUSENEVER = 'never'
2318
2322
2319 DELTAREUSEFULLADD = 'fulladd'
2323 DELTAREUSEFULLADD = 'fulladd'
2320
2324
2321 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2325 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2322
2326
2323 def clone(self, tr, destrevlog, addrevisioncb=None,
2327 def clone(self, tr, destrevlog, addrevisioncb=None,
2324 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2328 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2325 """Copy this revlog to another, possibly with format changes.
2329 """Copy this revlog to another, possibly with format changes.
2326
2330
2327 The destination revlog will contain the same revisions and nodes.
2331 The destination revlog will contain the same revisions and nodes.
2328 However, it may not be bit-for-bit identical due to e.g. delta encoding
2332 However, it may not be bit-for-bit identical due to e.g. delta encoding
2329 differences.
2333 differences.
2330
2334
2331 The ``deltareuse`` argument control how deltas from the existing revlog
2335 The ``deltareuse`` argument control how deltas from the existing revlog
2332 are preserved in the destination revlog. The argument can have the
2336 are preserved in the destination revlog. The argument can have the
2333 following values:
2337 following values:
2334
2338
2335 DELTAREUSEALWAYS
2339 DELTAREUSEALWAYS
2336 Deltas will always be reused (if possible), even if the destination
2340 Deltas will always be reused (if possible), even if the destination
2337 revlog would not select the same revisions for the delta. This is the
2341 revlog would not select the same revisions for the delta. This is the
2338 fastest mode of operation.
2342 fastest mode of operation.
2339 DELTAREUSESAMEREVS
2343 DELTAREUSESAMEREVS
2340 Deltas will be reused if the destination revlog would pick the same
2344 Deltas will be reused if the destination revlog would pick the same
2341 revisions for the delta. This mode strikes a balance between speed
2345 revisions for the delta. This mode strikes a balance between speed
2342 and optimization.
2346 and optimization.
2343 DELTAREUSENEVER
2347 DELTAREUSENEVER
2344 Deltas will never be reused. This is the slowest mode of execution.
2348 Deltas will never be reused. This is the slowest mode of execution.
2345 This mode can be used to recompute deltas (e.g. if the diff/delta
2349 This mode can be used to recompute deltas (e.g. if the diff/delta
2346 algorithm changes).
2350 algorithm changes).
2347
2351
2348 Delta computation can be slow, so the choice of delta reuse policy can
2352 Delta computation can be slow, so the choice of delta reuse policy can
2349 significantly affect run time.
2353 significantly affect run time.
2350
2354
2351 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2355 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2352 two extremes. Deltas will be reused if they are appropriate. But if the
2356 two extremes. Deltas will be reused if they are appropriate. But if the
2353 delta could choose a better revision, it will do so. This means if you
2357 delta could choose a better revision, it will do so. This means if you
2354 are converting a non-generaldelta revlog to a generaldelta revlog,
2358 are converting a non-generaldelta revlog to a generaldelta revlog,
2355 deltas will be recomputed if the delta's parent isn't a parent of the
2359 deltas will be recomputed if the delta's parent isn't a parent of the
2356 revision.
2360 revision.
2357
2361
2358 In addition to the delta policy, the ``forcedeltabothparents``
2362 In addition to the delta policy, the ``forcedeltabothparents``
2359 argument controls whether to force compute deltas against both parents
2363 argument controls whether to force compute deltas against both parents
2360 for merges. By default, the current default is used.
2364 for merges. By default, the current default is used.
2361 """
2365 """
2362 if deltareuse not in self.DELTAREUSEALL:
2366 if deltareuse not in self.DELTAREUSEALL:
2363 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2367 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2364
2368
2365 if len(destrevlog):
2369 if len(destrevlog):
2366 raise ValueError(_('destination revlog is not empty'))
2370 raise ValueError(_('destination revlog is not empty'))
2367
2371
2368 if getattr(self, 'filteredrevs', None):
2372 if getattr(self, 'filteredrevs', None):
2369 raise ValueError(_('source revlog has filtered revisions'))
2373 raise ValueError(_('source revlog has filtered revisions'))
2370 if getattr(destrevlog, 'filteredrevs', None):
2374 if getattr(destrevlog, 'filteredrevs', None):
2371 raise ValueError(_('destination revlog has filtered revisions'))
2375 raise ValueError(_('destination revlog has filtered revisions'))
2372
2376
2373 # lazydeltabase controls whether to reuse a cached delta, if possible.
2377 # lazydeltabase controls whether to reuse a cached delta, if possible.
2374 oldlazydeltabase = destrevlog._lazydeltabase
2378 oldlazydeltabase = destrevlog._lazydeltabase
2375 oldamd = destrevlog._deltabothparents
2379 oldamd = destrevlog._deltabothparents
2376
2380
2377 try:
2381 try:
2378 if deltareuse == self.DELTAREUSEALWAYS:
2382 if deltareuse == self.DELTAREUSEALWAYS:
2379 destrevlog._lazydeltabase = True
2383 destrevlog._lazydeltabase = True
2380 elif deltareuse == self.DELTAREUSESAMEREVS:
2384 elif deltareuse == self.DELTAREUSESAMEREVS:
2381 destrevlog._lazydeltabase = False
2385 destrevlog._lazydeltabase = False
2382
2386
2383 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2387 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2384
2388
2385 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2389 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2386 self.DELTAREUSESAMEREVS)
2390 self.DELTAREUSESAMEREVS)
2387
2391
2388 deltacomputer = deltautil.deltacomputer(destrevlog)
2392 deltacomputer = deltautil.deltacomputer(destrevlog)
2389 index = self.index
2393 index = self.index
2390 for rev in self:
2394 for rev in self:
2391 entry = index[rev]
2395 entry = index[rev]
2392
2396
2393 # Some classes override linkrev to take filtered revs into
2397 # Some classes override linkrev to take filtered revs into
2394 # account. Use raw entry from index.
2398 # account. Use raw entry from index.
2395 flags = entry[0] & 0xffff
2399 flags = entry[0] & 0xffff
2396 linkrev = entry[4]
2400 linkrev = entry[4]
2397 p1 = index[entry[5]][7]
2401 p1 = index[entry[5]][7]
2398 p2 = index[entry[6]][7]
2402 p2 = index[entry[6]][7]
2399 node = entry[7]
2403 node = entry[7]
2400
2404
2401 # (Possibly) reuse the delta from the revlog if allowed and
2405 # (Possibly) reuse the delta from the revlog if allowed and
2402 # the revlog chunk is a delta.
2406 # the revlog chunk is a delta.
2403 cachedelta = None
2407 cachedelta = None
2404 rawtext = None
2408 rawtext = None
2405 if populatecachedelta:
2409 if populatecachedelta:
2406 dp = self.deltaparent(rev)
2410 dp = self.deltaparent(rev)
2407 if dp != nullrev:
2411 if dp != nullrev:
2408 cachedelta = (dp, bytes(self._chunk(rev)))
2412 cachedelta = (dp, bytes(self._chunk(rev)))
2409
2413
2410 if not cachedelta:
2414 if not cachedelta:
2411 rawtext = self.revision(rev, raw=True)
2415 rawtext = self.revision(rev, raw=True)
2412
2416
2413
2417
2414 if deltareuse == self.DELTAREUSEFULLADD:
2418 if deltareuse == self.DELTAREUSEFULLADD:
2415 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2419 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2416 cachedelta=cachedelta,
2420 cachedelta=cachedelta,
2417 node=node, flags=flags,
2421 node=node, flags=flags,
2418 deltacomputer=deltacomputer)
2422 deltacomputer=deltacomputer)
2419 else:
2423 else:
2420 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2424 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2421 checkambig=False)
2425 checkambig=False)
2422 dfh = None
2426 dfh = None
2423 if not destrevlog._inline:
2427 if not destrevlog._inline:
2424 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2428 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2425 try:
2429 try:
2426 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2430 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2427 p2, flags, cachedelta, ifh, dfh,
2431 p2, flags, cachedelta, ifh, dfh,
2428 deltacomputer=deltacomputer)
2432 deltacomputer=deltacomputer)
2429 finally:
2433 finally:
2430 if dfh:
2434 if dfh:
2431 dfh.close()
2435 dfh.close()
2432 ifh.close()
2436 ifh.close()
2433
2437
2434 if addrevisioncb:
2438 if addrevisioncb:
2435 addrevisioncb(self, rev, node)
2439 addrevisioncb(self, rev, node)
2436 finally:
2440 finally:
2437 destrevlog._lazydeltabase = oldlazydeltabase
2441 destrevlog._lazydeltabase = oldlazydeltabase
2438 destrevlog._deltabothparents = oldamd
2442 destrevlog._deltabothparents = oldamd
2439
2443
2440 def censorrevision(self, tr, censornode, tombstone=b''):
2444 def censorrevision(self, tr, censornode, tombstone=b''):
2441 if (self.version & 0xFFFF) == REVLOGV0:
2445 if (self.version & 0xFFFF) == REVLOGV0:
2442 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2446 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2443 self.version)
2447 self.version)
2444
2448
2445 censorrev = self.rev(censornode)
2449 censorrev = self.rev(censornode)
2446 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2450 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2447
2451
2448 if len(tombstone) > self.rawsize(censorrev):
2452 if len(tombstone) > self.rawsize(censorrev):
2449 raise error.Abort(_('censor tombstone must be no longer than '
2453 raise error.Abort(_('censor tombstone must be no longer than '
2450 'censored data'))
2454 'censored data'))
2451
2455
2452 # Rewriting the revlog in place is hard. Our strategy for censoring is
2456 # Rewriting the revlog in place is hard. Our strategy for censoring is
2453 # to create a new revlog, copy all revisions to it, then replace the
2457 # to create a new revlog, copy all revisions to it, then replace the
2454 # revlogs on transaction close.
2458 # revlogs on transaction close.
2455
2459
2456 newindexfile = self.indexfile + b'.tmpcensored'
2460 newindexfile = self.indexfile + b'.tmpcensored'
2457 newdatafile = self.datafile + b'.tmpcensored'
2461 newdatafile = self.datafile + b'.tmpcensored'
2458
2462
2459 # This is a bit dangerous. We could easily have a mismatch of state.
2463 # This is a bit dangerous. We could easily have a mismatch of state.
2460 newrl = revlog(self.opener, newindexfile, newdatafile,
2464 newrl = revlog(self.opener, newindexfile, newdatafile,
2461 censorable=True)
2465 censorable=True)
2462 newrl.version = self.version
2466 newrl.version = self.version
2463 newrl._generaldelta = self._generaldelta
2467 newrl._generaldelta = self._generaldelta
2464 newrl._io = self._io
2468 newrl._io = self._io
2465
2469
2466 for rev in self.revs():
2470 for rev in self.revs():
2467 node = self.node(rev)
2471 node = self.node(rev)
2468 p1, p2 = self.parents(node)
2472 p1, p2 = self.parents(node)
2469
2473
2470 if rev == censorrev:
2474 if rev == censorrev:
2471 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2475 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2472 p1, p2, censornode, REVIDX_ISCENSORED)
2476 p1, p2, censornode, REVIDX_ISCENSORED)
2473
2477
2474 if newrl.deltaparent(rev) != nullrev:
2478 if newrl.deltaparent(rev) != nullrev:
2475 raise error.Abort(_('censored revision stored as delta; '
2479 raise error.Abort(_('censored revision stored as delta; '
2476 'cannot censor'),
2480 'cannot censor'),
2477 hint=_('censoring of revlogs is not '
2481 hint=_('censoring of revlogs is not '
2478 'fully implemented; please report '
2482 'fully implemented; please report '
2479 'this bug'))
2483 'this bug'))
2480 continue
2484 continue
2481
2485
2482 if self.iscensored(rev):
2486 if self.iscensored(rev):
2483 if self.deltaparent(rev) != nullrev:
2487 if self.deltaparent(rev) != nullrev:
2484 raise error.Abort(_('cannot censor due to censored '
2488 raise error.Abort(_('cannot censor due to censored '
2485 'revision having delta stored'))
2489 'revision having delta stored'))
2486 rawtext = self._chunk(rev)
2490 rawtext = self._chunk(rev)
2487 else:
2491 else:
2488 rawtext = self.revision(rev, raw=True)
2492 rawtext = self.revision(rev, raw=True)
2489
2493
2490 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2494 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2491 self.flags(rev))
2495 self.flags(rev))
2492
2496
2493 tr.addbackup(self.indexfile, location='store')
2497 tr.addbackup(self.indexfile, location='store')
2494 if not self._inline:
2498 if not self._inline:
2495 tr.addbackup(self.datafile, location='store')
2499 tr.addbackup(self.datafile, location='store')
2496
2500
2497 self.opener.rename(newrl.indexfile, self.indexfile)
2501 self.opener.rename(newrl.indexfile, self.indexfile)
2498 if not self._inline:
2502 if not self._inline:
2499 self.opener.rename(newrl.datafile, self.datafile)
2503 self.opener.rename(newrl.datafile, self.datafile)
2500
2504
2501 self.clearcaches()
2505 self.clearcaches()
2502 self._loadindex()
2506 self._loadindex()
2503
2507
2504 def verifyintegrity(self, state):
2508 def verifyintegrity(self, state):
2505 """Verifies the integrity of the revlog.
2509 """Verifies the integrity of the revlog.
2506
2510
2507 Yields ``revlogproblem`` instances describing problems that are
2511 Yields ``revlogproblem`` instances describing problems that are
2508 found.
2512 found.
2509 """
2513 """
2510 dd, di = self.checksize()
2514 dd, di = self.checksize()
2511 if dd:
2515 if dd:
2512 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2516 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2513 if di:
2517 if di:
2514 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2518 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2515
2519
2516 version = self.version & 0xFFFF
2520 version = self.version & 0xFFFF
2517
2521
2518 # The verifier tells us what version revlog we should be.
2522 # The verifier tells us what version revlog we should be.
2519 if version != state['expectedversion']:
2523 if version != state['expectedversion']:
2520 yield revlogproblem(
2524 yield revlogproblem(
2521 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2525 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2522 (self.indexfile, version, state['expectedversion']))
2526 (self.indexfile, version, state['expectedversion']))
2523
2527
2524 state['skipread'] = set()
2528 state['skipread'] = set()
2525
2529
2526 for rev in self:
2530 for rev in self:
2527 node = self.node(rev)
2531 node = self.node(rev)
2528
2532
2529 # Verify contents. 4 cases to care about:
2533 # Verify contents. 4 cases to care about:
2530 #
2534 #
2531 # common: the most common case
2535 # common: the most common case
2532 # rename: with a rename
2536 # rename: with a rename
2533 # meta: file content starts with b'\1\n', the metadata
2537 # meta: file content starts with b'\1\n', the metadata
2534 # header defined in filelog.py, but without a rename
2538 # header defined in filelog.py, but without a rename
2535 # ext: content stored externally
2539 # ext: content stored externally
2536 #
2540 #
2537 # More formally, their differences are shown below:
2541 # More formally, their differences are shown below:
2538 #
2542 #
2539 # | common | rename | meta | ext
2543 # | common | rename | meta | ext
2540 # -------------------------------------------------------
2544 # -------------------------------------------------------
2541 # flags() | 0 | 0 | 0 | not 0
2545 # flags() | 0 | 0 | 0 | not 0
2542 # renamed() | False | True | False | ?
2546 # renamed() | False | True | False | ?
2543 # rawtext[0:2]=='\1\n'| False | True | True | ?
2547 # rawtext[0:2]=='\1\n'| False | True | True | ?
2544 #
2548 #
2545 # "rawtext" means the raw text stored in revlog data, which
2549 # "rawtext" means the raw text stored in revlog data, which
2546 # could be retrieved by "revision(rev, raw=True)". "text"
2550 # could be retrieved by "revision(rev, raw=True)". "text"
2547 # mentioned below is "revision(rev, raw=False)".
2551 # mentioned below is "revision(rev, raw=False)".
2548 #
2552 #
2549 # There are 3 different lengths stored physically:
2553 # There are 3 different lengths stored physically:
2550 # 1. L1: rawsize, stored in revlog index
2554 # 1. L1: rawsize, stored in revlog index
2551 # 2. L2: len(rawtext), stored in revlog data
2555 # 2. L2: len(rawtext), stored in revlog data
2552 # 3. L3: len(text), stored in revlog data if flags==0, or
2556 # 3. L3: len(text), stored in revlog data if flags==0, or
2553 # possibly somewhere else if flags!=0
2557 # possibly somewhere else if flags!=0
2554 #
2558 #
2555 # L1 should be equal to L2. L3 could be different from them.
2559 # L1 should be equal to L2. L3 could be different from them.
2556 # "text" may or may not affect commit hash depending on flag
2560 # "text" may or may not affect commit hash depending on flag
2557 # processors (see revlog.addflagprocessor).
2561 # processors (see revlog.addflagprocessor).
2558 #
2562 #
2559 # | common | rename | meta | ext
2563 # | common | rename | meta | ext
2560 # -------------------------------------------------
2564 # -------------------------------------------------
2561 # rawsize() | L1 | L1 | L1 | L1
2565 # rawsize() | L1 | L1 | L1 | L1
2562 # size() | L1 | L2-LM | L1(*) | L1 (?)
2566 # size() | L1 | L2-LM | L1(*) | L1 (?)
2563 # len(rawtext) | L2 | L2 | L2 | L2
2567 # len(rawtext) | L2 | L2 | L2 | L2
2564 # len(text) | L2 | L2 | L2 | L3
2568 # len(text) | L2 | L2 | L2 | L3
2565 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2569 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2566 #
2570 #
2567 # LM: length of metadata, depending on rawtext
2571 # LM: length of metadata, depending on rawtext
2568 # (*): not ideal, see comment in filelog.size
2572 # (*): not ideal, see comment in filelog.size
2569 # (?): could be "- len(meta)" if the resolved content has
2573 # (?): could be "- len(meta)" if the resolved content has
2570 # rename metadata
2574 # rename metadata
2571 #
2575 #
2572 # Checks needed to be done:
2576 # Checks needed to be done:
2573 # 1. length check: L1 == L2, in all cases.
2577 # 1. length check: L1 == L2, in all cases.
2574 # 2. hash check: depending on flag processor, we may need to
2578 # 2. hash check: depending on flag processor, we may need to
2575 # use either "text" (external), or "rawtext" (in revlog).
2579 # use either "text" (external), or "rawtext" (in revlog).
2576
2580
2577 try:
2581 try:
2578 skipflags = state.get('skipflags', 0)
2582 skipflags = state.get('skipflags', 0)
2579 if skipflags:
2583 if skipflags:
2580 skipflags &= self.flags(rev)
2584 skipflags &= self.flags(rev)
2581
2585
2582 if skipflags:
2586 if skipflags:
2583 state['skipread'].add(node)
2587 state['skipread'].add(node)
2584 else:
2588 else:
2585 # Side-effect: read content and verify hash.
2589 # Side-effect: read content and verify hash.
2586 self.revision(node)
2590 self.revision(node)
2587
2591
2588 l1 = self.rawsize(rev)
2592 l1 = self.rawsize(rev)
2589 l2 = len(self.revision(node, raw=True))
2593 l2 = len(self.revision(node, raw=True))
2590
2594
2591 if l1 != l2:
2595 if l1 != l2:
2592 yield revlogproblem(
2596 yield revlogproblem(
2593 error=_('unpacked size is %d, %d expected') % (l2, l1),
2597 error=_('unpacked size is %d, %d expected') % (l2, l1),
2594 node=node)
2598 node=node)
2595
2599
2596 except error.CensoredNodeError:
2600 except error.CensoredNodeError:
2597 if state['erroroncensored']:
2601 if state['erroroncensored']:
2598 yield revlogproblem(error=_('censored file data'),
2602 yield revlogproblem(error=_('censored file data'),
2599 node=node)
2603 node=node)
2600 state['skipread'].add(node)
2604 state['skipread'].add(node)
2601 except Exception as e:
2605 except Exception as e:
2602 yield revlogproblem(
2606 yield revlogproblem(
2603 error=_('unpacking %s: %s') % (short(node),
2607 error=_('unpacking %s: %s') % (short(node),
2604 stringutil.forcebytestr(e)),
2608 stringutil.forcebytestr(e)),
2605 node=node)
2609 node=node)
2606 state['skipread'].add(node)
2610 state['skipread'].add(node)
2607
2611
2608 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2612 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2609 revisionscount=False, trackedsize=False,
2613 revisionscount=False, trackedsize=False,
2610 storedsize=False):
2614 storedsize=False):
2611 d = {}
2615 d = {}
2612
2616
2613 if exclusivefiles:
2617 if exclusivefiles:
2614 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2618 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2615 if not self._inline:
2619 if not self._inline:
2616 d['exclusivefiles'].append((self.opener, self.datafile))
2620 d['exclusivefiles'].append((self.opener, self.datafile))
2617
2621
2618 if sharedfiles:
2622 if sharedfiles:
2619 d['sharedfiles'] = []
2623 d['sharedfiles'] = []
2620
2624
2621 if revisionscount:
2625 if revisionscount:
2622 d['revisionscount'] = len(self)
2626 d['revisionscount'] = len(self)
2623
2627
2624 if trackedsize:
2628 if trackedsize:
2625 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2629 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2626
2630
2627 if storedsize:
2631 if storedsize:
2628 d['storedsize'] = sum(self.opener.stat(path).st_size
2632 d['storedsize'] = sum(self.opener.stat(path).st_size
2629 for path in self.files())
2633 for path in self.files())
2630
2634
2631 return d
2635 return d
General Comments 0
You need to be logged in to leave comments. Login now