##// END OF EJS Templates
rust-cpython: using the new bindings from Python...
Georges Racinet -
r41150:536c8353 default
parent child Browse files
Show More
@@ -1,2607 +1,2617 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
104 rustext = None
100
105
101 # Aliased for performance.
106 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
103
108
104 # max size of revlog with inline data
109 # max size of revlog with inline data
105 _maxinline = 131072
110 _maxinline = 131072
106 _chunksize = 1048576
111 _chunksize = 1048576
107
112
108 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
114 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
111 }
116 }
112
117
113 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
115 return text, False
120 return text, False
116
121
117 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
118 return text, False
123 return text, False
119
124
120 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
121 return False
126 return False
122
127
123 ellipsisprocessor = (
128 ellipsisprocessor = (
124 ellipsisreadprocessor,
129 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
131 ellipsisrawprocessor,
127 )
132 )
128
133
129 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
131
136
132 Invariant:
137 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
142 following signatures:
138 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
147 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
149
154
150 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
153 """
158 """
154 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
155
160
156 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
157 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
160 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
163 if flag in flagprocessors:
168 if flag in flagprocessors:
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 raise error.Abort(msg)
170 raise error.Abort(msg)
166 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
167
172
168 def getoffset(q):
173 def getoffset(q):
169 return int(q >> 16)
174 return int(q >> 16)
170
175
171 def gettype(q):
176 def gettype(q):
172 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
173
178
174 def offset_type(offset, type):
179 def offset_type(offset, type):
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
177 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
178
183
179 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
180 class _revisioninfo(object):
185 class _revisioninfo(object):
181 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
182 node: expected hash of the revision
187 node: expected hash of the revision
183 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
184 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
185 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
186 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
187
192
188 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
189 """
194 """
190 node = attr.ib()
195 node = attr.ib()
191 p1 = attr.ib()
196 p1 = attr.ib()
192 p2 = attr.ib()
197 p2 = attr.ib()
193 btext = attr.ib()
198 btext = attr.ib()
194 textlen = attr.ib()
199 textlen = attr.ib()
195 cachedelta = attr.ib()
200 cachedelta = attr.ib()
196 flags = attr.ib()
201 flags = attr.ib()
197
202
198 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
199 @attr.s(slots=True)
204 @attr.s(slots=True)
200 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
201 node = attr.ib()
206 node = attr.ib()
202 p1node = attr.ib()
207 p1node = attr.ib()
203 p2node = attr.ib()
208 p2node = attr.ib()
204 basenode = attr.ib()
209 basenode = attr.ib()
205 flags = attr.ib()
210 flags = attr.ib()
206 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
207 revision = attr.ib()
212 revision = attr.ib()
208 delta = attr.ib()
213 delta = attr.ib()
209 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
210
215
211 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
217 @attr.s(frozen=True)
213 class revlogproblem(object):
218 class revlogproblem(object):
214 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
220 error = attr.ib(default=None)
216 node = attr.ib(default=None)
221 node = attr.ib(default=None)
217
222
218 # index v0:
223 # index v0:
219 # 4 bytes: offset
224 # 4 bytes: offset
220 # 4 bytes: compressed length
225 # 4 bytes: compressed length
221 # 4 bytes: base rev
226 # 4 bytes: base rev
222 # 4 bytes: link rev
227 # 4 bytes: link rev
223 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
225 # 20 bytes: nodeid
230 # 20 bytes: nodeid
226 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
227 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
229
234
230 class revlogoldindex(list):
235 class revlogoldindex(list):
231 def __getitem__(self, i):
236 def __getitem__(self, i):
232 if i == -1:
237 if i == -1:
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
235
240
236 class revlogoldio(object):
241 class revlogoldio(object):
237 def __init__(self):
242 def __init__(self):
238 self.size = indexformatv0.size
243 self.size = indexformatv0.size
239
244
240 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
241 s = self.size
246 s = self.size
242 index = []
247 index = []
243 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
244 n = off = 0
249 n = off = 0
245 l = len(data)
250 l = len(data)
246 while off + s <= l:
251 while off + s <= l:
247 cur = data[off:off + s]
252 cur = data[off:off + s]
248 off += s
253 off += s
249 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
250 # transform to revlogv1 format
255 # transform to revlogv1 format
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 index.append(e2)
258 index.append(e2)
254 nodemap[e[6]] = n
259 nodemap[e[6]] = n
255 n += 1
260 n += 1
256
261
257 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
258
263
259 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
260 if gettype(entry[0]):
265 if gettype(entry[0]):
261 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
262 'version 1'))
267 'version 1'))
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
265 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
266
271
267 # index ng:
272 # index ng:
268 # 6 bytes: offset
273 # 6 bytes: offset
269 # 2 bytes: flags
274 # 2 bytes: flags
270 # 4 bytes: compressed length
275 # 4 bytes: compressed length
271 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
272 # 4 bytes: base rev
277 # 4 bytes: base rev
273 # 4 bytes: link rev
278 # 4 bytes: link rev
274 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
275 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
276 # 32 bytes: nodeid
281 # 32 bytes: nodeid
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
279 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
280 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
281 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
282
287
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 # signed integer)
289 # signed integer)
285 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
286
291
287 class revlogio(object):
292 class revlogio(object):
288 def __init__(self):
293 def __init__(self):
289 self.size = indexformatng.size
294 self.size = indexformatng.size
290
295
291 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
292 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
293 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
294 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
295
300
296 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
297 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
298 if rev == 0:
303 if rev == 0:
299 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
300 return p
305 return p
301
306
302 class revlog(object):
307 class revlog(object):
303 """
308 """
304 the underlying revision storage object
309 the underlying revision storage object
305
310
306 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
307
312
308 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
309 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
310 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
311 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
312 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
313 data.
318 data.
314
319
315 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
316 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
317 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
318 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
319 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
320 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
321
326
322 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
323 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
324 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
325 for locking while reading.
330 for locking while reading.
326
331
327 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
328 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
329
334
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
332 configured threshold.
337 configured threshold.
333
338
334 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
335 """
340 """
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
338 """
343 """
339 create a revlog object
344 create a revlog object
340
345
341 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
342 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
343 """
348 """
344 self.indexfile = indexfile
349 self.indexfile = indexfile
345 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
346 self.opener = opener
351 self.opener = opener
347 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
348 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
349 self._checkambig = checkambig
354 self._checkambig = checkambig
350 self._censorable = censorable
355 self._censorable = censorable
351 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
352 self._revisioncache = None
357 self._revisioncache = None
353 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
354 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 self._chunkcache = (0, '')
361 self._chunkcache = (0, '')
357 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
358 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
359 self._maxchainlen = None
364 self._maxchainlen = None
360 self._deltabothparents = True
365 self._deltabothparents = True
361 self.index = []
366 self.index = []
362 # Mapping of partial identifiers to full nodes.
367 # Mapping of partial identifiers to full nodes.
363 self._pcache = {}
368 self._pcache = {}
364 # Mapping of revision integer to full node.
369 # Mapping of revision integer to full node.
365 self._nodecache = {nullid: nullrev}
370 self._nodecache = {nullid: nullrev}
366 self._nodepos = None
371 self._nodepos = None
367 self._compengine = 'zlib'
372 self._compengine = 'zlib'
368 self._maxdeltachainspan = -1
373 self._maxdeltachainspan = -1
369 self._withsparseread = False
374 self._withsparseread = False
370 self._sparserevlog = False
375 self._sparserevlog = False
371 self._srdensitythreshold = 0.50
376 self._srdensitythreshold = 0.50
372 self._srmingapsize = 262144
377 self._srmingapsize = 262144
373
378
374 # Make copy of flag processors so each revlog instance can support
379 # Make copy of flag processors so each revlog instance can support
375 # custom flags.
380 # custom flags.
376 self._flagprocessors = dict(_flagprocessors)
381 self._flagprocessors = dict(_flagprocessors)
377
382
378 # 2-tuple of file handles being used for active writing.
383 # 2-tuple of file handles being used for active writing.
379 self._writinghandles = None
384 self._writinghandles = None
380
385
381 mmapindexthreshold = None
386 mmapindexthreshold = None
382 v = REVLOG_DEFAULT_VERSION
387 v = REVLOG_DEFAULT_VERSION
383 opts = getattr(opener, 'options', None)
388 opts = getattr(opener, 'options', None)
384 if opts is not None:
389 if opts is not None:
385 if 'revlogv2' in opts:
390 if 'revlogv2' in opts:
386 # version 2 revlogs always use generaldelta.
391 # version 2 revlogs always use generaldelta.
387 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
392 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
388 elif 'revlogv1' in opts:
393 elif 'revlogv1' in opts:
389 if 'generaldelta' in opts:
394 if 'generaldelta' in opts:
390 v |= FLAG_GENERALDELTA
395 v |= FLAG_GENERALDELTA
391 else:
396 else:
392 v = 0
397 v = 0
393 if 'chunkcachesize' in opts:
398 if 'chunkcachesize' in opts:
394 self._chunkcachesize = opts['chunkcachesize']
399 self._chunkcachesize = opts['chunkcachesize']
395 if 'maxchainlen' in opts:
400 if 'maxchainlen' in opts:
396 self._maxchainlen = opts['maxchainlen']
401 self._maxchainlen = opts['maxchainlen']
397 if 'deltabothparents' in opts:
402 if 'deltabothparents' in opts:
398 self._deltabothparents = opts['deltabothparents']
403 self._deltabothparents = opts['deltabothparents']
399 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
404 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
400 if 'compengine' in opts:
405 if 'compengine' in opts:
401 self._compengine = opts['compengine']
406 self._compengine = opts['compengine']
402 if 'maxdeltachainspan' in opts:
407 if 'maxdeltachainspan' in opts:
403 self._maxdeltachainspan = opts['maxdeltachainspan']
408 self._maxdeltachainspan = opts['maxdeltachainspan']
404 if mmaplargeindex and 'mmapindexthreshold' in opts:
409 if mmaplargeindex and 'mmapindexthreshold' in opts:
405 mmapindexthreshold = opts['mmapindexthreshold']
410 mmapindexthreshold = opts['mmapindexthreshold']
406 self._sparserevlog = bool(opts.get('sparse-revlog', False))
411 self._sparserevlog = bool(opts.get('sparse-revlog', False))
407 withsparseread = bool(opts.get('with-sparse-read', False))
412 withsparseread = bool(opts.get('with-sparse-read', False))
408 # sparse-revlog forces sparse-read
413 # sparse-revlog forces sparse-read
409 self._withsparseread = self._sparserevlog or withsparseread
414 self._withsparseread = self._sparserevlog or withsparseread
410 if 'sparse-read-density-threshold' in opts:
415 if 'sparse-read-density-threshold' in opts:
411 self._srdensitythreshold = opts['sparse-read-density-threshold']
416 self._srdensitythreshold = opts['sparse-read-density-threshold']
412 if 'sparse-read-min-gap-size' in opts:
417 if 'sparse-read-min-gap-size' in opts:
413 self._srmingapsize = opts['sparse-read-min-gap-size']
418 self._srmingapsize = opts['sparse-read-min-gap-size']
414 if opts.get('enableellipsis'):
419 if opts.get('enableellipsis'):
415 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
420 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
416
421
417 # revlog v0 doesn't have flag processors
422 # revlog v0 doesn't have flag processors
418 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
423 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
419 _insertflagprocessor(flag, processor, self._flagprocessors)
424 _insertflagprocessor(flag, processor, self._flagprocessors)
420
425
421 if self._chunkcachesize <= 0:
426 if self._chunkcachesize <= 0:
422 raise error.RevlogError(_('revlog chunk cache size %r is not '
427 raise error.RevlogError(_('revlog chunk cache size %r is not '
423 'greater than 0') % self._chunkcachesize)
428 'greater than 0') % self._chunkcachesize)
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
429 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 raise error.RevlogError(_('revlog chunk cache size %r is not a '
430 raise error.RevlogError(_('revlog chunk cache size %r is not a '
426 'power of 2') % self._chunkcachesize)
431 'power of 2') % self._chunkcachesize)
427
432
428 self._loadindex(v, mmapindexthreshold)
433 self._loadindex(v, mmapindexthreshold)
429
434
430 def _loadindex(self, v, mmapindexthreshold):
435 def _loadindex(self, v, mmapindexthreshold):
431 indexdata = ''
436 indexdata = ''
432 self._initempty = True
437 self._initempty = True
433 try:
438 try:
434 with self._indexfp() as f:
439 with self._indexfp() as f:
435 if (mmapindexthreshold is not None and
440 if (mmapindexthreshold is not None and
436 self.opener.fstat(f).st_size >= mmapindexthreshold):
441 self.opener.fstat(f).st_size >= mmapindexthreshold):
437 indexdata = util.buffer(util.mmapread(f))
442 indexdata = util.buffer(util.mmapread(f))
438 else:
443 else:
439 indexdata = f.read()
444 indexdata = f.read()
440 if len(indexdata) > 0:
445 if len(indexdata) > 0:
441 v = versionformat_unpack(indexdata[:4])[0]
446 v = versionformat_unpack(indexdata[:4])[0]
442 self._initempty = False
447 self._initempty = False
443 except IOError as inst:
448 except IOError as inst:
444 if inst.errno != errno.ENOENT:
449 if inst.errno != errno.ENOENT:
445 raise
450 raise
446
451
447 self.version = v
452 self.version = v
448 self._inline = v & FLAG_INLINE_DATA
453 self._inline = v & FLAG_INLINE_DATA
449 self._generaldelta = v & FLAG_GENERALDELTA
454 self._generaldelta = v & FLAG_GENERALDELTA
450 flags = v & ~0xFFFF
455 flags = v & ~0xFFFF
451 fmt = v & 0xFFFF
456 fmt = v & 0xFFFF
452 if fmt == REVLOGV0:
457 if fmt == REVLOGV0:
453 if flags:
458 if flags:
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 'revlog %s') %
460 'revlog %s') %
456 (flags >> 16, fmt, self.indexfile))
461 (flags >> 16, fmt, self.indexfile))
457 elif fmt == REVLOGV1:
462 elif fmt == REVLOGV1:
458 if flags & ~REVLOGV1_FLAGS:
463 if flags & ~REVLOGV1_FLAGS:
459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 'revlog %s') %
465 'revlog %s') %
461 (flags >> 16, fmt, self.indexfile))
466 (flags >> 16, fmt, self.indexfile))
462 elif fmt == REVLOGV2:
467 elif fmt == REVLOGV2:
463 if flags & ~REVLOGV2_FLAGS:
468 if flags & ~REVLOGV2_FLAGS:
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
469 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 'revlog %s') %
470 'revlog %s') %
466 (flags >> 16, fmt, self.indexfile))
471 (flags >> 16, fmt, self.indexfile))
467 else:
472 else:
468 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
473 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 (fmt, self.indexfile))
474 (fmt, self.indexfile))
470
475
471 self._storedeltachains = True
476 self._storedeltachains = True
472
477
473 self._io = revlogio()
478 self._io = revlogio()
474 if self.version == REVLOGV0:
479 if self.version == REVLOGV0:
475 self._io = revlogoldio()
480 self._io = revlogoldio()
476 try:
481 try:
477 d = self._io.parseindex(indexdata, self._inline)
482 d = self._io.parseindex(indexdata, self._inline)
478 except (ValueError, IndexError):
483 except (ValueError, IndexError):
479 raise error.RevlogError(_("index %s is corrupted") %
484 raise error.RevlogError(_("index %s is corrupted") %
480 self.indexfile)
485 self.indexfile)
481 self.index, nodemap, self._chunkcache = d
486 self.index, nodemap, self._chunkcache = d
482 if nodemap is not None:
487 if nodemap is not None:
483 self.nodemap = self._nodecache = nodemap
488 self.nodemap = self._nodecache = nodemap
484 if not self._chunkcache:
489 if not self._chunkcache:
485 self._chunkclear()
490 self._chunkclear()
486 # revnum -> (chain-length, sum-delta-length)
491 # revnum -> (chain-length, sum-delta-length)
487 self._chaininfocache = {}
492 self._chaininfocache = {}
488 # revlog header -> revlog compressor
493 # revlog header -> revlog compressor
489 self._decompressors = {}
494 self._decompressors = {}
490
495
491 @util.propertycache
496 @util.propertycache
492 def _compressor(self):
497 def _compressor(self):
493 return util.compengines[self._compengine].revlogcompressor()
498 return util.compengines[self._compengine].revlogcompressor()
494
499
495 def _indexfp(self, mode='r'):
500 def _indexfp(self, mode='r'):
496 """file object for the revlog's index file"""
501 """file object for the revlog's index file"""
497 args = {r'mode': mode}
502 args = {r'mode': mode}
498 if mode != 'r':
503 if mode != 'r':
499 args[r'checkambig'] = self._checkambig
504 args[r'checkambig'] = self._checkambig
500 if mode == 'w':
505 if mode == 'w':
501 args[r'atomictemp'] = True
506 args[r'atomictemp'] = True
502 return self.opener(self.indexfile, **args)
507 return self.opener(self.indexfile, **args)
503
508
504 def _datafp(self, mode='r'):
509 def _datafp(self, mode='r'):
505 """file object for the revlog's data file"""
510 """file object for the revlog's data file"""
506 return self.opener(self.datafile, mode=mode)
511 return self.opener(self.datafile, mode=mode)
507
512
508 @contextlib.contextmanager
513 @contextlib.contextmanager
509 def _datareadfp(self, existingfp=None):
514 def _datareadfp(self, existingfp=None):
510 """file object suitable to read data"""
515 """file object suitable to read data"""
511 # Use explicit file handle, if given.
516 # Use explicit file handle, if given.
512 if existingfp is not None:
517 if existingfp is not None:
513 yield existingfp
518 yield existingfp
514
519
515 # Use a file handle being actively used for writes, if available.
520 # Use a file handle being actively used for writes, if available.
516 # There is some danger to doing this because reads will seek the
521 # There is some danger to doing this because reads will seek the
517 # file. However, _writeentry() performs a SEEK_END before all writes,
522 # file. However, _writeentry() performs a SEEK_END before all writes,
518 # so we should be safe.
523 # so we should be safe.
519 elif self._writinghandles:
524 elif self._writinghandles:
520 if self._inline:
525 if self._inline:
521 yield self._writinghandles[0]
526 yield self._writinghandles[0]
522 else:
527 else:
523 yield self._writinghandles[1]
528 yield self._writinghandles[1]
524
529
525 # Otherwise open a new file handle.
530 # Otherwise open a new file handle.
526 else:
531 else:
527 if self._inline:
532 if self._inline:
528 func = self._indexfp
533 func = self._indexfp
529 else:
534 else:
530 func = self._datafp
535 func = self._datafp
531 with func() as fp:
536 with func() as fp:
532 yield fp
537 yield fp
533
538
534 def tip(self):
539 def tip(self):
535 return self.node(len(self.index) - 1)
540 return self.node(len(self.index) - 1)
536 def __contains__(self, rev):
541 def __contains__(self, rev):
537 return 0 <= rev < len(self)
542 return 0 <= rev < len(self)
538 def __len__(self):
543 def __len__(self):
539 return len(self.index)
544 return len(self.index)
540 def __iter__(self):
545 def __iter__(self):
541 return iter(pycompat.xrange(len(self)))
546 return iter(pycompat.xrange(len(self)))
542 def revs(self, start=0, stop=None):
547 def revs(self, start=0, stop=None):
543 """iterate over all rev in this revlog (from start to stop)"""
548 """iterate over all rev in this revlog (from start to stop)"""
544 return storageutil.iterrevs(len(self), start=start, stop=stop)
549 return storageutil.iterrevs(len(self), start=start, stop=stop)
545
550
546 @util.propertycache
551 @util.propertycache
547 def nodemap(self):
552 def nodemap(self):
548 if self.index:
553 if self.index:
549 # populate mapping down to the initial node
554 # populate mapping down to the initial node
550 node0 = self.index[0][7] # get around changelog filtering
555 node0 = self.index[0][7] # get around changelog filtering
551 self.rev(node0)
556 self.rev(node0)
552 return self._nodecache
557 return self._nodecache
553
558
554 def hasnode(self, node):
559 def hasnode(self, node):
555 try:
560 try:
556 self.rev(node)
561 self.rev(node)
557 return True
562 return True
558 except KeyError:
563 except KeyError:
559 return False
564 return False
560
565
561 def candelta(self, baserev, rev):
566 def candelta(self, baserev, rev):
562 """whether two revisions (baserev, rev) can be delta-ed or not"""
567 """whether two revisions (baserev, rev) can be delta-ed or not"""
563 # Disable delta if either rev requires a content-changing flag
568 # Disable delta if either rev requires a content-changing flag
564 # processor (ex. LFS). This is because such flag processor can alter
569 # processor (ex. LFS). This is because such flag processor can alter
565 # the rawtext content that the delta will be based on, and two clients
570 # the rawtext content that the delta will be based on, and two clients
566 # could have a same revlog node with different flags (i.e. different
571 # could have a same revlog node with different flags (i.e. different
567 # rawtext contents) and the delta could be incompatible.
572 # rawtext contents) and the delta could be incompatible.
568 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
569 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
570 return False
575 return False
571 return True
576 return True
572
577
573 def clearcaches(self):
578 def clearcaches(self):
574 self._revisioncache = None
579 self._revisioncache = None
575 self._chainbasecache.clear()
580 self._chainbasecache.clear()
576 self._chunkcache = (0, '')
581 self._chunkcache = (0, '')
577 self._pcache = {}
582 self._pcache = {}
578
583
579 try:
584 try:
580 self._nodecache.clearcaches()
585 self._nodecache.clearcaches()
581 except AttributeError:
586 except AttributeError:
582 self._nodecache = {nullid: nullrev}
587 self._nodecache = {nullid: nullrev}
583 self._nodepos = None
588 self._nodepos = None
584
589
585 def rev(self, node):
590 def rev(self, node):
586 try:
591 try:
587 return self._nodecache[node]
592 return self._nodecache[node]
588 except TypeError:
593 except TypeError:
589 raise
594 raise
590 except error.RevlogError:
595 except error.RevlogError:
591 # parsers.c radix tree lookup failed
596 # parsers.c radix tree lookup failed
592 if node == wdirid or node in wdirfilenodeids:
597 if node == wdirid or node in wdirfilenodeids:
593 raise error.WdirUnsupported
598 raise error.WdirUnsupported
594 raise error.LookupError(node, self.indexfile, _('no node'))
599 raise error.LookupError(node, self.indexfile, _('no node'))
595 except KeyError:
600 except KeyError:
596 # pure python cache lookup failed
601 # pure python cache lookup failed
597 n = self._nodecache
602 n = self._nodecache
598 i = self.index
603 i = self.index
599 p = self._nodepos
604 p = self._nodepos
600 if p is None:
605 if p is None:
601 p = len(i) - 1
606 p = len(i) - 1
602 else:
607 else:
603 assert p < len(i)
608 assert p < len(i)
604 for r in pycompat.xrange(p, -1, -1):
609 for r in pycompat.xrange(p, -1, -1):
605 v = i[r][7]
610 v = i[r][7]
606 n[v] = r
611 n[v] = r
607 if v == node:
612 if v == node:
608 self._nodepos = r - 1
613 self._nodepos = r - 1
609 return r
614 return r
610 if node == wdirid or node in wdirfilenodeids:
615 if node == wdirid or node in wdirfilenodeids:
611 raise error.WdirUnsupported
616 raise error.WdirUnsupported
612 raise error.LookupError(node, self.indexfile, _('no node'))
617 raise error.LookupError(node, self.indexfile, _('no node'))
613
618
614 # Accessors for index entries.
619 # Accessors for index entries.
615
620
616 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
621 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
617 # are flags.
622 # are flags.
618 def start(self, rev):
623 def start(self, rev):
619 return int(self.index[rev][0] >> 16)
624 return int(self.index[rev][0] >> 16)
620
625
621 def flags(self, rev):
626 def flags(self, rev):
622 return self.index[rev][0] & 0xFFFF
627 return self.index[rev][0] & 0xFFFF
623
628
624 def length(self, rev):
629 def length(self, rev):
625 return self.index[rev][1]
630 return self.index[rev][1]
626
631
627 def rawsize(self, rev):
632 def rawsize(self, rev):
628 """return the length of the uncompressed text for a given revision"""
633 """return the length of the uncompressed text for a given revision"""
629 l = self.index[rev][2]
634 l = self.index[rev][2]
630 if l >= 0:
635 if l >= 0:
631 return l
636 return l
632
637
633 t = self.revision(rev, raw=True)
638 t = self.revision(rev, raw=True)
634 return len(t)
639 return len(t)
635
640
636 def size(self, rev):
641 def size(self, rev):
637 """length of non-raw text (processed by a "read" flag processor)"""
642 """length of non-raw text (processed by a "read" flag processor)"""
638 # fast path: if no "read" flag processor could change the content,
643 # fast path: if no "read" flag processor could change the content,
639 # size is rawsize. note: ELLIPSIS is known to not change the content.
644 # size is rawsize. note: ELLIPSIS is known to not change the content.
640 flags = self.flags(rev)
645 flags = self.flags(rev)
641 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
646 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
642 return self.rawsize(rev)
647 return self.rawsize(rev)
643
648
644 return len(self.revision(rev, raw=False))
649 return len(self.revision(rev, raw=False))
645
650
646 def chainbase(self, rev):
651 def chainbase(self, rev):
647 base = self._chainbasecache.get(rev)
652 base = self._chainbasecache.get(rev)
648 if base is not None:
653 if base is not None:
649 return base
654 return base
650
655
651 index = self.index
656 index = self.index
652 iterrev = rev
657 iterrev = rev
653 base = index[iterrev][3]
658 base = index[iterrev][3]
654 while base != iterrev:
659 while base != iterrev:
655 iterrev = base
660 iterrev = base
656 base = index[iterrev][3]
661 base = index[iterrev][3]
657
662
658 self._chainbasecache[rev] = base
663 self._chainbasecache[rev] = base
659 return base
664 return base
660
665
661 def linkrev(self, rev):
666 def linkrev(self, rev):
662 return self.index[rev][4]
667 return self.index[rev][4]
663
668
664 def parentrevs(self, rev):
669 def parentrevs(self, rev):
665 try:
670 try:
666 entry = self.index[rev]
671 entry = self.index[rev]
667 except IndexError:
672 except IndexError:
668 if rev == wdirrev:
673 if rev == wdirrev:
669 raise error.WdirUnsupported
674 raise error.WdirUnsupported
670 raise
675 raise
671
676
672 return entry[5], entry[6]
677 return entry[5], entry[6]
673
678
674 # fast parentrevs(rev) where rev isn't filtered
679 # fast parentrevs(rev) where rev isn't filtered
675 _uncheckedparentrevs = parentrevs
680 _uncheckedparentrevs = parentrevs
676
681
677 def node(self, rev):
682 def node(self, rev):
678 try:
683 try:
679 return self.index[rev][7]
684 return self.index[rev][7]
680 except IndexError:
685 except IndexError:
681 if rev == wdirrev:
686 if rev == wdirrev:
682 raise error.WdirUnsupported
687 raise error.WdirUnsupported
683 raise
688 raise
684
689
685 # Derived from index values.
690 # Derived from index values.
686
691
687 def end(self, rev):
692 def end(self, rev):
688 return self.start(rev) + self.length(rev)
693 return self.start(rev) + self.length(rev)
689
694
690 def parents(self, node):
695 def parents(self, node):
691 i = self.index
696 i = self.index
692 d = i[self.rev(node)]
697 d = i[self.rev(node)]
693 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
698 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
694
699
695 def chainlen(self, rev):
700 def chainlen(self, rev):
696 return self._chaininfo(rev)[0]
701 return self._chaininfo(rev)[0]
697
702
698 def _chaininfo(self, rev):
703 def _chaininfo(self, rev):
699 chaininfocache = self._chaininfocache
704 chaininfocache = self._chaininfocache
700 if rev in chaininfocache:
705 if rev in chaininfocache:
701 return chaininfocache[rev]
706 return chaininfocache[rev]
702 index = self.index
707 index = self.index
703 generaldelta = self._generaldelta
708 generaldelta = self._generaldelta
704 iterrev = rev
709 iterrev = rev
705 e = index[iterrev]
710 e = index[iterrev]
706 clen = 0
711 clen = 0
707 compresseddeltalen = 0
712 compresseddeltalen = 0
708 while iterrev != e[3]:
713 while iterrev != e[3]:
709 clen += 1
714 clen += 1
710 compresseddeltalen += e[1]
715 compresseddeltalen += e[1]
711 if generaldelta:
716 if generaldelta:
712 iterrev = e[3]
717 iterrev = e[3]
713 else:
718 else:
714 iterrev -= 1
719 iterrev -= 1
715 if iterrev in chaininfocache:
720 if iterrev in chaininfocache:
716 t = chaininfocache[iterrev]
721 t = chaininfocache[iterrev]
717 clen += t[0]
722 clen += t[0]
718 compresseddeltalen += t[1]
723 compresseddeltalen += t[1]
719 break
724 break
720 e = index[iterrev]
725 e = index[iterrev]
721 else:
726 else:
722 # Add text length of base since decompressing that also takes
727 # Add text length of base since decompressing that also takes
723 # work. For cache hits the length is already included.
728 # work. For cache hits the length is already included.
724 compresseddeltalen += e[1]
729 compresseddeltalen += e[1]
725 r = (clen, compresseddeltalen)
730 r = (clen, compresseddeltalen)
726 chaininfocache[rev] = r
731 chaininfocache[rev] = r
727 return r
732 return r
728
733
729 def _deltachain(self, rev, stoprev=None):
734 def _deltachain(self, rev, stoprev=None):
730 """Obtain the delta chain for a revision.
735 """Obtain the delta chain for a revision.
731
736
732 ``stoprev`` specifies a revision to stop at. If not specified, we
737 ``stoprev`` specifies a revision to stop at. If not specified, we
733 stop at the base of the chain.
738 stop at the base of the chain.
734
739
735 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
740 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
736 revs in ascending order and ``stopped`` is a bool indicating whether
741 revs in ascending order and ``stopped`` is a bool indicating whether
737 ``stoprev`` was hit.
742 ``stoprev`` was hit.
738 """
743 """
739 # Try C implementation.
744 # Try C implementation.
740 try:
745 try:
741 return self.index.deltachain(rev, stoprev, self._generaldelta)
746 return self.index.deltachain(rev, stoprev, self._generaldelta)
742 except AttributeError:
747 except AttributeError:
743 pass
748 pass
744
749
745 chain = []
750 chain = []
746
751
747 # Alias to prevent attribute lookup in tight loop.
752 # Alias to prevent attribute lookup in tight loop.
748 index = self.index
753 index = self.index
749 generaldelta = self._generaldelta
754 generaldelta = self._generaldelta
750
755
751 iterrev = rev
756 iterrev = rev
752 e = index[iterrev]
757 e = index[iterrev]
753 while iterrev != e[3] and iterrev != stoprev:
758 while iterrev != e[3] and iterrev != stoprev:
754 chain.append(iterrev)
759 chain.append(iterrev)
755 if generaldelta:
760 if generaldelta:
756 iterrev = e[3]
761 iterrev = e[3]
757 else:
762 else:
758 iterrev -= 1
763 iterrev -= 1
759 e = index[iterrev]
764 e = index[iterrev]
760
765
761 if iterrev == stoprev:
766 if iterrev == stoprev:
762 stopped = True
767 stopped = True
763 else:
768 else:
764 chain.append(iterrev)
769 chain.append(iterrev)
765 stopped = False
770 stopped = False
766
771
767 chain.reverse()
772 chain.reverse()
768 return chain, stopped
773 return chain, stopped
769
774
770 def ancestors(self, revs, stoprev=0, inclusive=False):
775 def ancestors(self, revs, stoprev=0, inclusive=False):
771 """Generate the ancestors of 'revs' in reverse revision order.
776 """Generate the ancestors of 'revs' in reverse revision order.
772 Does not generate revs lower than stoprev.
777 Does not generate revs lower than stoprev.
773
778
774 See the documentation for ancestor.lazyancestors for more details."""
779 See the documentation for ancestor.lazyancestors for more details."""
775
780
776 # first, make sure start revisions aren't filtered
781 # first, make sure start revisions aren't filtered
777 revs = list(revs)
782 revs = list(revs)
778 checkrev = self.node
783 checkrev = self.node
779 for r in revs:
784 for r in revs:
780 checkrev(r)
785 checkrev(r)
781 # and we're sure ancestors aren't filtered as well
786 # and we're sure ancestors aren't filtered as well
782 if util.safehasattr(parsers, 'rustlazyancestors'):
787
783 return ancestor.rustlazyancestors(
788 if rustext is not None:
784 self.index, revs,
789 lazyancestors = rustext.ancestor.LazyAncestors
785 stoprev=stoprev, inclusive=inclusive)
790 arg = self.index
786 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
791 elif util.safehasattr(parsers, 'rustlazyancestors'):
787 stoprev=stoprev, inclusive=inclusive)
792 lazyancestors = ancestor.rustlazyancestors
793 arg = self.index
794 else:
795 lazyancestors = ancestor.lazyancestors
796 arg = self._uncheckedparentrevs
797 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
788
798
789 def descendants(self, revs):
799 def descendants(self, revs):
790 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
800 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
791
801
792 def findcommonmissing(self, common=None, heads=None):
802 def findcommonmissing(self, common=None, heads=None):
793 """Return a tuple of the ancestors of common and the ancestors of heads
803 """Return a tuple of the ancestors of common and the ancestors of heads
794 that are not ancestors of common. In revset terminology, we return the
804 that are not ancestors of common. In revset terminology, we return the
795 tuple:
805 tuple:
796
806
797 ::common, (::heads) - (::common)
807 ::common, (::heads) - (::common)
798
808
799 The list is sorted by revision number, meaning it is
809 The list is sorted by revision number, meaning it is
800 topologically sorted.
810 topologically sorted.
801
811
802 'heads' and 'common' are both lists of node IDs. If heads is
812 'heads' and 'common' are both lists of node IDs. If heads is
803 not supplied, uses all of the revlog's heads. If common is not
813 not supplied, uses all of the revlog's heads. If common is not
804 supplied, uses nullid."""
814 supplied, uses nullid."""
805 if common is None:
815 if common is None:
806 common = [nullid]
816 common = [nullid]
807 if heads is None:
817 if heads is None:
808 heads = self.heads()
818 heads = self.heads()
809
819
810 common = [self.rev(n) for n in common]
820 common = [self.rev(n) for n in common]
811 heads = [self.rev(n) for n in heads]
821 heads = [self.rev(n) for n in heads]
812
822
813 # we want the ancestors, but inclusive
823 # we want the ancestors, but inclusive
814 class lazyset(object):
824 class lazyset(object):
815 def __init__(self, lazyvalues):
825 def __init__(self, lazyvalues):
816 self.addedvalues = set()
826 self.addedvalues = set()
817 self.lazyvalues = lazyvalues
827 self.lazyvalues = lazyvalues
818
828
819 def __contains__(self, value):
829 def __contains__(self, value):
820 return value in self.addedvalues or value in self.lazyvalues
830 return value in self.addedvalues or value in self.lazyvalues
821
831
822 def __iter__(self):
832 def __iter__(self):
823 added = self.addedvalues
833 added = self.addedvalues
824 for r in added:
834 for r in added:
825 yield r
835 yield r
826 for r in self.lazyvalues:
836 for r in self.lazyvalues:
827 if not r in added:
837 if not r in added:
828 yield r
838 yield r
829
839
830 def add(self, value):
840 def add(self, value):
831 self.addedvalues.add(value)
841 self.addedvalues.add(value)
832
842
833 def update(self, values):
843 def update(self, values):
834 self.addedvalues.update(values)
844 self.addedvalues.update(values)
835
845
836 has = lazyset(self.ancestors(common))
846 has = lazyset(self.ancestors(common))
837 has.add(nullrev)
847 has.add(nullrev)
838 has.update(common)
848 has.update(common)
839
849
840 # take all ancestors from heads that aren't in has
850 # take all ancestors from heads that aren't in has
841 missing = set()
851 missing = set()
842 visit = collections.deque(r for r in heads if r not in has)
852 visit = collections.deque(r for r in heads if r not in has)
843 while visit:
853 while visit:
844 r = visit.popleft()
854 r = visit.popleft()
845 if r in missing:
855 if r in missing:
846 continue
856 continue
847 else:
857 else:
848 missing.add(r)
858 missing.add(r)
849 for p in self.parentrevs(r):
859 for p in self.parentrevs(r):
850 if p not in has:
860 if p not in has:
851 visit.append(p)
861 visit.append(p)
852 missing = list(missing)
862 missing = list(missing)
853 missing.sort()
863 missing.sort()
854 return has, [self.node(miss) for miss in missing]
864 return has, [self.node(miss) for miss in missing]
855
865
856 def incrementalmissingrevs(self, common=None):
866 def incrementalmissingrevs(self, common=None):
857 """Return an object that can be used to incrementally compute the
867 """Return an object that can be used to incrementally compute the
858 revision numbers of the ancestors of arbitrary sets that are not
868 revision numbers of the ancestors of arbitrary sets that are not
859 ancestors of common. This is an ancestor.incrementalmissingancestors
869 ancestors of common. This is an ancestor.incrementalmissingancestors
860 object.
870 object.
861
871
862 'common' is a list of revision numbers. If common is not supplied, uses
872 'common' is a list of revision numbers. If common is not supplied, uses
863 nullrev.
873 nullrev.
864 """
874 """
865 if common is None:
875 if common is None:
866 common = [nullrev]
876 common = [nullrev]
867
877
868 return ancestor.incrementalmissingancestors(self.parentrevs, common)
878 return ancestor.incrementalmissingancestors(self.parentrevs, common)
869
879
870 def findmissingrevs(self, common=None, heads=None):
880 def findmissingrevs(self, common=None, heads=None):
871 """Return the revision numbers of the ancestors of heads that
881 """Return the revision numbers of the ancestors of heads that
872 are not ancestors of common.
882 are not ancestors of common.
873
883
874 More specifically, return a list of revision numbers corresponding to
884 More specifically, return a list of revision numbers corresponding to
875 nodes N such that every N satisfies the following constraints:
885 nodes N such that every N satisfies the following constraints:
876
886
877 1. N is an ancestor of some node in 'heads'
887 1. N is an ancestor of some node in 'heads'
878 2. N is not an ancestor of any node in 'common'
888 2. N is not an ancestor of any node in 'common'
879
889
880 The list is sorted by revision number, meaning it is
890 The list is sorted by revision number, meaning it is
881 topologically sorted.
891 topologically sorted.
882
892
883 'heads' and 'common' are both lists of revision numbers. If heads is
893 'heads' and 'common' are both lists of revision numbers. If heads is
884 not supplied, uses all of the revlog's heads. If common is not
894 not supplied, uses all of the revlog's heads. If common is not
885 supplied, uses nullid."""
895 supplied, uses nullid."""
886 if common is None:
896 if common is None:
887 common = [nullrev]
897 common = [nullrev]
888 if heads is None:
898 if heads is None:
889 heads = self.headrevs()
899 heads = self.headrevs()
890
900
891 inc = self.incrementalmissingrevs(common=common)
901 inc = self.incrementalmissingrevs(common=common)
892 return inc.missingancestors(heads)
902 return inc.missingancestors(heads)
893
903
894 def findmissing(self, common=None, heads=None):
904 def findmissing(self, common=None, heads=None):
895 """Return the ancestors of heads that are not ancestors of common.
905 """Return the ancestors of heads that are not ancestors of common.
896
906
897 More specifically, return a list of nodes N such that every N
907 More specifically, return a list of nodes N such that every N
898 satisfies the following constraints:
908 satisfies the following constraints:
899
909
900 1. N is an ancestor of some node in 'heads'
910 1. N is an ancestor of some node in 'heads'
901 2. N is not an ancestor of any node in 'common'
911 2. N is not an ancestor of any node in 'common'
902
912
903 The list is sorted by revision number, meaning it is
913 The list is sorted by revision number, meaning it is
904 topologically sorted.
914 topologically sorted.
905
915
906 'heads' and 'common' are both lists of node IDs. If heads is
916 'heads' and 'common' are both lists of node IDs. If heads is
907 not supplied, uses all of the revlog's heads. If common is not
917 not supplied, uses all of the revlog's heads. If common is not
908 supplied, uses nullid."""
918 supplied, uses nullid."""
909 if common is None:
919 if common is None:
910 common = [nullid]
920 common = [nullid]
911 if heads is None:
921 if heads is None:
912 heads = self.heads()
922 heads = self.heads()
913
923
914 common = [self.rev(n) for n in common]
924 common = [self.rev(n) for n in common]
915 heads = [self.rev(n) for n in heads]
925 heads = [self.rev(n) for n in heads]
916
926
917 inc = self.incrementalmissingrevs(common=common)
927 inc = self.incrementalmissingrevs(common=common)
918 return [self.node(r) for r in inc.missingancestors(heads)]
928 return [self.node(r) for r in inc.missingancestors(heads)]
919
929
920 def nodesbetween(self, roots=None, heads=None):
930 def nodesbetween(self, roots=None, heads=None):
921 """Return a topological path from 'roots' to 'heads'.
931 """Return a topological path from 'roots' to 'heads'.
922
932
923 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
933 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
924 topologically sorted list of all nodes N that satisfy both of
934 topologically sorted list of all nodes N that satisfy both of
925 these constraints:
935 these constraints:
926
936
927 1. N is a descendant of some node in 'roots'
937 1. N is a descendant of some node in 'roots'
928 2. N is an ancestor of some node in 'heads'
938 2. N is an ancestor of some node in 'heads'
929
939
930 Every node is considered to be both a descendant and an ancestor
940 Every node is considered to be both a descendant and an ancestor
931 of itself, so every reachable node in 'roots' and 'heads' will be
941 of itself, so every reachable node in 'roots' and 'heads' will be
932 included in 'nodes'.
942 included in 'nodes'.
933
943
934 'outroots' is the list of reachable nodes in 'roots', i.e., the
944 'outroots' is the list of reachable nodes in 'roots', i.e., the
935 subset of 'roots' that is returned in 'nodes'. Likewise,
945 subset of 'roots' that is returned in 'nodes'. Likewise,
936 'outheads' is the subset of 'heads' that is also in 'nodes'.
946 'outheads' is the subset of 'heads' that is also in 'nodes'.
937
947
938 'roots' and 'heads' are both lists of node IDs. If 'roots' is
948 'roots' and 'heads' are both lists of node IDs. If 'roots' is
939 unspecified, uses nullid as the only root. If 'heads' is
949 unspecified, uses nullid as the only root. If 'heads' is
940 unspecified, uses list of all of the revlog's heads."""
950 unspecified, uses list of all of the revlog's heads."""
941 nonodes = ([], [], [])
951 nonodes = ([], [], [])
942 if roots is not None:
952 if roots is not None:
943 roots = list(roots)
953 roots = list(roots)
944 if not roots:
954 if not roots:
945 return nonodes
955 return nonodes
946 lowestrev = min([self.rev(n) for n in roots])
956 lowestrev = min([self.rev(n) for n in roots])
947 else:
957 else:
948 roots = [nullid] # Everybody's a descendant of nullid
958 roots = [nullid] # Everybody's a descendant of nullid
949 lowestrev = nullrev
959 lowestrev = nullrev
950 if (lowestrev == nullrev) and (heads is None):
960 if (lowestrev == nullrev) and (heads is None):
951 # We want _all_ the nodes!
961 # We want _all_ the nodes!
952 return ([self.node(r) for r in self], [nullid], list(self.heads()))
962 return ([self.node(r) for r in self], [nullid], list(self.heads()))
953 if heads is None:
963 if heads is None:
954 # All nodes are ancestors, so the latest ancestor is the last
964 # All nodes are ancestors, so the latest ancestor is the last
955 # node.
965 # node.
956 highestrev = len(self) - 1
966 highestrev = len(self) - 1
957 # Set ancestors to None to signal that every node is an ancestor.
967 # Set ancestors to None to signal that every node is an ancestor.
958 ancestors = None
968 ancestors = None
959 # Set heads to an empty dictionary for later discovery of heads
969 # Set heads to an empty dictionary for later discovery of heads
960 heads = {}
970 heads = {}
961 else:
971 else:
962 heads = list(heads)
972 heads = list(heads)
963 if not heads:
973 if not heads:
964 return nonodes
974 return nonodes
965 ancestors = set()
975 ancestors = set()
966 # Turn heads into a dictionary so we can remove 'fake' heads.
976 # Turn heads into a dictionary so we can remove 'fake' heads.
967 # Also, later we will be using it to filter out the heads we can't
977 # Also, later we will be using it to filter out the heads we can't
968 # find from roots.
978 # find from roots.
969 heads = dict.fromkeys(heads, False)
979 heads = dict.fromkeys(heads, False)
970 # Start at the top and keep marking parents until we're done.
980 # Start at the top and keep marking parents until we're done.
971 nodestotag = set(heads)
981 nodestotag = set(heads)
972 # Remember where the top was so we can use it as a limit later.
982 # Remember where the top was so we can use it as a limit later.
973 highestrev = max([self.rev(n) for n in nodestotag])
983 highestrev = max([self.rev(n) for n in nodestotag])
974 while nodestotag:
984 while nodestotag:
975 # grab a node to tag
985 # grab a node to tag
976 n = nodestotag.pop()
986 n = nodestotag.pop()
977 # Never tag nullid
987 # Never tag nullid
978 if n == nullid:
988 if n == nullid:
979 continue
989 continue
980 # A node's revision number represents its place in a
990 # A node's revision number represents its place in a
981 # topologically sorted list of nodes.
991 # topologically sorted list of nodes.
982 r = self.rev(n)
992 r = self.rev(n)
983 if r >= lowestrev:
993 if r >= lowestrev:
984 if n not in ancestors:
994 if n not in ancestors:
985 # If we are possibly a descendant of one of the roots
995 # If we are possibly a descendant of one of the roots
986 # and we haven't already been marked as an ancestor
996 # and we haven't already been marked as an ancestor
987 ancestors.add(n) # Mark as ancestor
997 ancestors.add(n) # Mark as ancestor
988 # Add non-nullid parents to list of nodes to tag.
998 # Add non-nullid parents to list of nodes to tag.
989 nodestotag.update([p for p in self.parents(n) if
999 nodestotag.update([p for p in self.parents(n) if
990 p != nullid])
1000 p != nullid])
991 elif n in heads: # We've seen it before, is it a fake head?
1001 elif n in heads: # We've seen it before, is it a fake head?
992 # So it is, real heads should not be the ancestors of
1002 # So it is, real heads should not be the ancestors of
993 # any other heads.
1003 # any other heads.
994 heads.pop(n)
1004 heads.pop(n)
995 if not ancestors:
1005 if not ancestors:
996 return nonodes
1006 return nonodes
997 # Now that we have our set of ancestors, we want to remove any
1007 # Now that we have our set of ancestors, we want to remove any
998 # roots that are not ancestors.
1008 # roots that are not ancestors.
999
1009
1000 # If one of the roots was nullid, everything is included anyway.
1010 # If one of the roots was nullid, everything is included anyway.
1001 if lowestrev > nullrev:
1011 if lowestrev > nullrev:
1002 # But, since we weren't, let's recompute the lowest rev to not
1012 # But, since we weren't, let's recompute the lowest rev to not
1003 # include roots that aren't ancestors.
1013 # include roots that aren't ancestors.
1004
1014
1005 # Filter out roots that aren't ancestors of heads
1015 # Filter out roots that aren't ancestors of heads
1006 roots = [root for root in roots if root in ancestors]
1016 roots = [root for root in roots if root in ancestors]
1007 # Recompute the lowest revision
1017 # Recompute the lowest revision
1008 if roots:
1018 if roots:
1009 lowestrev = min([self.rev(root) for root in roots])
1019 lowestrev = min([self.rev(root) for root in roots])
1010 else:
1020 else:
1011 # No more roots? Return empty list
1021 # No more roots? Return empty list
1012 return nonodes
1022 return nonodes
1013 else:
1023 else:
1014 # We are descending from nullid, and don't need to care about
1024 # We are descending from nullid, and don't need to care about
1015 # any other roots.
1025 # any other roots.
1016 lowestrev = nullrev
1026 lowestrev = nullrev
1017 roots = [nullid]
1027 roots = [nullid]
1018 # Transform our roots list into a set.
1028 # Transform our roots list into a set.
1019 descendants = set(roots)
1029 descendants = set(roots)
1020 # Also, keep the original roots so we can filter out roots that aren't
1030 # Also, keep the original roots so we can filter out roots that aren't
1021 # 'real' roots (i.e. are descended from other roots).
1031 # 'real' roots (i.e. are descended from other roots).
1022 roots = descendants.copy()
1032 roots = descendants.copy()
1023 # Our topologically sorted list of output nodes.
1033 # Our topologically sorted list of output nodes.
1024 orderedout = []
1034 orderedout = []
1025 # Don't start at nullid since we don't want nullid in our output list,
1035 # Don't start at nullid since we don't want nullid in our output list,
1026 # and if nullid shows up in descendants, empty parents will look like
1036 # and if nullid shows up in descendants, empty parents will look like
1027 # they're descendants.
1037 # they're descendants.
1028 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1038 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1029 n = self.node(r)
1039 n = self.node(r)
1030 isdescendant = False
1040 isdescendant = False
1031 if lowestrev == nullrev: # Everybody is a descendant of nullid
1041 if lowestrev == nullrev: # Everybody is a descendant of nullid
1032 isdescendant = True
1042 isdescendant = True
1033 elif n in descendants:
1043 elif n in descendants:
1034 # n is already a descendant
1044 # n is already a descendant
1035 isdescendant = True
1045 isdescendant = True
1036 # This check only needs to be done here because all the roots
1046 # This check only needs to be done here because all the roots
1037 # will start being marked is descendants before the loop.
1047 # will start being marked is descendants before the loop.
1038 if n in roots:
1048 if n in roots:
1039 # If n was a root, check if it's a 'real' root.
1049 # If n was a root, check if it's a 'real' root.
1040 p = tuple(self.parents(n))
1050 p = tuple(self.parents(n))
1041 # If any of its parents are descendants, it's not a root.
1051 # If any of its parents are descendants, it's not a root.
1042 if (p[0] in descendants) or (p[1] in descendants):
1052 if (p[0] in descendants) or (p[1] in descendants):
1043 roots.remove(n)
1053 roots.remove(n)
1044 else:
1054 else:
1045 p = tuple(self.parents(n))
1055 p = tuple(self.parents(n))
1046 # A node is a descendant if either of its parents are
1056 # A node is a descendant if either of its parents are
1047 # descendants. (We seeded the dependents list with the roots
1057 # descendants. (We seeded the dependents list with the roots
1048 # up there, remember?)
1058 # up there, remember?)
1049 if (p[0] in descendants) or (p[1] in descendants):
1059 if (p[0] in descendants) or (p[1] in descendants):
1050 descendants.add(n)
1060 descendants.add(n)
1051 isdescendant = True
1061 isdescendant = True
1052 if isdescendant and ((ancestors is None) or (n in ancestors)):
1062 if isdescendant and ((ancestors is None) or (n in ancestors)):
1053 # Only include nodes that are both descendants and ancestors.
1063 # Only include nodes that are both descendants and ancestors.
1054 orderedout.append(n)
1064 orderedout.append(n)
1055 if (ancestors is not None) and (n in heads):
1065 if (ancestors is not None) and (n in heads):
1056 # We're trying to figure out which heads are reachable
1066 # We're trying to figure out which heads are reachable
1057 # from roots.
1067 # from roots.
1058 # Mark this head as having been reached
1068 # Mark this head as having been reached
1059 heads[n] = True
1069 heads[n] = True
1060 elif ancestors is None:
1070 elif ancestors is None:
1061 # Otherwise, we're trying to discover the heads.
1071 # Otherwise, we're trying to discover the heads.
1062 # Assume this is a head because if it isn't, the next step
1072 # Assume this is a head because if it isn't, the next step
1063 # will eventually remove it.
1073 # will eventually remove it.
1064 heads[n] = True
1074 heads[n] = True
1065 # But, obviously its parents aren't.
1075 # But, obviously its parents aren't.
1066 for p in self.parents(n):
1076 for p in self.parents(n):
1067 heads.pop(p, None)
1077 heads.pop(p, None)
1068 heads = [head for head, flag in heads.iteritems() if flag]
1078 heads = [head for head, flag in heads.iteritems() if flag]
1069 roots = list(roots)
1079 roots = list(roots)
1070 assert orderedout
1080 assert orderedout
1071 assert roots
1081 assert roots
1072 assert heads
1082 assert heads
1073 return (orderedout, roots, heads)
1083 return (orderedout, roots, heads)
1074
1084
1075 def headrevs(self):
1085 def headrevs(self):
1076 try:
1086 try:
1077 return self.index.headrevs()
1087 return self.index.headrevs()
1078 except AttributeError:
1088 except AttributeError:
1079 return self._headrevs()
1089 return self._headrevs()
1080
1090
1081 def computephases(self, roots):
1091 def computephases(self, roots):
1082 return self.index.computephasesmapsets(roots)
1092 return self.index.computephasesmapsets(roots)
1083
1093
1084 def _headrevs(self):
1094 def _headrevs(self):
1085 count = len(self)
1095 count = len(self)
1086 if not count:
1096 if not count:
1087 return [nullrev]
1097 return [nullrev]
1088 # we won't iter over filtered rev so nobody is a head at start
1098 # we won't iter over filtered rev so nobody is a head at start
1089 ishead = [0] * (count + 1)
1099 ishead = [0] * (count + 1)
1090 index = self.index
1100 index = self.index
1091 for r in self:
1101 for r in self:
1092 ishead[r] = 1 # I may be an head
1102 ishead[r] = 1 # I may be an head
1093 e = index[r]
1103 e = index[r]
1094 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1104 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1095 return [r for r, val in enumerate(ishead) if val]
1105 return [r for r, val in enumerate(ishead) if val]
1096
1106
1097 def heads(self, start=None, stop=None):
1107 def heads(self, start=None, stop=None):
1098 """return the list of all nodes that have no children
1108 """return the list of all nodes that have no children
1099
1109
1100 if start is specified, only heads that are descendants of
1110 if start is specified, only heads that are descendants of
1101 start will be returned
1111 start will be returned
1102 if stop is specified, it will consider all the revs from stop
1112 if stop is specified, it will consider all the revs from stop
1103 as if they had no children
1113 as if they had no children
1104 """
1114 """
1105 if start is None and stop is None:
1115 if start is None and stop is None:
1106 if not len(self):
1116 if not len(self):
1107 return [nullid]
1117 return [nullid]
1108 return [self.node(r) for r in self.headrevs()]
1118 return [self.node(r) for r in self.headrevs()]
1109
1119
1110 if start is None:
1120 if start is None:
1111 start = nullrev
1121 start = nullrev
1112 else:
1122 else:
1113 start = self.rev(start)
1123 start = self.rev(start)
1114
1124
1115 stoprevs = set(self.rev(n) for n in stop or [])
1125 stoprevs = set(self.rev(n) for n in stop or [])
1116
1126
1117 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1127 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1118 stoprevs=stoprevs)
1128 stoprevs=stoprevs)
1119
1129
1120 return [self.node(rev) for rev in revs]
1130 return [self.node(rev) for rev in revs]
1121
1131
1122 def children(self, node):
1132 def children(self, node):
1123 """find the children of a given node"""
1133 """find the children of a given node"""
1124 c = []
1134 c = []
1125 p = self.rev(node)
1135 p = self.rev(node)
1126 for r in self.revs(start=p + 1):
1136 for r in self.revs(start=p + 1):
1127 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1137 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1128 if prevs:
1138 if prevs:
1129 for pr in prevs:
1139 for pr in prevs:
1130 if pr == p:
1140 if pr == p:
1131 c.append(self.node(r))
1141 c.append(self.node(r))
1132 elif p == nullrev:
1142 elif p == nullrev:
1133 c.append(self.node(r))
1143 c.append(self.node(r))
1134 return c
1144 return c
1135
1145
1136 def commonancestorsheads(self, a, b):
1146 def commonancestorsheads(self, a, b):
1137 """calculate all the heads of the common ancestors of nodes a and b"""
1147 """calculate all the heads of the common ancestors of nodes a and b"""
1138 a, b = self.rev(a), self.rev(b)
1148 a, b = self.rev(a), self.rev(b)
1139 ancs = self._commonancestorsheads(a, b)
1149 ancs = self._commonancestorsheads(a, b)
1140 return pycompat.maplist(self.node, ancs)
1150 return pycompat.maplist(self.node, ancs)
1141
1151
1142 def _commonancestorsheads(self, *revs):
1152 def _commonancestorsheads(self, *revs):
1143 """calculate all the heads of the common ancestors of revs"""
1153 """calculate all the heads of the common ancestors of revs"""
1144 try:
1154 try:
1145 ancs = self.index.commonancestorsheads(*revs)
1155 ancs = self.index.commonancestorsheads(*revs)
1146 except (AttributeError, OverflowError): # C implementation failed
1156 except (AttributeError, OverflowError): # C implementation failed
1147 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1157 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1148 return ancs
1158 return ancs
1149
1159
1150 def isancestor(self, a, b):
1160 def isancestor(self, a, b):
1151 """return True if node a is an ancestor of node b
1161 """return True if node a is an ancestor of node b
1152
1162
1153 A revision is considered an ancestor of itself."""
1163 A revision is considered an ancestor of itself."""
1154 a, b = self.rev(a), self.rev(b)
1164 a, b = self.rev(a), self.rev(b)
1155 return self.isancestorrev(a, b)
1165 return self.isancestorrev(a, b)
1156
1166
1157 def isancestorrev(self, a, b):
1167 def isancestorrev(self, a, b):
1158 """return True if revision a is an ancestor of revision b
1168 """return True if revision a is an ancestor of revision b
1159
1169
1160 A revision is considered an ancestor of itself.
1170 A revision is considered an ancestor of itself.
1161
1171
1162 The implementation of this is trivial but the use of
1172 The implementation of this is trivial but the use of
1163 commonancestorsheads is not."""
1173 commonancestorsheads is not."""
1164 if a == nullrev:
1174 if a == nullrev:
1165 return True
1175 return True
1166 elif a == b:
1176 elif a == b:
1167 return True
1177 return True
1168 elif a > b:
1178 elif a > b:
1169 return False
1179 return False
1170 return a in self._commonancestorsheads(a, b)
1180 return a in self._commonancestorsheads(a, b)
1171
1181
1172 def ancestor(self, a, b):
1182 def ancestor(self, a, b):
1173 """calculate the "best" common ancestor of nodes a and b"""
1183 """calculate the "best" common ancestor of nodes a and b"""
1174
1184
1175 a, b = self.rev(a), self.rev(b)
1185 a, b = self.rev(a), self.rev(b)
1176 try:
1186 try:
1177 ancs = self.index.ancestors(a, b)
1187 ancs = self.index.ancestors(a, b)
1178 except (AttributeError, OverflowError):
1188 except (AttributeError, OverflowError):
1179 ancs = ancestor.ancestors(self.parentrevs, a, b)
1189 ancs = ancestor.ancestors(self.parentrevs, a, b)
1180 if ancs:
1190 if ancs:
1181 # choose a consistent winner when there's a tie
1191 # choose a consistent winner when there's a tie
1182 return min(map(self.node, ancs))
1192 return min(map(self.node, ancs))
1183 return nullid
1193 return nullid
1184
1194
1185 def _match(self, id):
1195 def _match(self, id):
1186 if isinstance(id, int):
1196 if isinstance(id, int):
1187 # rev
1197 # rev
1188 return self.node(id)
1198 return self.node(id)
1189 if len(id) == 20:
1199 if len(id) == 20:
1190 # possibly a binary node
1200 # possibly a binary node
1191 # odds of a binary node being all hex in ASCII are 1 in 10**25
1201 # odds of a binary node being all hex in ASCII are 1 in 10**25
1192 try:
1202 try:
1193 node = id
1203 node = id
1194 self.rev(node) # quick search the index
1204 self.rev(node) # quick search the index
1195 return node
1205 return node
1196 except error.LookupError:
1206 except error.LookupError:
1197 pass # may be partial hex id
1207 pass # may be partial hex id
1198 try:
1208 try:
1199 # str(rev)
1209 # str(rev)
1200 rev = int(id)
1210 rev = int(id)
1201 if "%d" % rev != id:
1211 if "%d" % rev != id:
1202 raise ValueError
1212 raise ValueError
1203 if rev < 0:
1213 if rev < 0:
1204 rev = len(self) + rev
1214 rev = len(self) + rev
1205 if rev < 0 or rev >= len(self):
1215 if rev < 0 or rev >= len(self):
1206 raise ValueError
1216 raise ValueError
1207 return self.node(rev)
1217 return self.node(rev)
1208 except (ValueError, OverflowError):
1218 except (ValueError, OverflowError):
1209 pass
1219 pass
1210 if len(id) == 40:
1220 if len(id) == 40:
1211 try:
1221 try:
1212 # a full hex nodeid?
1222 # a full hex nodeid?
1213 node = bin(id)
1223 node = bin(id)
1214 self.rev(node)
1224 self.rev(node)
1215 return node
1225 return node
1216 except (TypeError, error.LookupError):
1226 except (TypeError, error.LookupError):
1217 pass
1227 pass
1218
1228
1219 def _partialmatch(self, id):
1229 def _partialmatch(self, id):
1220 # we don't care wdirfilenodeids as they should be always full hash
1230 # we don't care wdirfilenodeids as they should be always full hash
1221 maybewdir = wdirhex.startswith(id)
1231 maybewdir = wdirhex.startswith(id)
1222 try:
1232 try:
1223 partial = self.index.partialmatch(id)
1233 partial = self.index.partialmatch(id)
1224 if partial and self.hasnode(partial):
1234 if partial and self.hasnode(partial):
1225 if maybewdir:
1235 if maybewdir:
1226 # single 'ff...' match in radix tree, ambiguous with wdir
1236 # single 'ff...' match in radix tree, ambiguous with wdir
1227 raise error.RevlogError
1237 raise error.RevlogError
1228 return partial
1238 return partial
1229 if maybewdir:
1239 if maybewdir:
1230 # no 'ff...' match in radix tree, wdir identified
1240 # no 'ff...' match in radix tree, wdir identified
1231 raise error.WdirUnsupported
1241 raise error.WdirUnsupported
1232 return None
1242 return None
1233 except error.RevlogError:
1243 except error.RevlogError:
1234 # parsers.c radix tree lookup gave multiple matches
1244 # parsers.c radix tree lookup gave multiple matches
1235 # fast path: for unfiltered changelog, radix tree is accurate
1245 # fast path: for unfiltered changelog, radix tree is accurate
1236 if not getattr(self, 'filteredrevs', None):
1246 if not getattr(self, 'filteredrevs', None):
1237 raise error.AmbiguousPrefixLookupError(
1247 raise error.AmbiguousPrefixLookupError(
1238 id, self.indexfile, _('ambiguous identifier'))
1248 id, self.indexfile, _('ambiguous identifier'))
1239 # fall through to slow path that filters hidden revisions
1249 # fall through to slow path that filters hidden revisions
1240 except (AttributeError, ValueError):
1250 except (AttributeError, ValueError):
1241 # we are pure python, or key was too short to search radix tree
1251 # we are pure python, or key was too short to search radix tree
1242 pass
1252 pass
1243
1253
1244 if id in self._pcache:
1254 if id in self._pcache:
1245 return self._pcache[id]
1255 return self._pcache[id]
1246
1256
1247 if len(id) <= 40:
1257 if len(id) <= 40:
1248 try:
1258 try:
1249 # hex(node)[:...]
1259 # hex(node)[:...]
1250 l = len(id) // 2 # grab an even number of digits
1260 l = len(id) // 2 # grab an even number of digits
1251 prefix = bin(id[:l * 2])
1261 prefix = bin(id[:l * 2])
1252 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1262 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1253 nl = [n for n in nl if hex(n).startswith(id) and
1263 nl = [n for n in nl if hex(n).startswith(id) and
1254 self.hasnode(n)]
1264 self.hasnode(n)]
1255 if nullhex.startswith(id):
1265 if nullhex.startswith(id):
1256 nl.append(nullid)
1266 nl.append(nullid)
1257 if len(nl) > 0:
1267 if len(nl) > 0:
1258 if len(nl) == 1 and not maybewdir:
1268 if len(nl) == 1 and not maybewdir:
1259 self._pcache[id] = nl[0]
1269 self._pcache[id] = nl[0]
1260 return nl[0]
1270 return nl[0]
1261 raise error.AmbiguousPrefixLookupError(
1271 raise error.AmbiguousPrefixLookupError(
1262 id, self.indexfile, _('ambiguous identifier'))
1272 id, self.indexfile, _('ambiguous identifier'))
1263 if maybewdir:
1273 if maybewdir:
1264 raise error.WdirUnsupported
1274 raise error.WdirUnsupported
1265 return None
1275 return None
1266 except TypeError:
1276 except TypeError:
1267 pass
1277 pass
1268
1278
1269 def lookup(self, id):
1279 def lookup(self, id):
1270 """locate a node based on:
1280 """locate a node based on:
1271 - revision number or str(revision number)
1281 - revision number or str(revision number)
1272 - nodeid or subset of hex nodeid
1282 - nodeid or subset of hex nodeid
1273 """
1283 """
1274 n = self._match(id)
1284 n = self._match(id)
1275 if n is not None:
1285 if n is not None:
1276 return n
1286 return n
1277 n = self._partialmatch(id)
1287 n = self._partialmatch(id)
1278 if n:
1288 if n:
1279 return n
1289 return n
1280
1290
1281 raise error.LookupError(id, self.indexfile, _('no match found'))
1291 raise error.LookupError(id, self.indexfile, _('no match found'))
1282
1292
1283 def shortest(self, node, minlength=1):
1293 def shortest(self, node, minlength=1):
1284 """Find the shortest unambiguous prefix that matches node."""
1294 """Find the shortest unambiguous prefix that matches node."""
1285 def isvalid(prefix):
1295 def isvalid(prefix):
1286 try:
1296 try:
1287 node = self._partialmatch(prefix)
1297 node = self._partialmatch(prefix)
1288 except error.AmbiguousPrefixLookupError:
1298 except error.AmbiguousPrefixLookupError:
1289 return False
1299 return False
1290 except error.WdirUnsupported:
1300 except error.WdirUnsupported:
1291 # single 'ff...' match
1301 # single 'ff...' match
1292 return True
1302 return True
1293 if node is None:
1303 if node is None:
1294 raise error.LookupError(node, self.indexfile, _('no node'))
1304 raise error.LookupError(node, self.indexfile, _('no node'))
1295 return True
1305 return True
1296
1306
1297 def maybewdir(prefix):
1307 def maybewdir(prefix):
1298 return all(c == 'f' for c in prefix)
1308 return all(c == 'f' for c in prefix)
1299
1309
1300 hexnode = hex(node)
1310 hexnode = hex(node)
1301
1311
1302 def disambiguate(hexnode, minlength):
1312 def disambiguate(hexnode, minlength):
1303 """Disambiguate against wdirid."""
1313 """Disambiguate against wdirid."""
1304 for length in range(minlength, 41):
1314 for length in range(minlength, 41):
1305 prefix = hexnode[:length]
1315 prefix = hexnode[:length]
1306 if not maybewdir(prefix):
1316 if not maybewdir(prefix):
1307 return prefix
1317 return prefix
1308
1318
1309 if not getattr(self, 'filteredrevs', None):
1319 if not getattr(self, 'filteredrevs', None):
1310 try:
1320 try:
1311 length = max(self.index.shortest(node), minlength)
1321 length = max(self.index.shortest(node), minlength)
1312 return disambiguate(hexnode, length)
1322 return disambiguate(hexnode, length)
1313 except error.RevlogError:
1323 except error.RevlogError:
1314 if node != wdirid:
1324 if node != wdirid:
1315 raise error.LookupError(node, self.indexfile, _('no node'))
1325 raise error.LookupError(node, self.indexfile, _('no node'))
1316 except AttributeError:
1326 except AttributeError:
1317 # Fall through to pure code
1327 # Fall through to pure code
1318 pass
1328 pass
1319
1329
1320 if node == wdirid:
1330 if node == wdirid:
1321 for length in range(minlength, 41):
1331 for length in range(minlength, 41):
1322 prefix = hexnode[:length]
1332 prefix = hexnode[:length]
1323 if isvalid(prefix):
1333 if isvalid(prefix):
1324 return prefix
1334 return prefix
1325
1335
1326 for length in range(minlength, 41):
1336 for length in range(minlength, 41):
1327 prefix = hexnode[:length]
1337 prefix = hexnode[:length]
1328 if isvalid(prefix):
1338 if isvalid(prefix):
1329 return disambiguate(hexnode, length)
1339 return disambiguate(hexnode, length)
1330
1340
1331 def cmp(self, node, text):
1341 def cmp(self, node, text):
1332 """compare text with a given file revision
1342 """compare text with a given file revision
1333
1343
1334 returns True if text is different than what is stored.
1344 returns True if text is different than what is stored.
1335 """
1345 """
1336 p1, p2 = self.parents(node)
1346 p1, p2 = self.parents(node)
1337 return storageutil.hashrevisionsha1(text, p1, p2) != node
1347 return storageutil.hashrevisionsha1(text, p1, p2) != node
1338
1348
1339 def _cachesegment(self, offset, data):
1349 def _cachesegment(self, offset, data):
1340 """Add a segment to the revlog cache.
1350 """Add a segment to the revlog cache.
1341
1351
1342 Accepts an absolute offset and the data that is at that location.
1352 Accepts an absolute offset and the data that is at that location.
1343 """
1353 """
1344 o, d = self._chunkcache
1354 o, d = self._chunkcache
1345 # try to add to existing cache
1355 # try to add to existing cache
1346 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1356 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1347 self._chunkcache = o, d + data
1357 self._chunkcache = o, d + data
1348 else:
1358 else:
1349 self._chunkcache = offset, data
1359 self._chunkcache = offset, data
1350
1360
1351 def _readsegment(self, offset, length, df=None):
1361 def _readsegment(self, offset, length, df=None):
1352 """Load a segment of raw data from the revlog.
1362 """Load a segment of raw data from the revlog.
1353
1363
1354 Accepts an absolute offset, length to read, and an optional existing
1364 Accepts an absolute offset, length to read, and an optional existing
1355 file handle to read from.
1365 file handle to read from.
1356
1366
1357 If an existing file handle is passed, it will be seeked and the
1367 If an existing file handle is passed, it will be seeked and the
1358 original seek position will NOT be restored.
1368 original seek position will NOT be restored.
1359
1369
1360 Returns a str or buffer of raw byte data.
1370 Returns a str or buffer of raw byte data.
1361
1371
1362 Raises if the requested number of bytes could not be read.
1372 Raises if the requested number of bytes could not be read.
1363 """
1373 """
1364 # Cache data both forward and backward around the requested
1374 # Cache data both forward and backward around the requested
1365 # data, in a fixed size window. This helps speed up operations
1375 # data, in a fixed size window. This helps speed up operations
1366 # involving reading the revlog backwards.
1376 # involving reading the revlog backwards.
1367 cachesize = self._chunkcachesize
1377 cachesize = self._chunkcachesize
1368 realoffset = offset & ~(cachesize - 1)
1378 realoffset = offset & ~(cachesize - 1)
1369 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1379 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1370 - realoffset)
1380 - realoffset)
1371 with self._datareadfp(df) as df:
1381 with self._datareadfp(df) as df:
1372 df.seek(realoffset)
1382 df.seek(realoffset)
1373 d = df.read(reallength)
1383 d = df.read(reallength)
1374
1384
1375 self._cachesegment(realoffset, d)
1385 self._cachesegment(realoffset, d)
1376 if offset != realoffset or reallength != length:
1386 if offset != realoffset or reallength != length:
1377 startoffset = offset - realoffset
1387 startoffset = offset - realoffset
1378 if len(d) - startoffset < length:
1388 if len(d) - startoffset < length:
1379 raise error.RevlogError(
1389 raise error.RevlogError(
1380 _('partial read of revlog %s; expected %d bytes from '
1390 _('partial read of revlog %s; expected %d bytes from '
1381 'offset %d, got %d') %
1391 'offset %d, got %d') %
1382 (self.indexfile if self._inline else self.datafile,
1392 (self.indexfile if self._inline else self.datafile,
1383 length, realoffset, len(d) - startoffset))
1393 length, realoffset, len(d) - startoffset))
1384
1394
1385 return util.buffer(d, startoffset, length)
1395 return util.buffer(d, startoffset, length)
1386
1396
1387 if len(d) < length:
1397 if len(d) < length:
1388 raise error.RevlogError(
1398 raise error.RevlogError(
1389 _('partial read of revlog %s; expected %d bytes from offset '
1399 _('partial read of revlog %s; expected %d bytes from offset '
1390 '%d, got %d') %
1400 '%d, got %d') %
1391 (self.indexfile if self._inline else self.datafile,
1401 (self.indexfile if self._inline else self.datafile,
1392 length, offset, len(d)))
1402 length, offset, len(d)))
1393
1403
1394 return d
1404 return d
1395
1405
1396 def _getsegment(self, offset, length, df=None):
1406 def _getsegment(self, offset, length, df=None):
1397 """Obtain a segment of raw data from the revlog.
1407 """Obtain a segment of raw data from the revlog.
1398
1408
1399 Accepts an absolute offset, length of bytes to obtain, and an
1409 Accepts an absolute offset, length of bytes to obtain, and an
1400 optional file handle to the already-opened revlog. If the file
1410 optional file handle to the already-opened revlog. If the file
1401 handle is used, it's original seek position will not be preserved.
1411 handle is used, it's original seek position will not be preserved.
1402
1412
1403 Requests for data may be returned from a cache.
1413 Requests for data may be returned from a cache.
1404
1414
1405 Returns a str or a buffer instance of raw byte data.
1415 Returns a str or a buffer instance of raw byte data.
1406 """
1416 """
1407 o, d = self._chunkcache
1417 o, d = self._chunkcache
1408 l = len(d)
1418 l = len(d)
1409
1419
1410 # is it in the cache?
1420 # is it in the cache?
1411 cachestart = offset - o
1421 cachestart = offset - o
1412 cacheend = cachestart + length
1422 cacheend = cachestart + length
1413 if cachestart >= 0 and cacheend <= l:
1423 if cachestart >= 0 and cacheend <= l:
1414 if cachestart == 0 and cacheend == l:
1424 if cachestart == 0 and cacheend == l:
1415 return d # avoid a copy
1425 return d # avoid a copy
1416 return util.buffer(d, cachestart, cacheend - cachestart)
1426 return util.buffer(d, cachestart, cacheend - cachestart)
1417
1427
1418 return self._readsegment(offset, length, df=df)
1428 return self._readsegment(offset, length, df=df)
1419
1429
1420 def _getsegmentforrevs(self, startrev, endrev, df=None):
1430 def _getsegmentforrevs(self, startrev, endrev, df=None):
1421 """Obtain a segment of raw data corresponding to a range of revisions.
1431 """Obtain a segment of raw data corresponding to a range of revisions.
1422
1432
1423 Accepts the start and end revisions and an optional already-open
1433 Accepts the start and end revisions and an optional already-open
1424 file handle to be used for reading. If the file handle is read, its
1434 file handle to be used for reading. If the file handle is read, its
1425 seek position will not be preserved.
1435 seek position will not be preserved.
1426
1436
1427 Requests for data may be satisfied by a cache.
1437 Requests for data may be satisfied by a cache.
1428
1438
1429 Returns a 2-tuple of (offset, data) for the requested range of
1439 Returns a 2-tuple of (offset, data) for the requested range of
1430 revisions. Offset is the integer offset from the beginning of the
1440 revisions. Offset is the integer offset from the beginning of the
1431 revlog and data is a str or buffer of the raw byte data.
1441 revlog and data is a str or buffer of the raw byte data.
1432
1442
1433 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1443 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1434 to determine where each revision's data begins and ends.
1444 to determine where each revision's data begins and ends.
1435 """
1445 """
1436 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1446 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1437 # (functions are expensive).
1447 # (functions are expensive).
1438 index = self.index
1448 index = self.index
1439 istart = index[startrev]
1449 istart = index[startrev]
1440 start = int(istart[0] >> 16)
1450 start = int(istart[0] >> 16)
1441 if startrev == endrev:
1451 if startrev == endrev:
1442 end = start + istart[1]
1452 end = start + istart[1]
1443 else:
1453 else:
1444 iend = index[endrev]
1454 iend = index[endrev]
1445 end = int(iend[0] >> 16) + iend[1]
1455 end = int(iend[0] >> 16) + iend[1]
1446
1456
1447 if self._inline:
1457 if self._inline:
1448 start += (startrev + 1) * self._io.size
1458 start += (startrev + 1) * self._io.size
1449 end += (endrev + 1) * self._io.size
1459 end += (endrev + 1) * self._io.size
1450 length = end - start
1460 length = end - start
1451
1461
1452 return start, self._getsegment(start, length, df=df)
1462 return start, self._getsegment(start, length, df=df)
1453
1463
1454 def _chunk(self, rev, df=None):
1464 def _chunk(self, rev, df=None):
1455 """Obtain a single decompressed chunk for a revision.
1465 """Obtain a single decompressed chunk for a revision.
1456
1466
1457 Accepts an integer revision and an optional already-open file handle
1467 Accepts an integer revision and an optional already-open file handle
1458 to be used for reading. If used, the seek position of the file will not
1468 to be used for reading. If used, the seek position of the file will not
1459 be preserved.
1469 be preserved.
1460
1470
1461 Returns a str holding uncompressed data for the requested revision.
1471 Returns a str holding uncompressed data for the requested revision.
1462 """
1472 """
1463 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1473 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1464
1474
1465 def _chunks(self, revs, df=None, targetsize=None):
1475 def _chunks(self, revs, df=None, targetsize=None):
1466 """Obtain decompressed chunks for the specified revisions.
1476 """Obtain decompressed chunks for the specified revisions.
1467
1477
1468 Accepts an iterable of numeric revisions that are assumed to be in
1478 Accepts an iterable of numeric revisions that are assumed to be in
1469 ascending order. Also accepts an optional already-open file handle
1479 ascending order. Also accepts an optional already-open file handle
1470 to be used for reading. If used, the seek position of the file will
1480 to be used for reading. If used, the seek position of the file will
1471 not be preserved.
1481 not be preserved.
1472
1482
1473 This function is similar to calling ``self._chunk()`` multiple times,
1483 This function is similar to calling ``self._chunk()`` multiple times,
1474 but is faster.
1484 but is faster.
1475
1485
1476 Returns a list with decompressed data for each requested revision.
1486 Returns a list with decompressed data for each requested revision.
1477 """
1487 """
1478 if not revs:
1488 if not revs:
1479 return []
1489 return []
1480 start = self.start
1490 start = self.start
1481 length = self.length
1491 length = self.length
1482 inline = self._inline
1492 inline = self._inline
1483 iosize = self._io.size
1493 iosize = self._io.size
1484 buffer = util.buffer
1494 buffer = util.buffer
1485
1495
1486 l = []
1496 l = []
1487 ladd = l.append
1497 ladd = l.append
1488
1498
1489 if not self._withsparseread:
1499 if not self._withsparseread:
1490 slicedchunks = (revs,)
1500 slicedchunks = (revs,)
1491 else:
1501 else:
1492 slicedchunks = deltautil.slicechunk(self, revs,
1502 slicedchunks = deltautil.slicechunk(self, revs,
1493 targetsize=targetsize)
1503 targetsize=targetsize)
1494
1504
1495 for revschunk in slicedchunks:
1505 for revschunk in slicedchunks:
1496 firstrev = revschunk[0]
1506 firstrev = revschunk[0]
1497 # Skip trailing revisions with empty diff
1507 # Skip trailing revisions with empty diff
1498 for lastrev in revschunk[::-1]:
1508 for lastrev in revschunk[::-1]:
1499 if length(lastrev) != 0:
1509 if length(lastrev) != 0:
1500 break
1510 break
1501
1511
1502 try:
1512 try:
1503 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1513 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1504 except OverflowError:
1514 except OverflowError:
1505 # issue4215 - we can't cache a run of chunks greater than
1515 # issue4215 - we can't cache a run of chunks greater than
1506 # 2G on Windows
1516 # 2G on Windows
1507 return [self._chunk(rev, df=df) for rev in revschunk]
1517 return [self._chunk(rev, df=df) for rev in revschunk]
1508
1518
1509 decomp = self.decompress
1519 decomp = self.decompress
1510 for rev in revschunk:
1520 for rev in revschunk:
1511 chunkstart = start(rev)
1521 chunkstart = start(rev)
1512 if inline:
1522 if inline:
1513 chunkstart += (rev + 1) * iosize
1523 chunkstart += (rev + 1) * iosize
1514 chunklength = length(rev)
1524 chunklength = length(rev)
1515 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1525 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1516
1526
1517 return l
1527 return l
1518
1528
1519 def _chunkclear(self):
1529 def _chunkclear(self):
1520 """Clear the raw chunk cache."""
1530 """Clear the raw chunk cache."""
1521 self._chunkcache = (0, '')
1531 self._chunkcache = (0, '')
1522
1532
1523 def deltaparent(self, rev):
1533 def deltaparent(self, rev):
1524 """return deltaparent of the given revision"""
1534 """return deltaparent of the given revision"""
1525 base = self.index[rev][3]
1535 base = self.index[rev][3]
1526 if base == rev:
1536 if base == rev:
1527 return nullrev
1537 return nullrev
1528 elif self._generaldelta:
1538 elif self._generaldelta:
1529 return base
1539 return base
1530 else:
1540 else:
1531 return rev - 1
1541 return rev - 1
1532
1542
1533 def issnapshot(self, rev):
1543 def issnapshot(self, rev):
1534 """tells whether rev is a snapshot
1544 """tells whether rev is a snapshot
1535 """
1545 """
1536 if not self._sparserevlog:
1546 if not self._sparserevlog:
1537 return self.deltaparent(rev) == nullrev
1547 return self.deltaparent(rev) == nullrev
1538 elif util.safehasattr(self.index, 'issnapshot'):
1548 elif util.safehasattr(self.index, 'issnapshot'):
1539 # directly assign the method to cache the testing and access
1549 # directly assign the method to cache the testing and access
1540 self.issnapshot = self.index.issnapshot
1550 self.issnapshot = self.index.issnapshot
1541 return self.issnapshot(rev)
1551 return self.issnapshot(rev)
1542 if rev == nullrev:
1552 if rev == nullrev:
1543 return True
1553 return True
1544 entry = self.index[rev]
1554 entry = self.index[rev]
1545 base = entry[3]
1555 base = entry[3]
1546 if base == rev:
1556 if base == rev:
1547 return True
1557 return True
1548 if base == nullrev:
1558 if base == nullrev:
1549 return True
1559 return True
1550 p1 = entry[5]
1560 p1 = entry[5]
1551 p2 = entry[6]
1561 p2 = entry[6]
1552 if base == p1 or base == p2:
1562 if base == p1 or base == p2:
1553 return False
1563 return False
1554 return self.issnapshot(base)
1564 return self.issnapshot(base)
1555
1565
1556 def snapshotdepth(self, rev):
1566 def snapshotdepth(self, rev):
1557 """number of snapshot in the chain before this one"""
1567 """number of snapshot in the chain before this one"""
1558 if not self.issnapshot(rev):
1568 if not self.issnapshot(rev):
1559 raise error.ProgrammingError('revision %d not a snapshot')
1569 raise error.ProgrammingError('revision %d not a snapshot')
1560 return len(self._deltachain(rev)[0]) - 1
1570 return len(self._deltachain(rev)[0]) - 1
1561
1571
1562 def revdiff(self, rev1, rev2):
1572 def revdiff(self, rev1, rev2):
1563 """return or calculate a delta between two revisions
1573 """return or calculate a delta between two revisions
1564
1574
1565 The delta calculated is in binary form and is intended to be written to
1575 The delta calculated is in binary form and is intended to be written to
1566 revlog data directly. So this function needs raw revision data.
1576 revlog data directly. So this function needs raw revision data.
1567 """
1577 """
1568 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1578 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1569 return bytes(self._chunk(rev2))
1579 return bytes(self._chunk(rev2))
1570
1580
1571 return mdiff.textdiff(self.revision(rev1, raw=True),
1581 return mdiff.textdiff(self.revision(rev1, raw=True),
1572 self.revision(rev2, raw=True))
1582 self.revision(rev2, raw=True))
1573
1583
1574 def revision(self, nodeorrev, _df=None, raw=False):
1584 def revision(self, nodeorrev, _df=None, raw=False):
1575 """return an uncompressed revision of a given node or revision
1585 """return an uncompressed revision of a given node or revision
1576 number.
1586 number.
1577
1587
1578 _df - an existing file handle to read from. (internal-only)
1588 _df - an existing file handle to read from. (internal-only)
1579 raw - an optional argument specifying if the revision data is to be
1589 raw - an optional argument specifying if the revision data is to be
1580 treated as raw data when applying flag transforms. 'raw' should be set
1590 treated as raw data when applying flag transforms. 'raw' should be set
1581 to True when generating changegroups or in debug commands.
1591 to True when generating changegroups or in debug commands.
1582 """
1592 """
1583 if isinstance(nodeorrev, int):
1593 if isinstance(nodeorrev, int):
1584 rev = nodeorrev
1594 rev = nodeorrev
1585 node = self.node(rev)
1595 node = self.node(rev)
1586 else:
1596 else:
1587 node = nodeorrev
1597 node = nodeorrev
1588 rev = None
1598 rev = None
1589
1599
1590 cachedrev = None
1600 cachedrev = None
1591 flags = None
1601 flags = None
1592 rawtext = None
1602 rawtext = None
1593 if node == nullid:
1603 if node == nullid:
1594 return ""
1604 return ""
1595 if self._revisioncache:
1605 if self._revisioncache:
1596 if self._revisioncache[0] == node:
1606 if self._revisioncache[0] == node:
1597 # _cache only stores rawtext
1607 # _cache only stores rawtext
1598 if raw:
1608 if raw:
1599 return self._revisioncache[2]
1609 return self._revisioncache[2]
1600 # duplicated, but good for perf
1610 # duplicated, but good for perf
1601 if rev is None:
1611 if rev is None:
1602 rev = self.rev(node)
1612 rev = self.rev(node)
1603 if flags is None:
1613 if flags is None:
1604 flags = self.flags(rev)
1614 flags = self.flags(rev)
1605 # no extra flags set, no flag processor runs, text = rawtext
1615 # no extra flags set, no flag processor runs, text = rawtext
1606 if flags == REVIDX_DEFAULT_FLAGS:
1616 if flags == REVIDX_DEFAULT_FLAGS:
1607 return self._revisioncache[2]
1617 return self._revisioncache[2]
1608 # rawtext is reusable. need to run flag processor
1618 # rawtext is reusable. need to run flag processor
1609 rawtext = self._revisioncache[2]
1619 rawtext = self._revisioncache[2]
1610
1620
1611 cachedrev = self._revisioncache[1]
1621 cachedrev = self._revisioncache[1]
1612
1622
1613 # look up what we need to read
1623 # look up what we need to read
1614 if rawtext is None:
1624 if rawtext is None:
1615 if rev is None:
1625 if rev is None:
1616 rev = self.rev(node)
1626 rev = self.rev(node)
1617
1627
1618 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1628 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1619 if stopped:
1629 if stopped:
1620 rawtext = self._revisioncache[2]
1630 rawtext = self._revisioncache[2]
1621
1631
1622 # drop cache to save memory
1632 # drop cache to save memory
1623 self._revisioncache = None
1633 self._revisioncache = None
1624
1634
1625 targetsize = None
1635 targetsize = None
1626 rawsize = self.index[rev][2]
1636 rawsize = self.index[rev][2]
1627 if 0 <= rawsize:
1637 if 0 <= rawsize:
1628 targetsize = 4 * rawsize
1638 targetsize = 4 * rawsize
1629
1639
1630 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1640 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1631 if rawtext is None:
1641 if rawtext is None:
1632 rawtext = bytes(bins[0])
1642 rawtext = bytes(bins[0])
1633 bins = bins[1:]
1643 bins = bins[1:]
1634
1644
1635 rawtext = mdiff.patches(rawtext, bins)
1645 rawtext = mdiff.patches(rawtext, bins)
1636 self._revisioncache = (node, rev, rawtext)
1646 self._revisioncache = (node, rev, rawtext)
1637
1647
1638 if flags is None:
1648 if flags is None:
1639 if rev is None:
1649 if rev is None:
1640 rev = self.rev(node)
1650 rev = self.rev(node)
1641 flags = self.flags(rev)
1651 flags = self.flags(rev)
1642
1652
1643 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1653 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1644 if validatehash:
1654 if validatehash:
1645 self.checkhash(text, node, rev=rev)
1655 self.checkhash(text, node, rev=rev)
1646
1656
1647 return text
1657 return text
1648
1658
1649 def hash(self, text, p1, p2):
1659 def hash(self, text, p1, p2):
1650 """Compute a node hash.
1660 """Compute a node hash.
1651
1661
1652 Available as a function so that subclasses can replace the hash
1662 Available as a function so that subclasses can replace the hash
1653 as needed.
1663 as needed.
1654 """
1664 """
1655 return storageutil.hashrevisionsha1(text, p1, p2)
1665 return storageutil.hashrevisionsha1(text, p1, p2)
1656
1666
1657 def _processflags(self, text, flags, operation, raw=False):
1667 def _processflags(self, text, flags, operation, raw=False):
1658 """Inspect revision data flags and applies transforms defined by
1668 """Inspect revision data flags and applies transforms defined by
1659 registered flag processors.
1669 registered flag processors.
1660
1670
1661 ``text`` - the revision data to process
1671 ``text`` - the revision data to process
1662 ``flags`` - the revision flags
1672 ``flags`` - the revision flags
1663 ``operation`` - the operation being performed (read or write)
1673 ``operation`` - the operation being performed (read or write)
1664 ``raw`` - an optional argument describing if the raw transform should be
1674 ``raw`` - an optional argument describing if the raw transform should be
1665 applied.
1675 applied.
1666
1676
1667 This method processes the flags in the order (or reverse order if
1677 This method processes the flags in the order (or reverse order if
1668 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1678 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1669 flag processors registered for present flags. The order of flags defined
1679 flag processors registered for present flags. The order of flags defined
1670 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1680 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1671
1681
1672 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1682 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1673 processed text and ``validatehash`` is a bool indicating whether the
1683 processed text and ``validatehash`` is a bool indicating whether the
1674 returned text should be checked for hash integrity.
1684 returned text should be checked for hash integrity.
1675
1685
1676 Note: If the ``raw`` argument is set, it has precedence over the
1686 Note: If the ``raw`` argument is set, it has precedence over the
1677 operation and will only update the value of ``validatehash``.
1687 operation and will only update the value of ``validatehash``.
1678 """
1688 """
1679 # fast path: no flag processors will run
1689 # fast path: no flag processors will run
1680 if flags == 0:
1690 if flags == 0:
1681 return text, True
1691 return text, True
1682 if not operation in ('read', 'write'):
1692 if not operation in ('read', 'write'):
1683 raise error.ProgrammingError(_("invalid '%s' operation") %
1693 raise error.ProgrammingError(_("invalid '%s' operation") %
1684 operation)
1694 operation)
1685 # Check all flags are known.
1695 # Check all flags are known.
1686 if flags & ~REVIDX_KNOWN_FLAGS:
1696 if flags & ~REVIDX_KNOWN_FLAGS:
1687 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1697 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1688 (flags & ~REVIDX_KNOWN_FLAGS))
1698 (flags & ~REVIDX_KNOWN_FLAGS))
1689 validatehash = True
1699 validatehash = True
1690 # Depending on the operation (read or write), the order might be
1700 # Depending on the operation (read or write), the order might be
1691 # reversed due to non-commutative transforms.
1701 # reversed due to non-commutative transforms.
1692 orderedflags = REVIDX_FLAGS_ORDER
1702 orderedflags = REVIDX_FLAGS_ORDER
1693 if operation == 'write':
1703 if operation == 'write':
1694 orderedflags = reversed(orderedflags)
1704 orderedflags = reversed(orderedflags)
1695
1705
1696 for flag in orderedflags:
1706 for flag in orderedflags:
1697 # If a flagprocessor has been registered for a known flag, apply the
1707 # If a flagprocessor has been registered for a known flag, apply the
1698 # related operation transform and update result tuple.
1708 # related operation transform and update result tuple.
1699 if flag & flags:
1709 if flag & flags:
1700 vhash = True
1710 vhash = True
1701
1711
1702 if flag not in self._flagprocessors:
1712 if flag not in self._flagprocessors:
1703 message = _("missing processor for flag '%#x'") % (flag)
1713 message = _("missing processor for flag '%#x'") % (flag)
1704 raise error.RevlogError(message)
1714 raise error.RevlogError(message)
1705
1715
1706 processor = self._flagprocessors[flag]
1716 processor = self._flagprocessors[flag]
1707 if processor is not None:
1717 if processor is not None:
1708 readtransform, writetransform, rawtransform = processor
1718 readtransform, writetransform, rawtransform = processor
1709
1719
1710 if raw:
1720 if raw:
1711 vhash = rawtransform(self, text)
1721 vhash = rawtransform(self, text)
1712 elif operation == 'read':
1722 elif operation == 'read':
1713 text, vhash = readtransform(self, text)
1723 text, vhash = readtransform(self, text)
1714 else: # write operation
1724 else: # write operation
1715 text, vhash = writetransform(self, text)
1725 text, vhash = writetransform(self, text)
1716 validatehash = validatehash and vhash
1726 validatehash = validatehash and vhash
1717
1727
1718 return text, validatehash
1728 return text, validatehash
1719
1729
1720 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1730 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1721 """Check node hash integrity.
1731 """Check node hash integrity.
1722
1732
1723 Available as a function so that subclasses can extend hash mismatch
1733 Available as a function so that subclasses can extend hash mismatch
1724 behaviors as needed.
1734 behaviors as needed.
1725 """
1735 """
1726 try:
1736 try:
1727 if p1 is None and p2 is None:
1737 if p1 is None and p2 is None:
1728 p1, p2 = self.parents(node)
1738 p1, p2 = self.parents(node)
1729 if node != self.hash(text, p1, p2):
1739 if node != self.hash(text, p1, p2):
1730 # Clear the revision cache on hash failure. The revision cache
1740 # Clear the revision cache on hash failure. The revision cache
1731 # only stores the raw revision and clearing the cache does have
1741 # only stores the raw revision and clearing the cache does have
1732 # the side-effect that we won't have a cache hit when the raw
1742 # the side-effect that we won't have a cache hit when the raw
1733 # revision data is accessed. But this case should be rare and
1743 # revision data is accessed. But this case should be rare and
1734 # it is extra work to teach the cache about the hash
1744 # it is extra work to teach the cache about the hash
1735 # verification state.
1745 # verification state.
1736 if self._revisioncache and self._revisioncache[0] == node:
1746 if self._revisioncache and self._revisioncache[0] == node:
1737 self._revisioncache = None
1747 self._revisioncache = None
1738
1748
1739 revornode = rev
1749 revornode = rev
1740 if revornode is None:
1750 if revornode is None:
1741 revornode = templatefilters.short(hex(node))
1751 revornode = templatefilters.short(hex(node))
1742 raise error.RevlogError(_("integrity check failed on %s:%s")
1752 raise error.RevlogError(_("integrity check failed on %s:%s")
1743 % (self.indexfile, pycompat.bytestr(revornode)))
1753 % (self.indexfile, pycompat.bytestr(revornode)))
1744 except error.RevlogError:
1754 except error.RevlogError:
1745 if self._censorable and storageutil.iscensoredtext(text):
1755 if self._censorable and storageutil.iscensoredtext(text):
1746 raise error.CensoredNodeError(self.indexfile, node, text)
1756 raise error.CensoredNodeError(self.indexfile, node, text)
1747 raise
1757 raise
1748
1758
1749 def _enforceinlinesize(self, tr, fp=None):
1759 def _enforceinlinesize(self, tr, fp=None):
1750 """Check if the revlog is too big for inline and convert if so.
1760 """Check if the revlog is too big for inline and convert if so.
1751
1761
1752 This should be called after revisions are added to the revlog. If the
1762 This should be called after revisions are added to the revlog. If the
1753 revlog has grown too large to be an inline revlog, it will convert it
1763 revlog has grown too large to be an inline revlog, it will convert it
1754 to use multiple index and data files.
1764 to use multiple index and data files.
1755 """
1765 """
1756 tiprev = len(self) - 1
1766 tiprev = len(self) - 1
1757 if (not self._inline or
1767 if (not self._inline or
1758 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1768 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1759 return
1769 return
1760
1770
1761 trinfo = tr.find(self.indexfile)
1771 trinfo = tr.find(self.indexfile)
1762 if trinfo is None:
1772 if trinfo is None:
1763 raise error.RevlogError(_("%s not found in the transaction")
1773 raise error.RevlogError(_("%s not found in the transaction")
1764 % self.indexfile)
1774 % self.indexfile)
1765
1775
1766 trindex = trinfo[2]
1776 trindex = trinfo[2]
1767 if trindex is not None:
1777 if trindex is not None:
1768 dataoff = self.start(trindex)
1778 dataoff = self.start(trindex)
1769 else:
1779 else:
1770 # revlog was stripped at start of transaction, use all leftover data
1780 # revlog was stripped at start of transaction, use all leftover data
1771 trindex = len(self) - 1
1781 trindex = len(self) - 1
1772 dataoff = self.end(tiprev)
1782 dataoff = self.end(tiprev)
1773
1783
1774 tr.add(self.datafile, dataoff)
1784 tr.add(self.datafile, dataoff)
1775
1785
1776 if fp:
1786 if fp:
1777 fp.flush()
1787 fp.flush()
1778 fp.close()
1788 fp.close()
1779 # We can't use the cached file handle after close(). So prevent
1789 # We can't use the cached file handle after close(). So prevent
1780 # its usage.
1790 # its usage.
1781 self._writinghandles = None
1791 self._writinghandles = None
1782
1792
1783 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1793 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1784 for r in self:
1794 for r in self:
1785 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1795 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1786
1796
1787 with self._indexfp('w') as fp:
1797 with self._indexfp('w') as fp:
1788 self.version &= ~FLAG_INLINE_DATA
1798 self.version &= ~FLAG_INLINE_DATA
1789 self._inline = False
1799 self._inline = False
1790 io = self._io
1800 io = self._io
1791 for i in self:
1801 for i in self:
1792 e = io.packentry(self.index[i], self.node, self.version, i)
1802 e = io.packentry(self.index[i], self.node, self.version, i)
1793 fp.write(e)
1803 fp.write(e)
1794
1804
1795 # the temp file replace the real index when we exit the context
1805 # the temp file replace the real index when we exit the context
1796 # manager
1806 # manager
1797
1807
1798 tr.replace(self.indexfile, trindex * self._io.size)
1808 tr.replace(self.indexfile, trindex * self._io.size)
1799 self._chunkclear()
1809 self._chunkclear()
1800
1810
1801 def _nodeduplicatecallback(self, transaction, node):
1811 def _nodeduplicatecallback(self, transaction, node):
1802 """called when trying to add a node already stored.
1812 """called when trying to add a node already stored.
1803 """
1813 """
1804
1814
1805 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1815 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1806 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1816 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1807 """add a revision to the log
1817 """add a revision to the log
1808
1818
1809 text - the revision data to add
1819 text - the revision data to add
1810 transaction - the transaction object used for rollback
1820 transaction - the transaction object used for rollback
1811 link - the linkrev data to add
1821 link - the linkrev data to add
1812 p1, p2 - the parent nodeids of the revision
1822 p1, p2 - the parent nodeids of the revision
1813 cachedelta - an optional precomputed delta
1823 cachedelta - an optional precomputed delta
1814 node - nodeid of revision; typically node is not specified, and it is
1824 node - nodeid of revision; typically node is not specified, and it is
1815 computed by default as hash(text, p1, p2), however subclasses might
1825 computed by default as hash(text, p1, p2), however subclasses might
1816 use different hashing method (and override checkhash() in such case)
1826 use different hashing method (and override checkhash() in such case)
1817 flags - the known flags to set on the revision
1827 flags - the known flags to set on the revision
1818 deltacomputer - an optional deltacomputer instance shared between
1828 deltacomputer - an optional deltacomputer instance shared between
1819 multiple calls
1829 multiple calls
1820 """
1830 """
1821 if link == nullrev:
1831 if link == nullrev:
1822 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1832 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1823 % self.indexfile)
1833 % self.indexfile)
1824
1834
1825 if flags:
1835 if flags:
1826 node = node or self.hash(text, p1, p2)
1836 node = node or self.hash(text, p1, p2)
1827
1837
1828 rawtext, validatehash = self._processflags(text, flags, 'write')
1838 rawtext, validatehash = self._processflags(text, flags, 'write')
1829
1839
1830 # If the flag processor modifies the revision data, ignore any provided
1840 # If the flag processor modifies the revision data, ignore any provided
1831 # cachedelta.
1841 # cachedelta.
1832 if rawtext != text:
1842 if rawtext != text:
1833 cachedelta = None
1843 cachedelta = None
1834
1844
1835 if len(rawtext) > _maxentrysize:
1845 if len(rawtext) > _maxentrysize:
1836 raise error.RevlogError(
1846 raise error.RevlogError(
1837 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1847 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1838 % (self.indexfile, len(rawtext)))
1848 % (self.indexfile, len(rawtext)))
1839
1849
1840 node = node or self.hash(rawtext, p1, p2)
1850 node = node or self.hash(rawtext, p1, p2)
1841 if node in self.nodemap:
1851 if node in self.nodemap:
1842 return node
1852 return node
1843
1853
1844 if validatehash:
1854 if validatehash:
1845 self.checkhash(rawtext, node, p1=p1, p2=p2)
1855 self.checkhash(rawtext, node, p1=p1, p2=p2)
1846
1856
1847 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1857 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1848 flags, cachedelta=cachedelta,
1858 flags, cachedelta=cachedelta,
1849 deltacomputer=deltacomputer)
1859 deltacomputer=deltacomputer)
1850
1860
1851 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1861 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1852 cachedelta=None, deltacomputer=None):
1862 cachedelta=None, deltacomputer=None):
1853 """add a raw revision with known flags, node and parents
1863 """add a raw revision with known flags, node and parents
1854 useful when reusing a revision not stored in this revlog (ex: received
1864 useful when reusing a revision not stored in this revlog (ex: received
1855 over wire, or read from an external bundle).
1865 over wire, or read from an external bundle).
1856 """
1866 """
1857 dfh = None
1867 dfh = None
1858 if not self._inline:
1868 if not self._inline:
1859 dfh = self._datafp("a+")
1869 dfh = self._datafp("a+")
1860 ifh = self._indexfp("a+")
1870 ifh = self._indexfp("a+")
1861 try:
1871 try:
1862 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1872 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1863 flags, cachedelta, ifh, dfh,
1873 flags, cachedelta, ifh, dfh,
1864 deltacomputer=deltacomputer)
1874 deltacomputer=deltacomputer)
1865 finally:
1875 finally:
1866 if dfh:
1876 if dfh:
1867 dfh.close()
1877 dfh.close()
1868 ifh.close()
1878 ifh.close()
1869
1879
1870 def compress(self, data):
1880 def compress(self, data):
1871 """Generate a possibly-compressed representation of data."""
1881 """Generate a possibly-compressed representation of data."""
1872 if not data:
1882 if not data:
1873 return '', data
1883 return '', data
1874
1884
1875 compressed = self._compressor.compress(data)
1885 compressed = self._compressor.compress(data)
1876
1886
1877 if compressed:
1887 if compressed:
1878 # The revlog compressor added the header in the returned data.
1888 # The revlog compressor added the header in the returned data.
1879 return '', compressed
1889 return '', compressed
1880
1890
1881 if data[0:1] == '\0':
1891 if data[0:1] == '\0':
1882 return '', data
1892 return '', data
1883 return 'u', data
1893 return 'u', data
1884
1894
1885 def decompress(self, data):
1895 def decompress(self, data):
1886 """Decompress a revlog chunk.
1896 """Decompress a revlog chunk.
1887
1897
1888 The chunk is expected to begin with a header identifying the
1898 The chunk is expected to begin with a header identifying the
1889 format type so it can be routed to an appropriate decompressor.
1899 format type so it can be routed to an appropriate decompressor.
1890 """
1900 """
1891 if not data:
1901 if not data:
1892 return data
1902 return data
1893
1903
1894 # Revlogs are read much more frequently than they are written and many
1904 # Revlogs are read much more frequently than they are written and many
1895 # chunks only take microseconds to decompress, so performance is
1905 # chunks only take microseconds to decompress, so performance is
1896 # important here.
1906 # important here.
1897 #
1907 #
1898 # We can make a few assumptions about revlogs:
1908 # We can make a few assumptions about revlogs:
1899 #
1909 #
1900 # 1) the majority of chunks will be compressed (as opposed to inline
1910 # 1) the majority of chunks will be compressed (as opposed to inline
1901 # raw data).
1911 # raw data).
1902 # 2) decompressing *any* data will likely by at least 10x slower than
1912 # 2) decompressing *any* data will likely by at least 10x slower than
1903 # returning raw inline data.
1913 # returning raw inline data.
1904 # 3) we want to prioritize common and officially supported compression
1914 # 3) we want to prioritize common and officially supported compression
1905 # engines
1915 # engines
1906 #
1916 #
1907 # It follows that we want to optimize for "decompress compressed data
1917 # It follows that we want to optimize for "decompress compressed data
1908 # when encoded with common and officially supported compression engines"
1918 # when encoded with common and officially supported compression engines"
1909 # case over "raw data" and "data encoded by less common or non-official
1919 # case over "raw data" and "data encoded by less common or non-official
1910 # compression engines." That is why we have the inline lookup first
1920 # compression engines." That is why we have the inline lookup first
1911 # followed by the compengines lookup.
1921 # followed by the compengines lookup.
1912 #
1922 #
1913 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1923 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1914 # compressed chunks. And this matters for changelog and manifest reads.
1924 # compressed chunks. And this matters for changelog and manifest reads.
1915 t = data[0:1]
1925 t = data[0:1]
1916
1926
1917 if t == 'x':
1927 if t == 'x':
1918 try:
1928 try:
1919 return _zlibdecompress(data)
1929 return _zlibdecompress(data)
1920 except zlib.error as e:
1930 except zlib.error as e:
1921 raise error.RevlogError(_('revlog decompress error: %s') %
1931 raise error.RevlogError(_('revlog decompress error: %s') %
1922 stringutil.forcebytestr(e))
1932 stringutil.forcebytestr(e))
1923 # '\0' is more common than 'u' so it goes first.
1933 # '\0' is more common than 'u' so it goes first.
1924 elif t == '\0':
1934 elif t == '\0':
1925 return data
1935 return data
1926 elif t == 'u':
1936 elif t == 'u':
1927 return util.buffer(data, 1)
1937 return util.buffer(data, 1)
1928
1938
1929 try:
1939 try:
1930 compressor = self._decompressors[t]
1940 compressor = self._decompressors[t]
1931 except KeyError:
1941 except KeyError:
1932 try:
1942 try:
1933 engine = util.compengines.forrevlogheader(t)
1943 engine = util.compengines.forrevlogheader(t)
1934 compressor = engine.revlogcompressor()
1944 compressor = engine.revlogcompressor()
1935 self._decompressors[t] = compressor
1945 self._decompressors[t] = compressor
1936 except KeyError:
1946 except KeyError:
1937 raise error.RevlogError(_('unknown compression type %r') % t)
1947 raise error.RevlogError(_('unknown compression type %r') % t)
1938
1948
1939 return compressor.decompress(data)
1949 return compressor.decompress(data)
1940
1950
1941 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1951 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1942 cachedelta, ifh, dfh, alwayscache=False,
1952 cachedelta, ifh, dfh, alwayscache=False,
1943 deltacomputer=None):
1953 deltacomputer=None):
1944 """internal function to add revisions to the log
1954 """internal function to add revisions to the log
1945
1955
1946 see addrevision for argument descriptions.
1956 see addrevision for argument descriptions.
1947
1957
1948 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1958 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1949
1959
1950 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1960 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1951 be used.
1961 be used.
1952
1962
1953 invariants:
1963 invariants:
1954 - rawtext is optional (can be None); if not set, cachedelta must be set.
1964 - rawtext is optional (can be None); if not set, cachedelta must be set.
1955 if both are set, they must correspond to each other.
1965 if both are set, they must correspond to each other.
1956 """
1966 """
1957 if node == nullid:
1967 if node == nullid:
1958 raise error.RevlogError(_("%s: attempt to add null revision") %
1968 raise error.RevlogError(_("%s: attempt to add null revision") %
1959 self.indexfile)
1969 self.indexfile)
1960 if node == wdirid or node in wdirfilenodeids:
1970 if node == wdirid or node in wdirfilenodeids:
1961 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1971 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1962 self.indexfile)
1972 self.indexfile)
1963
1973
1964 if self._inline:
1974 if self._inline:
1965 fh = ifh
1975 fh = ifh
1966 else:
1976 else:
1967 fh = dfh
1977 fh = dfh
1968
1978
1969 btext = [rawtext]
1979 btext = [rawtext]
1970
1980
1971 curr = len(self)
1981 curr = len(self)
1972 prev = curr - 1
1982 prev = curr - 1
1973 offset = self.end(prev)
1983 offset = self.end(prev)
1974 p1r, p2r = self.rev(p1), self.rev(p2)
1984 p1r, p2r = self.rev(p1), self.rev(p2)
1975
1985
1976 # full versions are inserted when the needed deltas
1986 # full versions are inserted when the needed deltas
1977 # become comparable to the uncompressed text
1987 # become comparable to the uncompressed text
1978 if rawtext is None:
1988 if rawtext is None:
1979 # need rawtext size, before changed by flag processors, which is
1989 # need rawtext size, before changed by flag processors, which is
1980 # the non-raw size. use revlog explicitly to avoid filelog's extra
1990 # the non-raw size. use revlog explicitly to avoid filelog's extra
1981 # logic that might remove metadata size.
1991 # logic that might remove metadata size.
1982 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1992 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1983 cachedelta[1])
1993 cachedelta[1])
1984 else:
1994 else:
1985 textlen = len(rawtext)
1995 textlen = len(rawtext)
1986
1996
1987 if deltacomputer is None:
1997 if deltacomputer is None:
1988 deltacomputer = deltautil.deltacomputer(self)
1998 deltacomputer = deltautil.deltacomputer(self)
1989
1999
1990 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2000 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1991
2001
1992 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2002 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1993
2003
1994 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2004 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1995 deltainfo.base, link, p1r, p2r, node)
2005 deltainfo.base, link, p1r, p2r, node)
1996 self.index.append(e)
2006 self.index.append(e)
1997 self.nodemap[node] = curr
2007 self.nodemap[node] = curr
1998
2008
1999 # Reset the pure node cache start lookup offset to account for new
2009 # Reset the pure node cache start lookup offset to account for new
2000 # revision.
2010 # revision.
2001 if self._nodepos is not None:
2011 if self._nodepos is not None:
2002 self._nodepos = curr
2012 self._nodepos = curr
2003
2013
2004 entry = self._io.packentry(e, self.node, self.version, curr)
2014 entry = self._io.packentry(e, self.node, self.version, curr)
2005 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2015 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2006 link, offset)
2016 link, offset)
2007
2017
2008 rawtext = btext[0]
2018 rawtext = btext[0]
2009
2019
2010 if alwayscache and rawtext is None:
2020 if alwayscache and rawtext is None:
2011 rawtext = deltacomputer.buildtext(revinfo, fh)
2021 rawtext = deltacomputer.buildtext(revinfo, fh)
2012
2022
2013 if type(rawtext) == bytes: # only accept immutable objects
2023 if type(rawtext) == bytes: # only accept immutable objects
2014 self._revisioncache = (node, curr, rawtext)
2024 self._revisioncache = (node, curr, rawtext)
2015 self._chainbasecache[curr] = deltainfo.chainbase
2025 self._chainbasecache[curr] = deltainfo.chainbase
2016 return node
2026 return node
2017
2027
2018 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2028 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2019 # Files opened in a+ mode have inconsistent behavior on various
2029 # Files opened in a+ mode have inconsistent behavior on various
2020 # platforms. Windows requires that a file positioning call be made
2030 # platforms. Windows requires that a file positioning call be made
2021 # when the file handle transitions between reads and writes. See
2031 # when the file handle transitions between reads and writes. See
2022 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2032 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2023 # platforms, Python or the platform itself can be buggy. Some versions
2033 # platforms, Python or the platform itself can be buggy. Some versions
2024 # of Solaris have been observed to not append at the end of the file
2034 # of Solaris have been observed to not append at the end of the file
2025 # if the file was seeked to before the end. See issue4943 for more.
2035 # if the file was seeked to before the end. See issue4943 for more.
2026 #
2036 #
2027 # We work around this issue by inserting a seek() before writing.
2037 # We work around this issue by inserting a seek() before writing.
2028 # Note: This is likely not necessary on Python 3. However, because
2038 # Note: This is likely not necessary on Python 3. However, because
2029 # the file handle is reused for reads and may be seeked there, we need
2039 # the file handle is reused for reads and may be seeked there, we need
2030 # to be careful before changing this.
2040 # to be careful before changing this.
2031 ifh.seek(0, os.SEEK_END)
2041 ifh.seek(0, os.SEEK_END)
2032 if dfh:
2042 if dfh:
2033 dfh.seek(0, os.SEEK_END)
2043 dfh.seek(0, os.SEEK_END)
2034
2044
2035 curr = len(self) - 1
2045 curr = len(self) - 1
2036 if not self._inline:
2046 if not self._inline:
2037 transaction.add(self.datafile, offset)
2047 transaction.add(self.datafile, offset)
2038 transaction.add(self.indexfile, curr * len(entry))
2048 transaction.add(self.indexfile, curr * len(entry))
2039 if data[0]:
2049 if data[0]:
2040 dfh.write(data[0])
2050 dfh.write(data[0])
2041 dfh.write(data[1])
2051 dfh.write(data[1])
2042 ifh.write(entry)
2052 ifh.write(entry)
2043 else:
2053 else:
2044 offset += curr * self._io.size
2054 offset += curr * self._io.size
2045 transaction.add(self.indexfile, offset, curr)
2055 transaction.add(self.indexfile, offset, curr)
2046 ifh.write(entry)
2056 ifh.write(entry)
2047 ifh.write(data[0])
2057 ifh.write(data[0])
2048 ifh.write(data[1])
2058 ifh.write(data[1])
2049 self._enforceinlinesize(transaction, ifh)
2059 self._enforceinlinesize(transaction, ifh)
2050
2060
2051 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2061 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2052 """
2062 """
2053 add a delta group
2063 add a delta group
2054
2064
2055 given a set of deltas, add them to the revision log. the
2065 given a set of deltas, add them to the revision log. the
2056 first delta is against its parent, which should be in our
2066 first delta is against its parent, which should be in our
2057 log, the rest are against the previous delta.
2067 log, the rest are against the previous delta.
2058
2068
2059 If ``addrevisioncb`` is defined, it will be called with arguments of
2069 If ``addrevisioncb`` is defined, it will be called with arguments of
2060 this revlog and the node that was added.
2070 this revlog and the node that was added.
2061 """
2071 """
2062
2072
2063 if self._writinghandles:
2073 if self._writinghandles:
2064 raise error.ProgrammingError('cannot nest addgroup() calls')
2074 raise error.ProgrammingError('cannot nest addgroup() calls')
2065
2075
2066 nodes = []
2076 nodes = []
2067
2077
2068 r = len(self)
2078 r = len(self)
2069 end = 0
2079 end = 0
2070 if r:
2080 if r:
2071 end = self.end(r - 1)
2081 end = self.end(r - 1)
2072 ifh = self._indexfp("a+")
2082 ifh = self._indexfp("a+")
2073 isize = r * self._io.size
2083 isize = r * self._io.size
2074 if self._inline:
2084 if self._inline:
2075 transaction.add(self.indexfile, end + isize, r)
2085 transaction.add(self.indexfile, end + isize, r)
2076 dfh = None
2086 dfh = None
2077 else:
2087 else:
2078 transaction.add(self.indexfile, isize, r)
2088 transaction.add(self.indexfile, isize, r)
2079 transaction.add(self.datafile, end)
2089 transaction.add(self.datafile, end)
2080 dfh = self._datafp("a+")
2090 dfh = self._datafp("a+")
2081 def flush():
2091 def flush():
2082 if dfh:
2092 if dfh:
2083 dfh.flush()
2093 dfh.flush()
2084 ifh.flush()
2094 ifh.flush()
2085
2095
2086 self._writinghandles = (ifh, dfh)
2096 self._writinghandles = (ifh, dfh)
2087
2097
2088 try:
2098 try:
2089 deltacomputer = deltautil.deltacomputer(self)
2099 deltacomputer = deltautil.deltacomputer(self)
2090 # loop through our set of deltas
2100 # loop through our set of deltas
2091 for data in deltas:
2101 for data in deltas:
2092 node, p1, p2, linknode, deltabase, delta, flags = data
2102 node, p1, p2, linknode, deltabase, delta, flags = data
2093 link = linkmapper(linknode)
2103 link = linkmapper(linknode)
2094 flags = flags or REVIDX_DEFAULT_FLAGS
2104 flags = flags or REVIDX_DEFAULT_FLAGS
2095
2105
2096 nodes.append(node)
2106 nodes.append(node)
2097
2107
2098 if node in self.nodemap:
2108 if node in self.nodemap:
2099 self._nodeduplicatecallback(transaction, node)
2109 self._nodeduplicatecallback(transaction, node)
2100 # this can happen if two branches make the same change
2110 # this can happen if two branches make the same change
2101 continue
2111 continue
2102
2112
2103 for p in (p1, p2):
2113 for p in (p1, p2):
2104 if p not in self.nodemap:
2114 if p not in self.nodemap:
2105 raise error.LookupError(p, self.indexfile,
2115 raise error.LookupError(p, self.indexfile,
2106 _('unknown parent'))
2116 _('unknown parent'))
2107
2117
2108 if deltabase not in self.nodemap:
2118 if deltabase not in self.nodemap:
2109 raise error.LookupError(deltabase, self.indexfile,
2119 raise error.LookupError(deltabase, self.indexfile,
2110 _('unknown delta base'))
2120 _('unknown delta base'))
2111
2121
2112 baserev = self.rev(deltabase)
2122 baserev = self.rev(deltabase)
2113
2123
2114 if baserev != nullrev and self.iscensored(baserev):
2124 if baserev != nullrev and self.iscensored(baserev):
2115 # if base is censored, delta must be full replacement in a
2125 # if base is censored, delta must be full replacement in a
2116 # single patch operation
2126 # single patch operation
2117 hlen = struct.calcsize(">lll")
2127 hlen = struct.calcsize(">lll")
2118 oldlen = self.rawsize(baserev)
2128 oldlen = self.rawsize(baserev)
2119 newlen = len(delta) - hlen
2129 newlen = len(delta) - hlen
2120 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2130 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2121 raise error.CensoredBaseError(self.indexfile,
2131 raise error.CensoredBaseError(self.indexfile,
2122 self.node(baserev))
2132 self.node(baserev))
2123
2133
2124 if not flags and self._peek_iscensored(baserev, delta, flush):
2134 if not flags and self._peek_iscensored(baserev, delta, flush):
2125 flags |= REVIDX_ISCENSORED
2135 flags |= REVIDX_ISCENSORED
2126
2136
2127 # We assume consumers of addrevisioncb will want to retrieve
2137 # We assume consumers of addrevisioncb will want to retrieve
2128 # the added revision, which will require a call to
2138 # the added revision, which will require a call to
2129 # revision(). revision() will fast path if there is a cache
2139 # revision(). revision() will fast path if there is a cache
2130 # hit. So, we tell _addrevision() to always cache in this case.
2140 # hit. So, we tell _addrevision() to always cache in this case.
2131 # We're only using addgroup() in the context of changegroup
2141 # We're only using addgroup() in the context of changegroup
2132 # generation so the revision data can always be handled as raw
2142 # generation so the revision data can always be handled as raw
2133 # by the flagprocessor.
2143 # by the flagprocessor.
2134 self._addrevision(node, None, transaction, link,
2144 self._addrevision(node, None, transaction, link,
2135 p1, p2, flags, (baserev, delta),
2145 p1, p2, flags, (baserev, delta),
2136 ifh, dfh,
2146 ifh, dfh,
2137 alwayscache=bool(addrevisioncb),
2147 alwayscache=bool(addrevisioncb),
2138 deltacomputer=deltacomputer)
2148 deltacomputer=deltacomputer)
2139
2149
2140 if addrevisioncb:
2150 if addrevisioncb:
2141 addrevisioncb(self, node)
2151 addrevisioncb(self, node)
2142
2152
2143 if not dfh and not self._inline:
2153 if not dfh and not self._inline:
2144 # addrevision switched from inline to conventional
2154 # addrevision switched from inline to conventional
2145 # reopen the index
2155 # reopen the index
2146 ifh.close()
2156 ifh.close()
2147 dfh = self._datafp("a+")
2157 dfh = self._datafp("a+")
2148 ifh = self._indexfp("a+")
2158 ifh = self._indexfp("a+")
2149 self._writinghandles = (ifh, dfh)
2159 self._writinghandles = (ifh, dfh)
2150 finally:
2160 finally:
2151 self._writinghandles = None
2161 self._writinghandles = None
2152
2162
2153 if dfh:
2163 if dfh:
2154 dfh.close()
2164 dfh.close()
2155 ifh.close()
2165 ifh.close()
2156
2166
2157 return nodes
2167 return nodes
2158
2168
2159 def iscensored(self, rev):
2169 def iscensored(self, rev):
2160 """Check if a file revision is censored."""
2170 """Check if a file revision is censored."""
2161 if not self._censorable:
2171 if not self._censorable:
2162 return False
2172 return False
2163
2173
2164 return self.flags(rev) & REVIDX_ISCENSORED
2174 return self.flags(rev) & REVIDX_ISCENSORED
2165
2175
2166 def _peek_iscensored(self, baserev, delta, flush):
2176 def _peek_iscensored(self, baserev, delta, flush):
2167 """Quickly check if a delta produces a censored revision."""
2177 """Quickly check if a delta produces a censored revision."""
2168 if not self._censorable:
2178 if not self._censorable:
2169 return False
2179 return False
2170
2180
2171 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2181 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2172
2182
2173 def getstrippoint(self, minlink):
2183 def getstrippoint(self, minlink):
2174 """find the minimum rev that must be stripped to strip the linkrev
2184 """find the minimum rev that must be stripped to strip the linkrev
2175
2185
2176 Returns a tuple containing the minimum rev and a set of all revs that
2186 Returns a tuple containing the minimum rev and a set of all revs that
2177 have linkrevs that will be broken by this strip.
2187 have linkrevs that will be broken by this strip.
2178 """
2188 """
2179 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2189 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2180 self.headrevs(),
2190 self.headrevs(),
2181 self.linkrev, self.parentrevs)
2191 self.linkrev, self.parentrevs)
2182
2192
2183 def strip(self, minlink, transaction):
2193 def strip(self, minlink, transaction):
2184 """truncate the revlog on the first revision with a linkrev >= minlink
2194 """truncate the revlog on the first revision with a linkrev >= minlink
2185
2195
2186 This function is called when we're stripping revision minlink and
2196 This function is called when we're stripping revision minlink and
2187 its descendants from the repository.
2197 its descendants from the repository.
2188
2198
2189 We have to remove all revisions with linkrev >= minlink, because
2199 We have to remove all revisions with linkrev >= minlink, because
2190 the equivalent changelog revisions will be renumbered after the
2200 the equivalent changelog revisions will be renumbered after the
2191 strip.
2201 strip.
2192
2202
2193 So we truncate the revlog on the first of these revisions, and
2203 So we truncate the revlog on the first of these revisions, and
2194 trust that the caller has saved the revisions that shouldn't be
2204 trust that the caller has saved the revisions that shouldn't be
2195 removed and that it'll re-add them after this truncation.
2205 removed and that it'll re-add them after this truncation.
2196 """
2206 """
2197 if len(self) == 0:
2207 if len(self) == 0:
2198 return
2208 return
2199
2209
2200 rev, _ = self.getstrippoint(minlink)
2210 rev, _ = self.getstrippoint(minlink)
2201 if rev == len(self):
2211 if rev == len(self):
2202 return
2212 return
2203
2213
2204 # first truncate the files on disk
2214 # first truncate the files on disk
2205 end = self.start(rev)
2215 end = self.start(rev)
2206 if not self._inline:
2216 if not self._inline:
2207 transaction.add(self.datafile, end)
2217 transaction.add(self.datafile, end)
2208 end = rev * self._io.size
2218 end = rev * self._io.size
2209 else:
2219 else:
2210 end += rev * self._io.size
2220 end += rev * self._io.size
2211
2221
2212 transaction.add(self.indexfile, end)
2222 transaction.add(self.indexfile, end)
2213
2223
2214 # then reset internal state in memory to forget those revisions
2224 # then reset internal state in memory to forget those revisions
2215 self._revisioncache = None
2225 self._revisioncache = None
2216 self._chaininfocache = {}
2226 self._chaininfocache = {}
2217 self._chunkclear()
2227 self._chunkclear()
2218 for x in pycompat.xrange(rev, len(self)):
2228 for x in pycompat.xrange(rev, len(self)):
2219 del self.nodemap[self.node(x)]
2229 del self.nodemap[self.node(x)]
2220
2230
2221 del self.index[rev:-1]
2231 del self.index[rev:-1]
2222 self._nodepos = None
2232 self._nodepos = None
2223
2233
2224 def checksize(self):
2234 def checksize(self):
2225 expected = 0
2235 expected = 0
2226 if len(self):
2236 if len(self):
2227 expected = max(0, self.end(len(self) - 1))
2237 expected = max(0, self.end(len(self) - 1))
2228
2238
2229 try:
2239 try:
2230 with self._datafp() as f:
2240 with self._datafp() as f:
2231 f.seek(0, 2)
2241 f.seek(0, 2)
2232 actual = f.tell()
2242 actual = f.tell()
2233 dd = actual - expected
2243 dd = actual - expected
2234 except IOError as inst:
2244 except IOError as inst:
2235 if inst.errno != errno.ENOENT:
2245 if inst.errno != errno.ENOENT:
2236 raise
2246 raise
2237 dd = 0
2247 dd = 0
2238
2248
2239 try:
2249 try:
2240 f = self.opener(self.indexfile)
2250 f = self.opener(self.indexfile)
2241 f.seek(0, 2)
2251 f.seek(0, 2)
2242 actual = f.tell()
2252 actual = f.tell()
2243 f.close()
2253 f.close()
2244 s = self._io.size
2254 s = self._io.size
2245 i = max(0, actual // s)
2255 i = max(0, actual // s)
2246 di = actual - (i * s)
2256 di = actual - (i * s)
2247 if self._inline:
2257 if self._inline:
2248 databytes = 0
2258 databytes = 0
2249 for r in self:
2259 for r in self:
2250 databytes += max(0, self.length(r))
2260 databytes += max(0, self.length(r))
2251 dd = 0
2261 dd = 0
2252 di = actual - len(self) * s - databytes
2262 di = actual - len(self) * s - databytes
2253 except IOError as inst:
2263 except IOError as inst:
2254 if inst.errno != errno.ENOENT:
2264 if inst.errno != errno.ENOENT:
2255 raise
2265 raise
2256 di = 0
2266 di = 0
2257
2267
2258 return (dd, di)
2268 return (dd, di)
2259
2269
2260 def files(self):
2270 def files(self):
2261 res = [self.indexfile]
2271 res = [self.indexfile]
2262 if not self._inline:
2272 if not self._inline:
2263 res.append(self.datafile)
2273 res.append(self.datafile)
2264 return res
2274 return res
2265
2275
2266 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2276 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2267 assumehaveparentrevisions=False,
2277 assumehaveparentrevisions=False,
2268 deltamode=repository.CG_DELTAMODE_STD):
2278 deltamode=repository.CG_DELTAMODE_STD):
2269 if nodesorder not in ('nodes', 'storage', 'linear', None):
2279 if nodesorder not in ('nodes', 'storage', 'linear', None):
2270 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2280 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2271 nodesorder)
2281 nodesorder)
2272
2282
2273 if nodesorder is None and not self._generaldelta:
2283 if nodesorder is None and not self._generaldelta:
2274 nodesorder = 'storage'
2284 nodesorder = 'storage'
2275
2285
2276 if (not self._storedeltachains and
2286 if (not self._storedeltachains and
2277 deltamode != repository.CG_DELTAMODE_PREV):
2287 deltamode != repository.CG_DELTAMODE_PREV):
2278 deltamode = repository.CG_DELTAMODE_FULL
2288 deltamode = repository.CG_DELTAMODE_FULL
2279
2289
2280 return storageutil.emitrevisions(
2290 return storageutil.emitrevisions(
2281 self, nodes, nodesorder, revlogrevisiondelta,
2291 self, nodes, nodesorder, revlogrevisiondelta,
2282 deltaparentfn=self.deltaparent,
2292 deltaparentfn=self.deltaparent,
2283 candeltafn=self.candelta,
2293 candeltafn=self.candelta,
2284 rawsizefn=self.rawsize,
2294 rawsizefn=self.rawsize,
2285 revdifffn=self.revdiff,
2295 revdifffn=self.revdiff,
2286 flagsfn=self.flags,
2296 flagsfn=self.flags,
2287 deltamode=deltamode,
2297 deltamode=deltamode,
2288 revisiondata=revisiondata,
2298 revisiondata=revisiondata,
2289 assumehaveparentrevisions=assumehaveparentrevisions)
2299 assumehaveparentrevisions=assumehaveparentrevisions)
2290
2300
2291 DELTAREUSEALWAYS = 'always'
2301 DELTAREUSEALWAYS = 'always'
2292 DELTAREUSESAMEREVS = 'samerevs'
2302 DELTAREUSESAMEREVS = 'samerevs'
2293 DELTAREUSENEVER = 'never'
2303 DELTAREUSENEVER = 'never'
2294
2304
2295 DELTAREUSEFULLADD = 'fulladd'
2305 DELTAREUSEFULLADD = 'fulladd'
2296
2306
2297 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2307 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2298
2308
2299 def clone(self, tr, destrevlog, addrevisioncb=None,
2309 def clone(self, tr, destrevlog, addrevisioncb=None,
2300 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2310 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2301 """Copy this revlog to another, possibly with format changes.
2311 """Copy this revlog to another, possibly with format changes.
2302
2312
2303 The destination revlog will contain the same revisions and nodes.
2313 The destination revlog will contain the same revisions and nodes.
2304 However, it may not be bit-for-bit identical due to e.g. delta encoding
2314 However, it may not be bit-for-bit identical due to e.g. delta encoding
2305 differences.
2315 differences.
2306
2316
2307 The ``deltareuse`` argument control how deltas from the existing revlog
2317 The ``deltareuse`` argument control how deltas from the existing revlog
2308 are preserved in the destination revlog. The argument can have the
2318 are preserved in the destination revlog. The argument can have the
2309 following values:
2319 following values:
2310
2320
2311 DELTAREUSEALWAYS
2321 DELTAREUSEALWAYS
2312 Deltas will always be reused (if possible), even if the destination
2322 Deltas will always be reused (if possible), even if the destination
2313 revlog would not select the same revisions for the delta. This is the
2323 revlog would not select the same revisions for the delta. This is the
2314 fastest mode of operation.
2324 fastest mode of operation.
2315 DELTAREUSESAMEREVS
2325 DELTAREUSESAMEREVS
2316 Deltas will be reused if the destination revlog would pick the same
2326 Deltas will be reused if the destination revlog would pick the same
2317 revisions for the delta. This mode strikes a balance between speed
2327 revisions for the delta. This mode strikes a balance between speed
2318 and optimization.
2328 and optimization.
2319 DELTAREUSENEVER
2329 DELTAREUSENEVER
2320 Deltas will never be reused. This is the slowest mode of execution.
2330 Deltas will never be reused. This is the slowest mode of execution.
2321 This mode can be used to recompute deltas (e.g. if the diff/delta
2331 This mode can be used to recompute deltas (e.g. if the diff/delta
2322 algorithm changes).
2332 algorithm changes).
2323
2333
2324 Delta computation can be slow, so the choice of delta reuse policy can
2334 Delta computation can be slow, so the choice of delta reuse policy can
2325 significantly affect run time.
2335 significantly affect run time.
2326
2336
2327 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2337 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2328 two extremes. Deltas will be reused if they are appropriate. But if the
2338 two extremes. Deltas will be reused if they are appropriate. But if the
2329 delta could choose a better revision, it will do so. This means if you
2339 delta could choose a better revision, it will do so. This means if you
2330 are converting a non-generaldelta revlog to a generaldelta revlog,
2340 are converting a non-generaldelta revlog to a generaldelta revlog,
2331 deltas will be recomputed if the delta's parent isn't a parent of the
2341 deltas will be recomputed if the delta's parent isn't a parent of the
2332 revision.
2342 revision.
2333
2343
2334 In addition to the delta policy, the ``forcedeltabothparents``
2344 In addition to the delta policy, the ``forcedeltabothparents``
2335 argument controls whether to force compute deltas against both parents
2345 argument controls whether to force compute deltas against both parents
2336 for merges. By default, the current default is used.
2346 for merges. By default, the current default is used.
2337 """
2347 """
2338 if deltareuse not in self.DELTAREUSEALL:
2348 if deltareuse not in self.DELTAREUSEALL:
2339 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2349 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2340
2350
2341 if len(destrevlog):
2351 if len(destrevlog):
2342 raise ValueError(_('destination revlog is not empty'))
2352 raise ValueError(_('destination revlog is not empty'))
2343
2353
2344 if getattr(self, 'filteredrevs', None):
2354 if getattr(self, 'filteredrevs', None):
2345 raise ValueError(_('source revlog has filtered revisions'))
2355 raise ValueError(_('source revlog has filtered revisions'))
2346 if getattr(destrevlog, 'filteredrevs', None):
2356 if getattr(destrevlog, 'filteredrevs', None):
2347 raise ValueError(_('destination revlog has filtered revisions'))
2357 raise ValueError(_('destination revlog has filtered revisions'))
2348
2358
2349 # lazydeltabase controls whether to reuse a cached delta, if possible.
2359 # lazydeltabase controls whether to reuse a cached delta, if possible.
2350 oldlazydeltabase = destrevlog._lazydeltabase
2360 oldlazydeltabase = destrevlog._lazydeltabase
2351 oldamd = destrevlog._deltabothparents
2361 oldamd = destrevlog._deltabothparents
2352
2362
2353 try:
2363 try:
2354 if deltareuse == self.DELTAREUSEALWAYS:
2364 if deltareuse == self.DELTAREUSEALWAYS:
2355 destrevlog._lazydeltabase = True
2365 destrevlog._lazydeltabase = True
2356 elif deltareuse == self.DELTAREUSESAMEREVS:
2366 elif deltareuse == self.DELTAREUSESAMEREVS:
2357 destrevlog._lazydeltabase = False
2367 destrevlog._lazydeltabase = False
2358
2368
2359 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2369 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2360
2370
2361 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2371 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2362 self.DELTAREUSESAMEREVS)
2372 self.DELTAREUSESAMEREVS)
2363
2373
2364 deltacomputer = deltautil.deltacomputer(destrevlog)
2374 deltacomputer = deltautil.deltacomputer(destrevlog)
2365 index = self.index
2375 index = self.index
2366 for rev in self:
2376 for rev in self:
2367 entry = index[rev]
2377 entry = index[rev]
2368
2378
2369 # Some classes override linkrev to take filtered revs into
2379 # Some classes override linkrev to take filtered revs into
2370 # account. Use raw entry from index.
2380 # account. Use raw entry from index.
2371 flags = entry[0] & 0xffff
2381 flags = entry[0] & 0xffff
2372 linkrev = entry[4]
2382 linkrev = entry[4]
2373 p1 = index[entry[5]][7]
2383 p1 = index[entry[5]][7]
2374 p2 = index[entry[6]][7]
2384 p2 = index[entry[6]][7]
2375 node = entry[7]
2385 node = entry[7]
2376
2386
2377 # (Possibly) reuse the delta from the revlog if allowed and
2387 # (Possibly) reuse the delta from the revlog if allowed and
2378 # the revlog chunk is a delta.
2388 # the revlog chunk is a delta.
2379 cachedelta = None
2389 cachedelta = None
2380 rawtext = None
2390 rawtext = None
2381 if populatecachedelta:
2391 if populatecachedelta:
2382 dp = self.deltaparent(rev)
2392 dp = self.deltaparent(rev)
2383 if dp != nullrev:
2393 if dp != nullrev:
2384 cachedelta = (dp, bytes(self._chunk(rev)))
2394 cachedelta = (dp, bytes(self._chunk(rev)))
2385
2395
2386 if not cachedelta:
2396 if not cachedelta:
2387 rawtext = self.revision(rev, raw=True)
2397 rawtext = self.revision(rev, raw=True)
2388
2398
2389
2399
2390 if deltareuse == self.DELTAREUSEFULLADD:
2400 if deltareuse == self.DELTAREUSEFULLADD:
2391 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2401 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2392 cachedelta=cachedelta,
2402 cachedelta=cachedelta,
2393 node=node, flags=flags,
2403 node=node, flags=flags,
2394 deltacomputer=deltacomputer)
2404 deltacomputer=deltacomputer)
2395 else:
2405 else:
2396 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2406 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2397 checkambig=False)
2407 checkambig=False)
2398 dfh = None
2408 dfh = None
2399 if not destrevlog._inline:
2409 if not destrevlog._inline:
2400 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2410 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2401 try:
2411 try:
2402 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2412 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2403 p2, flags, cachedelta, ifh, dfh,
2413 p2, flags, cachedelta, ifh, dfh,
2404 deltacomputer=deltacomputer)
2414 deltacomputer=deltacomputer)
2405 finally:
2415 finally:
2406 if dfh:
2416 if dfh:
2407 dfh.close()
2417 dfh.close()
2408 ifh.close()
2418 ifh.close()
2409
2419
2410 if addrevisioncb:
2420 if addrevisioncb:
2411 addrevisioncb(self, rev, node)
2421 addrevisioncb(self, rev, node)
2412 finally:
2422 finally:
2413 destrevlog._lazydeltabase = oldlazydeltabase
2423 destrevlog._lazydeltabase = oldlazydeltabase
2414 destrevlog._deltabothparents = oldamd
2424 destrevlog._deltabothparents = oldamd
2415
2425
2416 def censorrevision(self, tr, censornode, tombstone=b''):
2426 def censorrevision(self, tr, censornode, tombstone=b''):
2417 if (self.version & 0xFFFF) == REVLOGV0:
2427 if (self.version & 0xFFFF) == REVLOGV0:
2418 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2428 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2419 self.version)
2429 self.version)
2420
2430
2421 censorrev = self.rev(censornode)
2431 censorrev = self.rev(censornode)
2422 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2432 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2423
2433
2424 if len(tombstone) > self.rawsize(censorrev):
2434 if len(tombstone) > self.rawsize(censorrev):
2425 raise error.Abort(_('censor tombstone must be no longer than '
2435 raise error.Abort(_('censor tombstone must be no longer than '
2426 'censored data'))
2436 'censored data'))
2427
2437
2428 # Rewriting the revlog in place is hard. Our strategy for censoring is
2438 # Rewriting the revlog in place is hard. Our strategy for censoring is
2429 # to create a new revlog, copy all revisions to it, then replace the
2439 # to create a new revlog, copy all revisions to it, then replace the
2430 # revlogs on transaction close.
2440 # revlogs on transaction close.
2431
2441
2432 newindexfile = self.indexfile + b'.tmpcensored'
2442 newindexfile = self.indexfile + b'.tmpcensored'
2433 newdatafile = self.datafile + b'.tmpcensored'
2443 newdatafile = self.datafile + b'.tmpcensored'
2434
2444
2435 # This is a bit dangerous. We could easily have a mismatch of state.
2445 # This is a bit dangerous. We could easily have a mismatch of state.
2436 newrl = revlog(self.opener, newindexfile, newdatafile,
2446 newrl = revlog(self.opener, newindexfile, newdatafile,
2437 censorable=True)
2447 censorable=True)
2438 newrl.version = self.version
2448 newrl.version = self.version
2439 newrl._generaldelta = self._generaldelta
2449 newrl._generaldelta = self._generaldelta
2440 newrl._io = self._io
2450 newrl._io = self._io
2441
2451
2442 for rev in self.revs():
2452 for rev in self.revs():
2443 node = self.node(rev)
2453 node = self.node(rev)
2444 p1, p2 = self.parents(node)
2454 p1, p2 = self.parents(node)
2445
2455
2446 if rev == censorrev:
2456 if rev == censorrev:
2447 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2457 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2448 p1, p2, censornode, REVIDX_ISCENSORED)
2458 p1, p2, censornode, REVIDX_ISCENSORED)
2449
2459
2450 if newrl.deltaparent(rev) != nullrev:
2460 if newrl.deltaparent(rev) != nullrev:
2451 raise error.Abort(_('censored revision stored as delta; '
2461 raise error.Abort(_('censored revision stored as delta; '
2452 'cannot censor'),
2462 'cannot censor'),
2453 hint=_('censoring of revlogs is not '
2463 hint=_('censoring of revlogs is not '
2454 'fully implemented; please report '
2464 'fully implemented; please report '
2455 'this bug'))
2465 'this bug'))
2456 continue
2466 continue
2457
2467
2458 if self.iscensored(rev):
2468 if self.iscensored(rev):
2459 if self.deltaparent(rev) != nullrev:
2469 if self.deltaparent(rev) != nullrev:
2460 raise error.Abort(_('cannot censor due to censored '
2470 raise error.Abort(_('cannot censor due to censored '
2461 'revision having delta stored'))
2471 'revision having delta stored'))
2462 rawtext = self._chunk(rev)
2472 rawtext = self._chunk(rev)
2463 else:
2473 else:
2464 rawtext = self.revision(rev, raw=True)
2474 rawtext = self.revision(rev, raw=True)
2465
2475
2466 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2476 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2467 self.flags(rev))
2477 self.flags(rev))
2468
2478
2469 tr.addbackup(self.indexfile, location='store')
2479 tr.addbackup(self.indexfile, location='store')
2470 if not self._inline:
2480 if not self._inline:
2471 tr.addbackup(self.datafile, location='store')
2481 tr.addbackup(self.datafile, location='store')
2472
2482
2473 self.opener.rename(newrl.indexfile, self.indexfile)
2483 self.opener.rename(newrl.indexfile, self.indexfile)
2474 if not self._inline:
2484 if not self._inline:
2475 self.opener.rename(newrl.datafile, self.datafile)
2485 self.opener.rename(newrl.datafile, self.datafile)
2476
2486
2477 self.clearcaches()
2487 self.clearcaches()
2478 self._loadindex(self.version, None)
2488 self._loadindex(self.version, None)
2479
2489
2480 def verifyintegrity(self, state):
2490 def verifyintegrity(self, state):
2481 """Verifies the integrity of the revlog.
2491 """Verifies the integrity of the revlog.
2482
2492
2483 Yields ``revlogproblem`` instances describing problems that are
2493 Yields ``revlogproblem`` instances describing problems that are
2484 found.
2494 found.
2485 """
2495 """
2486 dd, di = self.checksize()
2496 dd, di = self.checksize()
2487 if dd:
2497 if dd:
2488 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2498 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2489 if di:
2499 if di:
2490 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2500 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2491
2501
2492 version = self.version & 0xFFFF
2502 version = self.version & 0xFFFF
2493
2503
2494 # The verifier tells us what version revlog we should be.
2504 # The verifier tells us what version revlog we should be.
2495 if version != state['expectedversion']:
2505 if version != state['expectedversion']:
2496 yield revlogproblem(
2506 yield revlogproblem(
2497 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2507 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2498 (self.indexfile, version, state['expectedversion']))
2508 (self.indexfile, version, state['expectedversion']))
2499
2509
2500 state['skipread'] = set()
2510 state['skipread'] = set()
2501
2511
2502 for rev in self:
2512 for rev in self:
2503 node = self.node(rev)
2513 node = self.node(rev)
2504
2514
2505 # Verify contents. 4 cases to care about:
2515 # Verify contents. 4 cases to care about:
2506 #
2516 #
2507 # common: the most common case
2517 # common: the most common case
2508 # rename: with a rename
2518 # rename: with a rename
2509 # meta: file content starts with b'\1\n', the metadata
2519 # meta: file content starts with b'\1\n', the metadata
2510 # header defined in filelog.py, but without a rename
2520 # header defined in filelog.py, but without a rename
2511 # ext: content stored externally
2521 # ext: content stored externally
2512 #
2522 #
2513 # More formally, their differences are shown below:
2523 # More formally, their differences are shown below:
2514 #
2524 #
2515 # | common | rename | meta | ext
2525 # | common | rename | meta | ext
2516 # -------------------------------------------------------
2526 # -------------------------------------------------------
2517 # flags() | 0 | 0 | 0 | not 0
2527 # flags() | 0 | 0 | 0 | not 0
2518 # renamed() | False | True | False | ?
2528 # renamed() | False | True | False | ?
2519 # rawtext[0:2]=='\1\n'| False | True | True | ?
2529 # rawtext[0:2]=='\1\n'| False | True | True | ?
2520 #
2530 #
2521 # "rawtext" means the raw text stored in revlog data, which
2531 # "rawtext" means the raw text stored in revlog data, which
2522 # could be retrieved by "revision(rev, raw=True)". "text"
2532 # could be retrieved by "revision(rev, raw=True)". "text"
2523 # mentioned below is "revision(rev, raw=False)".
2533 # mentioned below is "revision(rev, raw=False)".
2524 #
2534 #
2525 # There are 3 different lengths stored physically:
2535 # There are 3 different lengths stored physically:
2526 # 1. L1: rawsize, stored in revlog index
2536 # 1. L1: rawsize, stored in revlog index
2527 # 2. L2: len(rawtext), stored in revlog data
2537 # 2. L2: len(rawtext), stored in revlog data
2528 # 3. L3: len(text), stored in revlog data if flags==0, or
2538 # 3. L3: len(text), stored in revlog data if flags==0, or
2529 # possibly somewhere else if flags!=0
2539 # possibly somewhere else if flags!=0
2530 #
2540 #
2531 # L1 should be equal to L2. L3 could be different from them.
2541 # L1 should be equal to L2. L3 could be different from them.
2532 # "text" may or may not affect commit hash depending on flag
2542 # "text" may or may not affect commit hash depending on flag
2533 # processors (see revlog.addflagprocessor).
2543 # processors (see revlog.addflagprocessor).
2534 #
2544 #
2535 # | common | rename | meta | ext
2545 # | common | rename | meta | ext
2536 # -------------------------------------------------
2546 # -------------------------------------------------
2537 # rawsize() | L1 | L1 | L1 | L1
2547 # rawsize() | L1 | L1 | L1 | L1
2538 # size() | L1 | L2-LM | L1(*) | L1 (?)
2548 # size() | L1 | L2-LM | L1(*) | L1 (?)
2539 # len(rawtext) | L2 | L2 | L2 | L2
2549 # len(rawtext) | L2 | L2 | L2 | L2
2540 # len(text) | L2 | L2 | L2 | L3
2550 # len(text) | L2 | L2 | L2 | L3
2541 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2551 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2542 #
2552 #
2543 # LM: length of metadata, depending on rawtext
2553 # LM: length of metadata, depending on rawtext
2544 # (*): not ideal, see comment in filelog.size
2554 # (*): not ideal, see comment in filelog.size
2545 # (?): could be "- len(meta)" if the resolved content has
2555 # (?): could be "- len(meta)" if the resolved content has
2546 # rename metadata
2556 # rename metadata
2547 #
2557 #
2548 # Checks needed to be done:
2558 # Checks needed to be done:
2549 # 1. length check: L1 == L2, in all cases.
2559 # 1. length check: L1 == L2, in all cases.
2550 # 2. hash check: depending on flag processor, we may need to
2560 # 2. hash check: depending on flag processor, we may need to
2551 # use either "text" (external), or "rawtext" (in revlog).
2561 # use either "text" (external), or "rawtext" (in revlog).
2552
2562
2553 try:
2563 try:
2554 skipflags = state.get('skipflags', 0)
2564 skipflags = state.get('skipflags', 0)
2555 if skipflags:
2565 if skipflags:
2556 skipflags &= self.flags(rev)
2566 skipflags &= self.flags(rev)
2557
2567
2558 if skipflags:
2568 if skipflags:
2559 state['skipread'].add(node)
2569 state['skipread'].add(node)
2560 else:
2570 else:
2561 # Side-effect: read content and verify hash.
2571 # Side-effect: read content and verify hash.
2562 self.revision(node)
2572 self.revision(node)
2563
2573
2564 l1 = self.rawsize(rev)
2574 l1 = self.rawsize(rev)
2565 l2 = len(self.revision(node, raw=True))
2575 l2 = len(self.revision(node, raw=True))
2566
2576
2567 if l1 != l2:
2577 if l1 != l2:
2568 yield revlogproblem(
2578 yield revlogproblem(
2569 error=_('unpacked size is %d, %d expected') % (l2, l1),
2579 error=_('unpacked size is %d, %d expected') % (l2, l1),
2570 node=node)
2580 node=node)
2571
2581
2572 except error.CensoredNodeError:
2582 except error.CensoredNodeError:
2573 if state['erroroncensored']:
2583 if state['erroroncensored']:
2574 yield revlogproblem(error=_('censored file data'),
2584 yield revlogproblem(error=_('censored file data'),
2575 node=node)
2585 node=node)
2576 state['skipread'].add(node)
2586 state['skipread'].add(node)
2577 except Exception as e:
2587 except Exception as e:
2578 yield revlogproblem(
2588 yield revlogproblem(
2579 error=_('unpacking %s: %s') % (short(node),
2589 error=_('unpacking %s: %s') % (short(node),
2580 stringutil.forcebytestr(e)),
2590 stringutil.forcebytestr(e)),
2581 node=node)
2591 node=node)
2582 state['skipread'].add(node)
2592 state['skipread'].add(node)
2583
2593
2584 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2594 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2585 revisionscount=False, trackedsize=False,
2595 revisionscount=False, trackedsize=False,
2586 storedsize=False):
2596 storedsize=False):
2587 d = {}
2597 d = {}
2588
2598
2589 if exclusivefiles:
2599 if exclusivefiles:
2590 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2600 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2591 if not self._inline:
2601 if not self._inline:
2592 d['exclusivefiles'].append((self.opener, self.datafile))
2602 d['exclusivefiles'].append((self.opener, self.datafile))
2593
2603
2594 if sharedfiles:
2604 if sharedfiles:
2595 d['sharedfiles'] = []
2605 d['sharedfiles'] = []
2596
2606
2597 if revisionscount:
2607 if revisionscount:
2598 d['revisionscount'] = len(self)
2608 d['revisionscount'] = len(self)
2599
2609
2600 if trackedsize:
2610 if trackedsize:
2601 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2611 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2602
2612
2603 if storedsize:
2613 if storedsize:
2604 d['storedsize'] = sum(self.opener.stat(path).st_size
2614 d['storedsize'] = sum(self.opener.stat(path).st_size
2605 for path in self.files())
2615 for path in self.files())
2606
2616
2607 return d
2617 return d
General Comments 0
You need to be logged in to leave comments. Login now