##// END OF EJS Templates
revlog: rename v to versionflags...
Gregory Szorc -
r41237:6439cefa default
parent child Browse files
Show More
@@ -1,2618 +1,2618 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._censorable = censorable
355 self._censorable = censorable
356 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None
357 self._revisioncache = None
358 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, '')
361 self._chunkcache = (0, '')
362 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
363 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
364 self._maxchainlen = None
364 self._maxchainlen = None
365 self._deltabothparents = True
365 self._deltabothparents = True
366 self.index = []
366 self.index = []
367 # Mapping of partial identifiers to full nodes.
367 # Mapping of partial identifiers to full nodes.
368 self._pcache = {}
368 self._pcache = {}
369 # Mapping of revision integer to full node.
369 # Mapping of revision integer to full node.
370 self._nodecache = {nullid: nullrev}
370 self._nodecache = {nullid: nullrev}
371 self._nodepos = None
371 self._nodepos = None
372 self._compengine = 'zlib'
372 self._compengine = 'zlib'
373 self._maxdeltachainspan = -1
373 self._maxdeltachainspan = -1
374 self._withsparseread = False
374 self._withsparseread = False
375 self._sparserevlog = False
375 self._sparserevlog = False
376 self._srdensitythreshold = 0.50
376 self._srdensitythreshold = 0.50
377 self._srmingapsize = 262144
377 self._srmingapsize = 262144
378
378
379 # Make copy of flag processors so each revlog instance can support
379 # Make copy of flag processors so each revlog instance can support
380 # custom flags.
380 # custom flags.
381 self._flagprocessors = dict(_flagprocessors)
381 self._flagprocessors = dict(_flagprocessors)
382
382
383 # 2-tuple of file handles being used for active writing.
383 # 2-tuple of file handles being used for active writing.
384 self._writinghandles = None
384 self._writinghandles = None
385
385
386 mmapindexthreshold = None
386 mmapindexthreshold = None
387 opts = getattr(opener, 'options', {}) or {}
387 opts = getattr(opener, 'options', {}) or {}
388
388
389 if 'revlogv2' in opts:
389 if 'revlogv2' in opts:
390 # version 2 revlogs always use generaldelta.
390 # version 2 revlogs always use generaldelta.
391 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
391 versionflags = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
392 elif 'revlogv1' in opts:
392 elif 'revlogv1' in opts:
393 v = REVLOGV1 | FLAG_INLINE_DATA
393 versionflags = REVLOGV1 | FLAG_INLINE_DATA
394 if 'generaldelta' in opts:
394 if 'generaldelta' in opts:
395 v |= FLAG_GENERALDELTA
395 versionflags |= FLAG_GENERALDELTA
396 else:
396 else:
397 v = REVLOG_DEFAULT_VERSION
397 versionflags = REVLOG_DEFAULT_VERSION
398
398
399 if 'chunkcachesize' in opts:
399 if 'chunkcachesize' in opts:
400 self._chunkcachesize = opts['chunkcachesize']
400 self._chunkcachesize = opts['chunkcachesize']
401 if 'maxchainlen' in opts:
401 if 'maxchainlen' in opts:
402 self._maxchainlen = opts['maxchainlen']
402 self._maxchainlen = opts['maxchainlen']
403 if 'deltabothparents' in opts:
403 if 'deltabothparents' in opts:
404 self._deltabothparents = opts['deltabothparents']
404 self._deltabothparents = opts['deltabothparents']
405 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
405 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
406 if 'compengine' in opts:
406 if 'compengine' in opts:
407 self._compengine = opts['compengine']
407 self._compengine = opts['compengine']
408 if 'maxdeltachainspan' in opts:
408 if 'maxdeltachainspan' in opts:
409 self._maxdeltachainspan = opts['maxdeltachainspan']
409 self._maxdeltachainspan = opts['maxdeltachainspan']
410 if mmaplargeindex and 'mmapindexthreshold' in opts:
410 if mmaplargeindex and 'mmapindexthreshold' in opts:
411 mmapindexthreshold = opts['mmapindexthreshold']
411 mmapindexthreshold = opts['mmapindexthreshold']
412 self._sparserevlog = bool(opts.get('sparse-revlog', False))
412 self._sparserevlog = bool(opts.get('sparse-revlog', False))
413 withsparseread = bool(opts.get('with-sparse-read', False))
413 withsparseread = bool(opts.get('with-sparse-read', False))
414 # sparse-revlog forces sparse-read
414 # sparse-revlog forces sparse-read
415 self._withsparseread = self._sparserevlog or withsparseread
415 self._withsparseread = self._sparserevlog or withsparseread
416 if 'sparse-read-density-threshold' in opts:
416 if 'sparse-read-density-threshold' in opts:
417 self._srdensitythreshold = opts['sparse-read-density-threshold']
417 self._srdensitythreshold = opts['sparse-read-density-threshold']
418 if 'sparse-read-min-gap-size' in opts:
418 if 'sparse-read-min-gap-size' in opts:
419 self._srmingapsize = opts['sparse-read-min-gap-size']
419 self._srmingapsize = opts['sparse-read-min-gap-size']
420 if opts.get('enableellipsis'):
420 if opts.get('enableellipsis'):
421 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
421 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
422
422
423 # revlog v0 doesn't have flag processors
423 # revlog v0 doesn't have flag processors
424 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
424 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
425 _insertflagprocessor(flag, processor, self._flagprocessors)
425 _insertflagprocessor(flag, processor, self._flagprocessors)
426
426
427 if self._chunkcachesize <= 0:
427 if self._chunkcachesize <= 0:
428 raise error.RevlogError(_('revlog chunk cache size %r is not '
428 raise error.RevlogError(_('revlog chunk cache size %r is not '
429 'greater than 0') % self._chunkcachesize)
429 'greater than 0') % self._chunkcachesize)
430 elif self._chunkcachesize & (self._chunkcachesize - 1):
430 elif self._chunkcachesize & (self._chunkcachesize - 1):
431 raise error.RevlogError(_('revlog chunk cache size %r is not a '
431 raise error.RevlogError(_('revlog chunk cache size %r is not a '
432 'power of 2') % self._chunkcachesize)
432 'power of 2') % self._chunkcachesize)
433
433
434 self._loadindex(v, mmapindexthreshold)
434 self._loadindex(versionflags, mmapindexthreshold)
435
435
436 def _loadindex(self, v, mmapindexthreshold):
436 def _loadindex(self, versionflags, mmapindexthreshold):
437 indexdata = ''
437 indexdata = ''
438 self._initempty = True
438 self._initempty = True
439 try:
439 try:
440 with self._indexfp() as f:
440 with self._indexfp() as f:
441 if (mmapindexthreshold is not None and
441 if (mmapindexthreshold is not None and
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 indexdata = util.buffer(util.mmapread(f))
443 indexdata = util.buffer(util.mmapread(f))
444 else:
444 else:
445 indexdata = f.read()
445 indexdata = f.read()
446 if len(indexdata) > 0:
446 if len(indexdata) > 0:
447 v = versionformat_unpack(indexdata[:4])[0]
447 versionflags = versionformat_unpack(indexdata[:4])[0]
448 self._initempty = False
448 self._initempty = False
449 except IOError as inst:
449 except IOError as inst:
450 if inst.errno != errno.ENOENT:
450 if inst.errno != errno.ENOENT:
451 raise
451 raise
452
452
453 self.version = v
453 self.version = versionflags
454 self._inline = v & FLAG_INLINE_DATA
454 self._inline = versionflags & FLAG_INLINE_DATA
455 self._generaldelta = v & FLAG_GENERALDELTA
455 self._generaldelta = versionflags & FLAG_GENERALDELTA
456 flags = v & ~0xFFFF
456 flags = versionflags & ~0xFFFF
457 fmt = v & 0xFFFF
457 fmt = versionflags & 0xFFFF
458 if fmt == REVLOGV0:
458 if fmt == REVLOGV0:
459 if flags:
459 if flags:
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 'revlog %s') %
461 'revlog %s') %
462 (flags >> 16, fmt, self.indexfile))
462 (flags >> 16, fmt, self.indexfile))
463 elif fmt == REVLOGV1:
463 elif fmt == REVLOGV1:
464 if flags & ~REVLOGV1_FLAGS:
464 if flags & ~REVLOGV1_FLAGS:
465 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
466 'revlog %s') %
466 'revlog %s') %
467 (flags >> 16, fmt, self.indexfile))
467 (flags >> 16, fmt, self.indexfile))
468 elif fmt == REVLOGV2:
468 elif fmt == REVLOGV2:
469 if flags & ~REVLOGV2_FLAGS:
469 if flags & ~REVLOGV2_FLAGS:
470 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
470 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
471 'revlog %s') %
471 'revlog %s') %
472 (flags >> 16, fmt, self.indexfile))
472 (flags >> 16, fmt, self.indexfile))
473 else:
473 else:
474 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
474 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
475 (fmt, self.indexfile))
475 (fmt, self.indexfile))
476
476
477 self._storedeltachains = True
477 self._storedeltachains = True
478
478
479 self._io = revlogio()
479 self._io = revlogio()
480 if self.version == REVLOGV0:
480 if self.version == REVLOGV0:
481 self._io = revlogoldio()
481 self._io = revlogoldio()
482 try:
482 try:
483 d = self._io.parseindex(indexdata, self._inline)
483 d = self._io.parseindex(indexdata, self._inline)
484 except (ValueError, IndexError):
484 except (ValueError, IndexError):
485 raise error.RevlogError(_("index %s is corrupted") %
485 raise error.RevlogError(_("index %s is corrupted") %
486 self.indexfile)
486 self.indexfile)
487 self.index, nodemap, self._chunkcache = d
487 self.index, nodemap, self._chunkcache = d
488 if nodemap is not None:
488 if nodemap is not None:
489 self.nodemap = self._nodecache = nodemap
489 self.nodemap = self._nodecache = nodemap
490 if not self._chunkcache:
490 if not self._chunkcache:
491 self._chunkclear()
491 self._chunkclear()
492 # revnum -> (chain-length, sum-delta-length)
492 # revnum -> (chain-length, sum-delta-length)
493 self._chaininfocache = {}
493 self._chaininfocache = {}
494 # revlog header -> revlog compressor
494 # revlog header -> revlog compressor
495 self._decompressors = {}
495 self._decompressors = {}
496
496
497 @util.propertycache
497 @util.propertycache
498 def _compressor(self):
498 def _compressor(self):
499 return util.compengines[self._compengine].revlogcompressor()
499 return util.compengines[self._compengine].revlogcompressor()
500
500
501 def _indexfp(self, mode='r'):
501 def _indexfp(self, mode='r'):
502 """file object for the revlog's index file"""
502 """file object for the revlog's index file"""
503 args = {r'mode': mode}
503 args = {r'mode': mode}
504 if mode != 'r':
504 if mode != 'r':
505 args[r'checkambig'] = self._checkambig
505 args[r'checkambig'] = self._checkambig
506 if mode == 'w':
506 if mode == 'w':
507 args[r'atomictemp'] = True
507 args[r'atomictemp'] = True
508 return self.opener(self.indexfile, **args)
508 return self.opener(self.indexfile, **args)
509
509
510 def _datafp(self, mode='r'):
510 def _datafp(self, mode='r'):
511 """file object for the revlog's data file"""
511 """file object for the revlog's data file"""
512 return self.opener(self.datafile, mode=mode)
512 return self.opener(self.datafile, mode=mode)
513
513
514 @contextlib.contextmanager
514 @contextlib.contextmanager
515 def _datareadfp(self, existingfp=None):
515 def _datareadfp(self, existingfp=None):
516 """file object suitable to read data"""
516 """file object suitable to read data"""
517 # Use explicit file handle, if given.
517 # Use explicit file handle, if given.
518 if existingfp is not None:
518 if existingfp is not None:
519 yield existingfp
519 yield existingfp
520
520
521 # Use a file handle being actively used for writes, if available.
521 # Use a file handle being actively used for writes, if available.
522 # There is some danger to doing this because reads will seek the
522 # There is some danger to doing this because reads will seek the
523 # file. However, _writeentry() performs a SEEK_END before all writes,
523 # file. However, _writeentry() performs a SEEK_END before all writes,
524 # so we should be safe.
524 # so we should be safe.
525 elif self._writinghandles:
525 elif self._writinghandles:
526 if self._inline:
526 if self._inline:
527 yield self._writinghandles[0]
527 yield self._writinghandles[0]
528 else:
528 else:
529 yield self._writinghandles[1]
529 yield self._writinghandles[1]
530
530
531 # Otherwise open a new file handle.
531 # Otherwise open a new file handle.
532 else:
532 else:
533 if self._inline:
533 if self._inline:
534 func = self._indexfp
534 func = self._indexfp
535 else:
535 else:
536 func = self._datafp
536 func = self._datafp
537 with func() as fp:
537 with func() as fp:
538 yield fp
538 yield fp
539
539
540 def tip(self):
540 def tip(self):
541 return self.node(len(self.index) - 1)
541 return self.node(len(self.index) - 1)
542 def __contains__(self, rev):
542 def __contains__(self, rev):
543 return 0 <= rev < len(self)
543 return 0 <= rev < len(self)
544 def __len__(self):
544 def __len__(self):
545 return len(self.index)
545 return len(self.index)
546 def __iter__(self):
546 def __iter__(self):
547 return iter(pycompat.xrange(len(self)))
547 return iter(pycompat.xrange(len(self)))
548 def revs(self, start=0, stop=None):
548 def revs(self, start=0, stop=None):
549 """iterate over all rev in this revlog (from start to stop)"""
549 """iterate over all rev in this revlog (from start to stop)"""
550 return storageutil.iterrevs(len(self), start=start, stop=stop)
550 return storageutil.iterrevs(len(self), start=start, stop=stop)
551
551
552 @util.propertycache
552 @util.propertycache
553 def nodemap(self):
553 def nodemap(self):
554 if self.index:
554 if self.index:
555 # populate mapping down to the initial node
555 # populate mapping down to the initial node
556 node0 = self.index[0][7] # get around changelog filtering
556 node0 = self.index[0][7] # get around changelog filtering
557 self.rev(node0)
557 self.rev(node0)
558 return self._nodecache
558 return self._nodecache
559
559
560 def hasnode(self, node):
560 def hasnode(self, node):
561 try:
561 try:
562 self.rev(node)
562 self.rev(node)
563 return True
563 return True
564 except KeyError:
564 except KeyError:
565 return False
565 return False
566
566
567 def candelta(self, baserev, rev):
567 def candelta(self, baserev, rev):
568 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 """whether two revisions (baserev, rev) can be delta-ed or not"""
569 # Disable delta if either rev requires a content-changing flag
569 # Disable delta if either rev requires a content-changing flag
570 # processor (ex. LFS). This is because such flag processor can alter
570 # processor (ex. LFS). This is because such flag processor can alter
571 # the rawtext content that the delta will be based on, and two clients
571 # the rawtext content that the delta will be based on, and two clients
572 # could have a same revlog node with different flags (i.e. different
572 # could have a same revlog node with different flags (i.e. different
573 # rawtext contents) and the delta could be incompatible.
573 # rawtext contents) and the delta could be incompatible.
574 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
575 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
576 return False
576 return False
577 return True
577 return True
578
578
579 def clearcaches(self):
579 def clearcaches(self):
580 self._revisioncache = None
580 self._revisioncache = None
581 self._chainbasecache.clear()
581 self._chainbasecache.clear()
582 self._chunkcache = (0, '')
582 self._chunkcache = (0, '')
583 self._pcache = {}
583 self._pcache = {}
584
584
585 try:
585 try:
586 self._nodecache.clearcaches()
586 self._nodecache.clearcaches()
587 except AttributeError:
587 except AttributeError:
588 self._nodecache = {nullid: nullrev}
588 self._nodecache = {nullid: nullrev}
589 self._nodepos = None
589 self._nodepos = None
590
590
591 def rev(self, node):
591 def rev(self, node):
592 try:
592 try:
593 return self._nodecache[node]
593 return self._nodecache[node]
594 except TypeError:
594 except TypeError:
595 raise
595 raise
596 except error.RevlogError:
596 except error.RevlogError:
597 # parsers.c radix tree lookup failed
597 # parsers.c radix tree lookup failed
598 if node == wdirid or node in wdirfilenodeids:
598 if node == wdirid or node in wdirfilenodeids:
599 raise error.WdirUnsupported
599 raise error.WdirUnsupported
600 raise error.LookupError(node, self.indexfile, _('no node'))
600 raise error.LookupError(node, self.indexfile, _('no node'))
601 except KeyError:
601 except KeyError:
602 # pure python cache lookup failed
602 # pure python cache lookup failed
603 n = self._nodecache
603 n = self._nodecache
604 i = self.index
604 i = self.index
605 p = self._nodepos
605 p = self._nodepos
606 if p is None:
606 if p is None:
607 p = len(i) - 1
607 p = len(i) - 1
608 else:
608 else:
609 assert p < len(i)
609 assert p < len(i)
610 for r in pycompat.xrange(p, -1, -1):
610 for r in pycompat.xrange(p, -1, -1):
611 v = i[r][7]
611 v = i[r][7]
612 n[v] = r
612 n[v] = r
613 if v == node:
613 if v == node:
614 self._nodepos = r - 1
614 self._nodepos = r - 1
615 return r
615 return r
616 if node == wdirid or node in wdirfilenodeids:
616 if node == wdirid or node in wdirfilenodeids:
617 raise error.WdirUnsupported
617 raise error.WdirUnsupported
618 raise error.LookupError(node, self.indexfile, _('no node'))
618 raise error.LookupError(node, self.indexfile, _('no node'))
619
619
620 # Accessors for index entries.
620 # Accessors for index entries.
621
621
622 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
622 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
623 # are flags.
623 # are flags.
624 def start(self, rev):
624 def start(self, rev):
625 return int(self.index[rev][0] >> 16)
625 return int(self.index[rev][0] >> 16)
626
626
627 def flags(self, rev):
627 def flags(self, rev):
628 return self.index[rev][0] & 0xFFFF
628 return self.index[rev][0] & 0xFFFF
629
629
630 def length(self, rev):
630 def length(self, rev):
631 return self.index[rev][1]
631 return self.index[rev][1]
632
632
633 def rawsize(self, rev):
633 def rawsize(self, rev):
634 """return the length of the uncompressed text for a given revision"""
634 """return the length of the uncompressed text for a given revision"""
635 l = self.index[rev][2]
635 l = self.index[rev][2]
636 if l >= 0:
636 if l >= 0:
637 return l
637 return l
638
638
639 t = self.revision(rev, raw=True)
639 t = self.revision(rev, raw=True)
640 return len(t)
640 return len(t)
641
641
642 def size(self, rev):
642 def size(self, rev):
643 """length of non-raw text (processed by a "read" flag processor)"""
643 """length of non-raw text (processed by a "read" flag processor)"""
644 # fast path: if no "read" flag processor could change the content,
644 # fast path: if no "read" flag processor could change the content,
645 # size is rawsize. note: ELLIPSIS is known to not change the content.
645 # size is rawsize. note: ELLIPSIS is known to not change the content.
646 flags = self.flags(rev)
646 flags = self.flags(rev)
647 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
647 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
648 return self.rawsize(rev)
648 return self.rawsize(rev)
649
649
650 return len(self.revision(rev, raw=False))
650 return len(self.revision(rev, raw=False))
651
651
652 def chainbase(self, rev):
652 def chainbase(self, rev):
653 base = self._chainbasecache.get(rev)
653 base = self._chainbasecache.get(rev)
654 if base is not None:
654 if base is not None:
655 return base
655 return base
656
656
657 index = self.index
657 index = self.index
658 iterrev = rev
658 iterrev = rev
659 base = index[iterrev][3]
659 base = index[iterrev][3]
660 while base != iterrev:
660 while base != iterrev:
661 iterrev = base
661 iterrev = base
662 base = index[iterrev][3]
662 base = index[iterrev][3]
663
663
664 self._chainbasecache[rev] = base
664 self._chainbasecache[rev] = base
665 return base
665 return base
666
666
667 def linkrev(self, rev):
667 def linkrev(self, rev):
668 return self.index[rev][4]
668 return self.index[rev][4]
669
669
670 def parentrevs(self, rev):
670 def parentrevs(self, rev):
671 try:
671 try:
672 entry = self.index[rev]
672 entry = self.index[rev]
673 except IndexError:
673 except IndexError:
674 if rev == wdirrev:
674 if rev == wdirrev:
675 raise error.WdirUnsupported
675 raise error.WdirUnsupported
676 raise
676 raise
677
677
678 return entry[5], entry[6]
678 return entry[5], entry[6]
679
679
680 # fast parentrevs(rev) where rev isn't filtered
680 # fast parentrevs(rev) where rev isn't filtered
681 _uncheckedparentrevs = parentrevs
681 _uncheckedparentrevs = parentrevs
682
682
683 def node(self, rev):
683 def node(self, rev):
684 try:
684 try:
685 return self.index[rev][7]
685 return self.index[rev][7]
686 except IndexError:
686 except IndexError:
687 if rev == wdirrev:
687 if rev == wdirrev:
688 raise error.WdirUnsupported
688 raise error.WdirUnsupported
689 raise
689 raise
690
690
691 # Derived from index values.
691 # Derived from index values.
692
692
693 def end(self, rev):
693 def end(self, rev):
694 return self.start(rev) + self.length(rev)
694 return self.start(rev) + self.length(rev)
695
695
696 def parents(self, node):
696 def parents(self, node):
697 i = self.index
697 i = self.index
698 d = i[self.rev(node)]
698 d = i[self.rev(node)]
699 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
699 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
700
700
701 def chainlen(self, rev):
701 def chainlen(self, rev):
702 return self._chaininfo(rev)[0]
702 return self._chaininfo(rev)[0]
703
703
704 def _chaininfo(self, rev):
704 def _chaininfo(self, rev):
705 chaininfocache = self._chaininfocache
705 chaininfocache = self._chaininfocache
706 if rev in chaininfocache:
706 if rev in chaininfocache:
707 return chaininfocache[rev]
707 return chaininfocache[rev]
708 index = self.index
708 index = self.index
709 generaldelta = self._generaldelta
709 generaldelta = self._generaldelta
710 iterrev = rev
710 iterrev = rev
711 e = index[iterrev]
711 e = index[iterrev]
712 clen = 0
712 clen = 0
713 compresseddeltalen = 0
713 compresseddeltalen = 0
714 while iterrev != e[3]:
714 while iterrev != e[3]:
715 clen += 1
715 clen += 1
716 compresseddeltalen += e[1]
716 compresseddeltalen += e[1]
717 if generaldelta:
717 if generaldelta:
718 iterrev = e[3]
718 iterrev = e[3]
719 else:
719 else:
720 iterrev -= 1
720 iterrev -= 1
721 if iterrev in chaininfocache:
721 if iterrev in chaininfocache:
722 t = chaininfocache[iterrev]
722 t = chaininfocache[iterrev]
723 clen += t[0]
723 clen += t[0]
724 compresseddeltalen += t[1]
724 compresseddeltalen += t[1]
725 break
725 break
726 e = index[iterrev]
726 e = index[iterrev]
727 else:
727 else:
728 # Add text length of base since decompressing that also takes
728 # Add text length of base since decompressing that also takes
729 # work. For cache hits the length is already included.
729 # work. For cache hits the length is already included.
730 compresseddeltalen += e[1]
730 compresseddeltalen += e[1]
731 r = (clen, compresseddeltalen)
731 r = (clen, compresseddeltalen)
732 chaininfocache[rev] = r
732 chaininfocache[rev] = r
733 return r
733 return r
734
734
735 def _deltachain(self, rev, stoprev=None):
735 def _deltachain(self, rev, stoprev=None):
736 """Obtain the delta chain for a revision.
736 """Obtain the delta chain for a revision.
737
737
738 ``stoprev`` specifies a revision to stop at. If not specified, we
738 ``stoprev`` specifies a revision to stop at. If not specified, we
739 stop at the base of the chain.
739 stop at the base of the chain.
740
740
741 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
741 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
742 revs in ascending order and ``stopped`` is a bool indicating whether
742 revs in ascending order and ``stopped`` is a bool indicating whether
743 ``stoprev`` was hit.
743 ``stoprev`` was hit.
744 """
744 """
745 # Try C implementation.
745 # Try C implementation.
746 try:
746 try:
747 return self.index.deltachain(rev, stoprev, self._generaldelta)
747 return self.index.deltachain(rev, stoprev, self._generaldelta)
748 except AttributeError:
748 except AttributeError:
749 pass
749 pass
750
750
751 chain = []
751 chain = []
752
752
753 # Alias to prevent attribute lookup in tight loop.
753 # Alias to prevent attribute lookup in tight loop.
754 index = self.index
754 index = self.index
755 generaldelta = self._generaldelta
755 generaldelta = self._generaldelta
756
756
757 iterrev = rev
757 iterrev = rev
758 e = index[iterrev]
758 e = index[iterrev]
759 while iterrev != e[3] and iterrev != stoprev:
759 while iterrev != e[3] and iterrev != stoprev:
760 chain.append(iterrev)
760 chain.append(iterrev)
761 if generaldelta:
761 if generaldelta:
762 iterrev = e[3]
762 iterrev = e[3]
763 else:
763 else:
764 iterrev -= 1
764 iterrev -= 1
765 e = index[iterrev]
765 e = index[iterrev]
766
766
767 if iterrev == stoprev:
767 if iterrev == stoprev:
768 stopped = True
768 stopped = True
769 else:
769 else:
770 chain.append(iterrev)
770 chain.append(iterrev)
771 stopped = False
771 stopped = False
772
772
773 chain.reverse()
773 chain.reverse()
774 return chain, stopped
774 return chain, stopped
775
775
776 def ancestors(self, revs, stoprev=0, inclusive=False):
776 def ancestors(self, revs, stoprev=0, inclusive=False):
777 """Generate the ancestors of 'revs' in reverse revision order.
777 """Generate the ancestors of 'revs' in reverse revision order.
778 Does not generate revs lower than stoprev.
778 Does not generate revs lower than stoprev.
779
779
780 See the documentation for ancestor.lazyancestors for more details."""
780 See the documentation for ancestor.lazyancestors for more details."""
781
781
782 # first, make sure start revisions aren't filtered
782 # first, make sure start revisions aren't filtered
783 revs = list(revs)
783 revs = list(revs)
784 checkrev = self.node
784 checkrev = self.node
785 for r in revs:
785 for r in revs:
786 checkrev(r)
786 checkrev(r)
787 # and we're sure ancestors aren't filtered as well
787 # and we're sure ancestors aren't filtered as well
788
788
789 if rustext is not None:
789 if rustext is not None:
790 lazyancestors = rustext.ancestor.LazyAncestors
790 lazyancestors = rustext.ancestor.LazyAncestors
791 arg = self.index
791 arg = self.index
792 elif util.safehasattr(parsers, 'rustlazyancestors'):
792 elif util.safehasattr(parsers, 'rustlazyancestors'):
793 lazyancestors = ancestor.rustlazyancestors
793 lazyancestors = ancestor.rustlazyancestors
794 arg = self.index
794 arg = self.index
795 else:
795 else:
796 lazyancestors = ancestor.lazyancestors
796 lazyancestors = ancestor.lazyancestors
797 arg = self._uncheckedparentrevs
797 arg = self._uncheckedparentrevs
798 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
798 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
799
799
800 def descendants(self, revs):
800 def descendants(self, revs):
801 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
801 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
802
802
803 def findcommonmissing(self, common=None, heads=None):
803 def findcommonmissing(self, common=None, heads=None):
804 """Return a tuple of the ancestors of common and the ancestors of heads
804 """Return a tuple of the ancestors of common and the ancestors of heads
805 that are not ancestors of common. In revset terminology, we return the
805 that are not ancestors of common. In revset terminology, we return the
806 tuple:
806 tuple:
807
807
808 ::common, (::heads) - (::common)
808 ::common, (::heads) - (::common)
809
809
810 The list is sorted by revision number, meaning it is
810 The list is sorted by revision number, meaning it is
811 topologically sorted.
811 topologically sorted.
812
812
813 'heads' and 'common' are both lists of node IDs. If heads is
813 'heads' and 'common' are both lists of node IDs. If heads is
814 not supplied, uses all of the revlog's heads. If common is not
814 not supplied, uses all of the revlog's heads. If common is not
815 supplied, uses nullid."""
815 supplied, uses nullid."""
816 if common is None:
816 if common is None:
817 common = [nullid]
817 common = [nullid]
818 if heads is None:
818 if heads is None:
819 heads = self.heads()
819 heads = self.heads()
820
820
821 common = [self.rev(n) for n in common]
821 common = [self.rev(n) for n in common]
822 heads = [self.rev(n) for n in heads]
822 heads = [self.rev(n) for n in heads]
823
823
824 # we want the ancestors, but inclusive
824 # we want the ancestors, but inclusive
825 class lazyset(object):
825 class lazyset(object):
826 def __init__(self, lazyvalues):
826 def __init__(self, lazyvalues):
827 self.addedvalues = set()
827 self.addedvalues = set()
828 self.lazyvalues = lazyvalues
828 self.lazyvalues = lazyvalues
829
829
830 def __contains__(self, value):
830 def __contains__(self, value):
831 return value in self.addedvalues or value in self.lazyvalues
831 return value in self.addedvalues or value in self.lazyvalues
832
832
833 def __iter__(self):
833 def __iter__(self):
834 added = self.addedvalues
834 added = self.addedvalues
835 for r in added:
835 for r in added:
836 yield r
836 yield r
837 for r in self.lazyvalues:
837 for r in self.lazyvalues:
838 if not r in added:
838 if not r in added:
839 yield r
839 yield r
840
840
841 def add(self, value):
841 def add(self, value):
842 self.addedvalues.add(value)
842 self.addedvalues.add(value)
843
843
844 def update(self, values):
844 def update(self, values):
845 self.addedvalues.update(values)
845 self.addedvalues.update(values)
846
846
847 has = lazyset(self.ancestors(common))
847 has = lazyset(self.ancestors(common))
848 has.add(nullrev)
848 has.add(nullrev)
849 has.update(common)
849 has.update(common)
850
850
851 # take all ancestors from heads that aren't in has
851 # take all ancestors from heads that aren't in has
852 missing = set()
852 missing = set()
853 visit = collections.deque(r for r in heads if r not in has)
853 visit = collections.deque(r for r in heads if r not in has)
854 while visit:
854 while visit:
855 r = visit.popleft()
855 r = visit.popleft()
856 if r in missing:
856 if r in missing:
857 continue
857 continue
858 else:
858 else:
859 missing.add(r)
859 missing.add(r)
860 for p in self.parentrevs(r):
860 for p in self.parentrevs(r):
861 if p not in has:
861 if p not in has:
862 visit.append(p)
862 visit.append(p)
863 missing = list(missing)
863 missing = list(missing)
864 missing.sort()
864 missing.sort()
865 return has, [self.node(miss) for miss in missing]
865 return has, [self.node(miss) for miss in missing]
866
866
867 def incrementalmissingrevs(self, common=None):
867 def incrementalmissingrevs(self, common=None):
868 """Return an object that can be used to incrementally compute the
868 """Return an object that can be used to incrementally compute the
869 revision numbers of the ancestors of arbitrary sets that are not
869 revision numbers of the ancestors of arbitrary sets that are not
870 ancestors of common. This is an ancestor.incrementalmissingancestors
870 ancestors of common. This is an ancestor.incrementalmissingancestors
871 object.
871 object.
872
872
873 'common' is a list of revision numbers. If common is not supplied, uses
873 'common' is a list of revision numbers. If common is not supplied, uses
874 nullrev.
874 nullrev.
875 """
875 """
876 if common is None:
876 if common is None:
877 common = [nullrev]
877 common = [nullrev]
878
878
879 return ancestor.incrementalmissingancestors(self.parentrevs, common)
879 return ancestor.incrementalmissingancestors(self.parentrevs, common)
880
880
881 def findmissingrevs(self, common=None, heads=None):
881 def findmissingrevs(self, common=None, heads=None):
882 """Return the revision numbers of the ancestors of heads that
882 """Return the revision numbers of the ancestors of heads that
883 are not ancestors of common.
883 are not ancestors of common.
884
884
885 More specifically, return a list of revision numbers corresponding to
885 More specifically, return a list of revision numbers corresponding to
886 nodes N such that every N satisfies the following constraints:
886 nodes N such that every N satisfies the following constraints:
887
887
888 1. N is an ancestor of some node in 'heads'
888 1. N is an ancestor of some node in 'heads'
889 2. N is not an ancestor of any node in 'common'
889 2. N is not an ancestor of any node in 'common'
890
890
891 The list is sorted by revision number, meaning it is
891 The list is sorted by revision number, meaning it is
892 topologically sorted.
892 topologically sorted.
893
893
894 'heads' and 'common' are both lists of revision numbers. If heads is
894 'heads' and 'common' are both lists of revision numbers. If heads is
895 not supplied, uses all of the revlog's heads. If common is not
895 not supplied, uses all of the revlog's heads. If common is not
896 supplied, uses nullid."""
896 supplied, uses nullid."""
897 if common is None:
897 if common is None:
898 common = [nullrev]
898 common = [nullrev]
899 if heads is None:
899 if heads is None:
900 heads = self.headrevs()
900 heads = self.headrevs()
901
901
902 inc = self.incrementalmissingrevs(common=common)
902 inc = self.incrementalmissingrevs(common=common)
903 return inc.missingancestors(heads)
903 return inc.missingancestors(heads)
904
904
905 def findmissing(self, common=None, heads=None):
905 def findmissing(self, common=None, heads=None):
906 """Return the ancestors of heads that are not ancestors of common.
906 """Return the ancestors of heads that are not ancestors of common.
907
907
908 More specifically, return a list of nodes N such that every N
908 More specifically, return a list of nodes N such that every N
909 satisfies the following constraints:
909 satisfies the following constraints:
910
910
911 1. N is an ancestor of some node in 'heads'
911 1. N is an ancestor of some node in 'heads'
912 2. N is not an ancestor of any node in 'common'
912 2. N is not an ancestor of any node in 'common'
913
913
914 The list is sorted by revision number, meaning it is
914 The list is sorted by revision number, meaning it is
915 topologically sorted.
915 topologically sorted.
916
916
917 'heads' and 'common' are both lists of node IDs. If heads is
917 'heads' and 'common' are both lists of node IDs. If heads is
918 not supplied, uses all of the revlog's heads. If common is not
918 not supplied, uses all of the revlog's heads. If common is not
919 supplied, uses nullid."""
919 supplied, uses nullid."""
920 if common is None:
920 if common is None:
921 common = [nullid]
921 common = [nullid]
922 if heads is None:
922 if heads is None:
923 heads = self.heads()
923 heads = self.heads()
924
924
925 common = [self.rev(n) for n in common]
925 common = [self.rev(n) for n in common]
926 heads = [self.rev(n) for n in heads]
926 heads = [self.rev(n) for n in heads]
927
927
928 inc = self.incrementalmissingrevs(common=common)
928 inc = self.incrementalmissingrevs(common=common)
929 return [self.node(r) for r in inc.missingancestors(heads)]
929 return [self.node(r) for r in inc.missingancestors(heads)]
930
930
931 def nodesbetween(self, roots=None, heads=None):
931 def nodesbetween(self, roots=None, heads=None):
932 """Return a topological path from 'roots' to 'heads'.
932 """Return a topological path from 'roots' to 'heads'.
933
933
934 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
934 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
935 topologically sorted list of all nodes N that satisfy both of
935 topologically sorted list of all nodes N that satisfy both of
936 these constraints:
936 these constraints:
937
937
938 1. N is a descendant of some node in 'roots'
938 1. N is a descendant of some node in 'roots'
939 2. N is an ancestor of some node in 'heads'
939 2. N is an ancestor of some node in 'heads'
940
940
941 Every node is considered to be both a descendant and an ancestor
941 Every node is considered to be both a descendant and an ancestor
942 of itself, so every reachable node in 'roots' and 'heads' will be
942 of itself, so every reachable node in 'roots' and 'heads' will be
943 included in 'nodes'.
943 included in 'nodes'.
944
944
945 'outroots' is the list of reachable nodes in 'roots', i.e., the
945 'outroots' is the list of reachable nodes in 'roots', i.e., the
946 subset of 'roots' that is returned in 'nodes'. Likewise,
946 subset of 'roots' that is returned in 'nodes'. Likewise,
947 'outheads' is the subset of 'heads' that is also in 'nodes'.
947 'outheads' is the subset of 'heads' that is also in 'nodes'.
948
948
949 'roots' and 'heads' are both lists of node IDs. If 'roots' is
949 'roots' and 'heads' are both lists of node IDs. If 'roots' is
950 unspecified, uses nullid as the only root. If 'heads' is
950 unspecified, uses nullid as the only root. If 'heads' is
951 unspecified, uses list of all of the revlog's heads."""
951 unspecified, uses list of all of the revlog's heads."""
952 nonodes = ([], [], [])
952 nonodes = ([], [], [])
953 if roots is not None:
953 if roots is not None:
954 roots = list(roots)
954 roots = list(roots)
955 if not roots:
955 if not roots:
956 return nonodes
956 return nonodes
957 lowestrev = min([self.rev(n) for n in roots])
957 lowestrev = min([self.rev(n) for n in roots])
958 else:
958 else:
959 roots = [nullid] # Everybody's a descendant of nullid
959 roots = [nullid] # Everybody's a descendant of nullid
960 lowestrev = nullrev
960 lowestrev = nullrev
961 if (lowestrev == nullrev) and (heads is None):
961 if (lowestrev == nullrev) and (heads is None):
962 # We want _all_ the nodes!
962 # We want _all_ the nodes!
963 return ([self.node(r) for r in self], [nullid], list(self.heads()))
963 return ([self.node(r) for r in self], [nullid], list(self.heads()))
964 if heads is None:
964 if heads is None:
965 # All nodes are ancestors, so the latest ancestor is the last
965 # All nodes are ancestors, so the latest ancestor is the last
966 # node.
966 # node.
967 highestrev = len(self) - 1
967 highestrev = len(self) - 1
968 # Set ancestors to None to signal that every node is an ancestor.
968 # Set ancestors to None to signal that every node is an ancestor.
969 ancestors = None
969 ancestors = None
970 # Set heads to an empty dictionary for later discovery of heads
970 # Set heads to an empty dictionary for later discovery of heads
971 heads = {}
971 heads = {}
972 else:
972 else:
973 heads = list(heads)
973 heads = list(heads)
974 if not heads:
974 if not heads:
975 return nonodes
975 return nonodes
976 ancestors = set()
976 ancestors = set()
977 # Turn heads into a dictionary so we can remove 'fake' heads.
977 # Turn heads into a dictionary so we can remove 'fake' heads.
978 # Also, later we will be using it to filter out the heads we can't
978 # Also, later we will be using it to filter out the heads we can't
979 # find from roots.
979 # find from roots.
980 heads = dict.fromkeys(heads, False)
980 heads = dict.fromkeys(heads, False)
981 # Start at the top and keep marking parents until we're done.
981 # Start at the top and keep marking parents until we're done.
982 nodestotag = set(heads)
982 nodestotag = set(heads)
983 # Remember where the top was so we can use it as a limit later.
983 # Remember where the top was so we can use it as a limit later.
984 highestrev = max([self.rev(n) for n in nodestotag])
984 highestrev = max([self.rev(n) for n in nodestotag])
985 while nodestotag:
985 while nodestotag:
986 # grab a node to tag
986 # grab a node to tag
987 n = nodestotag.pop()
987 n = nodestotag.pop()
988 # Never tag nullid
988 # Never tag nullid
989 if n == nullid:
989 if n == nullid:
990 continue
990 continue
991 # A node's revision number represents its place in a
991 # A node's revision number represents its place in a
992 # topologically sorted list of nodes.
992 # topologically sorted list of nodes.
993 r = self.rev(n)
993 r = self.rev(n)
994 if r >= lowestrev:
994 if r >= lowestrev:
995 if n not in ancestors:
995 if n not in ancestors:
996 # If we are possibly a descendant of one of the roots
996 # If we are possibly a descendant of one of the roots
997 # and we haven't already been marked as an ancestor
997 # and we haven't already been marked as an ancestor
998 ancestors.add(n) # Mark as ancestor
998 ancestors.add(n) # Mark as ancestor
999 # Add non-nullid parents to list of nodes to tag.
999 # Add non-nullid parents to list of nodes to tag.
1000 nodestotag.update([p for p in self.parents(n) if
1000 nodestotag.update([p for p in self.parents(n) if
1001 p != nullid])
1001 p != nullid])
1002 elif n in heads: # We've seen it before, is it a fake head?
1002 elif n in heads: # We've seen it before, is it a fake head?
1003 # So it is, real heads should not be the ancestors of
1003 # So it is, real heads should not be the ancestors of
1004 # any other heads.
1004 # any other heads.
1005 heads.pop(n)
1005 heads.pop(n)
1006 if not ancestors:
1006 if not ancestors:
1007 return nonodes
1007 return nonodes
1008 # Now that we have our set of ancestors, we want to remove any
1008 # Now that we have our set of ancestors, we want to remove any
1009 # roots that are not ancestors.
1009 # roots that are not ancestors.
1010
1010
1011 # If one of the roots was nullid, everything is included anyway.
1011 # If one of the roots was nullid, everything is included anyway.
1012 if lowestrev > nullrev:
1012 if lowestrev > nullrev:
1013 # But, since we weren't, let's recompute the lowest rev to not
1013 # But, since we weren't, let's recompute the lowest rev to not
1014 # include roots that aren't ancestors.
1014 # include roots that aren't ancestors.
1015
1015
1016 # Filter out roots that aren't ancestors of heads
1016 # Filter out roots that aren't ancestors of heads
1017 roots = [root for root in roots if root in ancestors]
1017 roots = [root for root in roots if root in ancestors]
1018 # Recompute the lowest revision
1018 # Recompute the lowest revision
1019 if roots:
1019 if roots:
1020 lowestrev = min([self.rev(root) for root in roots])
1020 lowestrev = min([self.rev(root) for root in roots])
1021 else:
1021 else:
1022 # No more roots? Return empty list
1022 # No more roots? Return empty list
1023 return nonodes
1023 return nonodes
1024 else:
1024 else:
1025 # We are descending from nullid, and don't need to care about
1025 # We are descending from nullid, and don't need to care about
1026 # any other roots.
1026 # any other roots.
1027 lowestrev = nullrev
1027 lowestrev = nullrev
1028 roots = [nullid]
1028 roots = [nullid]
1029 # Transform our roots list into a set.
1029 # Transform our roots list into a set.
1030 descendants = set(roots)
1030 descendants = set(roots)
1031 # Also, keep the original roots so we can filter out roots that aren't
1031 # Also, keep the original roots so we can filter out roots that aren't
1032 # 'real' roots (i.e. are descended from other roots).
1032 # 'real' roots (i.e. are descended from other roots).
1033 roots = descendants.copy()
1033 roots = descendants.copy()
1034 # Our topologically sorted list of output nodes.
1034 # Our topologically sorted list of output nodes.
1035 orderedout = []
1035 orderedout = []
1036 # Don't start at nullid since we don't want nullid in our output list,
1036 # Don't start at nullid since we don't want nullid in our output list,
1037 # and if nullid shows up in descendants, empty parents will look like
1037 # and if nullid shows up in descendants, empty parents will look like
1038 # they're descendants.
1038 # they're descendants.
1039 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1039 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1040 n = self.node(r)
1040 n = self.node(r)
1041 isdescendant = False
1041 isdescendant = False
1042 if lowestrev == nullrev: # Everybody is a descendant of nullid
1042 if lowestrev == nullrev: # Everybody is a descendant of nullid
1043 isdescendant = True
1043 isdescendant = True
1044 elif n in descendants:
1044 elif n in descendants:
1045 # n is already a descendant
1045 # n is already a descendant
1046 isdescendant = True
1046 isdescendant = True
1047 # This check only needs to be done here because all the roots
1047 # This check only needs to be done here because all the roots
1048 # will start being marked is descendants before the loop.
1048 # will start being marked is descendants before the loop.
1049 if n in roots:
1049 if n in roots:
1050 # If n was a root, check if it's a 'real' root.
1050 # If n was a root, check if it's a 'real' root.
1051 p = tuple(self.parents(n))
1051 p = tuple(self.parents(n))
1052 # If any of its parents are descendants, it's not a root.
1052 # If any of its parents are descendants, it's not a root.
1053 if (p[0] in descendants) or (p[1] in descendants):
1053 if (p[0] in descendants) or (p[1] in descendants):
1054 roots.remove(n)
1054 roots.remove(n)
1055 else:
1055 else:
1056 p = tuple(self.parents(n))
1056 p = tuple(self.parents(n))
1057 # A node is a descendant if either of its parents are
1057 # A node is a descendant if either of its parents are
1058 # descendants. (We seeded the dependents list with the roots
1058 # descendants. (We seeded the dependents list with the roots
1059 # up there, remember?)
1059 # up there, remember?)
1060 if (p[0] in descendants) or (p[1] in descendants):
1060 if (p[0] in descendants) or (p[1] in descendants):
1061 descendants.add(n)
1061 descendants.add(n)
1062 isdescendant = True
1062 isdescendant = True
1063 if isdescendant and ((ancestors is None) or (n in ancestors)):
1063 if isdescendant and ((ancestors is None) or (n in ancestors)):
1064 # Only include nodes that are both descendants and ancestors.
1064 # Only include nodes that are both descendants and ancestors.
1065 orderedout.append(n)
1065 orderedout.append(n)
1066 if (ancestors is not None) and (n in heads):
1066 if (ancestors is not None) and (n in heads):
1067 # We're trying to figure out which heads are reachable
1067 # We're trying to figure out which heads are reachable
1068 # from roots.
1068 # from roots.
1069 # Mark this head as having been reached
1069 # Mark this head as having been reached
1070 heads[n] = True
1070 heads[n] = True
1071 elif ancestors is None:
1071 elif ancestors is None:
1072 # Otherwise, we're trying to discover the heads.
1072 # Otherwise, we're trying to discover the heads.
1073 # Assume this is a head because if it isn't, the next step
1073 # Assume this is a head because if it isn't, the next step
1074 # will eventually remove it.
1074 # will eventually remove it.
1075 heads[n] = True
1075 heads[n] = True
1076 # But, obviously its parents aren't.
1076 # But, obviously its parents aren't.
1077 for p in self.parents(n):
1077 for p in self.parents(n):
1078 heads.pop(p, None)
1078 heads.pop(p, None)
1079 heads = [head for head, flag in heads.iteritems() if flag]
1079 heads = [head for head, flag in heads.iteritems() if flag]
1080 roots = list(roots)
1080 roots = list(roots)
1081 assert orderedout
1081 assert orderedout
1082 assert roots
1082 assert roots
1083 assert heads
1083 assert heads
1084 return (orderedout, roots, heads)
1084 return (orderedout, roots, heads)
1085
1085
1086 def headrevs(self):
1086 def headrevs(self):
1087 try:
1087 try:
1088 return self.index.headrevs()
1088 return self.index.headrevs()
1089 except AttributeError:
1089 except AttributeError:
1090 return self._headrevs()
1090 return self._headrevs()
1091
1091
1092 def computephases(self, roots):
1092 def computephases(self, roots):
1093 return self.index.computephasesmapsets(roots)
1093 return self.index.computephasesmapsets(roots)
1094
1094
1095 def _headrevs(self):
1095 def _headrevs(self):
1096 count = len(self)
1096 count = len(self)
1097 if not count:
1097 if not count:
1098 return [nullrev]
1098 return [nullrev]
1099 # we won't iter over filtered rev so nobody is a head at start
1099 # we won't iter over filtered rev so nobody is a head at start
1100 ishead = [0] * (count + 1)
1100 ishead = [0] * (count + 1)
1101 index = self.index
1101 index = self.index
1102 for r in self:
1102 for r in self:
1103 ishead[r] = 1 # I may be an head
1103 ishead[r] = 1 # I may be an head
1104 e = index[r]
1104 e = index[r]
1105 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1105 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1106 return [r for r, val in enumerate(ishead) if val]
1106 return [r for r, val in enumerate(ishead) if val]
1107
1107
1108 def heads(self, start=None, stop=None):
1108 def heads(self, start=None, stop=None):
1109 """return the list of all nodes that have no children
1109 """return the list of all nodes that have no children
1110
1110
1111 if start is specified, only heads that are descendants of
1111 if start is specified, only heads that are descendants of
1112 start will be returned
1112 start will be returned
1113 if stop is specified, it will consider all the revs from stop
1113 if stop is specified, it will consider all the revs from stop
1114 as if they had no children
1114 as if they had no children
1115 """
1115 """
1116 if start is None and stop is None:
1116 if start is None and stop is None:
1117 if not len(self):
1117 if not len(self):
1118 return [nullid]
1118 return [nullid]
1119 return [self.node(r) for r in self.headrevs()]
1119 return [self.node(r) for r in self.headrevs()]
1120
1120
1121 if start is None:
1121 if start is None:
1122 start = nullrev
1122 start = nullrev
1123 else:
1123 else:
1124 start = self.rev(start)
1124 start = self.rev(start)
1125
1125
1126 stoprevs = set(self.rev(n) for n in stop or [])
1126 stoprevs = set(self.rev(n) for n in stop or [])
1127
1127
1128 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1128 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1129 stoprevs=stoprevs)
1129 stoprevs=stoprevs)
1130
1130
1131 return [self.node(rev) for rev in revs]
1131 return [self.node(rev) for rev in revs]
1132
1132
1133 def children(self, node):
1133 def children(self, node):
1134 """find the children of a given node"""
1134 """find the children of a given node"""
1135 c = []
1135 c = []
1136 p = self.rev(node)
1136 p = self.rev(node)
1137 for r in self.revs(start=p + 1):
1137 for r in self.revs(start=p + 1):
1138 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1138 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1139 if prevs:
1139 if prevs:
1140 for pr in prevs:
1140 for pr in prevs:
1141 if pr == p:
1141 if pr == p:
1142 c.append(self.node(r))
1142 c.append(self.node(r))
1143 elif p == nullrev:
1143 elif p == nullrev:
1144 c.append(self.node(r))
1144 c.append(self.node(r))
1145 return c
1145 return c
1146
1146
1147 def commonancestorsheads(self, a, b):
1147 def commonancestorsheads(self, a, b):
1148 """calculate all the heads of the common ancestors of nodes a and b"""
1148 """calculate all the heads of the common ancestors of nodes a and b"""
1149 a, b = self.rev(a), self.rev(b)
1149 a, b = self.rev(a), self.rev(b)
1150 ancs = self._commonancestorsheads(a, b)
1150 ancs = self._commonancestorsheads(a, b)
1151 return pycompat.maplist(self.node, ancs)
1151 return pycompat.maplist(self.node, ancs)
1152
1152
1153 def _commonancestorsheads(self, *revs):
1153 def _commonancestorsheads(self, *revs):
1154 """calculate all the heads of the common ancestors of revs"""
1154 """calculate all the heads of the common ancestors of revs"""
1155 try:
1155 try:
1156 ancs = self.index.commonancestorsheads(*revs)
1156 ancs = self.index.commonancestorsheads(*revs)
1157 except (AttributeError, OverflowError): # C implementation failed
1157 except (AttributeError, OverflowError): # C implementation failed
1158 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1158 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1159 return ancs
1159 return ancs
1160
1160
1161 def isancestor(self, a, b):
1161 def isancestor(self, a, b):
1162 """return True if node a is an ancestor of node b
1162 """return True if node a is an ancestor of node b
1163
1163
1164 A revision is considered an ancestor of itself."""
1164 A revision is considered an ancestor of itself."""
1165 a, b = self.rev(a), self.rev(b)
1165 a, b = self.rev(a), self.rev(b)
1166 return self.isancestorrev(a, b)
1166 return self.isancestorrev(a, b)
1167
1167
1168 def isancestorrev(self, a, b):
1168 def isancestorrev(self, a, b):
1169 """return True if revision a is an ancestor of revision b
1169 """return True if revision a is an ancestor of revision b
1170
1170
1171 A revision is considered an ancestor of itself.
1171 A revision is considered an ancestor of itself.
1172
1172
1173 The implementation of this is trivial but the use of
1173 The implementation of this is trivial but the use of
1174 commonancestorsheads is not."""
1174 commonancestorsheads is not."""
1175 if a == nullrev:
1175 if a == nullrev:
1176 return True
1176 return True
1177 elif a == b:
1177 elif a == b:
1178 return True
1178 return True
1179 elif a > b:
1179 elif a > b:
1180 return False
1180 return False
1181 return a in self._commonancestorsheads(a, b)
1181 return a in self._commonancestorsheads(a, b)
1182
1182
1183 def ancestor(self, a, b):
1183 def ancestor(self, a, b):
1184 """calculate the "best" common ancestor of nodes a and b"""
1184 """calculate the "best" common ancestor of nodes a and b"""
1185
1185
1186 a, b = self.rev(a), self.rev(b)
1186 a, b = self.rev(a), self.rev(b)
1187 try:
1187 try:
1188 ancs = self.index.ancestors(a, b)
1188 ancs = self.index.ancestors(a, b)
1189 except (AttributeError, OverflowError):
1189 except (AttributeError, OverflowError):
1190 ancs = ancestor.ancestors(self.parentrevs, a, b)
1190 ancs = ancestor.ancestors(self.parentrevs, a, b)
1191 if ancs:
1191 if ancs:
1192 # choose a consistent winner when there's a tie
1192 # choose a consistent winner when there's a tie
1193 return min(map(self.node, ancs))
1193 return min(map(self.node, ancs))
1194 return nullid
1194 return nullid
1195
1195
1196 def _match(self, id):
1196 def _match(self, id):
1197 if isinstance(id, int):
1197 if isinstance(id, int):
1198 # rev
1198 # rev
1199 return self.node(id)
1199 return self.node(id)
1200 if len(id) == 20:
1200 if len(id) == 20:
1201 # possibly a binary node
1201 # possibly a binary node
1202 # odds of a binary node being all hex in ASCII are 1 in 10**25
1202 # odds of a binary node being all hex in ASCII are 1 in 10**25
1203 try:
1203 try:
1204 node = id
1204 node = id
1205 self.rev(node) # quick search the index
1205 self.rev(node) # quick search the index
1206 return node
1206 return node
1207 except error.LookupError:
1207 except error.LookupError:
1208 pass # may be partial hex id
1208 pass # may be partial hex id
1209 try:
1209 try:
1210 # str(rev)
1210 # str(rev)
1211 rev = int(id)
1211 rev = int(id)
1212 if "%d" % rev != id:
1212 if "%d" % rev != id:
1213 raise ValueError
1213 raise ValueError
1214 if rev < 0:
1214 if rev < 0:
1215 rev = len(self) + rev
1215 rev = len(self) + rev
1216 if rev < 0 or rev >= len(self):
1216 if rev < 0 or rev >= len(self):
1217 raise ValueError
1217 raise ValueError
1218 return self.node(rev)
1218 return self.node(rev)
1219 except (ValueError, OverflowError):
1219 except (ValueError, OverflowError):
1220 pass
1220 pass
1221 if len(id) == 40:
1221 if len(id) == 40:
1222 try:
1222 try:
1223 # a full hex nodeid?
1223 # a full hex nodeid?
1224 node = bin(id)
1224 node = bin(id)
1225 self.rev(node)
1225 self.rev(node)
1226 return node
1226 return node
1227 except (TypeError, error.LookupError):
1227 except (TypeError, error.LookupError):
1228 pass
1228 pass
1229
1229
1230 def _partialmatch(self, id):
1230 def _partialmatch(self, id):
1231 # we don't care wdirfilenodeids as they should be always full hash
1231 # we don't care wdirfilenodeids as they should be always full hash
1232 maybewdir = wdirhex.startswith(id)
1232 maybewdir = wdirhex.startswith(id)
1233 try:
1233 try:
1234 partial = self.index.partialmatch(id)
1234 partial = self.index.partialmatch(id)
1235 if partial and self.hasnode(partial):
1235 if partial and self.hasnode(partial):
1236 if maybewdir:
1236 if maybewdir:
1237 # single 'ff...' match in radix tree, ambiguous with wdir
1237 # single 'ff...' match in radix tree, ambiguous with wdir
1238 raise error.RevlogError
1238 raise error.RevlogError
1239 return partial
1239 return partial
1240 if maybewdir:
1240 if maybewdir:
1241 # no 'ff...' match in radix tree, wdir identified
1241 # no 'ff...' match in radix tree, wdir identified
1242 raise error.WdirUnsupported
1242 raise error.WdirUnsupported
1243 return None
1243 return None
1244 except error.RevlogError:
1244 except error.RevlogError:
1245 # parsers.c radix tree lookup gave multiple matches
1245 # parsers.c radix tree lookup gave multiple matches
1246 # fast path: for unfiltered changelog, radix tree is accurate
1246 # fast path: for unfiltered changelog, radix tree is accurate
1247 if not getattr(self, 'filteredrevs', None):
1247 if not getattr(self, 'filteredrevs', None):
1248 raise error.AmbiguousPrefixLookupError(
1248 raise error.AmbiguousPrefixLookupError(
1249 id, self.indexfile, _('ambiguous identifier'))
1249 id, self.indexfile, _('ambiguous identifier'))
1250 # fall through to slow path that filters hidden revisions
1250 # fall through to slow path that filters hidden revisions
1251 except (AttributeError, ValueError):
1251 except (AttributeError, ValueError):
1252 # we are pure python, or key was too short to search radix tree
1252 # we are pure python, or key was too short to search radix tree
1253 pass
1253 pass
1254
1254
1255 if id in self._pcache:
1255 if id in self._pcache:
1256 return self._pcache[id]
1256 return self._pcache[id]
1257
1257
1258 if len(id) <= 40:
1258 if len(id) <= 40:
1259 try:
1259 try:
1260 # hex(node)[:...]
1260 # hex(node)[:...]
1261 l = len(id) // 2 # grab an even number of digits
1261 l = len(id) // 2 # grab an even number of digits
1262 prefix = bin(id[:l * 2])
1262 prefix = bin(id[:l * 2])
1263 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1263 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1264 nl = [n for n in nl if hex(n).startswith(id) and
1264 nl = [n for n in nl if hex(n).startswith(id) and
1265 self.hasnode(n)]
1265 self.hasnode(n)]
1266 if nullhex.startswith(id):
1266 if nullhex.startswith(id):
1267 nl.append(nullid)
1267 nl.append(nullid)
1268 if len(nl) > 0:
1268 if len(nl) > 0:
1269 if len(nl) == 1 and not maybewdir:
1269 if len(nl) == 1 and not maybewdir:
1270 self._pcache[id] = nl[0]
1270 self._pcache[id] = nl[0]
1271 return nl[0]
1271 return nl[0]
1272 raise error.AmbiguousPrefixLookupError(
1272 raise error.AmbiguousPrefixLookupError(
1273 id, self.indexfile, _('ambiguous identifier'))
1273 id, self.indexfile, _('ambiguous identifier'))
1274 if maybewdir:
1274 if maybewdir:
1275 raise error.WdirUnsupported
1275 raise error.WdirUnsupported
1276 return None
1276 return None
1277 except TypeError:
1277 except TypeError:
1278 pass
1278 pass
1279
1279
1280 def lookup(self, id):
1280 def lookup(self, id):
1281 """locate a node based on:
1281 """locate a node based on:
1282 - revision number or str(revision number)
1282 - revision number or str(revision number)
1283 - nodeid or subset of hex nodeid
1283 - nodeid or subset of hex nodeid
1284 """
1284 """
1285 n = self._match(id)
1285 n = self._match(id)
1286 if n is not None:
1286 if n is not None:
1287 return n
1287 return n
1288 n = self._partialmatch(id)
1288 n = self._partialmatch(id)
1289 if n:
1289 if n:
1290 return n
1290 return n
1291
1291
1292 raise error.LookupError(id, self.indexfile, _('no match found'))
1292 raise error.LookupError(id, self.indexfile, _('no match found'))
1293
1293
1294 def shortest(self, node, minlength=1):
1294 def shortest(self, node, minlength=1):
1295 """Find the shortest unambiguous prefix that matches node."""
1295 """Find the shortest unambiguous prefix that matches node."""
1296 def isvalid(prefix):
1296 def isvalid(prefix):
1297 try:
1297 try:
1298 node = self._partialmatch(prefix)
1298 node = self._partialmatch(prefix)
1299 except error.AmbiguousPrefixLookupError:
1299 except error.AmbiguousPrefixLookupError:
1300 return False
1300 return False
1301 except error.WdirUnsupported:
1301 except error.WdirUnsupported:
1302 # single 'ff...' match
1302 # single 'ff...' match
1303 return True
1303 return True
1304 if node is None:
1304 if node is None:
1305 raise error.LookupError(node, self.indexfile, _('no node'))
1305 raise error.LookupError(node, self.indexfile, _('no node'))
1306 return True
1306 return True
1307
1307
1308 def maybewdir(prefix):
1308 def maybewdir(prefix):
1309 return all(c == 'f' for c in prefix)
1309 return all(c == 'f' for c in prefix)
1310
1310
1311 hexnode = hex(node)
1311 hexnode = hex(node)
1312
1312
1313 def disambiguate(hexnode, minlength):
1313 def disambiguate(hexnode, minlength):
1314 """Disambiguate against wdirid."""
1314 """Disambiguate against wdirid."""
1315 for length in range(minlength, 41):
1315 for length in range(minlength, 41):
1316 prefix = hexnode[:length]
1316 prefix = hexnode[:length]
1317 if not maybewdir(prefix):
1317 if not maybewdir(prefix):
1318 return prefix
1318 return prefix
1319
1319
1320 if not getattr(self, 'filteredrevs', None):
1320 if not getattr(self, 'filteredrevs', None):
1321 try:
1321 try:
1322 length = max(self.index.shortest(node), minlength)
1322 length = max(self.index.shortest(node), minlength)
1323 return disambiguate(hexnode, length)
1323 return disambiguate(hexnode, length)
1324 except error.RevlogError:
1324 except error.RevlogError:
1325 if node != wdirid:
1325 if node != wdirid:
1326 raise error.LookupError(node, self.indexfile, _('no node'))
1326 raise error.LookupError(node, self.indexfile, _('no node'))
1327 except AttributeError:
1327 except AttributeError:
1328 # Fall through to pure code
1328 # Fall through to pure code
1329 pass
1329 pass
1330
1330
1331 if node == wdirid:
1331 if node == wdirid:
1332 for length in range(minlength, 41):
1332 for length in range(minlength, 41):
1333 prefix = hexnode[:length]
1333 prefix = hexnode[:length]
1334 if isvalid(prefix):
1334 if isvalid(prefix):
1335 return prefix
1335 return prefix
1336
1336
1337 for length in range(minlength, 41):
1337 for length in range(minlength, 41):
1338 prefix = hexnode[:length]
1338 prefix = hexnode[:length]
1339 if isvalid(prefix):
1339 if isvalid(prefix):
1340 return disambiguate(hexnode, length)
1340 return disambiguate(hexnode, length)
1341
1341
1342 def cmp(self, node, text):
1342 def cmp(self, node, text):
1343 """compare text with a given file revision
1343 """compare text with a given file revision
1344
1344
1345 returns True if text is different than what is stored.
1345 returns True if text is different than what is stored.
1346 """
1346 """
1347 p1, p2 = self.parents(node)
1347 p1, p2 = self.parents(node)
1348 return storageutil.hashrevisionsha1(text, p1, p2) != node
1348 return storageutil.hashrevisionsha1(text, p1, p2) != node
1349
1349
1350 def _cachesegment(self, offset, data):
1350 def _cachesegment(self, offset, data):
1351 """Add a segment to the revlog cache.
1351 """Add a segment to the revlog cache.
1352
1352
1353 Accepts an absolute offset and the data that is at that location.
1353 Accepts an absolute offset and the data that is at that location.
1354 """
1354 """
1355 o, d = self._chunkcache
1355 o, d = self._chunkcache
1356 # try to add to existing cache
1356 # try to add to existing cache
1357 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1357 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1358 self._chunkcache = o, d + data
1358 self._chunkcache = o, d + data
1359 else:
1359 else:
1360 self._chunkcache = offset, data
1360 self._chunkcache = offset, data
1361
1361
1362 def _readsegment(self, offset, length, df=None):
1362 def _readsegment(self, offset, length, df=None):
1363 """Load a segment of raw data from the revlog.
1363 """Load a segment of raw data from the revlog.
1364
1364
1365 Accepts an absolute offset, length to read, and an optional existing
1365 Accepts an absolute offset, length to read, and an optional existing
1366 file handle to read from.
1366 file handle to read from.
1367
1367
1368 If an existing file handle is passed, it will be seeked and the
1368 If an existing file handle is passed, it will be seeked and the
1369 original seek position will NOT be restored.
1369 original seek position will NOT be restored.
1370
1370
1371 Returns a str or buffer of raw byte data.
1371 Returns a str or buffer of raw byte data.
1372
1372
1373 Raises if the requested number of bytes could not be read.
1373 Raises if the requested number of bytes could not be read.
1374 """
1374 """
1375 # Cache data both forward and backward around the requested
1375 # Cache data both forward and backward around the requested
1376 # data, in a fixed size window. This helps speed up operations
1376 # data, in a fixed size window. This helps speed up operations
1377 # involving reading the revlog backwards.
1377 # involving reading the revlog backwards.
1378 cachesize = self._chunkcachesize
1378 cachesize = self._chunkcachesize
1379 realoffset = offset & ~(cachesize - 1)
1379 realoffset = offset & ~(cachesize - 1)
1380 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1380 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1381 - realoffset)
1381 - realoffset)
1382 with self._datareadfp(df) as df:
1382 with self._datareadfp(df) as df:
1383 df.seek(realoffset)
1383 df.seek(realoffset)
1384 d = df.read(reallength)
1384 d = df.read(reallength)
1385
1385
1386 self._cachesegment(realoffset, d)
1386 self._cachesegment(realoffset, d)
1387 if offset != realoffset or reallength != length:
1387 if offset != realoffset or reallength != length:
1388 startoffset = offset - realoffset
1388 startoffset = offset - realoffset
1389 if len(d) - startoffset < length:
1389 if len(d) - startoffset < length:
1390 raise error.RevlogError(
1390 raise error.RevlogError(
1391 _('partial read of revlog %s; expected %d bytes from '
1391 _('partial read of revlog %s; expected %d bytes from '
1392 'offset %d, got %d') %
1392 'offset %d, got %d') %
1393 (self.indexfile if self._inline else self.datafile,
1393 (self.indexfile if self._inline else self.datafile,
1394 length, realoffset, len(d) - startoffset))
1394 length, realoffset, len(d) - startoffset))
1395
1395
1396 return util.buffer(d, startoffset, length)
1396 return util.buffer(d, startoffset, length)
1397
1397
1398 if len(d) < length:
1398 if len(d) < length:
1399 raise error.RevlogError(
1399 raise error.RevlogError(
1400 _('partial read of revlog %s; expected %d bytes from offset '
1400 _('partial read of revlog %s; expected %d bytes from offset '
1401 '%d, got %d') %
1401 '%d, got %d') %
1402 (self.indexfile if self._inline else self.datafile,
1402 (self.indexfile if self._inline else self.datafile,
1403 length, offset, len(d)))
1403 length, offset, len(d)))
1404
1404
1405 return d
1405 return d
1406
1406
1407 def _getsegment(self, offset, length, df=None):
1407 def _getsegment(self, offset, length, df=None):
1408 """Obtain a segment of raw data from the revlog.
1408 """Obtain a segment of raw data from the revlog.
1409
1409
1410 Accepts an absolute offset, length of bytes to obtain, and an
1410 Accepts an absolute offset, length of bytes to obtain, and an
1411 optional file handle to the already-opened revlog. If the file
1411 optional file handle to the already-opened revlog. If the file
1412 handle is used, it's original seek position will not be preserved.
1412 handle is used, it's original seek position will not be preserved.
1413
1413
1414 Requests for data may be returned from a cache.
1414 Requests for data may be returned from a cache.
1415
1415
1416 Returns a str or a buffer instance of raw byte data.
1416 Returns a str or a buffer instance of raw byte data.
1417 """
1417 """
1418 o, d = self._chunkcache
1418 o, d = self._chunkcache
1419 l = len(d)
1419 l = len(d)
1420
1420
1421 # is it in the cache?
1421 # is it in the cache?
1422 cachestart = offset - o
1422 cachestart = offset - o
1423 cacheend = cachestart + length
1423 cacheend = cachestart + length
1424 if cachestart >= 0 and cacheend <= l:
1424 if cachestart >= 0 and cacheend <= l:
1425 if cachestart == 0 and cacheend == l:
1425 if cachestart == 0 and cacheend == l:
1426 return d # avoid a copy
1426 return d # avoid a copy
1427 return util.buffer(d, cachestart, cacheend - cachestart)
1427 return util.buffer(d, cachestart, cacheend - cachestart)
1428
1428
1429 return self._readsegment(offset, length, df=df)
1429 return self._readsegment(offset, length, df=df)
1430
1430
1431 def _getsegmentforrevs(self, startrev, endrev, df=None):
1431 def _getsegmentforrevs(self, startrev, endrev, df=None):
1432 """Obtain a segment of raw data corresponding to a range of revisions.
1432 """Obtain a segment of raw data corresponding to a range of revisions.
1433
1433
1434 Accepts the start and end revisions and an optional already-open
1434 Accepts the start and end revisions and an optional already-open
1435 file handle to be used for reading. If the file handle is read, its
1435 file handle to be used for reading. If the file handle is read, its
1436 seek position will not be preserved.
1436 seek position will not be preserved.
1437
1437
1438 Requests for data may be satisfied by a cache.
1438 Requests for data may be satisfied by a cache.
1439
1439
1440 Returns a 2-tuple of (offset, data) for the requested range of
1440 Returns a 2-tuple of (offset, data) for the requested range of
1441 revisions. Offset is the integer offset from the beginning of the
1441 revisions. Offset is the integer offset from the beginning of the
1442 revlog and data is a str or buffer of the raw byte data.
1442 revlog and data is a str or buffer of the raw byte data.
1443
1443
1444 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1444 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1445 to determine where each revision's data begins and ends.
1445 to determine where each revision's data begins and ends.
1446 """
1446 """
1447 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1447 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1448 # (functions are expensive).
1448 # (functions are expensive).
1449 index = self.index
1449 index = self.index
1450 istart = index[startrev]
1450 istart = index[startrev]
1451 start = int(istart[0] >> 16)
1451 start = int(istart[0] >> 16)
1452 if startrev == endrev:
1452 if startrev == endrev:
1453 end = start + istart[1]
1453 end = start + istart[1]
1454 else:
1454 else:
1455 iend = index[endrev]
1455 iend = index[endrev]
1456 end = int(iend[0] >> 16) + iend[1]
1456 end = int(iend[0] >> 16) + iend[1]
1457
1457
1458 if self._inline:
1458 if self._inline:
1459 start += (startrev + 1) * self._io.size
1459 start += (startrev + 1) * self._io.size
1460 end += (endrev + 1) * self._io.size
1460 end += (endrev + 1) * self._io.size
1461 length = end - start
1461 length = end - start
1462
1462
1463 return start, self._getsegment(start, length, df=df)
1463 return start, self._getsegment(start, length, df=df)
1464
1464
1465 def _chunk(self, rev, df=None):
1465 def _chunk(self, rev, df=None):
1466 """Obtain a single decompressed chunk for a revision.
1466 """Obtain a single decompressed chunk for a revision.
1467
1467
1468 Accepts an integer revision and an optional already-open file handle
1468 Accepts an integer revision and an optional already-open file handle
1469 to be used for reading. If used, the seek position of the file will not
1469 to be used for reading. If used, the seek position of the file will not
1470 be preserved.
1470 be preserved.
1471
1471
1472 Returns a str holding uncompressed data for the requested revision.
1472 Returns a str holding uncompressed data for the requested revision.
1473 """
1473 """
1474 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1474 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1475
1475
1476 def _chunks(self, revs, df=None, targetsize=None):
1476 def _chunks(self, revs, df=None, targetsize=None):
1477 """Obtain decompressed chunks for the specified revisions.
1477 """Obtain decompressed chunks for the specified revisions.
1478
1478
1479 Accepts an iterable of numeric revisions that are assumed to be in
1479 Accepts an iterable of numeric revisions that are assumed to be in
1480 ascending order. Also accepts an optional already-open file handle
1480 ascending order. Also accepts an optional already-open file handle
1481 to be used for reading. If used, the seek position of the file will
1481 to be used for reading. If used, the seek position of the file will
1482 not be preserved.
1482 not be preserved.
1483
1483
1484 This function is similar to calling ``self._chunk()`` multiple times,
1484 This function is similar to calling ``self._chunk()`` multiple times,
1485 but is faster.
1485 but is faster.
1486
1486
1487 Returns a list with decompressed data for each requested revision.
1487 Returns a list with decompressed data for each requested revision.
1488 """
1488 """
1489 if not revs:
1489 if not revs:
1490 return []
1490 return []
1491 start = self.start
1491 start = self.start
1492 length = self.length
1492 length = self.length
1493 inline = self._inline
1493 inline = self._inline
1494 iosize = self._io.size
1494 iosize = self._io.size
1495 buffer = util.buffer
1495 buffer = util.buffer
1496
1496
1497 l = []
1497 l = []
1498 ladd = l.append
1498 ladd = l.append
1499
1499
1500 if not self._withsparseread:
1500 if not self._withsparseread:
1501 slicedchunks = (revs,)
1501 slicedchunks = (revs,)
1502 else:
1502 else:
1503 slicedchunks = deltautil.slicechunk(self, revs,
1503 slicedchunks = deltautil.slicechunk(self, revs,
1504 targetsize=targetsize)
1504 targetsize=targetsize)
1505
1505
1506 for revschunk in slicedchunks:
1506 for revschunk in slicedchunks:
1507 firstrev = revschunk[0]
1507 firstrev = revschunk[0]
1508 # Skip trailing revisions with empty diff
1508 # Skip trailing revisions with empty diff
1509 for lastrev in revschunk[::-1]:
1509 for lastrev in revschunk[::-1]:
1510 if length(lastrev) != 0:
1510 if length(lastrev) != 0:
1511 break
1511 break
1512
1512
1513 try:
1513 try:
1514 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1514 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1515 except OverflowError:
1515 except OverflowError:
1516 # issue4215 - we can't cache a run of chunks greater than
1516 # issue4215 - we can't cache a run of chunks greater than
1517 # 2G on Windows
1517 # 2G on Windows
1518 return [self._chunk(rev, df=df) for rev in revschunk]
1518 return [self._chunk(rev, df=df) for rev in revschunk]
1519
1519
1520 decomp = self.decompress
1520 decomp = self.decompress
1521 for rev in revschunk:
1521 for rev in revschunk:
1522 chunkstart = start(rev)
1522 chunkstart = start(rev)
1523 if inline:
1523 if inline:
1524 chunkstart += (rev + 1) * iosize
1524 chunkstart += (rev + 1) * iosize
1525 chunklength = length(rev)
1525 chunklength = length(rev)
1526 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1526 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1527
1527
1528 return l
1528 return l
1529
1529
1530 def _chunkclear(self):
1530 def _chunkclear(self):
1531 """Clear the raw chunk cache."""
1531 """Clear the raw chunk cache."""
1532 self._chunkcache = (0, '')
1532 self._chunkcache = (0, '')
1533
1533
1534 def deltaparent(self, rev):
1534 def deltaparent(self, rev):
1535 """return deltaparent of the given revision"""
1535 """return deltaparent of the given revision"""
1536 base = self.index[rev][3]
1536 base = self.index[rev][3]
1537 if base == rev:
1537 if base == rev:
1538 return nullrev
1538 return nullrev
1539 elif self._generaldelta:
1539 elif self._generaldelta:
1540 return base
1540 return base
1541 else:
1541 else:
1542 return rev - 1
1542 return rev - 1
1543
1543
1544 def issnapshot(self, rev):
1544 def issnapshot(self, rev):
1545 """tells whether rev is a snapshot
1545 """tells whether rev is a snapshot
1546 """
1546 """
1547 if not self._sparserevlog:
1547 if not self._sparserevlog:
1548 return self.deltaparent(rev) == nullrev
1548 return self.deltaparent(rev) == nullrev
1549 elif util.safehasattr(self.index, 'issnapshot'):
1549 elif util.safehasattr(self.index, 'issnapshot'):
1550 # directly assign the method to cache the testing and access
1550 # directly assign the method to cache the testing and access
1551 self.issnapshot = self.index.issnapshot
1551 self.issnapshot = self.index.issnapshot
1552 return self.issnapshot(rev)
1552 return self.issnapshot(rev)
1553 if rev == nullrev:
1553 if rev == nullrev:
1554 return True
1554 return True
1555 entry = self.index[rev]
1555 entry = self.index[rev]
1556 base = entry[3]
1556 base = entry[3]
1557 if base == rev:
1557 if base == rev:
1558 return True
1558 return True
1559 if base == nullrev:
1559 if base == nullrev:
1560 return True
1560 return True
1561 p1 = entry[5]
1561 p1 = entry[5]
1562 p2 = entry[6]
1562 p2 = entry[6]
1563 if base == p1 or base == p2:
1563 if base == p1 or base == p2:
1564 return False
1564 return False
1565 return self.issnapshot(base)
1565 return self.issnapshot(base)
1566
1566
1567 def snapshotdepth(self, rev):
1567 def snapshotdepth(self, rev):
1568 """number of snapshot in the chain before this one"""
1568 """number of snapshot in the chain before this one"""
1569 if not self.issnapshot(rev):
1569 if not self.issnapshot(rev):
1570 raise error.ProgrammingError('revision %d not a snapshot')
1570 raise error.ProgrammingError('revision %d not a snapshot')
1571 return len(self._deltachain(rev)[0]) - 1
1571 return len(self._deltachain(rev)[0]) - 1
1572
1572
1573 def revdiff(self, rev1, rev2):
1573 def revdiff(self, rev1, rev2):
1574 """return or calculate a delta between two revisions
1574 """return or calculate a delta between two revisions
1575
1575
1576 The delta calculated is in binary form and is intended to be written to
1576 The delta calculated is in binary form and is intended to be written to
1577 revlog data directly. So this function needs raw revision data.
1577 revlog data directly. So this function needs raw revision data.
1578 """
1578 """
1579 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1579 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1580 return bytes(self._chunk(rev2))
1580 return bytes(self._chunk(rev2))
1581
1581
1582 return mdiff.textdiff(self.revision(rev1, raw=True),
1582 return mdiff.textdiff(self.revision(rev1, raw=True),
1583 self.revision(rev2, raw=True))
1583 self.revision(rev2, raw=True))
1584
1584
1585 def revision(self, nodeorrev, _df=None, raw=False):
1585 def revision(self, nodeorrev, _df=None, raw=False):
1586 """return an uncompressed revision of a given node or revision
1586 """return an uncompressed revision of a given node or revision
1587 number.
1587 number.
1588
1588
1589 _df - an existing file handle to read from. (internal-only)
1589 _df - an existing file handle to read from. (internal-only)
1590 raw - an optional argument specifying if the revision data is to be
1590 raw - an optional argument specifying if the revision data is to be
1591 treated as raw data when applying flag transforms. 'raw' should be set
1591 treated as raw data when applying flag transforms. 'raw' should be set
1592 to True when generating changegroups or in debug commands.
1592 to True when generating changegroups or in debug commands.
1593 """
1593 """
1594 if isinstance(nodeorrev, int):
1594 if isinstance(nodeorrev, int):
1595 rev = nodeorrev
1595 rev = nodeorrev
1596 node = self.node(rev)
1596 node = self.node(rev)
1597 else:
1597 else:
1598 node = nodeorrev
1598 node = nodeorrev
1599 rev = None
1599 rev = None
1600
1600
1601 cachedrev = None
1601 cachedrev = None
1602 flags = None
1602 flags = None
1603 rawtext = None
1603 rawtext = None
1604 if node == nullid:
1604 if node == nullid:
1605 return ""
1605 return ""
1606 if self._revisioncache:
1606 if self._revisioncache:
1607 if self._revisioncache[0] == node:
1607 if self._revisioncache[0] == node:
1608 # _cache only stores rawtext
1608 # _cache only stores rawtext
1609 if raw:
1609 if raw:
1610 return self._revisioncache[2]
1610 return self._revisioncache[2]
1611 # duplicated, but good for perf
1611 # duplicated, but good for perf
1612 if rev is None:
1612 if rev is None:
1613 rev = self.rev(node)
1613 rev = self.rev(node)
1614 if flags is None:
1614 if flags is None:
1615 flags = self.flags(rev)
1615 flags = self.flags(rev)
1616 # no extra flags set, no flag processor runs, text = rawtext
1616 # no extra flags set, no flag processor runs, text = rawtext
1617 if flags == REVIDX_DEFAULT_FLAGS:
1617 if flags == REVIDX_DEFAULT_FLAGS:
1618 return self._revisioncache[2]
1618 return self._revisioncache[2]
1619 # rawtext is reusable. need to run flag processor
1619 # rawtext is reusable. need to run flag processor
1620 rawtext = self._revisioncache[2]
1620 rawtext = self._revisioncache[2]
1621
1621
1622 cachedrev = self._revisioncache[1]
1622 cachedrev = self._revisioncache[1]
1623
1623
1624 # look up what we need to read
1624 # look up what we need to read
1625 if rawtext is None:
1625 if rawtext is None:
1626 if rev is None:
1626 if rev is None:
1627 rev = self.rev(node)
1627 rev = self.rev(node)
1628
1628
1629 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1629 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1630 if stopped:
1630 if stopped:
1631 rawtext = self._revisioncache[2]
1631 rawtext = self._revisioncache[2]
1632
1632
1633 # drop cache to save memory
1633 # drop cache to save memory
1634 self._revisioncache = None
1634 self._revisioncache = None
1635
1635
1636 targetsize = None
1636 targetsize = None
1637 rawsize = self.index[rev][2]
1637 rawsize = self.index[rev][2]
1638 if 0 <= rawsize:
1638 if 0 <= rawsize:
1639 targetsize = 4 * rawsize
1639 targetsize = 4 * rawsize
1640
1640
1641 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1641 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1642 if rawtext is None:
1642 if rawtext is None:
1643 rawtext = bytes(bins[0])
1643 rawtext = bytes(bins[0])
1644 bins = bins[1:]
1644 bins = bins[1:]
1645
1645
1646 rawtext = mdiff.patches(rawtext, bins)
1646 rawtext = mdiff.patches(rawtext, bins)
1647 self._revisioncache = (node, rev, rawtext)
1647 self._revisioncache = (node, rev, rawtext)
1648
1648
1649 if flags is None:
1649 if flags is None:
1650 if rev is None:
1650 if rev is None:
1651 rev = self.rev(node)
1651 rev = self.rev(node)
1652 flags = self.flags(rev)
1652 flags = self.flags(rev)
1653
1653
1654 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1654 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1655 if validatehash:
1655 if validatehash:
1656 self.checkhash(text, node, rev=rev)
1656 self.checkhash(text, node, rev=rev)
1657
1657
1658 return text
1658 return text
1659
1659
1660 def hash(self, text, p1, p2):
1660 def hash(self, text, p1, p2):
1661 """Compute a node hash.
1661 """Compute a node hash.
1662
1662
1663 Available as a function so that subclasses can replace the hash
1663 Available as a function so that subclasses can replace the hash
1664 as needed.
1664 as needed.
1665 """
1665 """
1666 return storageutil.hashrevisionsha1(text, p1, p2)
1666 return storageutil.hashrevisionsha1(text, p1, p2)
1667
1667
1668 def _processflags(self, text, flags, operation, raw=False):
1668 def _processflags(self, text, flags, operation, raw=False):
1669 """Inspect revision data flags and applies transforms defined by
1669 """Inspect revision data flags and applies transforms defined by
1670 registered flag processors.
1670 registered flag processors.
1671
1671
1672 ``text`` - the revision data to process
1672 ``text`` - the revision data to process
1673 ``flags`` - the revision flags
1673 ``flags`` - the revision flags
1674 ``operation`` - the operation being performed (read or write)
1674 ``operation`` - the operation being performed (read or write)
1675 ``raw`` - an optional argument describing if the raw transform should be
1675 ``raw`` - an optional argument describing if the raw transform should be
1676 applied.
1676 applied.
1677
1677
1678 This method processes the flags in the order (or reverse order if
1678 This method processes the flags in the order (or reverse order if
1679 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1679 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1680 flag processors registered for present flags. The order of flags defined
1680 flag processors registered for present flags. The order of flags defined
1681 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1681 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1682
1682
1683 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1683 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1684 processed text and ``validatehash`` is a bool indicating whether the
1684 processed text and ``validatehash`` is a bool indicating whether the
1685 returned text should be checked for hash integrity.
1685 returned text should be checked for hash integrity.
1686
1686
1687 Note: If the ``raw`` argument is set, it has precedence over the
1687 Note: If the ``raw`` argument is set, it has precedence over the
1688 operation and will only update the value of ``validatehash``.
1688 operation and will only update the value of ``validatehash``.
1689 """
1689 """
1690 # fast path: no flag processors will run
1690 # fast path: no flag processors will run
1691 if flags == 0:
1691 if flags == 0:
1692 return text, True
1692 return text, True
1693 if not operation in ('read', 'write'):
1693 if not operation in ('read', 'write'):
1694 raise error.ProgrammingError(_("invalid '%s' operation") %
1694 raise error.ProgrammingError(_("invalid '%s' operation") %
1695 operation)
1695 operation)
1696 # Check all flags are known.
1696 # Check all flags are known.
1697 if flags & ~REVIDX_KNOWN_FLAGS:
1697 if flags & ~REVIDX_KNOWN_FLAGS:
1698 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1698 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1699 (flags & ~REVIDX_KNOWN_FLAGS))
1699 (flags & ~REVIDX_KNOWN_FLAGS))
1700 validatehash = True
1700 validatehash = True
1701 # Depending on the operation (read or write), the order might be
1701 # Depending on the operation (read or write), the order might be
1702 # reversed due to non-commutative transforms.
1702 # reversed due to non-commutative transforms.
1703 orderedflags = REVIDX_FLAGS_ORDER
1703 orderedflags = REVIDX_FLAGS_ORDER
1704 if operation == 'write':
1704 if operation == 'write':
1705 orderedflags = reversed(orderedflags)
1705 orderedflags = reversed(orderedflags)
1706
1706
1707 for flag in orderedflags:
1707 for flag in orderedflags:
1708 # If a flagprocessor has been registered for a known flag, apply the
1708 # If a flagprocessor has been registered for a known flag, apply the
1709 # related operation transform and update result tuple.
1709 # related operation transform and update result tuple.
1710 if flag & flags:
1710 if flag & flags:
1711 vhash = True
1711 vhash = True
1712
1712
1713 if flag not in self._flagprocessors:
1713 if flag not in self._flagprocessors:
1714 message = _("missing processor for flag '%#x'") % (flag)
1714 message = _("missing processor for flag '%#x'") % (flag)
1715 raise error.RevlogError(message)
1715 raise error.RevlogError(message)
1716
1716
1717 processor = self._flagprocessors[flag]
1717 processor = self._flagprocessors[flag]
1718 if processor is not None:
1718 if processor is not None:
1719 readtransform, writetransform, rawtransform = processor
1719 readtransform, writetransform, rawtransform = processor
1720
1720
1721 if raw:
1721 if raw:
1722 vhash = rawtransform(self, text)
1722 vhash = rawtransform(self, text)
1723 elif operation == 'read':
1723 elif operation == 'read':
1724 text, vhash = readtransform(self, text)
1724 text, vhash = readtransform(self, text)
1725 else: # write operation
1725 else: # write operation
1726 text, vhash = writetransform(self, text)
1726 text, vhash = writetransform(self, text)
1727 validatehash = validatehash and vhash
1727 validatehash = validatehash and vhash
1728
1728
1729 return text, validatehash
1729 return text, validatehash
1730
1730
1731 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1731 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1732 """Check node hash integrity.
1732 """Check node hash integrity.
1733
1733
1734 Available as a function so that subclasses can extend hash mismatch
1734 Available as a function so that subclasses can extend hash mismatch
1735 behaviors as needed.
1735 behaviors as needed.
1736 """
1736 """
1737 try:
1737 try:
1738 if p1 is None and p2 is None:
1738 if p1 is None and p2 is None:
1739 p1, p2 = self.parents(node)
1739 p1, p2 = self.parents(node)
1740 if node != self.hash(text, p1, p2):
1740 if node != self.hash(text, p1, p2):
1741 # Clear the revision cache on hash failure. The revision cache
1741 # Clear the revision cache on hash failure. The revision cache
1742 # only stores the raw revision and clearing the cache does have
1742 # only stores the raw revision and clearing the cache does have
1743 # the side-effect that we won't have a cache hit when the raw
1743 # the side-effect that we won't have a cache hit when the raw
1744 # revision data is accessed. But this case should be rare and
1744 # revision data is accessed. But this case should be rare and
1745 # it is extra work to teach the cache about the hash
1745 # it is extra work to teach the cache about the hash
1746 # verification state.
1746 # verification state.
1747 if self._revisioncache and self._revisioncache[0] == node:
1747 if self._revisioncache and self._revisioncache[0] == node:
1748 self._revisioncache = None
1748 self._revisioncache = None
1749
1749
1750 revornode = rev
1750 revornode = rev
1751 if revornode is None:
1751 if revornode is None:
1752 revornode = templatefilters.short(hex(node))
1752 revornode = templatefilters.short(hex(node))
1753 raise error.RevlogError(_("integrity check failed on %s:%s")
1753 raise error.RevlogError(_("integrity check failed on %s:%s")
1754 % (self.indexfile, pycompat.bytestr(revornode)))
1754 % (self.indexfile, pycompat.bytestr(revornode)))
1755 except error.RevlogError:
1755 except error.RevlogError:
1756 if self._censorable and storageutil.iscensoredtext(text):
1756 if self._censorable and storageutil.iscensoredtext(text):
1757 raise error.CensoredNodeError(self.indexfile, node, text)
1757 raise error.CensoredNodeError(self.indexfile, node, text)
1758 raise
1758 raise
1759
1759
1760 def _enforceinlinesize(self, tr, fp=None):
1760 def _enforceinlinesize(self, tr, fp=None):
1761 """Check if the revlog is too big for inline and convert if so.
1761 """Check if the revlog is too big for inline and convert if so.
1762
1762
1763 This should be called after revisions are added to the revlog. If the
1763 This should be called after revisions are added to the revlog. If the
1764 revlog has grown too large to be an inline revlog, it will convert it
1764 revlog has grown too large to be an inline revlog, it will convert it
1765 to use multiple index and data files.
1765 to use multiple index and data files.
1766 """
1766 """
1767 tiprev = len(self) - 1
1767 tiprev = len(self) - 1
1768 if (not self._inline or
1768 if (not self._inline or
1769 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1769 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1770 return
1770 return
1771
1771
1772 trinfo = tr.find(self.indexfile)
1772 trinfo = tr.find(self.indexfile)
1773 if trinfo is None:
1773 if trinfo is None:
1774 raise error.RevlogError(_("%s not found in the transaction")
1774 raise error.RevlogError(_("%s not found in the transaction")
1775 % self.indexfile)
1775 % self.indexfile)
1776
1776
1777 trindex = trinfo[2]
1777 trindex = trinfo[2]
1778 if trindex is not None:
1778 if trindex is not None:
1779 dataoff = self.start(trindex)
1779 dataoff = self.start(trindex)
1780 else:
1780 else:
1781 # revlog was stripped at start of transaction, use all leftover data
1781 # revlog was stripped at start of transaction, use all leftover data
1782 trindex = len(self) - 1
1782 trindex = len(self) - 1
1783 dataoff = self.end(tiprev)
1783 dataoff = self.end(tiprev)
1784
1784
1785 tr.add(self.datafile, dataoff)
1785 tr.add(self.datafile, dataoff)
1786
1786
1787 if fp:
1787 if fp:
1788 fp.flush()
1788 fp.flush()
1789 fp.close()
1789 fp.close()
1790 # We can't use the cached file handle after close(). So prevent
1790 # We can't use the cached file handle after close(). So prevent
1791 # its usage.
1791 # its usage.
1792 self._writinghandles = None
1792 self._writinghandles = None
1793
1793
1794 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1794 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1795 for r in self:
1795 for r in self:
1796 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1796 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1797
1797
1798 with self._indexfp('w') as fp:
1798 with self._indexfp('w') as fp:
1799 self.version &= ~FLAG_INLINE_DATA
1799 self.version &= ~FLAG_INLINE_DATA
1800 self._inline = False
1800 self._inline = False
1801 io = self._io
1801 io = self._io
1802 for i in self:
1802 for i in self:
1803 e = io.packentry(self.index[i], self.node, self.version, i)
1803 e = io.packentry(self.index[i], self.node, self.version, i)
1804 fp.write(e)
1804 fp.write(e)
1805
1805
1806 # the temp file replace the real index when we exit the context
1806 # the temp file replace the real index when we exit the context
1807 # manager
1807 # manager
1808
1808
1809 tr.replace(self.indexfile, trindex * self._io.size)
1809 tr.replace(self.indexfile, trindex * self._io.size)
1810 self._chunkclear()
1810 self._chunkclear()
1811
1811
1812 def _nodeduplicatecallback(self, transaction, node):
1812 def _nodeduplicatecallback(self, transaction, node):
1813 """called when trying to add a node already stored.
1813 """called when trying to add a node already stored.
1814 """
1814 """
1815
1815
1816 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1816 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1817 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1817 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1818 """add a revision to the log
1818 """add a revision to the log
1819
1819
1820 text - the revision data to add
1820 text - the revision data to add
1821 transaction - the transaction object used for rollback
1821 transaction - the transaction object used for rollback
1822 link - the linkrev data to add
1822 link - the linkrev data to add
1823 p1, p2 - the parent nodeids of the revision
1823 p1, p2 - the parent nodeids of the revision
1824 cachedelta - an optional precomputed delta
1824 cachedelta - an optional precomputed delta
1825 node - nodeid of revision; typically node is not specified, and it is
1825 node - nodeid of revision; typically node is not specified, and it is
1826 computed by default as hash(text, p1, p2), however subclasses might
1826 computed by default as hash(text, p1, p2), however subclasses might
1827 use different hashing method (and override checkhash() in such case)
1827 use different hashing method (and override checkhash() in such case)
1828 flags - the known flags to set on the revision
1828 flags - the known flags to set on the revision
1829 deltacomputer - an optional deltacomputer instance shared between
1829 deltacomputer - an optional deltacomputer instance shared between
1830 multiple calls
1830 multiple calls
1831 """
1831 """
1832 if link == nullrev:
1832 if link == nullrev:
1833 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1833 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1834 % self.indexfile)
1834 % self.indexfile)
1835
1835
1836 if flags:
1836 if flags:
1837 node = node or self.hash(text, p1, p2)
1837 node = node or self.hash(text, p1, p2)
1838
1838
1839 rawtext, validatehash = self._processflags(text, flags, 'write')
1839 rawtext, validatehash = self._processflags(text, flags, 'write')
1840
1840
1841 # If the flag processor modifies the revision data, ignore any provided
1841 # If the flag processor modifies the revision data, ignore any provided
1842 # cachedelta.
1842 # cachedelta.
1843 if rawtext != text:
1843 if rawtext != text:
1844 cachedelta = None
1844 cachedelta = None
1845
1845
1846 if len(rawtext) > _maxentrysize:
1846 if len(rawtext) > _maxentrysize:
1847 raise error.RevlogError(
1847 raise error.RevlogError(
1848 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1848 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1849 % (self.indexfile, len(rawtext)))
1849 % (self.indexfile, len(rawtext)))
1850
1850
1851 node = node or self.hash(rawtext, p1, p2)
1851 node = node or self.hash(rawtext, p1, p2)
1852 if node in self.nodemap:
1852 if node in self.nodemap:
1853 return node
1853 return node
1854
1854
1855 if validatehash:
1855 if validatehash:
1856 self.checkhash(rawtext, node, p1=p1, p2=p2)
1856 self.checkhash(rawtext, node, p1=p1, p2=p2)
1857
1857
1858 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1858 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1859 flags, cachedelta=cachedelta,
1859 flags, cachedelta=cachedelta,
1860 deltacomputer=deltacomputer)
1860 deltacomputer=deltacomputer)
1861
1861
1862 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1862 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1863 cachedelta=None, deltacomputer=None):
1863 cachedelta=None, deltacomputer=None):
1864 """add a raw revision with known flags, node and parents
1864 """add a raw revision with known flags, node and parents
1865 useful when reusing a revision not stored in this revlog (ex: received
1865 useful when reusing a revision not stored in this revlog (ex: received
1866 over wire, or read from an external bundle).
1866 over wire, or read from an external bundle).
1867 """
1867 """
1868 dfh = None
1868 dfh = None
1869 if not self._inline:
1869 if not self._inline:
1870 dfh = self._datafp("a+")
1870 dfh = self._datafp("a+")
1871 ifh = self._indexfp("a+")
1871 ifh = self._indexfp("a+")
1872 try:
1872 try:
1873 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1873 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1874 flags, cachedelta, ifh, dfh,
1874 flags, cachedelta, ifh, dfh,
1875 deltacomputer=deltacomputer)
1875 deltacomputer=deltacomputer)
1876 finally:
1876 finally:
1877 if dfh:
1877 if dfh:
1878 dfh.close()
1878 dfh.close()
1879 ifh.close()
1879 ifh.close()
1880
1880
1881 def compress(self, data):
1881 def compress(self, data):
1882 """Generate a possibly-compressed representation of data."""
1882 """Generate a possibly-compressed representation of data."""
1883 if not data:
1883 if not data:
1884 return '', data
1884 return '', data
1885
1885
1886 compressed = self._compressor.compress(data)
1886 compressed = self._compressor.compress(data)
1887
1887
1888 if compressed:
1888 if compressed:
1889 # The revlog compressor added the header in the returned data.
1889 # The revlog compressor added the header in the returned data.
1890 return '', compressed
1890 return '', compressed
1891
1891
1892 if data[0:1] == '\0':
1892 if data[0:1] == '\0':
1893 return '', data
1893 return '', data
1894 return 'u', data
1894 return 'u', data
1895
1895
1896 def decompress(self, data):
1896 def decompress(self, data):
1897 """Decompress a revlog chunk.
1897 """Decompress a revlog chunk.
1898
1898
1899 The chunk is expected to begin with a header identifying the
1899 The chunk is expected to begin with a header identifying the
1900 format type so it can be routed to an appropriate decompressor.
1900 format type so it can be routed to an appropriate decompressor.
1901 """
1901 """
1902 if not data:
1902 if not data:
1903 return data
1903 return data
1904
1904
1905 # Revlogs are read much more frequently than they are written and many
1905 # Revlogs are read much more frequently than they are written and many
1906 # chunks only take microseconds to decompress, so performance is
1906 # chunks only take microseconds to decompress, so performance is
1907 # important here.
1907 # important here.
1908 #
1908 #
1909 # We can make a few assumptions about revlogs:
1909 # We can make a few assumptions about revlogs:
1910 #
1910 #
1911 # 1) the majority of chunks will be compressed (as opposed to inline
1911 # 1) the majority of chunks will be compressed (as opposed to inline
1912 # raw data).
1912 # raw data).
1913 # 2) decompressing *any* data will likely by at least 10x slower than
1913 # 2) decompressing *any* data will likely by at least 10x slower than
1914 # returning raw inline data.
1914 # returning raw inline data.
1915 # 3) we want to prioritize common and officially supported compression
1915 # 3) we want to prioritize common and officially supported compression
1916 # engines
1916 # engines
1917 #
1917 #
1918 # It follows that we want to optimize for "decompress compressed data
1918 # It follows that we want to optimize for "decompress compressed data
1919 # when encoded with common and officially supported compression engines"
1919 # when encoded with common and officially supported compression engines"
1920 # case over "raw data" and "data encoded by less common or non-official
1920 # case over "raw data" and "data encoded by less common or non-official
1921 # compression engines." That is why we have the inline lookup first
1921 # compression engines." That is why we have the inline lookup first
1922 # followed by the compengines lookup.
1922 # followed by the compengines lookup.
1923 #
1923 #
1924 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1924 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1925 # compressed chunks. And this matters for changelog and manifest reads.
1925 # compressed chunks. And this matters for changelog and manifest reads.
1926 t = data[0:1]
1926 t = data[0:1]
1927
1927
1928 if t == 'x':
1928 if t == 'x':
1929 try:
1929 try:
1930 return _zlibdecompress(data)
1930 return _zlibdecompress(data)
1931 except zlib.error as e:
1931 except zlib.error as e:
1932 raise error.RevlogError(_('revlog decompress error: %s') %
1932 raise error.RevlogError(_('revlog decompress error: %s') %
1933 stringutil.forcebytestr(e))
1933 stringutil.forcebytestr(e))
1934 # '\0' is more common than 'u' so it goes first.
1934 # '\0' is more common than 'u' so it goes first.
1935 elif t == '\0':
1935 elif t == '\0':
1936 return data
1936 return data
1937 elif t == 'u':
1937 elif t == 'u':
1938 return util.buffer(data, 1)
1938 return util.buffer(data, 1)
1939
1939
1940 try:
1940 try:
1941 compressor = self._decompressors[t]
1941 compressor = self._decompressors[t]
1942 except KeyError:
1942 except KeyError:
1943 try:
1943 try:
1944 engine = util.compengines.forrevlogheader(t)
1944 engine = util.compengines.forrevlogheader(t)
1945 compressor = engine.revlogcompressor()
1945 compressor = engine.revlogcompressor()
1946 self._decompressors[t] = compressor
1946 self._decompressors[t] = compressor
1947 except KeyError:
1947 except KeyError:
1948 raise error.RevlogError(_('unknown compression type %r') % t)
1948 raise error.RevlogError(_('unknown compression type %r') % t)
1949
1949
1950 return compressor.decompress(data)
1950 return compressor.decompress(data)
1951
1951
1952 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1952 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1953 cachedelta, ifh, dfh, alwayscache=False,
1953 cachedelta, ifh, dfh, alwayscache=False,
1954 deltacomputer=None):
1954 deltacomputer=None):
1955 """internal function to add revisions to the log
1955 """internal function to add revisions to the log
1956
1956
1957 see addrevision for argument descriptions.
1957 see addrevision for argument descriptions.
1958
1958
1959 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1959 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1960
1960
1961 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1961 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1962 be used.
1962 be used.
1963
1963
1964 invariants:
1964 invariants:
1965 - rawtext is optional (can be None); if not set, cachedelta must be set.
1965 - rawtext is optional (can be None); if not set, cachedelta must be set.
1966 if both are set, they must correspond to each other.
1966 if both are set, they must correspond to each other.
1967 """
1967 """
1968 if node == nullid:
1968 if node == nullid:
1969 raise error.RevlogError(_("%s: attempt to add null revision") %
1969 raise error.RevlogError(_("%s: attempt to add null revision") %
1970 self.indexfile)
1970 self.indexfile)
1971 if node == wdirid or node in wdirfilenodeids:
1971 if node == wdirid or node in wdirfilenodeids:
1972 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1972 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1973 self.indexfile)
1973 self.indexfile)
1974
1974
1975 if self._inline:
1975 if self._inline:
1976 fh = ifh
1976 fh = ifh
1977 else:
1977 else:
1978 fh = dfh
1978 fh = dfh
1979
1979
1980 btext = [rawtext]
1980 btext = [rawtext]
1981
1981
1982 curr = len(self)
1982 curr = len(self)
1983 prev = curr - 1
1983 prev = curr - 1
1984 offset = self.end(prev)
1984 offset = self.end(prev)
1985 p1r, p2r = self.rev(p1), self.rev(p2)
1985 p1r, p2r = self.rev(p1), self.rev(p2)
1986
1986
1987 # full versions are inserted when the needed deltas
1987 # full versions are inserted when the needed deltas
1988 # become comparable to the uncompressed text
1988 # become comparable to the uncompressed text
1989 if rawtext is None:
1989 if rawtext is None:
1990 # need rawtext size, before changed by flag processors, which is
1990 # need rawtext size, before changed by flag processors, which is
1991 # the non-raw size. use revlog explicitly to avoid filelog's extra
1991 # the non-raw size. use revlog explicitly to avoid filelog's extra
1992 # logic that might remove metadata size.
1992 # logic that might remove metadata size.
1993 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1993 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1994 cachedelta[1])
1994 cachedelta[1])
1995 else:
1995 else:
1996 textlen = len(rawtext)
1996 textlen = len(rawtext)
1997
1997
1998 if deltacomputer is None:
1998 if deltacomputer is None:
1999 deltacomputer = deltautil.deltacomputer(self)
1999 deltacomputer = deltautil.deltacomputer(self)
2000
2000
2001 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2001 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2002
2002
2003 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2003 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2004
2004
2005 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2005 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2006 deltainfo.base, link, p1r, p2r, node)
2006 deltainfo.base, link, p1r, p2r, node)
2007 self.index.append(e)
2007 self.index.append(e)
2008 self.nodemap[node] = curr
2008 self.nodemap[node] = curr
2009
2009
2010 # Reset the pure node cache start lookup offset to account for new
2010 # Reset the pure node cache start lookup offset to account for new
2011 # revision.
2011 # revision.
2012 if self._nodepos is not None:
2012 if self._nodepos is not None:
2013 self._nodepos = curr
2013 self._nodepos = curr
2014
2014
2015 entry = self._io.packentry(e, self.node, self.version, curr)
2015 entry = self._io.packentry(e, self.node, self.version, curr)
2016 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2016 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2017 link, offset)
2017 link, offset)
2018
2018
2019 rawtext = btext[0]
2019 rawtext = btext[0]
2020
2020
2021 if alwayscache and rawtext is None:
2021 if alwayscache and rawtext is None:
2022 rawtext = deltacomputer.buildtext(revinfo, fh)
2022 rawtext = deltacomputer.buildtext(revinfo, fh)
2023
2023
2024 if type(rawtext) == bytes: # only accept immutable objects
2024 if type(rawtext) == bytes: # only accept immutable objects
2025 self._revisioncache = (node, curr, rawtext)
2025 self._revisioncache = (node, curr, rawtext)
2026 self._chainbasecache[curr] = deltainfo.chainbase
2026 self._chainbasecache[curr] = deltainfo.chainbase
2027 return node
2027 return node
2028
2028
2029 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2029 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2030 # Files opened in a+ mode have inconsistent behavior on various
2030 # Files opened in a+ mode have inconsistent behavior on various
2031 # platforms. Windows requires that a file positioning call be made
2031 # platforms. Windows requires that a file positioning call be made
2032 # when the file handle transitions between reads and writes. See
2032 # when the file handle transitions between reads and writes. See
2033 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2033 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2034 # platforms, Python or the platform itself can be buggy. Some versions
2034 # platforms, Python or the platform itself can be buggy. Some versions
2035 # of Solaris have been observed to not append at the end of the file
2035 # of Solaris have been observed to not append at the end of the file
2036 # if the file was seeked to before the end. See issue4943 for more.
2036 # if the file was seeked to before the end. See issue4943 for more.
2037 #
2037 #
2038 # We work around this issue by inserting a seek() before writing.
2038 # We work around this issue by inserting a seek() before writing.
2039 # Note: This is likely not necessary on Python 3. However, because
2039 # Note: This is likely not necessary on Python 3. However, because
2040 # the file handle is reused for reads and may be seeked there, we need
2040 # the file handle is reused for reads and may be seeked there, we need
2041 # to be careful before changing this.
2041 # to be careful before changing this.
2042 ifh.seek(0, os.SEEK_END)
2042 ifh.seek(0, os.SEEK_END)
2043 if dfh:
2043 if dfh:
2044 dfh.seek(0, os.SEEK_END)
2044 dfh.seek(0, os.SEEK_END)
2045
2045
2046 curr = len(self) - 1
2046 curr = len(self) - 1
2047 if not self._inline:
2047 if not self._inline:
2048 transaction.add(self.datafile, offset)
2048 transaction.add(self.datafile, offset)
2049 transaction.add(self.indexfile, curr * len(entry))
2049 transaction.add(self.indexfile, curr * len(entry))
2050 if data[0]:
2050 if data[0]:
2051 dfh.write(data[0])
2051 dfh.write(data[0])
2052 dfh.write(data[1])
2052 dfh.write(data[1])
2053 ifh.write(entry)
2053 ifh.write(entry)
2054 else:
2054 else:
2055 offset += curr * self._io.size
2055 offset += curr * self._io.size
2056 transaction.add(self.indexfile, offset, curr)
2056 transaction.add(self.indexfile, offset, curr)
2057 ifh.write(entry)
2057 ifh.write(entry)
2058 ifh.write(data[0])
2058 ifh.write(data[0])
2059 ifh.write(data[1])
2059 ifh.write(data[1])
2060 self._enforceinlinesize(transaction, ifh)
2060 self._enforceinlinesize(transaction, ifh)
2061
2061
2062 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2062 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2063 """
2063 """
2064 add a delta group
2064 add a delta group
2065
2065
2066 given a set of deltas, add them to the revision log. the
2066 given a set of deltas, add them to the revision log. the
2067 first delta is against its parent, which should be in our
2067 first delta is against its parent, which should be in our
2068 log, the rest are against the previous delta.
2068 log, the rest are against the previous delta.
2069
2069
2070 If ``addrevisioncb`` is defined, it will be called with arguments of
2070 If ``addrevisioncb`` is defined, it will be called with arguments of
2071 this revlog and the node that was added.
2071 this revlog and the node that was added.
2072 """
2072 """
2073
2073
2074 if self._writinghandles:
2074 if self._writinghandles:
2075 raise error.ProgrammingError('cannot nest addgroup() calls')
2075 raise error.ProgrammingError('cannot nest addgroup() calls')
2076
2076
2077 nodes = []
2077 nodes = []
2078
2078
2079 r = len(self)
2079 r = len(self)
2080 end = 0
2080 end = 0
2081 if r:
2081 if r:
2082 end = self.end(r - 1)
2082 end = self.end(r - 1)
2083 ifh = self._indexfp("a+")
2083 ifh = self._indexfp("a+")
2084 isize = r * self._io.size
2084 isize = r * self._io.size
2085 if self._inline:
2085 if self._inline:
2086 transaction.add(self.indexfile, end + isize, r)
2086 transaction.add(self.indexfile, end + isize, r)
2087 dfh = None
2087 dfh = None
2088 else:
2088 else:
2089 transaction.add(self.indexfile, isize, r)
2089 transaction.add(self.indexfile, isize, r)
2090 transaction.add(self.datafile, end)
2090 transaction.add(self.datafile, end)
2091 dfh = self._datafp("a+")
2091 dfh = self._datafp("a+")
2092 def flush():
2092 def flush():
2093 if dfh:
2093 if dfh:
2094 dfh.flush()
2094 dfh.flush()
2095 ifh.flush()
2095 ifh.flush()
2096
2096
2097 self._writinghandles = (ifh, dfh)
2097 self._writinghandles = (ifh, dfh)
2098
2098
2099 try:
2099 try:
2100 deltacomputer = deltautil.deltacomputer(self)
2100 deltacomputer = deltautil.deltacomputer(self)
2101 # loop through our set of deltas
2101 # loop through our set of deltas
2102 for data in deltas:
2102 for data in deltas:
2103 node, p1, p2, linknode, deltabase, delta, flags = data
2103 node, p1, p2, linknode, deltabase, delta, flags = data
2104 link = linkmapper(linknode)
2104 link = linkmapper(linknode)
2105 flags = flags or REVIDX_DEFAULT_FLAGS
2105 flags = flags or REVIDX_DEFAULT_FLAGS
2106
2106
2107 nodes.append(node)
2107 nodes.append(node)
2108
2108
2109 if node in self.nodemap:
2109 if node in self.nodemap:
2110 self._nodeduplicatecallback(transaction, node)
2110 self._nodeduplicatecallback(transaction, node)
2111 # this can happen if two branches make the same change
2111 # this can happen if two branches make the same change
2112 continue
2112 continue
2113
2113
2114 for p in (p1, p2):
2114 for p in (p1, p2):
2115 if p not in self.nodemap:
2115 if p not in self.nodemap:
2116 raise error.LookupError(p, self.indexfile,
2116 raise error.LookupError(p, self.indexfile,
2117 _('unknown parent'))
2117 _('unknown parent'))
2118
2118
2119 if deltabase not in self.nodemap:
2119 if deltabase not in self.nodemap:
2120 raise error.LookupError(deltabase, self.indexfile,
2120 raise error.LookupError(deltabase, self.indexfile,
2121 _('unknown delta base'))
2121 _('unknown delta base'))
2122
2122
2123 baserev = self.rev(deltabase)
2123 baserev = self.rev(deltabase)
2124
2124
2125 if baserev != nullrev and self.iscensored(baserev):
2125 if baserev != nullrev and self.iscensored(baserev):
2126 # if base is censored, delta must be full replacement in a
2126 # if base is censored, delta must be full replacement in a
2127 # single patch operation
2127 # single patch operation
2128 hlen = struct.calcsize(">lll")
2128 hlen = struct.calcsize(">lll")
2129 oldlen = self.rawsize(baserev)
2129 oldlen = self.rawsize(baserev)
2130 newlen = len(delta) - hlen
2130 newlen = len(delta) - hlen
2131 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2131 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2132 raise error.CensoredBaseError(self.indexfile,
2132 raise error.CensoredBaseError(self.indexfile,
2133 self.node(baserev))
2133 self.node(baserev))
2134
2134
2135 if not flags and self._peek_iscensored(baserev, delta, flush):
2135 if not flags and self._peek_iscensored(baserev, delta, flush):
2136 flags |= REVIDX_ISCENSORED
2136 flags |= REVIDX_ISCENSORED
2137
2137
2138 # We assume consumers of addrevisioncb will want to retrieve
2138 # We assume consumers of addrevisioncb will want to retrieve
2139 # the added revision, which will require a call to
2139 # the added revision, which will require a call to
2140 # revision(). revision() will fast path if there is a cache
2140 # revision(). revision() will fast path if there is a cache
2141 # hit. So, we tell _addrevision() to always cache in this case.
2141 # hit. So, we tell _addrevision() to always cache in this case.
2142 # We're only using addgroup() in the context of changegroup
2142 # We're only using addgroup() in the context of changegroup
2143 # generation so the revision data can always be handled as raw
2143 # generation so the revision data can always be handled as raw
2144 # by the flagprocessor.
2144 # by the flagprocessor.
2145 self._addrevision(node, None, transaction, link,
2145 self._addrevision(node, None, transaction, link,
2146 p1, p2, flags, (baserev, delta),
2146 p1, p2, flags, (baserev, delta),
2147 ifh, dfh,
2147 ifh, dfh,
2148 alwayscache=bool(addrevisioncb),
2148 alwayscache=bool(addrevisioncb),
2149 deltacomputer=deltacomputer)
2149 deltacomputer=deltacomputer)
2150
2150
2151 if addrevisioncb:
2151 if addrevisioncb:
2152 addrevisioncb(self, node)
2152 addrevisioncb(self, node)
2153
2153
2154 if not dfh and not self._inline:
2154 if not dfh and not self._inline:
2155 # addrevision switched from inline to conventional
2155 # addrevision switched from inline to conventional
2156 # reopen the index
2156 # reopen the index
2157 ifh.close()
2157 ifh.close()
2158 dfh = self._datafp("a+")
2158 dfh = self._datafp("a+")
2159 ifh = self._indexfp("a+")
2159 ifh = self._indexfp("a+")
2160 self._writinghandles = (ifh, dfh)
2160 self._writinghandles = (ifh, dfh)
2161 finally:
2161 finally:
2162 self._writinghandles = None
2162 self._writinghandles = None
2163
2163
2164 if dfh:
2164 if dfh:
2165 dfh.close()
2165 dfh.close()
2166 ifh.close()
2166 ifh.close()
2167
2167
2168 return nodes
2168 return nodes
2169
2169
2170 def iscensored(self, rev):
2170 def iscensored(self, rev):
2171 """Check if a file revision is censored."""
2171 """Check if a file revision is censored."""
2172 if not self._censorable:
2172 if not self._censorable:
2173 return False
2173 return False
2174
2174
2175 return self.flags(rev) & REVIDX_ISCENSORED
2175 return self.flags(rev) & REVIDX_ISCENSORED
2176
2176
2177 def _peek_iscensored(self, baserev, delta, flush):
2177 def _peek_iscensored(self, baserev, delta, flush):
2178 """Quickly check if a delta produces a censored revision."""
2178 """Quickly check if a delta produces a censored revision."""
2179 if not self._censorable:
2179 if not self._censorable:
2180 return False
2180 return False
2181
2181
2182 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2182 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2183
2183
2184 def getstrippoint(self, minlink):
2184 def getstrippoint(self, minlink):
2185 """find the minimum rev that must be stripped to strip the linkrev
2185 """find the minimum rev that must be stripped to strip the linkrev
2186
2186
2187 Returns a tuple containing the minimum rev and a set of all revs that
2187 Returns a tuple containing the minimum rev and a set of all revs that
2188 have linkrevs that will be broken by this strip.
2188 have linkrevs that will be broken by this strip.
2189 """
2189 """
2190 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2190 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2191 self.headrevs(),
2191 self.headrevs(),
2192 self.linkrev, self.parentrevs)
2192 self.linkrev, self.parentrevs)
2193
2193
2194 def strip(self, minlink, transaction):
2194 def strip(self, minlink, transaction):
2195 """truncate the revlog on the first revision with a linkrev >= minlink
2195 """truncate the revlog on the first revision with a linkrev >= minlink
2196
2196
2197 This function is called when we're stripping revision minlink and
2197 This function is called when we're stripping revision minlink and
2198 its descendants from the repository.
2198 its descendants from the repository.
2199
2199
2200 We have to remove all revisions with linkrev >= minlink, because
2200 We have to remove all revisions with linkrev >= minlink, because
2201 the equivalent changelog revisions will be renumbered after the
2201 the equivalent changelog revisions will be renumbered after the
2202 strip.
2202 strip.
2203
2203
2204 So we truncate the revlog on the first of these revisions, and
2204 So we truncate the revlog on the first of these revisions, and
2205 trust that the caller has saved the revisions that shouldn't be
2205 trust that the caller has saved the revisions that shouldn't be
2206 removed and that it'll re-add them after this truncation.
2206 removed and that it'll re-add them after this truncation.
2207 """
2207 """
2208 if len(self) == 0:
2208 if len(self) == 0:
2209 return
2209 return
2210
2210
2211 rev, _ = self.getstrippoint(minlink)
2211 rev, _ = self.getstrippoint(minlink)
2212 if rev == len(self):
2212 if rev == len(self):
2213 return
2213 return
2214
2214
2215 # first truncate the files on disk
2215 # first truncate the files on disk
2216 end = self.start(rev)
2216 end = self.start(rev)
2217 if not self._inline:
2217 if not self._inline:
2218 transaction.add(self.datafile, end)
2218 transaction.add(self.datafile, end)
2219 end = rev * self._io.size
2219 end = rev * self._io.size
2220 else:
2220 else:
2221 end += rev * self._io.size
2221 end += rev * self._io.size
2222
2222
2223 transaction.add(self.indexfile, end)
2223 transaction.add(self.indexfile, end)
2224
2224
2225 # then reset internal state in memory to forget those revisions
2225 # then reset internal state in memory to forget those revisions
2226 self._revisioncache = None
2226 self._revisioncache = None
2227 self._chaininfocache = {}
2227 self._chaininfocache = {}
2228 self._chunkclear()
2228 self._chunkclear()
2229 for x in pycompat.xrange(rev, len(self)):
2229 for x in pycompat.xrange(rev, len(self)):
2230 del self.nodemap[self.node(x)]
2230 del self.nodemap[self.node(x)]
2231
2231
2232 del self.index[rev:-1]
2232 del self.index[rev:-1]
2233 self._nodepos = None
2233 self._nodepos = None
2234
2234
2235 def checksize(self):
2235 def checksize(self):
2236 expected = 0
2236 expected = 0
2237 if len(self):
2237 if len(self):
2238 expected = max(0, self.end(len(self) - 1))
2238 expected = max(0, self.end(len(self) - 1))
2239
2239
2240 try:
2240 try:
2241 with self._datafp() as f:
2241 with self._datafp() as f:
2242 f.seek(0, 2)
2242 f.seek(0, 2)
2243 actual = f.tell()
2243 actual = f.tell()
2244 dd = actual - expected
2244 dd = actual - expected
2245 except IOError as inst:
2245 except IOError as inst:
2246 if inst.errno != errno.ENOENT:
2246 if inst.errno != errno.ENOENT:
2247 raise
2247 raise
2248 dd = 0
2248 dd = 0
2249
2249
2250 try:
2250 try:
2251 f = self.opener(self.indexfile)
2251 f = self.opener(self.indexfile)
2252 f.seek(0, 2)
2252 f.seek(0, 2)
2253 actual = f.tell()
2253 actual = f.tell()
2254 f.close()
2254 f.close()
2255 s = self._io.size
2255 s = self._io.size
2256 i = max(0, actual // s)
2256 i = max(0, actual // s)
2257 di = actual - (i * s)
2257 di = actual - (i * s)
2258 if self._inline:
2258 if self._inline:
2259 databytes = 0
2259 databytes = 0
2260 for r in self:
2260 for r in self:
2261 databytes += max(0, self.length(r))
2261 databytes += max(0, self.length(r))
2262 dd = 0
2262 dd = 0
2263 di = actual - len(self) * s - databytes
2263 di = actual - len(self) * s - databytes
2264 except IOError as inst:
2264 except IOError as inst:
2265 if inst.errno != errno.ENOENT:
2265 if inst.errno != errno.ENOENT:
2266 raise
2266 raise
2267 di = 0
2267 di = 0
2268
2268
2269 return (dd, di)
2269 return (dd, di)
2270
2270
2271 def files(self):
2271 def files(self):
2272 res = [self.indexfile]
2272 res = [self.indexfile]
2273 if not self._inline:
2273 if not self._inline:
2274 res.append(self.datafile)
2274 res.append(self.datafile)
2275 return res
2275 return res
2276
2276
2277 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2277 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2278 assumehaveparentrevisions=False,
2278 assumehaveparentrevisions=False,
2279 deltamode=repository.CG_DELTAMODE_STD):
2279 deltamode=repository.CG_DELTAMODE_STD):
2280 if nodesorder not in ('nodes', 'storage', 'linear', None):
2280 if nodesorder not in ('nodes', 'storage', 'linear', None):
2281 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2281 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2282 nodesorder)
2282 nodesorder)
2283
2283
2284 if nodesorder is None and not self._generaldelta:
2284 if nodesorder is None and not self._generaldelta:
2285 nodesorder = 'storage'
2285 nodesorder = 'storage'
2286
2286
2287 if (not self._storedeltachains and
2287 if (not self._storedeltachains and
2288 deltamode != repository.CG_DELTAMODE_PREV):
2288 deltamode != repository.CG_DELTAMODE_PREV):
2289 deltamode = repository.CG_DELTAMODE_FULL
2289 deltamode = repository.CG_DELTAMODE_FULL
2290
2290
2291 return storageutil.emitrevisions(
2291 return storageutil.emitrevisions(
2292 self, nodes, nodesorder, revlogrevisiondelta,
2292 self, nodes, nodesorder, revlogrevisiondelta,
2293 deltaparentfn=self.deltaparent,
2293 deltaparentfn=self.deltaparent,
2294 candeltafn=self.candelta,
2294 candeltafn=self.candelta,
2295 rawsizefn=self.rawsize,
2295 rawsizefn=self.rawsize,
2296 revdifffn=self.revdiff,
2296 revdifffn=self.revdiff,
2297 flagsfn=self.flags,
2297 flagsfn=self.flags,
2298 deltamode=deltamode,
2298 deltamode=deltamode,
2299 revisiondata=revisiondata,
2299 revisiondata=revisiondata,
2300 assumehaveparentrevisions=assumehaveparentrevisions)
2300 assumehaveparentrevisions=assumehaveparentrevisions)
2301
2301
2302 DELTAREUSEALWAYS = 'always'
2302 DELTAREUSEALWAYS = 'always'
2303 DELTAREUSESAMEREVS = 'samerevs'
2303 DELTAREUSESAMEREVS = 'samerevs'
2304 DELTAREUSENEVER = 'never'
2304 DELTAREUSENEVER = 'never'
2305
2305
2306 DELTAREUSEFULLADD = 'fulladd'
2306 DELTAREUSEFULLADD = 'fulladd'
2307
2307
2308 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2308 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2309
2309
2310 def clone(self, tr, destrevlog, addrevisioncb=None,
2310 def clone(self, tr, destrevlog, addrevisioncb=None,
2311 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2311 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2312 """Copy this revlog to another, possibly with format changes.
2312 """Copy this revlog to another, possibly with format changes.
2313
2313
2314 The destination revlog will contain the same revisions and nodes.
2314 The destination revlog will contain the same revisions and nodes.
2315 However, it may not be bit-for-bit identical due to e.g. delta encoding
2315 However, it may not be bit-for-bit identical due to e.g. delta encoding
2316 differences.
2316 differences.
2317
2317
2318 The ``deltareuse`` argument control how deltas from the existing revlog
2318 The ``deltareuse`` argument control how deltas from the existing revlog
2319 are preserved in the destination revlog. The argument can have the
2319 are preserved in the destination revlog. The argument can have the
2320 following values:
2320 following values:
2321
2321
2322 DELTAREUSEALWAYS
2322 DELTAREUSEALWAYS
2323 Deltas will always be reused (if possible), even if the destination
2323 Deltas will always be reused (if possible), even if the destination
2324 revlog would not select the same revisions for the delta. This is the
2324 revlog would not select the same revisions for the delta. This is the
2325 fastest mode of operation.
2325 fastest mode of operation.
2326 DELTAREUSESAMEREVS
2326 DELTAREUSESAMEREVS
2327 Deltas will be reused if the destination revlog would pick the same
2327 Deltas will be reused if the destination revlog would pick the same
2328 revisions for the delta. This mode strikes a balance between speed
2328 revisions for the delta. This mode strikes a balance between speed
2329 and optimization.
2329 and optimization.
2330 DELTAREUSENEVER
2330 DELTAREUSENEVER
2331 Deltas will never be reused. This is the slowest mode of execution.
2331 Deltas will never be reused. This is the slowest mode of execution.
2332 This mode can be used to recompute deltas (e.g. if the diff/delta
2332 This mode can be used to recompute deltas (e.g. if the diff/delta
2333 algorithm changes).
2333 algorithm changes).
2334
2334
2335 Delta computation can be slow, so the choice of delta reuse policy can
2335 Delta computation can be slow, so the choice of delta reuse policy can
2336 significantly affect run time.
2336 significantly affect run time.
2337
2337
2338 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2338 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2339 two extremes. Deltas will be reused if they are appropriate. But if the
2339 two extremes. Deltas will be reused if they are appropriate. But if the
2340 delta could choose a better revision, it will do so. This means if you
2340 delta could choose a better revision, it will do so. This means if you
2341 are converting a non-generaldelta revlog to a generaldelta revlog,
2341 are converting a non-generaldelta revlog to a generaldelta revlog,
2342 deltas will be recomputed if the delta's parent isn't a parent of the
2342 deltas will be recomputed if the delta's parent isn't a parent of the
2343 revision.
2343 revision.
2344
2344
2345 In addition to the delta policy, the ``forcedeltabothparents``
2345 In addition to the delta policy, the ``forcedeltabothparents``
2346 argument controls whether to force compute deltas against both parents
2346 argument controls whether to force compute deltas against both parents
2347 for merges. By default, the current default is used.
2347 for merges. By default, the current default is used.
2348 """
2348 """
2349 if deltareuse not in self.DELTAREUSEALL:
2349 if deltareuse not in self.DELTAREUSEALL:
2350 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2350 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2351
2351
2352 if len(destrevlog):
2352 if len(destrevlog):
2353 raise ValueError(_('destination revlog is not empty'))
2353 raise ValueError(_('destination revlog is not empty'))
2354
2354
2355 if getattr(self, 'filteredrevs', None):
2355 if getattr(self, 'filteredrevs', None):
2356 raise ValueError(_('source revlog has filtered revisions'))
2356 raise ValueError(_('source revlog has filtered revisions'))
2357 if getattr(destrevlog, 'filteredrevs', None):
2357 if getattr(destrevlog, 'filteredrevs', None):
2358 raise ValueError(_('destination revlog has filtered revisions'))
2358 raise ValueError(_('destination revlog has filtered revisions'))
2359
2359
2360 # lazydeltabase controls whether to reuse a cached delta, if possible.
2360 # lazydeltabase controls whether to reuse a cached delta, if possible.
2361 oldlazydeltabase = destrevlog._lazydeltabase
2361 oldlazydeltabase = destrevlog._lazydeltabase
2362 oldamd = destrevlog._deltabothparents
2362 oldamd = destrevlog._deltabothparents
2363
2363
2364 try:
2364 try:
2365 if deltareuse == self.DELTAREUSEALWAYS:
2365 if deltareuse == self.DELTAREUSEALWAYS:
2366 destrevlog._lazydeltabase = True
2366 destrevlog._lazydeltabase = True
2367 elif deltareuse == self.DELTAREUSESAMEREVS:
2367 elif deltareuse == self.DELTAREUSESAMEREVS:
2368 destrevlog._lazydeltabase = False
2368 destrevlog._lazydeltabase = False
2369
2369
2370 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2370 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2371
2371
2372 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2372 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2373 self.DELTAREUSESAMEREVS)
2373 self.DELTAREUSESAMEREVS)
2374
2374
2375 deltacomputer = deltautil.deltacomputer(destrevlog)
2375 deltacomputer = deltautil.deltacomputer(destrevlog)
2376 index = self.index
2376 index = self.index
2377 for rev in self:
2377 for rev in self:
2378 entry = index[rev]
2378 entry = index[rev]
2379
2379
2380 # Some classes override linkrev to take filtered revs into
2380 # Some classes override linkrev to take filtered revs into
2381 # account. Use raw entry from index.
2381 # account. Use raw entry from index.
2382 flags = entry[0] & 0xffff
2382 flags = entry[0] & 0xffff
2383 linkrev = entry[4]
2383 linkrev = entry[4]
2384 p1 = index[entry[5]][7]
2384 p1 = index[entry[5]][7]
2385 p2 = index[entry[6]][7]
2385 p2 = index[entry[6]][7]
2386 node = entry[7]
2386 node = entry[7]
2387
2387
2388 # (Possibly) reuse the delta from the revlog if allowed and
2388 # (Possibly) reuse the delta from the revlog if allowed and
2389 # the revlog chunk is a delta.
2389 # the revlog chunk is a delta.
2390 cachedelta = None
2390 cachedelta = None
2391 rawtext = None
2391 rawtext = None
2392 if populatecachedelta:
2392 if populatecachedelta:
2393 dp = self.deltaparent(rev)
2393 dp = self.deltaparent(rev)
2394 if dp != nullrev:
2394 if dp != nullrev:
2395 cachedelta = (dp, bytes(self._chunk(rev)))
2395 cachedelta = (dp, bytes(self._chunk(rev)))
2396
2396
2397 if not cachedelta:
2397 if not cachedelta:
2398 rawtext = self.revision(rev, raw=True)
2398 rawtext = self.revision(rev, raw=True)
2399
2399
2400
2400
2401 if deltareuse == self.DELTAREUSEFULLADD:
2401 if deltareuse == self.DELTAREUSEFULLADD:
2402 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2402 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2403 cachedelta=cachedelta,
2403 cachedelta=cachedelta,
2404 node=node, flags=flags,
2404 node=node, flags=flags,
2405 deltacomputer=deltacomputer)
2405 deltacomputer=deltacomputer)
2406 else:
2406 else:
2407 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2407 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2408 checkambig=False)
2408 checkambig=False)
2409 dfh = None
2409 dfh = None
2410 if not destrevlog._inline:
2410 if not destrevlog._inline:
2411 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2411 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2412 try:
2412 try:
2413 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2413 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2414 p2, flags, cachedelta, ifh, dfh,
2414 p2, flags, cachedelta, ifh, dfh,
2415 deltacomputer=deltacomputer)
2415 deltacomputer=deltacomputer)
2416 finally:
2416 finally:
2417 if dfh:
2417 if dfh:
2418 dfh.close()
2418 dfh.close()
2419 ifh.close()
2419 ifh.close()
2420
2420
2421 if addrevisioncb:
2421 if addrevisioncb:
2422 addrevisioncb(self, rev, node)
2422 addrevisioncb(self, rev, node)
2423 finally:
2423 finally:
2424 destrevlog._lazydeltabase = oldlazydeltabase
2424 destrevlog._lazydeltabase = oldlazydeltabase
2425 destrevlog._deltabothparents = oldamd
2425 destrevlog._deltabothparents = oldamd
2426
2426
2427 def censorrevision(self, tr, censornode, tombstone=b''):
2427 def censorrevision(self, tr, censornode, tombstone=b''):
2428 if (self.version & 0xFFFF) == REVLOGV0:
2428 if (self.version & 0xFFFF) == REVLOGV0:
2429 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2429 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2430 self.version)
2430 self.version)
2431
2431
2432 censorrev = self.rev(censornode)
2432 censorrev = self.rev(censornode)
2433 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2433 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2434
2434
2435 if len(tombstone) > self.rawsize(censorrev):
2435 if len(tombstone) > self.rawsize(censorrev):
2436 raise error.Abort(_('censor tombstone must be no longer than '
2436 raise error.Abort(_('censor tombstone must be no longer than '
2437 'censored data'))
2437 'censored data'))
2438
2438
2439 # Rewriting the revlog in place is hard. Our strategy for censoring is
2439 # Rewriting the revlog in place is hard. Our strategy for censoring is
2440 # to create a new revlog, copy all revisions to it, then replace the
2440 # to create a new revlog, copy all revisions to it, then replace the
2441 # revlogs on transaction close.
2441 # revlogs on transaction close.
2442
2442
2443 newindexfile = self.indexfile + b'.tmpcensored'
2443 newindexfile = self.indexfile + b'.tmpcensored'
2444 newdatafile = self.datafile + b'.tmpcensored'
2444 newdatafile = self.datafile + b'.tmpcensored'
2445
2445
2446 # This is a bit dangerous. We could easily have a mismatch of state.
2446 # This is a bit dangerous. We could easily have a mismatch of state.
2447 newrl = revlog(self.opener, newindexfile, newdatafile,
2447 newrl = revlog(self.opener, newindexfile, newdatafile,
2448 censorable=True)
2448 censorable=True)
2449 newrl.version = self.version
2449 newrl.version = self.version
2450 newrl._generaldelta = self._generaldelta
2450 newrl._generaldelta = self._generaldelta
2451 newrl._io = self._io
2451 newrl._io = self._io
2452
2452
2453 for rev in self.revs():
2453 for rev in self.revs():
2454 node = self.node(rev)
2454 node = self.node(rev)
2455 p1, p2 = self.parents(node)
2455 p1, p2 = self.parents(node)
2456
2456
2457 if rev == censorrev:
2457 if rev == censorrev:
2458 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2458 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2459 p1, p2, censornode, REVIDX_ISCENSORED)
2459 p1, p2, censornode, REVIDX_ISCENSORED)
2460
2460
2461 if newrl.deltaparent(rev) != nullrev:
2461 if newrl.deltaparent(rev) != nullrev:
2462 raise error.Abort(_('censored revision stored as delta; '
2462 raise error.Abort(_('censored revision stored as delta; '
2463 'cannot censor'),
2463 'cannot censor'),
2464 hint=_('censoring of revlogs is not '
2464 hint=_('censoring of revlogs is not '
2465 'fully implemented; please report '
2465 'fully implemented; please report '
2466 'this bug'))
2466 'this bug'))
2467 continue
2467 continue
2468
2468
2469 if self.iscensored(rev):
2469 if self.iscensored(rev):
2470 if self.deltaparent(rev) != nullrev:
2470 if self.deltaparent(rev) != nullrev:
2471 raise error.Abort(_('cannot censor due to censored '
2471 raise error.Abort(_('cannot censor due to censored '
2472 'revision having delta stored'))
2472 'revision having delta stored'))
2473 rawtext = self._chunk(rev)
2473 rawtext = self._chunk(rev)
2474 else:
2474 else:
2475 rawtext = self.revision(rev, raw=True)
2475 rawtext = self.revision(rev, raw=True)
2476
2476
2477 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2477 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2478 self.flags(rev))
2478 self.flags(rev))
2479
2479
2480 tr.addbackup(self.indexfile, location='store')
2480 tr.addbackup(self.indexfile, location='store')
2481 if not self._inline:
2481 if not self._inline:
2482 tr.addbackup(self.datafile, location='store')
2482 tr.addbackup(self.datafile, location='store')
2483
2483
2484 self.opener.rename(newrl.indexfile, self.indexfile)
2484 self.opener.rename(newrl.indexfile, self.indexfile)
2485 if not self._inline:
2485 if not self._inline:
2486 self.opener.rename(newrl.datafile, self.datafile)
2486 self.opener.rename(newrl.datafile, self.datafile)
2487
2487
2488 self.clearcaches()
2488 self.clearcaches()
2489 self._loadindex(self.version, None)
2489 self._loadindex(self.version, None)
2490
2490
2491 def verifyintegrity(self, state):
2491 def verifyintegrity(self, state):
2492 """Verifies the integrity of the revlog.
2492 """Verifies the integrity of the revlog.
2493
2493
2494 Yields ``revlogproblem`` instances describing problems that are
2494 Yields ``revlogproblem`` instances describing problems that are
2495 found.
2495 found.
2496 """
2496 """
2497 dd, di = self.checksize()
2497 dd, di = self.checksize()
2498 if dd:
2498 if dd:
2499 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2499 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2500 if di:
2500 if di:
2501 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2501 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2502
2502
2503 version = self.version & 0xFFFF
2503 version = self.version & 0xFFFF
2504
2504
2505 # The verifier tells us what version revlog we should be.
2505 # The verifier tells us what version revlog we should be.
2506 if version != state['expectedversion']:
2506 if version != state['expectedversion']:
2507 yield revlogproblem(
2507 yield revlogproblem(
2508 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2508 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2509 (self.indexfile, version, state['expectedversion']))
2509 (self.indexfile, version, state['expectedversion']))
2510
2510
2511 state['skipread'] = set()
2511 state['skipread'] = set()
2512
2512
2513 for rev in self:
2513 for rev in self:
2514 node = self.node(rev)
2514 node = self.node(rev)
2515
2515
2516 # Verify contents. 4 cases to care about:
2516 # Verify contents. 4 cases to care about:
2517 #
2517 #
2518 # common: the most common case
2518 # common: the most common case
2519 # rename: with a rename
2519 # rename: with a rename
2520 # meta: file content starts with b'\1\n', the metadata
2520 # meta: file content starts with b'\1\n', the metadata
2521 # header defined in filelog.py, but without a rename
2521 # header defined in filelog.py, but without a rename
2522 # ext: content stored externally
2522 # ext: content stored externally
2523 #
2523 #
2524 # More formally, their differences are shown below:
2524 # More formally, their differences are shown below:
2525 #
2525 #
2526 # | common | rename | meta | ext
2526 # | common | rename | meta | ext
2527 # -------------------------------------------------------
2527 # -------------------------------------------------------
2528 # flags() | 0 | 0 | 0 | not 0
2528 # flags() | 0 | 0 | 0 | not 0
2529 # renamed() | False | True | False | ?
2529 # renamed() | False | True | False | ?
2530 # rawtext[0:2]=='\1\n'| False | True | True | ?
2530 # rawtext[0:2]=='\1\n'| False | True | True | ?
2531 #
2531 #
2532 # "rawtext" means the raw text stored in revlog data, which
2532 # "rawtext" means the raw text stored in revlog data, which
2533 # could be retrieved by "revision(rev, raw=True)". "text"
2533 # could be retrieved by "revision(rev, raw=True)". "text"
2534 # mentioned below is "revision(rev, raw=False)".
2534 # mentioned below is "revision(rev, raw=False)".
2535 #
2535 #
2536 # There are 3 different lengths stored physically:
2536 # There are 3 different lengths stored physically:
2537 # 1. L1: rawsize, stored in revlog index
2537 # 1. L1: rawsize, stored in revlog index
2538 # 2. L2: len(rawtext), stored in revlog data
2538 # 2. L2: len(rawtext), stored in revlog data
2539 # 3. L3: len(text), stored in revlog data if flags==0, or
2539 # 3. L3: len(text), stored in revlog data if flags==0, or
2540 # possibly somewhere else if flags!=0
2540 # possibly somewhere else if flags!=0
2541 #
2541 #
2542 # L1 should be equal to L2. L3 could be different from them.
2542 # L1 should be equal to L2. L3 could be different from them.
2543 # "text" may or may not affect commit hash depending on flag
2543 # "text" may or may not affect commit hash depending on flag
2544 # processors (see revlog.addflagprocessor).
2544 # processors (see revlog.addflagprocessor).
2545 #
2545 #
2546 # | common | rename | meta | ext
2546 # | common | rename | meta | ext
2547 # -------------------------------------------------
2547 # -------------------------------------------------
2548 # rawsize() | L1 | L1 | L1 | L1
2548 # rawsize() | L1 | L1 | L1 | L1
2549 # size() | L1 | L2-LM | L1(*) | L1 (?)
2549 # size() | L1 | L2-LM | L1(*) | L1 (?)
2550 # len(rawtext) | L2 | L2 | L2 | L2
2550 # len(rawtext) | L2 | L2 | L2 | L2
2551 # len(text) | L2 | L2 | L2 | L3
2551 # len(text) | L2 | L2 | L2 | L3
2552 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2552 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2553 #
2553 #
2554 # LM: length of metadata, depending on rawtext
2554 # LM: length of metadata, depending on rawtext
2555 # (*): not ideal, see comment in filelog.size
2555 # (*): not ideal, see comment in filelog.size
2556 # (?): could be "- len(meta)" if the resolved content has
2556 # (?): could be "- len(meta)" if the resolved content has
2557 # rename metadata
2557 # rename metadata
2558 #
2558 #
2559 # Checks needed to be done:
2559 # Checks needed to be done:
2560 # 1. length check: L1 == L2, in all cases.
2560 # 1. length check: L1 == L2, in all cases.
2561 # 2. hash check: depending on flag processor, we may need to
2561 # 2. hash check: depending on flag processor, we may need to
2562 # use either "text" (external), or "rawtext" (in revlog).
2562 # use either "text" (external), or "rawtext" (in revlog).
2563
2563
2564 try:
2564 try:
2565 skipflags = state.get('skipflags', 0)
2565 skipflags = state.get('skipflags', 0)
2566 if skipflags:
2566 if skipflags:
2567 skipflags &= self.flags(rev)
2567 skipflags &= self.flags(rev)
2568
2568
2569 if skipflags:
2569 if skipflags:
2570 state['skipread'].add(node)
2570 state['skipread'].add(node)
2571 else:
2571 else:
2572 # Side-effect: read content and verify hash.
2572 # Side-effect: read content and verify hash.
2573 self.revision(node)
2573 self.revision(node)
2574
2574
2575 l1 = self.rawsize(rev)
2575 l1 = self.rawsize(rev)
2576 l2 = len(self.revision(node, raw=True))
2576 l2 = len(self.revision(node, raw=True))
2577
2577
2578 if l1 != l2:
2578 if l1 != l2:
2579 yield revlogproblem(
2579 yield revlogproblem(
2580 error=_('unpacked size is %d, %d expected') % (l2, l1),
2580 error=_('unpacked size is %d, %d expected') % (l2, l1),
2581 node=node)
2581 node=node)
2582
2582
2583 except error.CensoredNodeError:
2583 except error.CensoredNodeError:
2584 if state['erroroncensored']:
2584 if state['erroroncensored']:
2585 yield revlogproblem(error=_('censored file data'),
2585 yield revlogproblem(error=_('censored file data'),
2586 node=node)
2586 node=node)
2587 state['skipread'].add(node)
2587 state['skipread'].add(node)
2588 except Exception as e:
2588 except Exception as e:
2589 yield revlogproblem(
2589 yield revlogproblem(
2590 error=_('unpacking %s: %s') % (short(node),
2590 error=_('unpacking %s: %s') % (short(node),
2591 stringutil.forcebytestr(e)),
2591 stringutil.forcebytestr(e)),
2592 node=node)
2592 node=node)
2593 state['skipread'].add(node)
2593 state['skipread'].add(node)
2594
2594
2595 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2595 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2596 revisionscount=False, trackedsize=False,
2596 revisionscount=False, trackedsize=False,
2597 storedsize=False):
2597 storedsize=False):
2598 d = {}
2598 d = {}
2599
2599
2600 if exclusivefiles:
2600 if exclusivefiles:
2601 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2601 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2602 if not self._inline:
2602 if not self._inline:
2603 d['exclusivefiles'].append((self.opener, self.datafile))
2603 d['exclusivefiles'].append((self.opener, self.datafile))
2604
2604
2605 if sharedfiles:
2605 if sharedfiles:
2606 d['sharedfiles'] = []
2606 d['sharedfiles'] = []
2607
2607
2608 if revisionscount:
2608 if revisionscount:
2609 d['revisionscount'] = len(self)
2609 d['revisionscount'] = len(self)
2610
2610
2611 if trackedsize:
2611 if trackedsize:
2612 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2612 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2613
2613
2614 if storedsize:
2614 if storedsize:
2615 d['storedsize'] = sum(self.opener.stat(path).st_size
2615 d['storedsize'] = sum(self.opener.stat(path).st_size
2616 for path in self.files())
2616 for path in self.files())
2617
2617
2618 return d
2618 return d
General Comments 0
You need to be logged in to leave comments. Login now