##// END OF EJS Templates
py3: use util.forcebytestr() to convert error messages to bytes...
Pulkit Goyal -
r39944:d70e620e default
parent child Browse files
Show More
@@ -1,2699 +1,2700 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 if not flag & REVIDX_KNOWN_FLAGS:
154 if not flag & REVIDX_KNOWN_FLAGS:
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 raise error.ProgrammingError(msg)
156 raise error.ProgrammingError(msg)
157 if flag not in REVIDX_FLAGS_ORDER:
157 if flag not in REVIDX_FLAGS_ORDER:
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag in _flagprocessors:
160 if flag in _flagprocessors:
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 raise error.Abort(msg)
162 raise error.Abort(msg)
163 _flagprocessors[flag] = processor
163 _flagprocessors[flag] = processor
164
164
165 def getoffset(q):
165 def getoffset(q):
166 return int(q >> 16)
166 return int(q >> 16)
167
167
168 def gettype(q):
168 def gettype(q):
169 return int(q & 0xFFFF)
169 return int(q & 0xFFFF)
170
170
171 def offset_type(offset, type):
171 def offset_type(offset, type):
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 raise ValueError('unknown revlog index flags')
173 raise ValueError('unknown revlog index flags')
174 return int(int(offset) << 16 | type)
174 return int(int(offset) << 16 | type)
175
175
176 @attr.s(slots=True, frozen=True)
176 @attr.s(slots=True, frozen=True)
177 class _revisioninfo(object):
177 class _revisioninfo(object):
178 """Information about a revision that allows building its fulltext
178 """Information about a revision that allows building its fulltext
179 node: expected hash of the revision
179 node: expected hash of the revision
180 p1, p2: parent revs of the revision
180 p1, p2: parent revs of the revision
181 btext: built text cache consisting of a one-element list
181 btext: built text cache consisting of a one-element list
182 cachedelta: (baserev, uncompressed_delta) or None
182 cachedelta: (baserev, uncompressed_delta) or None
183 flags: flags associated to the revision storage
183 flags: flags associated to the revision storage
184
184
185 One of btext[0] or cachedelta must be set.
185 One of btext[0] or cachedelta must be set.
186 """
186 """
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195 @interfaceutil.implementer(repository.irevisiondelta)
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
197 class revlogrevisiondelta(object):
198 node = attr.ib()
198 node = attr.ib()
199 p1node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
205 delta = attr.ib()
206 linknode = attr.ib(default=None)
206 linknode = attr.ib(default=None)
207
207
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
209 @attr.s(frozen=True)
209 @attr.s(frozen=True)
210 class revlogproblem(object):
210 class revlogproblem(object):
211 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
212 error = attr.ib(default=None)
212 error = attr.ib(default=None)
213 node = attr.ib(default=None)
213 node = attr.ib(default=None)
214
214
215 # index v0:
215 # index v0:
216 # 4 bytes: offset
216 # 4 bytes: offset
217 # 4 bytes: compressed length
217 # 4 bytes: compressed length
218 # 4 bytes: base rev
218 # 4 bytes: base rev
219 # 4 bytes: link rev
219 # 4 bytes: link rev
220 # 20 bytes: parent 1 nodeid
220 # 20 bytes: parent 1 nodeid
221 # 20 bytes: parent 2 nodeid
221 # 20 bytes: parent 2 nodeid
222 # 20 bytes: nodeid
222 # 20 bytes: nodeid
223 indexformatv0 = struct.Struct(">4l20s20s20s")
223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 indexformatv0_pack = indexformatv0.pack
224 indexformatv0_pack = indexformatv0.pack
225 indexformatv0_unpack = indexformatv0.unpack
225 indexformatv0_unpack = indexformatv0.unpack
226
226
227 class revlogoldindex(list):
227 class revlogoldindex(list):
228 def __getitem__(self, i):
228 def __getitem__(self, i):
229 if i == -1:
229 if i == -1:
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 return list.__getitem__(self, i)
231 return list.__getitem__(self, i)
232
232
233 class revlogoldio(object):
233 class revlogoldio(object):
234 def __init__(self):
234 def __init__(self):
235 self.size = indexformatv0.size
235 self.size = indexformatv0.size
236
236
237 def parseindex(self, data, inline):
237 def parseindex(self, data, inline):
238 s = self.size
238 s = self.size
239 index = []
239 index = []
240 nodemap = {nullid: nullrev}
240 nodemap = {nullid: nullrev}
241 n = off = 0
241 n = off = 0
242 l = len(data)
242 l = len(data)
243 while off + s <= l:
243 while off + s <= l:
244 cur = data[off:off + s]
244 cur = data[off:off + s]
245 off += s
245 off += s
246 e = indexformatv0_unpack(cur)
246 e = indexformatv0_unpack(cur)
247 # transform to revlogv1 format
247 # transform to revlogv1 format
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 index.append(e2)
250 index.append(e2)
251 nodemap[e[6]] = n
251 nodemap[e[6]] = n
252 n += 1
252 n += 1
253
253
254 return revlogoldindex(index), nodemap, None
254 return revlogoldindex(index), nodemap, None
255
255
256 def packentry(self, entry, node, version, rev):
256 def packentry(self, entry, node, version, rev):
257 if gettype(entry[0]):
257 if gettype(entry[0]):
258 raise error.RevlogError(_('index entry flags need revlog '
258 raise error.RevlogError(_('index entry flags need revlog '
259 'version 1'))
259 'version 1'))
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 node(entry[5]), node(entry[6]), entry[7])
261 node(entry[5]), node(entry[6]), entry[7])
262 return indexformatv0_pack(*e2)
262 return indexformatv0_pack(*e2)
263
263
264 # index ng:
264 # index ng:
265 # 6 bytes: offset
265 # 6 bytes: offset
266 # 2 bytes: flags
266 # 2 bytes: flags
267 # 4 bytes: compressed length
267 # 4 bytes: compressed length
268 # 4 bytes: uncompressed length
268 # 4 bytes: uncompressed length
269 # 4 bytes: base rev
269 # 4 bytes: base rev
270 # 4 bytes: link rev
270 # 4 bytes: link rev
271 # 4 bytes: parent 1 rev
271 # 4 bytes: parent 1 rev
272 # 4 bytes: parent 2 rev
272 # 4 bytes: parent 2 rev
273 # 32 bytes: nodeid
273 # 32 bytes: nodeid
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 indexformatng_pack = indexformatng.pack
275 indexformatng_pack = indexformatng.pack
276 versionformat = struct.Struct(">I")
276 versionformat = struct.Struct(">I")
277 versionformat_pack = versionformat.pack
277 versionformat_pack = versionformat.pack
278 versionformat_unpack = versionformat.unpack
278 versionformat_unpack = versionformat.unpack
279
279
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 # signed integer)
281 # signed integer)
282 _maxentrysize = 0x7fffffff
282 _maxentrysize = 0x7fffffff
283
283
284 class revlogio(object):
284 class revlogio(object):
285 def __init__(self):
285 def __init__(self):
286 self.size = indexformatng.size
286 self.size = indexformatng.size
287
287
288 def parseindex(self, data, inline):
288 def parseindex(self, data, inline):
289 # call the C implementation to parse the index data
289 # call the C implementation to parse the index data
290 index, cache = parsers.parse_index2(data, inline)
290 index, cache = parsers.parse_index2(data, inline)
291 return index, getattr(index, 'nodemap', None), cache
291 return index, getattr(index, 'nodemap', None), cache
292
292
293 def packentry(self, entry, node, version, rev):
293 def packentry(self, entry, node, version, rev):
294 p = indexformatng_pack(*entry)
294 p = indexformatng_pack(*entry)
295 if rev == 0:
295 if rev == 0:
296 p = versionformat_pack(version) + p[4:]
296 p = versionformat_pack(version) + p[4:]
297 return p
297 return p
298
298
299 class revlog(object):
299 class revlog(object):
300 """
300 """
301 the underlying revision storage object
301 the underlying revision storage object
302
302
303 A revlog consists of two parts, an index and the revision data.
303 A revlog consists of two parts, an index and the revision data.
304
304
305 The index is a file with a fixed record size containing
305 The index is a file with a fixed record size containing
306 information on each revision, including its nodeid (hash), the
306 information on each revision, including its nodeid (hash), the
307 nodeids of its parents, the position and offset of its data within
307 nodeids of its parents, the position and offset of its data within
308 the data file, and the revision it's based on. Finally, each entry
308 the data file, and the revision it's based on. Finally, each entry
309 contains a linkrev entry that can serve as a pointer to external
309 contains a linkrev entry that can serve as a pointer to external
310 data.
310 data.
311
311
312 The revision data itself is a linear collection of data chunks.
312 The revision data itself is a linear collection of data chunks.
313 Each chunk represents a revision and is usually represented as a
313 Each chunk represents a revision and is usually represented as a
314 delta against the previous chunk. To bound lookup time, runs of
314 delta against the previous chunk. To bound lookup time, runs of
315 deltas are limited to about 2 times the length of the original
315 deltas are limited to about 2 times the length of the original
316 version data. This makes retrieval of a version proportional to
316 version data. This makes retrieval of a version proportional to
317 its size, or O(1) relative to the number of revisions.
317 its size, or O(1) relative to the number of revisions.
318
318
319 Both pieces of the revlog are written to in an append-only
319 Both pieces of the revlog are written to in an append-only
320 fashion, which means we never need to rewrite a file to insert or
320 fashion, which means we never need to rewrite a file to insert or
321 remove data, and can use some simple techniques to avoid the need
321 remove data, and can use some simple techniques to avoid the need
322 for locking while reading.
322 for locking while reading.
323
323
324 If checkambig, indexfile is opened with checkambig=True at
324 If checkambig, indexfile is opened with checkambig=True at
325 writing, to avoid file stat ambiguity.
325 writing, to avoid file stat ambiguity.
326
326
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 index will be mmapped rather than read if it is larger than the
328 index will be mmapped rather than read if it is larger than the
329 configured threshold.
329 configured threshold.
330
330
331 If censorable is True, the revlog can have censored revisions.
331 If censorable is True, the revlog can have censored revisions.
332 """
332 """
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 mmaplargeindex=False, censorable=False):
334 mmaplargeindex=False, censorable=False):
335 """
335 """
336 create a revlog object
336 create a revlog object
337
337
338 opener is a function that abstracts the file opening operation
338 opener is a function that abstracts the file opening operation
339 and can be used to implement COW semantics or the like.
339 and can be used to implement COW semantics or the like.
340 """
340 """
341 self.indexfile = indexfile
341 self.indexfile = indexfile
342 self.datafile = datafile or (indexfile[:-2] + ".d")
342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 self.opener = opener
343 self.opener = opener
344 # When True, indexfile is opened with checkambig=True at writing, to
344 # When True, indexfile is opened with checkambig=True at writing, to
345 # avoid file stat ambiguity.
345 # avoid file stat ambiguity.
346 self._checkambig = checkambig
346 self._checkambig = checkambig
347 self._censorable = censorable
347 self._censorable = censorable
348 # 3-tuple of (node, rev, text) for a raw revision.
348 # 3-tuple of (node, rev, text) for a raw revision.
349 self._cache = None
349 self._cache = None
350 # Maps rev to chain base rev.
350 # Maps rev to chain base rev.
351 self._chainbasecache = util.lrucachedict(100)
351 self._chainbasecache = util.lrucachedict(100)
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 self._chunkcache = (0, '')
353 self._chunkcache = (0, '')
354 # How much data to read and cache into the raw revlog data cache.
354 # How much data to read and cache into the raw revlog data cache.
355 self._chunkcachesize = 65536
355 self._chunkcachesize = 65536
356 self._maxchainlen = None
356 self._maxchainlen = None
357 self._deltabothparents = True
357 self._deltabothparents = True
358 self.index = []
358 self.index = []
359 # Mapping of partial identifiers to full nodes.
359 # Mapping of partial identifiers to full nodes.
360 self._pcache = {}
360 self._pcache = {}
361 # Mapping of revision integer to full node.
361 # Mapping of revision integer to full node.
362 self._nodecache = {nullid: nullrev}
362 self._nodecache = {nullid: nullrev}
363 self._nodepos = None
363 self._nodepos = None
364 self._compengine = 'zlib'
364 self._compengine = 'zlib'
365 self._maxdeltachainspan = -1
365 self._maxdeltachainspan = -1
366 self._withsparseread = False
366 self._withsparseread = False
367 self._sparserevlog = False
367 self._sparserevlog = False
368 self._srdensitythreshold = 0.50
368 self._srdensitythreshold = 0.50
369 self._srmingapsize = 262144
369 self._srmingapsize = 262144
370
370
371 # Make copy of flag processors so each revlog instance can support
371 # Make copy of flag processors so each revlog instance can support
372 # custom flags.
372 # custom flags.
373 self._flagprocessors = dict(_flagprocessors)
373 self._flagprocessors = dict(_flagprocessors)
374
374
375 mmapindexthreshold = None
375 mmapindexthreshold = None
376 v = REVLOG_DEFAULT_VERSION
376 v = REVLOG_DEFAULT_VERSION
377 opts = getattr(opener, 'options', None)
377 opts = getattr(opener, 'options', None)
378 if opts is not None:
378 if opts is not None:
379 if 'revlogv2' in opts:
379 if 'revlogv2' in opts:
380 # version 2 revlogs always use generaldelta.
380 # version 2 revlogs always use generaldelta.
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 elif 'revlogv1' in opts:
382 elif 'revlogv1' in opts:
383 if 'generaldelta' in opts:
383 if 'generaldelta' in opts:
384 v |= FLAG_GENERALDELTA
384 v |= FLAG_GENERALDELTA
385 else:
385 else:
386 v = 0
386 v = 0
387 if 'chunkcachesize' in opts:
387 if 'chunkcachesize' in opts:
388 self._chunkcachesize = opts['chunkcachesize']
388 self._chunkcachesize = opts['chunkcachesize']
389 if 'maxchainlen' in opts:
389 if 'maxchainlen' in opts:
390 self._maxchainlen = opts['maxchainlen']
390 self._maxchainlen = opts['maxchainlen']
391 if 'deltabothparents' in opts:
391 if 'deltabothparents' in opts:
392 self._deltabothparents = opts['deltabothparents']
392 self._deltabothparents = opts['deltabothparents']
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 if 'compengine' in opts:
394 if 'compengine' in opts:
395 self._compengine = opts['compengine']
395 self._compengine = opts['compengine']
396 if 'maxdeltachainspan' in opts:
396 if 'maxdeltachainspan' in opts:
397 self._maxdeltachainspan = opts['maxdeltachainspan']
397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 mmapindexthreshold = opts['mmapindexthreshold']
399 mmapindexthreshold = opts['mmapindexthreshold']
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
402 # sparse-revlog forces sparse-read
402 # sparse-revlog forces sparse-read
403 self._withsparseread = self._sparserevlog or withsparseread
403 self._withsparseread = self._sparserevlog or withsparseread
404 if 'sparse-read-density-threshold' in opts:
404 if 'sparse-read-density-threshold' in opts:
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 if 'sparse-read-min-gap-size' in opts:
406 if 'sparse-read-min-gap-size' in opts:
407 self._srmingapsize = opts['sparse-read-min-gap-size']
407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 if opts.get('enableellipsis'):
408 if opts.get('enableellipsis'):
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410
410
411 if self._chunkcachesize <= 0:
411 if self._chunkcachesize <= 0:
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 'greater than 0') % self._chunkcachesize)
413 'greater than 0') % self._chunkcachesize)
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 'power of 2') % self._chunkcachesize)
416 'power of 2') % self._chunkcachesize)
417
417
418 indexdata = ''
418 indexdata = ''
419 self._initempty = True
419 self._initempty = True
420 try:
420 try:
421 with self._indexfp() as f:
421 with self._indexfp() as f:
422 if (mmapindexthreshold is not None and
422 if (mmapindexthreshold is not None and
423 self.opener.fstat(f).st_size >= mmapindexthreshold):
423 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 indexdata = util.buffer(util.mmapread(f))
424 indexdata = util.buffer(util.mmapread(f))
425 else:
425 else:
426 indexdata = f.read()
426 indexdata = f.read()
427 if len(indexdata) > 0:
427 if len(indexdata) > 0:
428 v = versionformat_unpack(indexdata[:4])[0]
428 v = versionformat_unpack(indexdata[:4])[0]
429 self._initempty = False
429 self._initempty = False
430 except IOError as inst:
430 except IOError as inst:
431 if inst.errno != errno.ENOENT:
431 if inst.errno != errno.ENOENT:
432 raise
432 raise
433
433
434 self.version = v
434 self.version = v
435 self._inline = v & FLAG_INLINE_DATA
435 self._inline = v & FLAG_INLINE_DATA
436 self._generaldelta = v & FLAG_GENERALDELTA
436 self._generaldelta = v & FLAG_GENERALDELTA
437 flags = v & ~0xFFFF
437 flags = v & ~0xFFFF
438 fmt = v & 0xFFFF
438 fmt = v & 0xFFFF
439 if fmt == REVLOGV0:
439 if fmt == REVLOGV0:
440 if flags:
440 if flags:
441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 'revlog %s') %
442 'revlog %s') %
443 (flags >> 16, fmt, self.indexfile))
443 (flags >> 16, fmt, self.indexfile))
444 elif fmt == REVLOGV1:
444 elif fmt == REVLOGV1:
445 if flags & ~REVLOGV1_FLAGS:
445 if flags & ~REVLOGV1_FLAGS:
446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
447 'revlog %s') %
447 'revlog %s') %
448 (flags >> 16, fmt, self.indexfile))
448 (flags >> 16, fmt, self.indexfile))
449 elif fmt == REVLOGV2:
449 elif fmt == REVLOGV2:
450 if flags & ~REVLOGV2_FLAGS:
450 if flags & ~REVLOGV2_FLAGS:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
452 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
453 (flags >> 16, fmt, self.indexfile))
454 else:
454 else:
455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
456 (fmt, self.indexfile))
456 (fmt, self.indexfile))
457
457
458 self._storedeltachains = True
458 self._storedeltachains = True
459
459
460 self._io = revlogio()
460 self._io = revlogio()
461 if self.version == REVLOGV0:
461 if self.version == REVLOGV0:
462 self._io = revlogoldio()
462 self._io = revlogoldio()
463 try:
463 try:
464 d = self._io.parseindex(indexdata, self._inline)
464 d = self._io.parseindex(indexdata, self._inline)
465 except (ValueError, IndexError):
465 except (ValueError, IndexError):
466 raise error.RevlogError(_("index %s is corrupted") %
466 raise error.RevlogError(_("index %s is corrupted") %
467 self.indexfile)
467 self.indexfile)
468 self.index, nodemap, self._chunkcache = d
468 self.index, nodemap, self._chunkcache = d
469 if nodemap is not None:
469 if nodemap is not None:
470 self.nodemap = self._nodecache = nodemap
470 self.nodemap = self._nodecache = nodemap
471 if not self._chunkcache:
471 if not self._chunkcache:
472 self._chunkclear()
472 self._chunkclear()
473 # revnum -> (chain-length, sum-delta-length)
473 # revnum -> (chain-length, sum-delta-length)
474 self._chaininfocache = {}
474 self._chaininfocache = {}
475 # revlog header -> revlog compressor
475 # revlog header -> revlog compressor
476 self._decompressors = {}
476 self._decompressors = {}
477
477
478 @util.propertycache
478 @util.propertycache
479 def _compressor(self):
479 def _compressor(self):
480 return util.compengines[self._compengine].revlogcompressor()
480 return util.compengines[self._compengine].revlogcompressor()
481
481
482 def _indexfp(self, mode='r'):
482 def _indexfp(self, mode='r'):
483 """file object for the revlog's index file"""
483 """file object for the revlog's index file"""
484 args = {r'mode': mode}
484 args = {r'mode': mode}
485 if mode != 'r':
485 if mode != 'r':
486 args[r'checkambig'] = self._checkambig
486 args[r'checkambig'] = self._checkambig
487 if mode == 'w':
487 if mode == 'w':
488 args[r'atomictemp'] = True
488 args[r'atomictemp'] = True
489 return self.opener(self.indexfile, **args)
489 return self.opener(self.indexfile, **args)
490
490
491 def _datafp(self, mode='r'):
491 def _datafp(self, mode='r'):
492 """file object for the revlog's data file"""
492 """file object for the revlog's data file"""
493 return self.opener(self.datafile, mode=mode)
493 return self.opener(self.datafile, mode=mode)
494
494
495 @contextlib.contextmanager
495 @contextlib.contextmanager
496 def _datareadfp(self, existingfp=None):
496 def _datareadfp(self, existingfp=None):
497 """file object suitable to read data"""
497 """file object suitable to read data"""
498 if existingfp is not None:
498 if existingfp is not None:
499 yield existingfp
499 yield existingfp
500 else:
500 else:
501 if self._inline:
501 if self._inline:
502 func = self._indexfp
502 func = self._indexfp
503 else:
503 else:
504 func = self._datafp
504 func = self._datafp
505 with func() as fp:
505 with func() as fp:
506 yield fp
506 yield fp
507
507
508 def tip(self):
508 def tip(self):
509 return self.node(len(self.index) - 1)
509 return self.node(len(self.index) - 1)
510 def __contains__(self, rev):
510 def __contains__(self, rev):
511 return 0 <= rev < len(self)
511 return 0 <= rev < len(self)
512 def __len__(self):
512 def __len__(self):
513 return len(self.index)
513 return len(self.index)
514 def __iter__(self):
514 def __iter__(self):
515 return iter(pycompat.xrange(len(self)))
515 return iter(pycompat.xrange(len(self)))
516 def revs(self, start=0, stop=None):
516 def revs(self, start=0, stop=None):
517 """iterate over all rev in this revlog (from start to stop)"""
517 """iterate over all rev in this revlog (from start to stop)"""
518 return storageutil.iterrevs(len(self), start=start, stop=stop)
518 return storageutil.iterrevs(len(self), start=start, stop=stop)
519
519
520 @util.propertycache
520 @util.propertycache
521 def nodemap(self):
521 def nodemap(self):
522 if self.index:
522 if self.index:
523 # populate mapping down to the initial node
523 # populate mapping down to the initial node
524 node0 = self.index[0][7] # get around changelog filtering
524 node0 = self.index[0][7] # get around changelog filtering
525 self.rev(node0)
525 self.rev(node0)
526 return self._nodecache
526 return self._nodecache
527
527
528 def hasnode(self, node):
528 def hasnode(self, node):
529 try:
529 try:
530 self.rev(node)
530 self.rev(node)
531 return True
531 return True
532 except KeyError:
532 except KeyError:
533 return False
533 return False
534
534
535 def candelta(self, baserev, rev):
535 def candelta(self, baserev, rev):
536 """whether two revisions (baserev, rev) can be delta-ed or not"""
536 """whether two revisions (baserev, rev) can be delta-ed or not"""
537 # Disable delta if either rev requires a content-changing flag
537 # Disable delta if either rev requires a content-changing flag
538 # processor (ex. LFS). This is because such flag processor can alter
538 # processor (ex. LFS). This is because such flag processor can alter
539 # the rawtext content that the delta will be based on, and two clients
539 # the rawtext content that the delta will be based on, and two clients
540 # could have a same revlog node with different flags (i.e. different
540 # could have a same revlog node with different flags (i.e. different
541 # rawtext contents) and the delta could be incompatible.
541 # rawtext contents) and the delta could be incompatible.
542 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
542 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
543 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
543 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
544 return False
544 return False
545 return True
545 return True
546
546
547 def clearcaches(self):
547 def clearcaches(self):
548 self._cache = None
548 self._cache = None
549 self._chainbasecache.clear()
549 self._chainbasecache.clear()
550 self._chunkcache = (0, '')
550 self._chunkcache = (0, '')
551 self._pcache = {}
551 self._pcache = {}
552
552
553 try:
553 try:
554 self._nodecache.clearcaches()
554 self._nodecache.clearcaches()
555 except AttributeError:
555 except AttributeError:
556 self._nodecache = {nullid: nullrev}
556 self._nodecache = {nullid: nullrev}
557 self._nodepos = None
557 self._nodepos = None
558
558
559 def rev(self, node):
559 def rev(self, node):
560 try:
560 try:
561 return self._nodecache[node]
561 return self._nodecache[node]
562 except TypeError:
562 except TypeError:
563 raise
563 raise
564 except error.RevlogError:
564 except error.RevlogError:
565 # parsers.c radix tree lookup failed
565 # parsers.c radix tree lookup failed
566 if node == wdirid or node in wdirfilenodeids:
566 if node == wdirid or node in wdirfilenodeids:
567 raise error.WdirUnsupported
567 raise error.WdirUnsupported
568 raise error.LookupError(node, self.indexfile, _('no node'))
568 raise error.LookupError(node, self.indexfile, _('no node'))
569 except KeyError:
569 except KeyError:
570 # pure python cache lookup failed
570 # pure python cache lookup failed
571 n = self._nodecache
571 n = self._nodecache
572 i = self.index
572 i = self.index
573 p = self._nodepos
573 p = self._nodepos
574 if p is None:
574 if p is None:
575 p = len(i) - 1
575 p = len(i) - 1
576 else:
576 else:
577 assert p < len(i)
577 assert p < len(i)
578 for r in pycompat.xrange(p, -1, -1):
578 for r in pycompat.xrange(p, -1, -1):
579 v = i[r][7]
579 v = i[r][7]
580 n[v] = r
580 n[v] = r
581 if v == node:
581 if v == node:
582 self._nodepos = r - 1
582 self._nodepos = r - 1
583 return r
583 return r
584 if node == wdirid or node in wdirfilenodeids:
584 if node == wdirid or node in wdirfilenodeids:
585 raise error.WdirUnsupported
585 raise error.WdirUnsupported
586 raise error.LookupError(node, self.indexfile, _('no node'))
586 raise error.LookupError(node, self.indexfile, _('no node'))
587
587
588 # Accessors for index entries.
588 # Accessors for index entries.
589
589
590 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
590 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
591 # are flags.
591 # are flags.
592 def start(self, rev):
592 def start(self, rev):
593 return int(self.index[rev][0] >> 16)
593 return int(self.index[rev][0] >> 16)
594
594
595 def flags(self, rev):
595 def flags(self, rev):
596 return self.index[rev][0] & 0xFFFF
596 return self.index[rev][0] & 0xFFFF
597
597
598 def length(self, rev):
598 def length(self, rev):
599 return self.index[rev][1]
599 return self.index[rev][1]
600
600
601 def rawsize(self, rev):
601 def rawsize(self, rev):
602 """return the length of the uncompressed text for a given revision"""
602 """return the length of the uncompressed text for a given revision"""
603 l = self.index[rev][2]
603 l = self.index[rev][2]
604 if l >= 0:
604 if l >= 0:
605 return l
605 return l
606
606
607 t = self.revision(rev, raw=True)
607 t = self.revision(rev, raw=True)
608 return len(t)
608 return len(t)
609
609
610 def size(self, rev):
610 def size(self, rev):
611 """length of non-raw text (processed by a "read" flag processor)"""
611 """length of non-raw text (processed by a "read" flag processor)"""
612 # fast path: if no "read" flag processor could change the content,
612 # fast path: if no "read" flag processor could change the content,
613 # size is rawsize. note: ELLIPSIS is known to not change the content.
613 # size is rawsize. note: ELLIPSIS is known to not change the content.
614 flags = self.flags(rev)
614 flags = self.flags(rev)
615 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
615 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
616 return self.rawsize(rev)
616 return self.rawsize(rev)
617
617
618 return len(self.revision(rev, raw=False))
618 return len(self.revision(rev, raw=False))
619
619
620 def chainbase(self, rev):
620 def chainbase(self, rev):
621 base = self._chainbasecache.get(rev)
621 base = self._chainbasecache.get(rev)
622 if base is not None:
622 if base is not None:
623 return base
623 return base
624
624
625 index = self.index
625 index = self.index
626 iterrev = rev
626 iterrev = rev
627 base = index[iterrev][3]
627 base = index[iterrev][3]
628 while base != iterrev:
628 while base != iterrev:
629 iterrev = base
629 iterrev = base
630 base = index[iterrev][3]
630 base = index[iterrev][3]
631
631
632 self._chainbasecache[rev] = base
632 self._chainbasecache[rev] = base
633 return base
633 return base
634
634
635 def linkrev(self, rev):
635 def linkrev(self, rev):
636 return self.index[rev][4]
636 return self.index[rev][4]
637
637
638 def parentrevs(self, rev):
638 def parentrevs(self, rev):
639 try:
639 try:
640 entry = self.index[rev]
640 entry = self.index[rev]
641 except IndexError:
641 except IndexError:
642 if rev == wdirrev:
642 if rev == wdirrev:
643 raise error.WdirUnsupported
643 raise error.WdirUnsupported
644 raise
644 raise
645
645
646 return entry[5], entry[6]
646 return entry[5], entry[6]
647
647
648 def node(self, rev):
648 def node(self, rev):
649 try:
649 try:
650 return self.index[rev][7]
650 return self.index[rev][7]
651 except IndexError:
651 except IndexError:
652 if rev == wdirrev:
652 if rev == wdirrev:
653 raise error.WdirUnsupported
653 raise error.WdirUnsupported
654 raise
654 raise
655
655
656 # Derived from index values.
656 # Derived from index values.
657
657
658 def end(self, rev):
658 def end(self, rev):
659 return self.start(rev) + self.length(rev)
659 return self.start(rev) + self.length(rev)
660
660
661 def parents(self, node):
661 def parents(self, node):
662 i = self.index
662 i = self.index
663 d = i[self.rev(node)]
663 d = i[self.rev(node)]
664 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
664 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
665
665
666 def chainlen(self, rev):
666 def chainlen(self, rev):
667 return self._chaininfo(rev)[0]
667 return self._chaininfo(rev)[0]
668
668
669 def _chaininfo(self, rev):
669 def _chaininfo(self, rev):
670 chaininfocache = self._chaininfocache
670 chaininfocache = self._chaininfocache
671 if rev in chaininfocache:
671 if rev in chaininfocache:
672 return chaininfocache[rev]
672 return chaininfocache[rev]
673 index = self.index
673 index = self.index
674 generaldelta = self._generaldelta
674 generaldelta = self._generaldelta
675 iterrev = rev
675 iterrev = rev
676 e = index[iterrev]
676 e = index[iterrev]
677 clen = 0
677 clen = 0
678 compresseddeltalen = 0
678 compresseddeltalen = 0
679 while iterrev != e[3]:
679 while iterrev != e[3]:
680 clen += 1
680 clen += 1
681 compresseddeltalen += e[1]
681 compresseddeltalen += e[1]
682 if generaldelta:
682 if generaldelta:
683 iterrev = e[3]
683 iterrev = e[3]
684 else:
684 else:
685 iterrev -= 1
685 iterrev -= 1
686 if iterrev in chaininfocache:
686 if iterrev in chaininfocache:
687 t = chaininfocache[iterrev]
687 t = chaininfocache[iterrev]
688 clen += t[0]
688 clen += t[0]
689 compresseddeltalen += t[1]
689 compresseddeltalen += t[1]
690 break
690 break
691 e = index[iterrev]
691 e = index[iterrev]
692 else:
692 else:
693 # Add text length of base since decompressing that also takes
693 # Add text length of base since decompressing that also takes
694 # work. For cache hits the length is already included.
694 # work. For cache hits the length is already included.
695 compresseddeltalen += e[1]
695 compresseddeltalen += e[1]
696 r = (clen, compresseddeltalen)
696 r = (clen, compresseddeltalen)
697 chaininfocache[rev] = r
697 chaininfocache[rev] = r
698 return r
698 return r
699
699
700 def _deltachain(self, rev, stoprev=None):
700 def _deltachain(self, rev, stoprev=None):
701 """Obtain the delta chain for a revision.
701 """Obtain the delta chain for a revision.
702
702
703 ``stoprev`` specifies a revision to stop at. If not specified, we
703 ``stoprev`` specifies a revision to stop at. If not specified, we
704 stop at the base of the chain.
704 stop at the base of the chain.
705
705
706 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
706 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
707 revs in ascending order and ``stopped`` is a bool indicating whether
707 revs in ascending order and ``stopped`` is a bool indicating whether
708 ``stoprev`` was hit.
708 ``stoprev`` was hit.
709 """
709 """
710 # Try C implementation.
710 # Try C implementation.
711 try:
711 try:
712 return self.index.deltachain(rev, stoprev, self._generaldelta)
712 return self.index.deltachain(rev, stoprev, self._generaldelta)
713 except AttributeError:
713 except AttributeError:
714 pass
714 pass
715
715
716 chain = []
716 chain = []
717
717
718 # Alias to prevent attribute lookup in tight loop.
718 # Alias to prevent attribute lookup in tight loop.
719 index = self.index
719 index = self.index
720 generaldelta = self._generaldelta
720 generaldelta = self._generaldelta
721
721
722 iterrev = rev
722 iterrev = rev
723 e = index[iterrev]
723 e = index[iterrev]
724 while iterrev != e[3] and iterrev != stoprev:
724 while iterrev != e[3] and iterrev != stoprev:
725 chain.append(iterrev)
725 chain.append(iterrev)
726 if generaldelta:
726 if generaldelta:
727 iterrev = e[3]
727 iterrev = e[3]
728 else:
728 else:
729 iterrev -= 1
729 iterrev -= 1
730 e = index[iterrev]
730 e = index[iterrev]
731
731
732 if iterrev == stoprev:
732 if iterrev == stoprev:
733 stopped = True
733 stopped = True
734 else:
734 else:
735 chain.append(iterrev)
735 chain.append(iterrev)
736 stopped = False
736 stopped = False
737
737
738 chain.reverse()
738 chain.reverse()
739 return chain, stopped
739 return chain, stopped
740
740
741 def ancestors(self, revs, stoprev=0, inclusive=False):
741 def ancestors(self, revs, stoprev=0, inclusive=False):
742 """Generate the ancestors of 'revs' in reverse topological order.
742 """Generate the ancestors of 'revs' in reverse topological order.
743 Does not generate revs lower than stoprev.
743 Does not generate revs lower than stoprev.
744
744
745 See the documentation for ancestor.lazyancestors for more details."""
745 See the documentation for ancestor.lazyancestors for more details."""
746
746
747 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
747 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
748 inclusive=inclusive)
748 inclusive=inclusive)
749
749
750 def descendants(self, revs):
750 def descendants(self, revs):
751 """Generate the descendants of 'revs' in revision order.
751 """Generate the descendants of 'revs' in revision order.
752
752
753 Yield a sequence of revision numbers starting with a child of
753 Yield a sequence of revision numbers starting with a child of
754 some rev in revs, i.e., each revision is *not* considered a
754 some rev in revs, i.e., each revision is *not* considered a
755 descendant of itself. Results are ordered by revision number (a
755 descendant of itself. Results are ordered by revision number (a
756 topological sort)."""
756 topological sort)."""
757 first = min(revs)
757 first = min(revs)
758 if first == nullrev:
758 if first == nullrev:
759 for i in self:
759 for i in self:
760 yield i
760 yield i
761 return
761 return
762
762
763 seen = set(revs)
763 seen = set(revs)
764 for i in self.revs(start=first + 1):
764 for i in self.revs(start=first + 1):
765 for x in self.parentrevs(i):
765 for x in self.parentrevs(i):
766 if x != nullrev and x in seen:
766 if x != nullrev and x in seen:
767 seen.add(i)
767 seen.add(i)
768 yield i
768 yield i
769 break
769 break
770
770
771 def findcommonmissing(self, common=None, heads=None):
771 def findcommonmissing(self, common=None, heads=None):
772 """Return a tuple of the ancestors of common and the ancestors of heads
772 """Return a tuple of the ancestors of common and the ancestors of heads
773 that are not ancestors of common. In revset terminology, we return the
773 that are not ancestors of common. In revset terminology, we return the
774 tuple:
774 tuple:
775
775
776 ::common, (::heads) - (::common)
776 ::common, (::heads) - (::common)
777
777
778 The list is sorted by revision number, meaning it is
778 The list is sorted by revision number, meaning it is
779 topologically sorted.
779 topologically sorted.
780
780
781 'heads' and 'common' are both lists of node IDs. If heads is
781 'heads' and 'common' are both lists of node IDs. If heads is
782 not supplied, uses all of the revlog's heads. If common is not
782 not supplied, uses all of the revlog's heads. If common is not
783 supplied, uses nullid."""
783 supplied, uses nullid."""
784 if common is None:
784 if common is None:
785 common = [nullid]
785 common = [nullid]
786 if heads is None:
786 if heads is None:
787 heads = self.heads()
787 heads = self.heads()
788
788
789 common = [self.rev(n) for n in common]
789 common = [self.rev(n) for n in common]
790 heads = [self.rev(n) for n in heads]
790 heads = [self.rev(n) for n in heads]
791
791
792 # we want the ancestors, but inclusive
792 # we want the ancestors, but inclusive
793 class lazyset(object):
793 class lazyset(object):
794 def __init__(self, lazyvalues):
794 def __init__(self, lazyvalues):
795 self.addedvalues = set()
795 self.addedvalues = set()
796 self.lazyvalues = lazyvalues
796 self.lazyvalues = lazyvalues
797
797
798 def __contains__(self, value):
798 def __contains__(self, value):
799 return value in self.addedvalues or value in self.lazyvalues
799 return value in self.addedvalues or value in self.lazyvalues
800
800
801 def __iter__(self):
801 def __iter__(self):
802 added = self.addedvalues
802 added = self.addedvalues
803 for r in added:
803 for r in added:
804 yield r
804 yield r
805 for r in self.lazyvalues:
805 for r in self.lazyvalues:
806 if not r in added:
806 if not r in added:
807 yield r
807 yield r
808
808
809 def add(self, value):
809 def add(self, value):
810 self.addedvalues.add(value)
810 self.addedvalues.add(value)
811
811
812 def update(self, values):
812 def update(self, values):
813 self.addedvalues.update(values)
813 self.addedvalues.update(values)
814
814
815 has = lazyset(self.ancestors(common))
815 has = lazyset(self.ancestors(common))
816 has.add(nullrev)
816 has.add(nullrev)
817 has.update(common)
817 has.update(common)
818
818
819 # take all ancestors from heads that aren't in has
819 # take all ancestors from heads that aren't in has
820 missing = set()
820 missing = set()
821 visit = collections.deque(r for r in heads if r not in has)
821 visit = collections.deque(r for r in heads if r not in has)
822 while visit:
822 while visit:
823 r = visit.popleft()
823 r = visit.popleft()
824 if r in missing:
824 if r in missing:
825 continue
825 continue
826 else:
826 else:
827 missing.add(r)
827 missing.add(r)
828 for p in self.parentrevs(r):
828 for p in self.parentrevs(r):
829 if p not in has:
829 if p not in has:
830 visit.append(p)
830 visit.append(p)
831 missing = list(missing)
831 missing = list(missing)
832 missing.sort()
832 missing.sort()
833 return has, [self.node(miss) for miss in missing]
833 return has, [self.node(miss) for miss in missing]
834
834
835 def incrementalmissingrevs(self, common=None):
835 def incrementalmissingrevs(self, common=None):
836 """Return an object that can be used to incrementally compute the
836 """Return an object that can be used to incrementally compute the
837 revision numbers of the ancestors of arbitrary sets that are not
837 revision numbers of the ancestors of arbitrary sets that are not
838 ancestors of common. This is an ancestor.incrementalmissingancestors
838 ancestors of common. This is an ancestor.incrementalmissingancestors
839 object.
839 object.
840
840
841 'common' is a list of revision numbers. If common is not supplied, uses
841 'common' is a list of revision numbers. If common is not supplied, uses
842 nullrev.
842 nullrev.
843 """
843 """
844 if common is None:
844 if common is None:
845 common = [nullrev]
845 common = [nullrev]
846
846
847 return ancestor.incrementalmissingancestors(self.parentrevs, common)
847 return ancestor.incrementalmissingancestors(self.parentrevs, common)
848
848
849 def findmissingrevs(self, common=None, heads=None):
849 def findmissingrevs(self, common=None, heads=None):
850 """Return the revision numbers of the ancestors of heads that
850 """Return the revision numbers of the ancestors of heads that
851 are not ancestors of common.
851 are not ancestors of common.
852
852
853 More specifically, return a list of revision numbers corresponding to
853 More specifically, return a list of revision numbers corresponding to
854 nodes N such that every N satisfies the following constraints:
854 nodes N such that every N satisfies the following constraints:
855
855
856 1. N is an ancestor of some node in 'heads'
856 1. N is an ancestor of some node in 'heads'
857 2. N is not an ancestor of any node in 'common'
857 2. N is not an ancestor of any node in 'common'
858
858
859 The list is sorted by revision number, meaning it is
859 The list is sorted by revision number, meaning it is
860 topologically sorted.
860 topologically sorted.
861
861
862 'heads' and 'common' are both lists of revision numbers. If heads is
862 'heads' and 'common' are both lists of revision numbers. If heads is
863 not supplied, uses all of the revlog's heads. If common is not
863 not supplied, uses all of the revlog's heads. If common is not
864 supplied, uses nullid."""
864 supplied, uses nullid."""
865 if common is None:
865 if common is None:
866 common = [nullrev]
866 common = [nullrev]
867 if heads is None:
867 if heads is None:
868 heads = self.headrevs()
868 heads = self.headrevs()
869
869
870 inc = self.incrementalmissingrevs(common=common)
870 inc = self.incrementalmissingrevs(common=common)
871 return inc.missingancestors(heads)
871 return inc.missingancestors(heads)
872
872
873 def findmissing(self, common=None, heads=None):
873 def findmissing(self, common=None, heads=None):
874 """Return the ancestors of heads that are not ancestors of common.
874 """Return the ancestors of heads that are not ancestors of common.
875
875
876 More specifically, return a list of nodes N such that every N
876 More specifically, return a list of nodes N such that every N
877 satisfies the following constraints:
877 satisfies the following constraints:
878
878
879 1. N is an ancestor of some node in 'heads'
879 1. N is an ancestor of some node in 'heads'
880 2. N is not an ancestor of any node in 'common'
880 2. N is not an ancestor of any node in 'common'
881
881
882 The list is sorted by revision number, meaning it is
882 The list is sorted by revision number, meaning it is
883 topologically sorted.
883 topologically sorted.
884
884
885 'heads' and 'common' are both lists of node IDs. If heads is
885 'heads' and 'common' are both lists of node IDs. If heads is
886 not supplied, uses all of the revlog's heads. If common is not
886 not supplied, uses all of the revlog's heads. If common is not
887 supplied, uses nullid."""
887 supplied, uses nullid."""
888 if common is None:
888 if common is None:
889 common = [nullid]
889 common = [nullid]
890 if heads is None:
890 if heads is None:
891 heads = self.heads()
891 heads = self.heads()
892
892
893 common = [self.rev(n) for n in common]
893 common = [self.rev(n) for n in common]
894 heads = [self.rev(n) for n in heads]
894 heads = [self.rev(n) for n in heads]
895
895
896 inc = self.incrementalmissingrevs(common=common)
896 inc = self.incrementalmissingrevs(common=common)
897 return [self.node(r) for r in inc.missingancestors(heads)]
897 return [self.node(r) for r in inc.missingancestors(heads)]
898
898
899 def nodesbetween(self, roots=None, heads=None):
899 def nodesbetween(self, roots=None, heads=None):
900 """Return a topological path from 'roots' to 'heads'.
900 """Return a topological path from 'roots' to 'heads'.
901
901
902 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
902 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
903 topologically sorted list of all nodes N that satisfy both of
903 topologically sorted list of all nodes N that satisfy both of
904 these constraints:
904 these constraints:
905
905
906 1. N is a descendant of some node in 'roots'
906 1. N is a descendant of some node in 'roots'
907 2. N is an ancestor of some node in 'heads'
907 2. N is an ancestor of some node in 'heads'
908
908
909 Every node is considered to be both a descendant and an ancestor
909 Every node is considered to be both a descendant and an ancestor
910 of itself, so every reachable node in 'roots' and 'heads' will be
910 of itself, so every reachable node in 'roots' and 'heads' will be
911 included in 'nodes'.
911 included in 'nodes'.
912
912
913 'outroots' is the list of reachable nodes in 'roots', i.e., the
913 'outroots' is the list of reachable nodes in 'roots', i.e., the
914 subset of 'roots' that is returned in 'nodes'. Likewise,
914 subset of 'roots' that is returned in 'nodes'. Likewise,
915 'outheads' is the subset of 'heads' that is also in 'nodes'.
915 'outheads' is the subset of 'heads' that is also in 'nodes'.
916
916
917 'roots' and 'heads' are both lists of node IDs. If 'roots' is
917 'roots' and 'heads' are both lists of node IDs. If 'roots' is
918 unspecified, uses nullid as the only root. If 'heads' is
918 unspecified, uses nullid as the only root. If 'heads' is
919 unspecified, uses list of all of the revlog's heads."""
919 unspecified, uses list of all of the revlog's heads."""
920 nonodes = ([], [], [])
920 nonodes = ([], [], [])
921 if roots is not None:
921 if roots is not None:
922 roots = list(roots)
922 roots = list(roots)
923 if not roots:
923 if not roots:
924 return nonodes
924 return nonodes
925 lowestrev = min([self.rev(n) for n in roots])
925 lowestrev = min([self.rev(n) for n in roots])
926 else:
926 else:
927 roots = [nullid] # Everybody's a descendant of nullid
927 roots = [nullid] # Everybody's a descendant of nullid
928 lowestrev = nullrev
928 lowestrev = nullrev
929 if (lowestrev == nullrev) and (heads is None):
929 if (lowestrev == nullrev) and (heads is None):
930 # We want _all_ the nodes!
930 # We want _all_ the nodes!
931 return ([self.node(r) for r in self], [nullid], list(self.heads()))
931 return ([self.node(r) for r in self], [nullid], list(self.heads()))
932 if heads is None:
932 if heads is None:
933 # All nodes are ancestors, so the latest ancestor is the last
933 # All nodes are ancestors, so the latest ancestor is the last
934 # node.
934 # node.
935 highestrev = len(self) - 1
935 highestrev = len(self) - 1
936 # Set ancestors to None to signal that every node is an ancestor.
936 # Set ancestors to None to signal that every node is an ancestor.
937 ancestors = None
937 ancestors = None
938 # Set heads to an empty dictionary for later discovery of heads
938 # Set heads to an empty dictionary for later discovery of heads
939 heads = {}
939 heads = {}
940 else:
940 else:
941 heads = list(heads)
941 heads = list(heads)
942 if not heads:
942 if not heads:
943 return nonodes
943 return nonodes
944 ancestors = set()
944 ancestors = set()
945 # Turn heads into a dictionary so we can remove 'fake' heads.
945 # Turn heads into a dictionary so we can remove 'fake' heads.
946 # Also, later we will be using it to filter out the heads we can't
946 # Also, later we will be using it to filter out the heads we can't
947 # find from roots.
947 # find from roots.
948 heads = dict.fromkeys(heads, False)
948 heads = dict.fromkeys(heads, False)
949 # Start at the top and keep marking parents until we're done.
949 # Start at the top and keep marking parents until we're done.
950 nodestotag = set(heads)
950 nodestotag = set(heads)
951 # Remember where the top was so we can use it as a limit later.
951 # Remember where the top was so we can use it as a limit later.
952 highestrev = max([self.rev(n) for n in nodestotag])
952 highestrev = max([self.rev(n) for n in nodestotag])
953 while nodestotag:
953 while nodestotag:
954 # grab a node to tag
954 # grab a node to tag
955 n = nodestotag.pop()
955 n = nodestotag.pop()
956 # Never tag nullid
956 # Never tag nullid
957 if n == nullid:
957 if n == nullid:
958 continue
958 continue
959 # A node's revision number represents its place in a
959 # A node's revision number represents its place in a
960 # topologically sorted list of nodes.
960 # topologically sorted list of nodes.
961 r = self.rev(n)
961 r = self.rev(n)
962 if r >= lowestrev:
962 if r >= lowestrev:
963 if n not in ancestors:
963 if n not in ancestors:
964 # If we are possibly a descendant of one of the roots
964 # If we are possibly a descendant of one of the roots
965 # and we haven't already been marked as an ancestor
965 # and we haven't already been marked as an ancestor
966 ancestors.add(n) # Mark as ancestor
966 ancestors.add(n) # Mark as ancestor
967 # Add non-nullid parents to list of nodes to tag.
967 # Add non-nullid parents to list of nodes to tag.
968 nodestotag.update([p for p in self.parents(n) if
968 nodestotag.update([p for p in self.parents(n) if
969 p != nullid])
969 p != nullid])
970 elif n in heads: # We've seen it before, is it a fake head?
970 elif n in heads: # We've seen it before, is it a fake head?
971 # So it is, real heads should not be the ancestors of
971 # So it is, real heads should not be the ancestors of
972 # any other heads.
972 # any other heads.
973 heads.pop(n)
973 heads.pop(n)
974 if not ancestors:
974 if not ancestors:
975 return nonodes
975 return nonodes
976 # Now that we have our set of ancestors, we want to remove any
976 # Now that we have our set of ancestors, we want to remove any
977 # roots that are not ancestors.
977 # roots that are not ancestors.
978
978
979 # If one of the roots was nullid, everything is included anyway.
979 # If one of the roots was nullid, everything is included anyway.
980 if lowestrev > nullrev:
980 if lowestrev > nullrev:
981 # But, since we weren't, let's recompute the lowest rev to not
981 # But, since we weren't, let's recompute the lowest rev to not
982 # include roots that aren't ancestors.
982 # include roots that aren't ancestors.
983
983
984 # Filter out roots that aren't ancestors of heads
984 # Filter out roots that aren't ancestors of heads
985 roots = [root for root in roots if root in ancestors]
985 roots = [root for root in roots if root in ancestors]
986 # Recompute the lowest revision
986 # Recompute the lowest revision
987 if roots:
987 if roots:
988 lowestrev = min([self.rev(root) for root in roots])
988 lowestrev = min([self.rev(root) for root in roots])
989 else:
989 else:
990 # No more roots? Return empty list
990 # No more roots? Return empty list
991 return nonodes
991 return nonodes
992 else:
992 else:
993 # We are descending from nullid, and don't need to care about
993 # We are descending from nullid, and don't need to care about
994 # any other roots.
994 # any other roots.
995 lowestrev = nullrev
995 lowestrev = nullrev
996 roots = [nullid]
996 roots = [nullid]
997 # Transform our roots list into a set.
997 # Transform our roots list into a set.
998 descendants = set(roots)
998 descendants = set(roots)
999 # Also, keep the original roots so we can filter out roots that aren't
999 # Also, keep the original roots so we can filter out roots that aren't
1000 # 'real' roots (i.e. are descended from other roots).
1000 # 'real' roots (i.e. are descended from other roots).
1001 roots = descendants.copy()
1001 roots = descendants.copy()
1002 # Our topologically sorted list of output nodes.
1002 # Our topologically sorted list of output nodes.
1003 orderedout = []
1003 orderedout = []
1004 # Don't start at nullid since we don't want nullid in our output list,
1004 # Don't start at nullid since we don't want nullid in our output list,
1005 # and if nullid shows up in descendants, empty parents will look like
1005 # and if nullid shows up in descendants, empty parents will look like
1006 # they're descendants.
1006 # they're descendants.
1007 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1007 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1008 n = self.node(r)
1008 n = self.node(r)
1009 isdescendant = False
1009 isdescendant = False
1010 if lowestrev == nullrev: # Everybody is a descendant of nullid
1010 if lowestrev == nullrev: # Everybody is a descendant of nullid
1011 isdescendant = True
1011 isdescendant = True
1012 elif n in descendants:
1012 elif n in descendants:
1013 # n is already a descendant
1013 # n is already a descendant
1014 isdescendant = True
1014 isdescendant = True
1015 # This check only needs to be done here because all the roots
1015 # This check only needs to be done here because all the roots
1016 # will start being marked is descendants before the loop.
1016 # will start being marked is descendants before the loop.
1017 if n in roots:
1017 if n in roots:
1018 # If n was a root, check if it's a 'real' root.
1018 # If n was a root, check if it's a 'real' root.
1019 p = tuple(self.parents(n))
1019 p = tuple(self.parents(n))
1020 # If any of its parents are descendants, it's not a root.
1020 # If any of its parents are descendants, it's not a root.
1021 if (p[0] in descendants) or (p[1] in descendants):
1021 if (p[0] in descendants) or (p[1] in descendants):
1022 roots.remove(n)
1022 roots.remove(n)
1023 else:
1023 else:
1024 p = tuple(self.parents(n))
1024 p = tuple(self.parents(n))
1025 # A node is a descendant if either of its parents are
1025 # A node is a descendant if either of its parents are
1026 # descendants. (We seeded the dependents list with the roots
1026 # descendants. (We seeded the dependents list with the roots
1027 # up there, remember?)
1027 # up there, remember?)
1028 if (p[0] in descendants) or (p[1] in descendants):
1028 if (p[0] in descendants) or (p[1] in descendants):
1029 descendants.add(n)
1029 descendants.add(n)
1030 isdescendant = True
1030 isdescendant = True
1031 if isdescendant and ((ancestors is None) or (n in ancestors)):
1031 if isdescendant and ((ancestors is None) or (n in ancestors)):
1032 # Only include nodes that are both descendants and ancestors.
1032 # Only include nodes that are both descendants and ancestors.
1033 orderedout.append(n)
1033 orderedout.append(n)
1034 if (ancestors is not None) and (n in heads):
1034 if (ancestors is not None) and (n in heads):
1035 # We're trying to figure out which heads are reachable
1035 # We're trying to figure out which heads are reachable
1036 # from roots.
1036 # from roots.
1037 # Mark this head as having been reached
1037 # Mark this head as having been reached
1038 heads[n] = True
1038 heads[n] = True
1039 elif ancestors is None:
1039 elif ancestors is None:
1040 # Otherwise, we're trying to discover the heads.
1040 # Otherwise, we're trying to discover the heads.
1041 # Assume this is a head because if it isn't, the next step
1041 # Assume this is a head because if it isn't, the next step
1042 # will eventually remove it.
1042 # will eventually remove it.
1043 heads[n] = True
1043 heads[n] = True
1044 # But, obviously its parents aren't.
1044 # But, obviously its parents aren't.
1045 for p in self.parents(n):
1045 for p in self.parents(n):
1046 heads.pop(p, None)
1046 heads.pop(p, None)
1047 heads = [head for head, flag in heads.iteritems() if flag]
1047 heads = [head for head, flag in heads.iteritems() if flag]
1048 roots = list(roots)
1048 roots = list(roots)
1049 assert orderedout
1049 assert orderedout
1050 assert roots
1050 assert roots
1051 assert heads
1051 assert heads
1052 return (orderedout, roots, heads)
1052 return (orderedout, roots, heads)
1053
1053
1054 def headrevs(self):
1054 def headrevs(self):
1055 try:
1055 try:
1056 return self.index.headrevs()
1056 return self.index.headrevs()
1057 except AttributeError:
1057 except AttributeError:
1058 return self._headrevs()
1058 return self._headrevs()
1059
1059
1060 def computephases(self, roots):
1060 def computephases(self, roots):
1061 return self.index.computephasesmapsets(roots)
1061 return self.index.computephasesmapsets(roots)
1062
1062
1063 def _headrevs(self):
1063 def _headrevs(self):
1064 count = len(self)
1064 count = len(self)
1065 if not count:
1065 if not count:
1066 return [nullrev]
1066 return [nullrev]
1067 # we won't iter over filtered rev so nobody is a head at start
1067 # we won't iter over filtered rev so nobody is a head at start
1068 ishead = [0] * (count + 1)
1068 ishead = [0] * (count + 1)
1069 index = self.index
1069 index = self.index
1070 for r in self:
1070 for r in self:
1071 ishead[r] = 1 # I may be an head
1071 ishead[r] = 1 # I may be an head
1072 e = index[r]
1072 e = index[r]
1073 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1073 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1074 return [r for r, val in enumerate(ishead) if val]
1074 return [r for r, val in enumerate(ishead) if val]
1075
1075
1076 def heads(self, start=None, stop=None):
1076 def heads(self, start=None, stop=None):
1077 """return the list of all nodes that have no children
1077 """return the list of all nodes that have no children
1078
1078
1079 if start is specified, only heads that are descendants of
1079 if start is specified, only heads that are descendants of
1080 start will be returned
1080 start will be returned
1081 if stop is specified, it will consider all the revs from stop
1081 if stop is specified, it will consider all the revs from stop
1082 as if they had no children
1082 as if they had no children
1083 """
1083 """
1084 if start is None and stop is None:
1084 if start is None and stop is None:
1085 if not len(self):
1085 if not len(self):
1086 return [nullid]
1086 return [nullid]
1087 return [self.node(r) for r in self.headrevs()]
1087 return [self.node(r) for r in self.headrevs()]
1088
1088
1089 if start is None:
1089 if start is None:
1090 start = nullid
1090 start = nullid
1091 if stop is None:
1091 if stop is None:
1092 stop = []
1092 stop = []
1093 stoprevs = set([self.rev(n) for n in stop])
1093 stoprevs = set([self.rev(n) for n in stop])
1094 startrev = self.rev(start)
1094 startrev = self.rev(start)
1095 reachable = {startrev}
1095 reachable = {startrev}
1096 heads = {startrev}
1096 heads = {startrev}
1097
1097
1098 parentrevs = self.parentrevs
1098 parentrevs = self.parentrevs
1099 for r in self.revs(start=startrev + 1):
1099 for r in self.revs(start=startrev + 1):
1100 for p in parentrevs(r):
1100 for p in parentrevs(r):
1101 if p in reachable:
1101 if p in reachable:
1102 if r not in stoprevs:
1102 if r not in stoprevs:
1103 reachable.add(r)
1103 reachable.add(r)
1104 heads.add(r)
1104 heads.add(r)
1105 if p in heads and p not in stoprevs:
1105 if p in heads and p not in stoprevs:
1106 heads.remove(p)
1106 heads.remove(p)
1107
1107
1108 return [self.node(r) for r in heads]
1108 return [self.node(r) for r in heads]
1109
1109
1110 def children(self, node):
1110 def children(self, node):
1111 """find the children of a given node"""
1111 """find the children of a given node"""
1112 c = []
1112 c = []
1113 p = self.rev(node)
1113 p = self.rev(node)
1114 for r in self.revs(start=p + 1):
1114 for r in self.revs(start=p + 1):
1115 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1115 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1116 if prevs:
1116 if prevs:
1117 for pr in prevs:
1117 for pr in prevs:
1118 if pr == p:
1118 if pr == p:
1119 c.append(self.node(r))
1119 c.append(self.node(r))
1120 elif p == nullrev:
1120 elif p == nullrev:
1121 c.append(self.node(r))
1121 c.append(self.node(r))
1122 return c
1122 return c
1123
1123
1124 def commonancestorsheads(self, a, b):
1124 def commonancestorsheads(self, a, b):
1125 """calculate all the heads of the common ancestors of nodes a and b"""
1125 """calculate all the heads of the common ancestors of nodes a and b"""
1126 a, b = self.rev(a), self.rev(b)
1126 a, b = self.rev(a), self.rev(b)
1127 ancs = self._commonancestorsheads(a, b)
1127 ancs = self._commonancestorsheads(a, b)
1128 return pycompat.maplist(self.node, ancs)
1128 return pycompat.maplist(self.node, ancs)
1129
1129
1130 def _commonancestorsheads(self, *revs):
1130 def _commonancestorsheads(self, *revs):
1131 """calculate all the heads of the common ancestors of revs"""
1131 """calculate all the heads of the common ancestors of revs"""
1132 try:
1132 try:
1133 ancs = self.index.commonancestorsheads(*revs)
1133 ancs = self.index.commonancestorsheads(*revs)
1134 except (AttributeError, OverflowError): # C implementation failed
1134 except (AttributeError, OverflowError): # C implementation failed
1135 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1135 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1136 return ancs
1136 return ancs
1137
1137
1138 def isancestor(self, a, b):
1138 def isancestor(self, a, b):
1139 """return True if node a is an ancestor of node b
1139 """return True if node a is an ancestor of node b
1140
1140
1141 A revision is considered an ancestor of itself."""
1141 A revision is considered an ancestor of itself."""
1142 a, b = self.rev(a), self.rev(b)
1142 a, b = self.rev(a), self.rev(b)
1143 return self.isancestorrev(a, b)
1143 return self.isancestorrev(a, b)
1144
1144
1145 def isancestorrev(self, a, b):
1145 def isancestorrev(self, a, b):
1146 """return True if revision a is an ancestor of revision b
1146 """return True if revision a is an ancestor of revision b
1147
1147
1148 A revision is considered an ancestor of itself.
1148 A revision is considered an ancestor of itself.
1149
1149
1150 The implementation of this is trivial but the use of
1150 The implementation of this is trivial but the use of
1151 commonancestorsheads is not."""
1151 commonancestorsheads is not."""
1152 if a == nullrev:
1152 if a == nullrev:
1153 return True
1153 return True
1154 elif a == b:
1154 elif a == b:
1155 return True
1155 return True
1156 elif a > b:
1156 elif a > b:
1157 return False
1157 return False
1158 return a in self._commonancestorsheads(a, b)
1158 return a in self._commonancestorsheads(a, b)
1159
1159
1160 def ancestor(self, a, b):
1160 def ancestor(self, a, b):
1161 """calculate the "best" common ancestor of nodes a and b"""
1161 """calculate the "best" common ancestor of nodes a and b"""
1162
1162
1163 a, b = self.rev(a), self.rev(b)
1163 a, b = self.rev(a), self.rev(b)
1164 try:
1164 try:
1165 ancs = self.index.ancestors(a, b)
1165 ancs = self.index.ancestors(a, b)
1166 except (AttributeError, OverflowError):
1166 except (AttributeError, OverflowError):
1167 ancs = ancestor.ancestors(self.parentrevs, a, b)
1167 ancs = ancestor.ancestors(self.parentrevs, a, b)
1168 if ancs:
1168 if ancs:
1169 # choose a consistent winner when there's a tie
1169 # choose a consistent winner when there's a tie
1170 return min(map(self.node, ancs))
1170 return min(map(self.node, ancs))
1171 return nullid
1171 return nullid
1172
1172
1173 def _match(self, id):
1173 def _match(self, id):
1174 if isinstance(id, int):
1174 if isinstance(id, int):
1175 # rev
1175 # rev
1176 return self.node(id)
1176 return self.node(id)
1177 if len(id) == 20:
1177 if len(id) == 20:
1178 # possibly a binary node
1178 # possibly a binary node
1179 # odds of a binary node being all hex in ASCII are 1 in 10**25
1179 # odds of a binary node being all hex in ASCII are 1 in 10**25
1180 try:
1180 try:
1181 node = id
1181 node = id
1182 self.rev(node) # quick search the index
1182 self.rev(node) # quick search the index
1183 return node
1183 return node
1184 except error.LookupError:
1184 except error.LookupError:
1185 pass # may be partial hex id
1185 pass # may be partial hex id
1186 try:
1186 try:
1187 # str(rev)
1187 # str(rev)
1188 rev = int(id)
1188 rev = int(id)
1189 if "%d" % rev != id:
1189 if "%d" % rev != id:
1190 raise ValueError
1190 raise ValueError
1191 if rev < 0:
1191 if rev < 0:
1192 rev = len(self) + rev
1192 rev = len(self) + rev
1193 if rev < 0 or rev >= len(self):
1193 if rev < 0 or rev >= len(self):
1194 raise ValueError
1194 raise ValueError
1195 return self.node(rev)
1195 return self.node(rev)
1196 except (ValueError, OverflowError):
1196 except (ValueError, OverflowError):
1197 pass
1197 pass
1198 if len(id) == 40:
1198 if len(id) == 40:
1199 try:
1199 try:
1200 # a full hex nodeid?
1200 # a full hex nodeid?
1201 node = bin(id)
1201 node = bin(id)
1202 self.rev(node)
1202 self.rev(node)
1203 return node
1203 return node
1204 except (TypeError, error.LookupError):
1204 except (TypeError, error.LookupError):
1205 pass
1205 pass
1206
1206
1207 def _partialmatch(self, id):
1207 def _partialmatch(self, id):
1208 # we don't care wdirfilenodeids as they should be always full hash
1208 # we don't care wdirfilenodeids as they should be always full hash
1209 maybewdir = wdirhex.startswith(id)
1209 maybewdir = wdirhex.startswith(id)
1210 try:
1210 try:
1211 partial = self.index.partialmatch(id)
1211 partial = self.index.partialmatch(id)
1212 if partial and self.hasnode(partial):
1212 if partial and self.hasnode(partial):
1213 if maybewdir:
1213 if maybewdir:
1214 # single 'ff...' match in radix tree, ambiguous with wdir
1214 # single 'ff...' match in radix tree, ambiguous with wdir
1215 raise error.RevlogError
1215 raise error.RevlogError
1216 return partial
1216 return partial
1217 if maybewdir:
1217 if maybewdir:
1218 # no 'ff...' match in radix tree, wdir identified
1218 # no 'ff...' match in radix tree, wdir identified
1219 raise error.WdirUnsupported
1219 raise error.WdirUnsupported
1220 return None
1220 return None
1221 except error.RevlogError:
1221 except error.RevlogError:
1222 # parsers.c radix tree lookup gave multiple matches
1222 # parsers.c radix tree lookup gave multiple matches
1223 # fast path: for unfiltered changelog, radix tree is accurate
1223 # fast path: for unfiltered changelog, radix tree is accurate
1224 if not getattr(self, 'filteredrevs', None):
1224 if not getattr(self, 'filteredrevs', None):
1225 raise error.AmbiguousPrefixLookupError(
1225 raise error.AmbiguousPrefixLookupError(
1226 id, self.indexfile, _('ambiguous identifier'))
1226 id, self.indexfile, _('ambiguous identifier'))
1227 # fall through to slow path that filters hidden revisions
1227 # fall through to slow path that filters hidden revisions
1228 except (AttributeError, ValueError):
1228 except (AttributeError, ValueError):
1229 # we are pure python, or key was too short to search radix tree
1229 # we are pure python, or key was too short to search radix tree
1230 pass
1230 pass
1231
1231
1232 if id in self._pcache:
1232 if id in self._pcache:
1233 return self._pcache[id]
1233 return self._pcache[id]
1234
1234
1235 if len(id) <= 40:
1235 if len(id) <= 40:
1236 try:
1236 try:
1237 # hex(node)[:...]
1237 # hex(node)[:...]
1238 l = len(id) // 2 # grab an even number of digits
1238 l = len(id) // 2 # grab an even number of digits
1239 prefix = bin(id[:l * 2])
1239 prefix = bin(id[:l * 2])
1240 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1240 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1241 nl = [n for n in nl if hex(n).startswith(id) and
1241 nl = [n for n in nl if hex(n).startswith(id) and
1242 self.hasnode(n)]
1242 self.hasnode(n)]
1243 if nullhex.startswith(id):
1243 if nullhex.startswith(id):
1244 nl.append(nullid)
1244 nl.append(nullid)
1245 if len(nl) > 0:
1245 if len(nl) > 0:
1246 if len(nl) == 1 and not maybewdir:
1246 if len(nl) == 1 and not maybewdir:
1247 self._pcache[id] = nl[0]
1247 self._pcache[id] = nl[0]
1248 return nl[0]
1248 return nl[0]
1249 raise error.AmbiguousPrefixLookupError(
1249 raise error.AmbiguousPrefixLookupError(
1250 id, self.indexfile, _('ambiguous identifier'))
1250 id, self.indexfile, _('ambiguous identifier'))
1251 if maybewdir:
1251 if maybewdir:
1252 raise error.WdirUnsupported
1252 raise error.WdirUnsupported
1253 return None
1253 return None
1254 except TypeError:
1254 except TypeError:
1255 pass
1255 pass
1256
1256
1257 def lookup(self, id):
1257 def lookup(self, id):
1258 """locate a node based on:
1258 """locate a node based on:
1259 - revision number or str(revision number)
1259 - revision number or str(revision number)
1260 - nodeid or subset of hex nodeid
1260 - nodeid or subset of hex nodeid
1261 """
1261 """
1262 n = self._match(id)
1262 n = self._match(id)
1263 if n is not None:
1263 if n is not None:
1264 return n
1264 return n
1265 n = self._partialmatch(id)
1265 n = self._partialmatch(id)
1266 if n:
1266 if n:
1267 return n
1267 return n
1268
1268
1269 raise error.LookupError(id, self.indexfile, _('no match found'))
1269 raise error.LookupError(id, self.indexfile, _('no match found'))
1270
1270
1271 def shortest(self, node, minlength=1):
1271 def shortest(self, node, minlength=1):
1272 """Find the shortest unambiguous prefix that matches node."""
1272 """Find the shortest unambiguous prefix that matches node."""
1273 def isvalid(prefix):
1273 def isvalid(prefix):
1274 try:
1274 try:
1275 node = self._partialmatch(prefix)
1275 node = self._partialmatch(prefix)
1276 except error.AmbiguousPrefixLookupError:
1276 except error.AmbiguousPrefixLookupError:
1277 return False
1277 return False
1278 except error.WdirUnsupported:
1278 except error.WdirUnsupported:
1279 # single 'ff...' match
1279 # single 'ff...' match
1280 return True
1280 return True
1281 if node is None:
1281 if node is None:
1282 raise error.LookupError(node, self.indexfile, _('no node'))
1282 raise error.LookupError(node, self.indexfile, _('no node'))
1283 return True
1283 return True
1284
1284
1285 def maybewdir(prefix):
1285 def maybewdir(prefix):
1286 return all(c == 'f' for c in prefix)
1286 return all(c == 'f' for c in prefix)
1287
1287
1288 hexnode = hex(node)
1288 hexnode = hex(node)
1289
1289
1290 def disambiguate(hexnode, minlength):
1290 def disambiguate(hexnode, minlength):
1291 """Disambiguate against wdirid."""
1291 """Disambiguate against wdirid."""
1292 for length in range(minlength, 41):
1292 for length in range(minlength, 41):
1293 prefix = hexnode[:length]
1293 prefix = hexnode[:length]
1294 if not maybewdir(prefix):
1294 if not maybewdir(prefix):
1295 return prefix
1295 return prefix
1296
1296
1297 if not getattr(self, 'filteredrevs', None):
1297 if not getattr(self, 'filteredrevs', None):
1298 try:
1298 try:
1299 length = max(self.index.shortest(node), minlength)
1299 length = max(self.index.shortest(node), minlength)
1300 return disambiguate(hexnode, length)
1300 return disambiguate(hexnode, length)
1301 except error.RevlogError:
1301 except error.RevlogError:
1302 if node != wdirid:
1302 if node != wdirid:
1303 raise error.LookupError(node, self.indexfile, _('no node'))
1303 raise error.LookupError(node, self.indexfile, _('no node'))
1304 except AttributeError:
1304 except AttributeError:
1305 # Fall through to pure code
1305 # Fall through to pure code
1306 pass
1306 pass
1307
1307
1308 if node == wdirid:
1308 if node == wdirid:
1309 for length in range(minlength, 41):
1309 for length in range(minlength, 41):
1310 prefix = hexnode[:length]
1310 prefix = hexnode[:length]
1311 if isvalid(prefix):
1311 if isvalid(prefix):
1312 return prefix
1312 return prefix
1313
1313
1314 for length in range(minlength, 41):
1314 for length in range(minlength, 41):
1315 prefix = hexnode[:length]
1315 prefix = hexnode[:length]
1316 if isvalid(prefix):
1316 if isvalid(prefix):
1317 return disambiguate(hexnode, length)
1317 return disambiguate(hexnode, length)
1318
1318
1319 def cmp(self, node, text):
1319 def cmp(self, node, text):
1320 """compare text with a given file revision
1320 """compare text with a given file revision
1321
1321
1322 returns True if text is different than what is stored.
1322 returns True if text is different than what is stored.
1323 """
1323 """
1324 p1, p2 = self.parents(node)
1324 p1, p2 = self.parents(node)
1325 return storageutil.hashrevisionsha1(text, p1, p2) != node
1325 return storageutil.hashrevisionsha1(text, p1, p2) != node
1326
1326
1327 def _cachesegment(self, offset, data):
1327 def _cachesegment(self, offset, data):
1328 """Add a segment to the revlog cache.
1328 """Add a segment to the revlog cache.
1329
1329
1330 Accepts an absolute offset and the data that is at that location.
1330 Accepts an absolute offset and the data that is at that location.
1331 """
1331 """
1332 o, d = self._chunkcache
1332 o, d = self._chunkcache
1333 # try to add to existing cache
1333 # try to add to existing cache
1334 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1334 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1335 self._chunkcache = o, d + data
1335 self._chunkcache = o, d + data
1336 else:
1336 else:
1337 self._chunkcache = offset, data
1337 self._chunkcache = offset, data
1338
1338
1339 def _readsegment(self, offset, length, df=None):
1339 def _readsegment(self, offset, length, df=None):
1340 """Load a segment of raw data from the revlog.
1340 """Load a segment of raw data from the revlog.
1341
1341
1342 Accepts an absolute offset, length to read, and an optional existing
1342 Accepts an absolute offset, length to read, and an optional existing
1343 file handle to read from.
1343 file handle to read from.
1344
1344
1345 If an existing file handle is passed, it will be seeked and the
1345 If an existing file handle is passed, it will be seeked and the
1346 original seek position will NOT be restored.
1346 original seek position will NOT be restored.
1347
1347
1348 Returns a str or buffer of raw byte data.
1348 Returns a str or buffer of raw byte data.
1349 """
1349 """
1350 # Cache data both forward and backward around the requested
1350 # Cache data both forward and backward around the requested
1351 # data, in a fixed size window. This helps speed up operations
1351 # data, in a fixed size window. This helps speed up operations
1352 # involving reading the revlog backwards.
1352 # involving reading the revlog backwards.
1353 cachesize = self._chunkcachesize
1353 cachesize = self._chunkcachesize
1354 realoffset = offset & ~(cachesize - 1)
1354 realoffset = offset & ~(cachesize - 1)
1355 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1355 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1356 - realoffset)
1356 - realoffset)
1357 with self._datareadfp(df) as df:
1357 with self._datareadfp(df) as df:
1358 df.seek(realoffset)
1358 df.seek(realoffset)
1359 d = df.read(reallength)
1359 d = df.read(reallength)
1360 self._cachesegment(realoffset, d)
1360 self._cachesegment(realoffset, d)
1361 if offset != realoffset or reallength != length:
1361 if offset != realoffset or reallength != length:
1362 return util.buffer(d, offset - realoffset, length)
1362 return util.buffer(d, offset - realoffset, length)
1363 return d
1363 return d
1364
1364
1365 def _getsegment(self, offset, length, df=None):
1365 def _getsegment(self, offset, length, df=None):
1366 """Obtain a segment of raw data from the revlog.
1366 """Obtain a segment of raw data from the revlog.
1367
1367
1368 Accepts an absolute offset, length of bytes to obtain, and an
1368 Accepts an absolute offset, length of bytes to obtain, and an
1369 optional file handle to the already-opened revlog. If the file
1369 optional file handle to the already-opened revlog. If the file
1370 handle is used, it's original seek position will not be preserved.
1370 handle is used, it's original seek position will not be preserved.
1371
1371
1372 Requests for data may be returned from a cache.
1372 Requests for data may be returned from a cache.
1373
1373
1374 Returns a str or a buffer instance of raw byte data.
1374 Returns a str or a buffer instance of raw byte data.
1375 """
1375 """
1376 o, d = self._chunkcache
1376 o, d = self._chunkcache
1377 l = len(d)
1377 l = len(d)
1378
1378
1379 # is it in the cache?
1379 # is it in the cache?
1380 cachestart = offset - o
1380 cachestart = offset - o
1381 cacheend = cachestart + length
1381 cacheend = cachestart + length
1382 if cachestart >= 0 and cacheend <= l:
1382 if cachestart >= 0 and cacheend <= l:
1383 if cachestart == 0 and cacheend == l:
1383 if cachestart == 0 and cacheend == l:
1384 return d # avoid a copy
1384 return d # avoid a copy
1385 return util.buffer(d, cachestart, cacheend - cachestart)
1385 return util.buffer(d, cachestart, cacheend - cachestart)
1386
1386
1387 return self._readsegment(offset, length, df=df)
1387 return self._readsegment(offset, length, df=df)
1388
1388
1389 def _getsegmentforrevs(self, startrev, endrev, df=None):
1389 def _getsegmentforrevs(self, startrev, endrev, df=None):
1390 """Obtain a segment of raw data corresponding to a range of revisions.
1390 """Obtain a segment of raw data corresponding to a range of revisions.
1391
1391
1392 Accepts the start and end revisions and an optional already-open
1392 Accepts the start and end revisions and an optional already-open
1393 file handle to be used for reading. If the file handle is read, its
1393 file handle to be used for reading. If the file handle is read, its
1394 seek position will not be preserved.
1394 seek position will not be preserved.
1395
1395
1396 Requests for data may be satisfied by a cache.
1396 Requests for data may be satisfied by a cache.
1397
1397
1398 Returns a 2-tuple of (offset, data) for the requested range of
1398 Returns a 2-tuple of (offset, data) for the requested range of
1399 revisions. Offset is the integer offset from the beginning of the
1399 revisions. Offset is the integer offset from the beginning of the
1400 revlog and data is a str or buffer of the raw byte data.
1400 revlog and data is a str or buffer of the raw byte data.
1401
1401
1402 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1402 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1403 to determine where each revision's data begins and ends.
1403 to determine where each revision's data begins and ends.
1404 """
1404 """
1405 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1405 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1406 # (functions are expensive).
1406 # (functions are expensive).
1407 index = self.index
1407 index = self.index
1408 istart = index[startrev]
1408 istart = index[startrev]
1409 start = int(istart[0] >> 16)
1409 start = int(istart[0] >> 16)
1410 if startrev == endrev:
1410 if startrev == endrev:
1411 end = start + istart[1]
1411 end = start + istart[1]
1412 else:
1412 else:
1413 iend = index[endrev]
1413 iend = index[endrev]
1414 end = int(iend[0] >> 16) + iend[1]
1414 end = int(iend[0] >> 16) + iend[1]
1415
1415
1416 if self._inline:
1416 if self._inline:
1417 start += (startrev + 1) * self._io.size
1417 start += (startrev + 1) * self._io.size
1418 end += (endrev + 1) * self._io.size
1418 end += (endrev + 1) * self._io.size
1419 length = end - start
1419 length = end - start
1420
1420
1421 return start, self._getsegment(start, length, df=df)
1421 return start, self._getsegment(start, length, df=df)
1422
1422
1423 def _chunk(self, rev, df=None):
1423 def _chunk(self, rev, df=None):
1424 """Obtain a single decompressed chunk for a revision.
1424 """Obtain a single decompressed chunk for a revision.
1425
1425
1426 Accepts an integer revision and an optional already-open file handle
1426 Accepts an integer revision and an optional already-open file handle
1427 to be used for reading. If used, the seek position of the file will not
1427 to be used for reading. If used, the seek position of the file will not
1428 be preserved.
1428 be preserved.
1429
1429
1430 Returns a str holding uncompressed data for the requested revision.
1430 Returns a str holding uncompressed data for the requested revision.
1431 """
1431 """
1432 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1432 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1433
1433
1434 def _chunks(self, revs, df=None, targetsize=None):
1434 def _chunks(self, revs, df=None, targetsize=None):
1435 """Obtain decompressed chunks for the specified revisions.
1435 """Obtain decompressed chunks for the specified revisions.
1436
1436
1437 Accepts an iterable of numeric revisions that are assumed to be in
1437 Accepts an iterable of numeric revisions that are assumed to be in
1438 ascending order. Also accepts an optional already-open file handle
1438 ascending order. Also accepts an optional already-open file handle
1439 to be used for reading. If used, the seek position of the file will
1439 to be used for reading. If used, the seek position of the file will
1440 not be preserved.
1440 not be preserved.
1441
1441
1442 This function is similar to calling ``self._chunk()`` multiple times,
1442 This function is similar to calling ``self._chunk()`` multiple times,
1443 but is faster.
1443 but is faster.
1444
1444
1445 Returns a list with decompressed data for each requested revision.
1445 Returns a list with decompressed data for each requested revision.
1446 """
1446 """
1447 if not revs:
1447 if not revs:
1448 return []
1448 return []
1449 start = self.start
1449 start = self.start
1450 length = self.length
1450 length = self.length
1451 inline = self._inline
1451 inline = self._inline
1452 iosize = self._io.size
1452 iosize = self._io.size
1453 buffer = util.buffer
1453 buffer = util.buffer
1454
1454
1455 l = []
1455 l = []
1456 ladd = l.append
1456 ladd = l.append
1457
1457
1458 if not self._withsparseread:
1458 if not self._withsparseread:
1459 slicedchunks = (revs,)
1459 slicedchunks = (revs,)
1460 else:
1460 else:
1461 slicedchunks = deltautil.slicechunk(self, revs,
1461 slicedchunks = deltautil.slicechunk(self, revs,
1462 targetsize=targetsize)
1462 targetsize=targetsize)
1463
1463
1464 for revschunk in slicedchunks:
1464 for revschunk in slicedchunks:
1465 firstrev = revschunk[0]
1465 firstrev = revschunk[0]
1466 # Skip trailing revisions with empty diff
1466 # Skip trailing revisions with empty diff
1467 for lastrev in revschunk[::-1]:
1467 for lastrev in revschunk[::-1]:
1468 if length(lastrev) != 0:
1468 if length(lastrev) != 0:
1469 break
1469 break
1470
1470
1471 try:
1471 try:
1472 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1472 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1473 except OverflowError:
1473 except OverflowError:
1474 # issue4215 - we can't cache a run of chunks greater than
1474 # issue4215 - we can't cache a run of chunks greater than
1475 # 2G on Windows
1475 # 2G on Windows
1476 return [self._chunk(rev, df=df) for rev in revschunk]
1476 return [self._chunk(rev, df=df) for rev in revschunk]
1477
1477
1478 decomp = self.decompress
1478 decomp = self.decompress
1479 for rev in revschunk:
1479 for rev in revschunk:
1480 chunkstart = start(rev)
1480 chunkstart = start(rev)
1481 if inline:
1481 if inline:
1482 chunkstart += (rev + 1) * iosize
1482 chunkstart += (rev + 1) * iosize
1483 chunklength = length(rev)
1483 chunklength = length(rev)
1484 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1484 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1485
1485
1486 return l
1486 return l
1487
1487
1488 def _chunkclear(self):
1488 def _chunkclear(self):
1489 """Clear the raw chunk cache."""
1489 """Clear the raw chunk cache."""
1490 self._chunkcache = (0, '')
1490 self._chunkcache = (0, '')
1491
1491
1492 def deltaparent(self, rev):
1492 def deltaparent(self, rev):
1493 """return deltaparent of the given revision"""
1493 """return deltaparent of the given revision"""
1494 base = self.index[rev][3]
1494 base = self.index[rev][3]
1495 if base == rev:
1495 if base == rev:
1496 return nullrev
1496 return nullrev
1497 elif self._generaldelta:
1497 elif self._generaldelta:
1498 return base
1498 return base
1499 else:
1499 else:
1500 return rev - 1
1500 return rev - 1
1501
1501
1502 def issnapshot(self, rev):
1502 def issnapshot(self, rev):
1503 """tells whether rev is a snapshot
1503 """tells whether rev is a snapshot
1504 """
1504 """
1505 if rev == nullrev:
1505 if rev == nullrev:
1506 return True
1506 return True
1507 deltap = self.deltaparent(rev)
1507 deltap = self.deltaparent(rev)
1508 if deltap == nullrev:
1508 if deltap == nullrev:
1509 return True
1509 return True
1510 p1, p2 = self.parentrevs(rev)
1510 p1, p2 = self.parentrevs(rev)
1511 if deltap in (p1, p2):
1511 if deltap in (p1, p2):
1512 return False
1512 return False
1513 return self.issnapshot(deltap)
1513 return self.issnapshot(deltap)
1514
1514
1515 def snapshotdepth(self, rev):
1515 def snapshotdepth(self, rev):
1516 """number of snapshot in the chain before this one"""
1516 """number of snapshot in the chain before this one"""
1517 if not self.issnapshot(rev):
1517 if not self.issnapshot(rev):
1518 raise error.ProgrammingError('revision %d not a snapshot')
1518 raise error.ProgrammingError('revision %d not a snapshot')
1519 return len(self._deltachain(rev)[0]) - 1
1519 return len(self._deltachain(rev)[0]) - 1
1520
1520
1521 def revdiff(self, rev1, rev2):
1521 def revdiff(self, rev1, rev2):
1522 """return or calculate a delta between two revisions
1522 """return or calculate a delta between two revisions
1523
1523
1524 The delta calculated is in binary form and is intended to be written to
1524 The delta calculated is in binary form and is intended to be written to
1525 revlog data directly. So this function needs raw revision data.
1525 revlog data directly. So this function needs raw revision data.
1526 """
1526 """
1527 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1527 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1528 return bytes(self._chunk(rev2))
1528 return bytes(self._chunk(rev2))
1529
1529
1530 return mdiff.textdiff(self.revision(rev1, raw=True),
1530 return mdiff.textdiff(self.revision(rev1, raw=True),
1531 self.revision(rev2, raw=True))
1531 self.revision(rev2, raw=True))
1532
1532
1533 def revision(self, nodeorrev, _df=None, raw=False):
1533 def revision(self, nodeorrev, _df=None, raw=False):
1534 """return an uncompressed revision of a given node or revision
1534 """return an uncompressed revision of a given node or revision
1535 number.
1535 number.
1536
1536
1537 _df - an existing file handle to read from. (internal-only)
1537 _df - an existing file handle to read from. (internal-only)
1538 raw - an optional argument specifying if the revision data is to be
1538 raw - an optional argument specifying if the revision data is to be
1539 treated as raw data when applying flag transforms. 'raw' should be set
1539 treated as raw data when applying flag transforms. 'raw' should be set
1540 to True when generating changegroups or in debug commands.
1540 to True when generating changegroups or in debug commands.
1541 """
1541 """
1542 if isinstance(nodeorrev, int):
1542 if isinstance(nodeorrev, int):
1543 rev = nodeorrev
1543 rev = nodeorrev
1544 node = self.node(rev)
1544 node = self.node(rev)
1545 else:
1545 else:
1546 node = nodeorrev
1546 node = nodeorrev
1547 rev = None
1547 rev = None
1548
1548
1549 cachedrev = None
1549 cachedrev = None
1550 flags = None
1550 flags = None
1551 rawtext = None
1551 rawtext = None
1552 if node == nullid:
1552 if node == nullid:
1553 return ""
1553 return ""
1554 if self._cache:
1554 if self._cache:
1555 if self._cache[0] == node:
1555 if self._cache[0] == node:
1556 # _cache only stores rawtext
1556 # _cache only stores rawtext
1557 if raw:
1557 if raw:
1558 return self._cache[2]
1558 return self._cache[2]
1559 # duplicated, but good for perf
1559 # duplicated, but good for perf
1560 if rev is None:
1560 if rev is None:
1561 rev = self.rev(node)
1561 rev = self.rev(node)
1562 if flags is None:
1562 if flags is None:
1563 flags = self.flags(rev)
1563 flags = self.flags(rev)
1564 # no extra flags set, no flag processor runs, text = rawtext
1564 # no extra flags set, no flag processor runs, text = rawtext
1565 if flags == REVIDX_DEFAULT_FLAGS:
1565 if flags == REVIDX_DEFAULT_FLAGS:
1566 return self._cache[2]
1566 return self._cache[2]
1567 # rawtext is reusable. need to run flag processor
1567 # rawtext is reusable. need to run flag processor
1568 rawtext = self._cache[2]
1568 rawtext = self._cache[2]
1569
1569
1570 cachedrev = self._cache[1]
1570 cachedrev = self._cache[1]
1571
1571
1572 # look up what we need to read
1572 # look up what we need to read
1573 if rawtext is None:
1573 if rawtext is None:
1574 if rev is None:
1574 if rev is None:
1575 rev = self.rev(node)
1575 rev = self.rev(node)
1576
1576
1577 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1577 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1578 if stopped:
1578 if stopped:
1579 rawtext = self._cache[2]
1579 rawtext = self._cache[2]
1580
1580
1581 # drop cache to save memory
1581 # drop cache to save memory
1582 self._cache = None
1582 self._cache = None
1583
1583
1584 targetsize = None
1584 targetsize = None
1585 rawsize = self.index[rev][2]
1585 rawsize = self.index[rev][2]
1586 if 0 <= rawsize:
1586 if 0 <= rawsize:
1587 targetsize = 4 * rawsize
1587 targetsize = 4 * rawsize
1588
1588
1589 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1589 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1590 if rawtext is None:
1590 if rawtext is None:
1591 rawtext = bytes(bins[0])
1591 rawtext = bytes(bins[0])
1592 bins = bins[1:]
1592 bins = bins[1:]
1593
1593
1594 rawtext = mdiff.patches(rawtext, bins)
1594 rawtext = mdiff.patches(rawtext, bins)
1595 self._cache = (node, rev, rawtext)
1595 self._cache = (node, rev, rawtext)
1596
1596
1597 if flags is None:
1597 if flags is None:
1598 if rev is None:
1598 if rev is None:
1599 rev = self.rev(node)
1599 rev = self.rev(node)
1600 flags = self.flags(rev)
1600 flags = self.flags(rev)
1601
1601
1602 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1602 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1603 if validatehash:
1603 if validatehash:
1604 self.checkhash(text, node, rev=rev)
1604 self.checkhash(text, node, rev=rev)
1605
1605
1606 return text
1606 return text
1607
1607
1608 def hash(self, text, p1, p2):
1608 def hash(self, text, p1, p2):
1609 """Compute a node hash.
1609 """Compute a node hash.
1610
1610
1611 Available as a function so that subclasses can replace the hash
1611 Available as a function so that subclasses can replace the hash
1612 as needed.
1612 as needed.
1613 """
1613 """
1614 return storageutil.hashrevisionsha1(text, p1, p2)
1614 return storageutil.hashrevisionsha1(text, p1, p2)
1615
1615
1616 def _processflags(self, text, flags, operation, raw=False):
1616 def _processflags(self, text, flags, operation, raw=False):
1617 """Inspect revision data flags and applies transforms defined by
1617 """Inspect revision data flags and applies transforms defined by
1618 registered flag processors.
1618 registered flag processors.
1619
1619
1620 ``text`` - the revision data to process
1620 ``text`` - the revision data to process
1621 ``flags`` - the revision flags
1621 ``flags`` - the revision flags
1622 ``operation`` - the operation being performed (read or write)
1622 ``operation`` - the operation being performed (read or write)
1623 ``raw`` - an optional argument describing if the raw transform should be
1623 ``raw`` - an optional argument describing if the raw transform should be
1624 applied.
1624 applied.
1625
1625
1626 This method processes the flags in the order (or reverse order if
1626 This method processes the flags in the order (or reverse order if
1627 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1627 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1628 flag processors registered for present flags. The order of flags defined
1628 flag processors registered for present flags. The order of flags defined
1629 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1629 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1630
1630
1631 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1631 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1632 processed text and ``validatehash`` is a bool indicating whether the
1632 processed text and ``validatehash`` is a bool indicating whether the
1633 returned text should be checked for hash integrity.
1633 returned text should be checked for hash integrity.
1634
1634
1635 Note: If the ``raw`` argument is set, it has precedence over the
1635 Note: If the ``raw`` argument is set, it has precedence over the
1636 operation and will only update the value of ``validatehash``.
1636 operation and will only update the value of ``validatehash``.
1637 """
1637 """
1638 # fast path: no flag processors will run
1638 # fast path: no flag processors will run
1639 if flags == 0:
1639 if flags == 0:
1640 return text, True
1640 return text, True
1641 if not operation in ('read', 'write'):
1641 if not operation in ('read', 'write'):
1642 raise error.ProgrammingError(_("invalid '%s' operation") %
1642 raise error.ProgrammingError(_("invalid '%s' operation") %
1643 operation)
1643 operation)
1644 # Check all flags are known.
1644 # Check all flags are known.
1645 if flags & ~REVIDX_KNOWN_FLAGS:
1645 if flags & ~REVIDX_KNOWN_FLAGS:
1646 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1646 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1647 (flags & ~REVIDX_KNOWN_FLAGS))
1647 (flags & ~REVIDX_KNOWN_FLAGS))
1648 validatehash = True
1648 validatehash = True
1649 # Depending on the operation (read or write), the order might be
1649 # Depending on the operation (read or write), the order might be
1650 # reversed due to non-commutative transforms.
1650 # reversed due to non-commutative transforms.
1651 orderedflags = REVIDX_FLAGS_ORDER
1651 orderedflags = REVIDX_FLAGS_ORDER
1652 if operation == 'write':
1652 if operation == 'write':
1653 orderedflags = reversed(orderedflags)
1653 orderedflags = reversed(orderedflags)
1654
1654
1655 for flag in orderedflags:
1655 for flag in orderedflags:
1656 # If a flagprocessor has been registered for a known flag, apply the
1656 # If a flagprocessor has been registered for a known flag, apply the
1657 # related operation transform and update result tuple.
1657 # related operation transform and update result tuple.
1658 if flag & flags:
1658 if flag & flags:
1659 vhash = True
1659 vhash = True
1660
1660
1661 if flag not in self._flagprocessors:
1661 if flag not in self._flagprocessors:
1662 message = _("missing processor for flag '%#x'") % (flag)
1662 message = _("missing processor for flag '%#x'") % (flag)
1663 raise error.RevlogError(message)
1663 raise error.RevlogError(message)
1664
1664
1665 processor = self._flagprocessors[flag]
1665 processor = self._flagprocessors[flag]
1666 if processor is not None:
1666 if processor is not None:
1667 readtransform, writetransform, rawtransform = processor
1667 readtransform, writetransform, rawtransform = processor
1668
1668
1669 if raw:
1669 if raw:
1670 vhash = rawtransform(self, text)
1670 vhash = rawtransform(self, text)
1671 elif operation == 'read':
1671 elif operation == 'read':
1672 text, vhash = readtransform(self, text)
1672 text, vhash = readtransform(self, text)
1673 else: # write operation
1673 else: # write operation
1674 text, vhash = writetransform(self, text)
1674 text, vhash = writetransform(self, text)
1675 validatehash = validatehash and vhash
1675 validatehash = validatehash and vhash
1676
1676
1677 return text, validatehash
1677 return text, validatehash
1678
1678
1679 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1679 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1680 """Check node hash integrity.
1680 """Check node hash integrity.
1681
1681
1682 Available as a function so that subclasses can extend hash mismatch
1682 Available as a function so that subclasses can extend hash mismatch
1683 behaviors as needed.
1683 behaviors as needed.
1684 """
1684 """
1685 try:
1685 try:
1686 if p1 is None and p2 is None:
1686 if p1 is None and p2 is None:
1687 p1, p2 = self.parents(node)
1687 p1, p2 = self.parents(node)
1688 if node != self.hash(text, p1, p2):
1688 if node != self.hash(text, p1, p2):
1689 revornode = rev
1689 revornode = rev
1690 if revornode is None:
1690 if revornode is None:
1691 revornode = templatefilters.short(hex(node))
1691 revornode = templatefilters.short(hex(node))
1692 raise error.RevlogError(_("integrity check failed on %s:%s")
1692 raise error.RevlogError(_("integrity check failed on %s:%s")
1693 % (self.indexfile, pycompat.bytestr(revornode)))
1693 % (self.indexfile, pycompat.bytestr(revornode)))
1694 except error.RevlogError:
1694 except error.RevlogError:
1695 if self._censorable and storageutil.iscensoredtext(text):
1695 if self._censorable and storageutil.iscensoredtext(text):
1696 raise error.CensoredNodeError(self.indexfile, node, text)
1696 raise error.CensoredNodeError(self.indexfile, node, text)
1697 raise
1697 raise
1698
1698
1699 def _enforceinlinesize(self, tr, fp=None):
1699 def _enforceinlinesize(self, tr, fp=None):
1700 """Check if the revlog is too big for inline and convert if so.
1700 """Check if the revlog is too big for inline and convert if so.
1701
1701
1702 This should be called after revisions are added to the revlog. If the
1702 This should be called after revisions are added to the revlog. If the
1703 revlog has grown too large to be an inline revlog, it will convert it
1703 revlog has grown too large to be an inline revlog, it will convert it
1704 to use multiple index and data files.
1704 to use multiple index and data files.
1705 """
1705 """
1706 tiprev = len(self) - 1
1706 tiprev = len(self) - 1
1707 if (not self._inline or
1707 if (not self._inline or
1708 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1708 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1709 return
1709 return
1710
1710
1711 trinfo = tr.find(self.indexfile)
1711 trinfo = tr.find(self.indexfile)
1712 if trinfo is None:
1712 if trinfo is None:
1713 raise error.RevlogError(_("%s not found in the transaction")
1713 raise error.RevlogError(_("%s not found in the transaction")
1714 % self.indexfile)
1714 % self.indexfile)
1715
1715
1716 trindex = trinfo[2]
1716 trindex = trinfo[2]
1717 if trindex is not None:
1717 if trindex is not None:
1718 dataoff = self.start(trindex)
1718 dataoff = self.start(trindex)
1719 else:
1719 else:
1720 # revlog was stripped at start of transaction, use all leftover data
1720 # revlog was stripped at start of transaction, use all leftover data
1721 trindex = len(self) - 1
1721 trindex = len(self) - 1
1722 dataoff = self.end(tiprev)
1722 dataoff = self.end(tiprev)
1723
1723
1724 tr.add(self.datafile, dataoff)
1724 tr.add(self.datafile, dataoff)
1725
1725
1726 if fp:
1726 if fp:
1727 fp.flush()
1727 fp.flush()
1728 fp.close()
1728 fp.close()
1729
1729
1730 with self._datafp('w') as df:
1730 with self._datafp('w') as df:
1731 for r in self:
1731 for r in self:
1732 df.write(self._getsegmentforrevs(r, r)[1])
1732 df.write(self._getsegmentforrevs(r, r)[1])
1733
1733
1734 with self._indexfp('w') as fp:
1734 with self._indexfp('w') as fp:
1735 self.version &= ~FLAG_INLINE_DATA
1735 self.version &= ~FLAG_INLINE_DATA
1736 self._inline = False
1736 self._inline = False
1737 io = self._io
1737 io = self._io
1738 for i in self:
1738 for i in self:
1739 e = io.packentry(self.index[i], self.node, self.version, i)
1739 e = io.packentry(self.index[i], self.node, self.version, i)
1740 fp.write(e)
1740 fp.write(e)
1741
1741
1742 # the temp file replace the real index when we exit the context
1742 # the temp file replace the real index when we exit the context
1743 # manager
1743 # manager
1744
1744
1745 tr.replace(self.indexfile, trindex * self._io.size)
1745 tr.replace(self.indexfile, trindex * self._io.size)
1746 self._chunkclear()
1746 self._chunkclear()
1747
1747
1748 def _nodeduplicatecallback(self, transaction, node):
1748 def _nodeduplicatecallback(self, transaction, node):
1749 """called when trying to add a node already stored.
1749 """called when trying to add a node already stored.
1750 """
1750 """
1751
1751
1752 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1752 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1753 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1753 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1754 """add a revision to the log
1754 """add a revision to the log
1755
1755
1756 text - the revision data to add
1756 text - the revision data to add
1757 transaction - the transaction object used for rollback
1757 transaction - the transaction object used for rollback
1758 link - the linkrev data to add
1758 link - the linkrev data to add
1759 p1, p2 - the parent nodeids of the revision
1759 p1, p2 - the parent nodeids of the revision
1760 cachedelta - an optional precomputed delta
1760 cachedelta - an optional precomputed delta
1761 node - nodeid of revision; typically node is not specified, and it is
1761 node - nodeid of revision; typically node is not specified, and it is
1762 computed by default as hash(text, p1, p2), however subclasses might
1762 computed by default as hash(text, p1, p2), however subclasses might
1763 use different hashing method (and override checkhash() in such case)
1763 use different hashing method (and override checkhash() in such case)
1764 flags - the known flags to set on the revision
1764 flags - the known flags to set on the revision
1765 deltacomputer - an optional deltacomputer instance shared between
1765 deltacomputer - an optional deltacomputer instance shared between
1766 multiple calls
1766 multiple calls
1767 """
1767 """
1768 if link == nullrev:
1768 if link == nullrev:
1769 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1769 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1770 % self.indexfile)
1770 % self.indexfile)
1771
1771
1772 if flags:
1772 if flags:
1773 node = node or self.hash(text, p1, p2)
1773 node = node or self.hash(text, p1, p2)
1774
1774
1775 rawtext, validatehash = self._processflags(text, flags, 'write')
1775 rawtext, validatehash = self._processflags(text, flags, 'write')
1776
1776
1777 # If the flag processor modifies the revision data, ignore any provided
1777 # If the flag processor modifies the revision data, ignore any provided
1778 # cachedelta.
1778 # cachedelta.
1779 if rawtext != text:
1779 if rawtext != text:
1780 cachedelta = None
1780 cachedelta = None
1781
1781
1782 if len(rawtext) > _maxentrysize:
1782 if len(rawtext) > _maxentrysize:
1783 raise error.RevlogError(
1783 raise error.RevlogError(
1784 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1784 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1785 % (self.indexfile, len(rawtext)))
1785 % (self.indexfile, len(rawtext)))
1786
1786
1787 node = node or self.hash(rawtext, p1, p2)
1787 node = node or self.hash(rawtext, p1, p2)
1788 if node in self.nodemap:
1788 if node in self.nodemap:
1789 return node
1789 return node
1790
1790
1791 if validatehash:
1791 if validatehash:
1792 self.checkhash(rawtext, node, p1=p1, p2=p2)
1792 self.checkhash(rawtext, node, p1=p1, p2=p2)
1793
1793
1794 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1794 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1795 flags, cachedelta=cachedelta,
1795 flags, cachedelta=cachedelta,
1796 deltacomputer=deltacomputer)
1796 deltacomputer=deltacomputer)
1797
1797
1798 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1798 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1799 cachedelta=None, deltacomputer=None):
1799 cachedelta=None, deltacomputer=None):
1800 """add a raw revision with known flags, node and parents
1800 """add a raw revision with known flags, node and parents
1801 useful when reusing a revision not stored in this revlog (ex: received
1801 useful when reusing a revision not stored in this revlog (ex: received
1802 over wire, or read from an external bundle).
1802 over wire, or read from an external bundle).
1803 """
1803 """
1804 dfh = None
1804 dfh = None
1805 if not self._inline:
1805 if not self._inline:
1806 dfh = self._datafp("a+")
1806 dfh = self._datafp("a+")
1807 ifh = self._indexfp("a+")
1807 ifh = self._indexfp("a+")
1808 try:
1808 try:
1809 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1809 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1810 flags, cachedelta, ifh, dfh,
1810 flags, cachedelta, ifh, dfh,
1811 deltacomputer=deltacomputer)
1811 deltacomputer=deltacomputer)
1812 finally:
1812 finally:
1813 if dfh:
1813 if dfh:
1814 dfh.close()
1814 dfh.close()
1815 ifh.close()
1815 ifh.close()
1816
1816
1817 def compress(self, data):
1817 def compress(self, data):
1818 """Generate a possibly-compressed representation of data."""
1818 """Generate a possibly-compressed representation of data."""
1819 if not data:
1819 if not data:
1820 return '', data
1820 return '', data
1821
1821
1822 compressed = self._compressor.compress(data)
1822 compressed = self._compressor.compress(data)
1823
1823
1824 if compressed:
1824 if compressed:
1825 # The revlog compressor added the header in the returned data.
1825 # The revlog compressor added the header in the returned data.
1826 return '', compressed
1826 return '', compressed
1827
1827
1828 if data[0:1] == '\0':
1828 if data[0:1] == '\0':
1829 return '', data
1829 return '', data
1830 return 'u', data
1830 return 'u', data
1831
1831
1832 def decompress(self, data):
1832 def decompress(self, data):
1833 """Decompress a revlog chunk.
1833 """Decompress a revlog chunk.
1834
1834
1835 The chunk is expected to begin with a header identifying the
1835 The chunk is expected to begin with a header identifying the
1836 format type so it can be routed to an appropriate decompressor.
1836 format type so it can be routed to an appropriate decompressor.
1837 """
1837 """
1838 if not data:
1838 if not data:
1839 return data
1839 return data
1840
1840
1841 # Revlogs are read much more frequently than they are written and many
1841 # Revlogs are read much more frequently than they are written and many
1842 # chunks only take microseconds to decompress, so performance is
1842 # chunks only take microseconds to decompress, so performance is
1843 # important here.
1843 # important here.
1844 #
1844 #
1845 # We can make a few assumptions about revlogs:
1845 # We can make a few assumptions about revlogs:
1846 #
1846 #
1847 # 1) the majority of chunks will be compressed (as opposed to inline
1847 # 1) the majority of chunks will be compressed (as opposed to inline
1848 # raw data).
1848 # raw data).
1849 # 2) decompressing *any* data will likely by at least 10x slower than
1849 # 2) decompressing *any* data will likely by at least 10x slower than
1850 # returning raw inline data.
1850 # returning raw inline data.
1851 # 3) we want to prioritize common and officially supported compression
1851 # 3) we want to prioritize common and officially supported compression
1852 # engines
1852 # engines
1853 #
1853 #
1854 # It follows that we want to optimize for "decompress compressed data
1854 # It follows that we want to optimize for "decompress compressed data
1855 # when encoded with common and officially supported compression engines"
1855 # when encoded with common and officially supported compression engines"
1856 # case over "raw data" and "data encoded by less common or non-official
1856 # case over "raw data" and "data encoded by less common or non-official
1857 # compression engines." That is why we have the inline lookup first
1857 # compression engines." That is why we have the inline lookup first
1858 # followed by the compengines lookup.
1858 # followed by the compengines lookup.
1859 #
1859 #
1860 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1860 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1861 # compressed chunks. And this matters for changelog and manifest reads.
1861 # compressed chunks. And this matters for changelog and manifest reads.
1862 t = data[0:1]
1862 t = data[0:1]
1863
1863
1864 if t == 'x':
1864 if t == 'x':
1865 try:
1865 try:
1866 return _zlibdecompress(data)
1866 return _zlibdecompress(data)
1867 except zlib.error as e:
1867 except zlib.error as e:
1868 raise error.RevlogError(_('revlog decompress error: %s') %
1868 raise error.RevlogError(_('revlog decompress error: %s') %
1869 stringutil.forcebytestr(e))
1869 stringutil.forcebytestr(e))
1870 # '\0' is more common than 'u' so it goes first.
1870 # '\0' is more common than 'u' so it goes first.
1871 elif t == '\0':
1871 elif t == '\0':
1872 return data
1872 return data
1873 elif t == 'u':
1873 elif t == 'u':
1874 return util.buffer(data, 1)
1874 return util.buffer(data, 1)
1875
1875
1876 try:
1876 try:
1877 compressor = self._decompressors[t]
1877 compressor = self._decompressors[t]
1878 except KeyError:
1878 except KeyError:
1879 try:
1879 try:
1880 engine = util.compengines.forrevlogheader(t)
1880 engine = util.compengines.forrevlogheader(t)
1881 compressor = engine.revlogcompressor()
1881 compressor = engine.revlogcompressor()
1882 self._decompressors[t] = compressor
1882 self._decompressors[t] = compressor
1883 except KeyError:
1883 except KeyError:
1884 raise error.RevlogError(_('unknown compression type %r') % t)
1884 raise error.RevlogError(_('unknown compression type %r') % t)
1885
1885
1886 return compressor.decompress(data)
1886 return compressor.decompress(data)
1887
1887
1888 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1888 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1889 cachedelta, ifh, dfh, alwayscache=False,
1889 cachedelta, ifh, dfh, alwayscache=False,
1890 deltacomputer=None):
1890 deltacomputer=None):
1891 """internal function to add revisions to the log
1891 """internal function to add revisions to the log
1892
1892
1893 see addrevision for argument descriptions.
1893 see addrevision for argument descriptions.
1894
1894
1895 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1895 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1896
1896
1897 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1897 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1898 be used.
1898 be used.
1899
1899
1900 invariants:
1900 invariants:
1901 - rawtext is optional (can be None); if not set, cachedelta must be set.
1901 - rawtext is optional (can be None); if not set, cachedelta must be set.
1902 if both are set, they must correspond to each other.
1902 if both are set, they must correspond to each other.
1903 """
1903 """
1904 if node == nullid:
1904 if node == nullid:
1905 raise error.RevlogError(_("%s: attempt to add null revision") %
1905 raise error.RevlogError(_("%s: attempt to add null revision") %
1906 self.indexfile)
1906 self.indexfile)
1907 if node == wdirid or node in wdirfilenodeids:
1907 if node == wdirid or node in wdirfilenodeids:
1908 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1908 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1909 self.indexfile)
1909 self.indexfile)
1910
1910
1911 if self._inline:
1911 if self._inline:
1912 fh = ifh
1912 fh = ifh
1913 else:
1913 else:
1914 fh = dfh
1914 fh = dfh
1915
1915
1916 btext = [rawtext]
1916 btext = [rawtext]
1917
1917
1918 curr = len(self)
1918 curr = len(self)
1919 prev = curr - 1
1919 prev = curr - 1
1920 offset = self.end(prev)
1920 offset = self.end(prev)
1921 p1r, p2r = self.rev(p1), self.rev(p2)
1921 p1r, p2r = self.rev(p1), self.rev(p2)
1922
1922
1923 # full versions are inserted when the needed deltas
1923 # full versions are inserted when the needed deltas
1924 # become comparable to the uncompressed text
1924 # become comparable to the uncompressed text
1925 if rawtext is None:
1925 if rawtext is None:
1926 # need rawtext size, before changed by flag processors, which is
1926 # need rawtext size, before changed by flag processors, which is
1927 # the non-raw size. use revlog explicitly to avoid filelog's extra
1927 # the non-raw size. use revlog explicitly to avoid filelog's extra
1928 # logic that might remove metadata size.
1928 # logic that might remove metadata size.
1929 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1929 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1930 cachedelta[1])
1930 cachedelta[1])
1931 else:
1931 else:
1932 textlen = len(rawtext)
1932 textlen = len(rawtext)
1933
1933
1934 if deltacomputer is None:
1934 if deltacomputer is None:
1935 deltacomputer = deltautil.deltacomputer(self)
1935 deltacomputer = deltautil.deltacomputer(self)
1936
1936
1937 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1937 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1938
1938
1939 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1939 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1940
1940
1941 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1941 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1942 deltainfo.base, link, p1r, p2r, node)
1942 deltainfo.base, link, p1r, p2r, node)
1943 self.index.append(e)
1943 self.index.append(e)
1944 self.nodemap[node] = curr
1944 self.nodemap[node] = curr
1945
1945
1946 entry = self._io.packentry(e, self.node, self.version, curr)
1946 entry = self._io.packentry(e, self.node, self.version, curr)
1947 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1947 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1948 link, offset)
1948 link, offset)
1949
1949
1950 rawtext = btext[0]
1950 rawtext = btext[0]
1951
1951
1952 if alwayscache and rawtext is None:
1952 if alwayscache and rawtext is None:
1953 rawtext = deltacomputer.buildtext(revinfo, fh)
1953 rawtext = deltacomputer.buildtext(revinfo, fh)
1954
1954
1955 if type(rawtext) == bytes: # only accept immutable objects
1955 if type(rawtext) == bytes: # only accept immutable objects
1956 self._cache = (node, curr, rawtext)
1956 self._cache = (node, curr, rawtext)
1957 self._chainbasecache[curr] = deltainfo.chainbase
1957 self._chainbasecache[curr] = deltainfo.chainbase
1958 return node
1958 return node
1959
1959
1960 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1960 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1961 # Files opened in a+ mode have inconsistent behavior on various
1961 # Files opened in a+ mode have inconsistent behavior on various
1962 # platforms. Windows requires that a file positioning call be made
1962 # platforms. Windows requires that a file positioning call be made
1963 # when the file handle transitions between reads and writes. See
1963 # when the file handle transitions between reads and writes. See
1964 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1964 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1965 # platforms, Python or the platform itself can be buggy. Some versions
1965 # platforms, Python or the platform itself can be buggy. Some versions
1966 # of Solaris have been observed to not append at the end of the file
1966 # of Solaris have been observed to not append at the end of the file
1967 # if the file was seeked to before the end. See issue4943 for more.
1967 # if the file was seeked to before the end. See issue4943 for more.
1968 #
1968 #
1969 # We work around this issue by inserting a seek() before writing.
1969 # We work around this issue by inserting a seek() before writing.
1970 # Note: This is likely not necessary on Python 3.
1970 # Note: This is likely not necessary on Python 3.
1971 ifh.seek(0, os.SEEK_END)
1971 ifh.seek(0, os.SEEK_END)
1972 if dfh:
1972 if dfh:
1973 dfh.seek(0, os.SEEK_END)
1973 dfh.seek(0, os.SEEK_END)
1974
1974
1975 curr = len(self) - 1
1975 curr = len(self) - 1
1976 if not self._inline:
1976 if not self._inline:
1977 transaction.add(self.datafile, offset)
1977 transaction.add(self.datafile, offset)
1978 transaction.add(self.indexfile, curr * len(entry))
1978 transaction.add(self.indexfile, curr * len(entry))
1979 if data[0]:
1979 if data[0]:
1980 dfh.write(data[0])
1980 dfh.write(data[0])
1981 dfh.write(data[1])
1981 dfh.write(data[1])
1982 ifh.write(entry)
1982 ifh.write(entry)
1983 else:
1983 else:
1984 offset += curr * self._io.size
1984 offset += curr * self._io.size
1985 transaction.add(self.indexfile, offset, curr)
1985 transaction.add(self.indexfile, offset, curr)
1986 ifh.write(entry)
1986 ifh.write(entry)
1987 ifh.write(data[0])
1987 ifh.write(data[0])
1988 ifh.write(data[1])
1988 ifh.write(data[1])
1989 self._enforceinlinesize(transaction, ifh)
1989 self._enforceinlinesize(transaction, ifh)
1990
1990
1991 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1991 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1992 """
1992 """
1993 add a delta group
1993 add a delta group
1994
1994
1995 given a set of deltas, add them to the revision log. the
1995 given a set of deltas, add them to the revision log. the
1996 first delta is against its parent, which should be in our
1996 first delta is against its parent, which should be in our
1997 log, the rest are against the previous delta.
1997 log, the rest are against the previous delta.
1998
1998
1999 If ``addrevisioncb`` is defined, it will be called with arguments of
1999 If ``addrevisioncb`` is defined, it will be called with arguments of
2000 this revlog and the node that was added.
2000 this revlog and the node that was added.
2001 """
2001 """
2002
2002
2003 nodes = []
2003 nodes = []
2004
2004
2005 r = len(self)
2005 r = len(self)
2006 end = 0
2006 end = 0
2007 if r:
2007 if r:
2008 end = self.end(r - 1)
2008 end = self.end(r - 1)
2009 ifh = self._indexfp("a+")
2009 ifh = self._indexfp("a+")
2010 isize = r * self._io.size
2010 isize = r * self._io.size
2011 if self._inline:
2011 if self._inline:
2012 transaction.add(self.indexfile, end + isize, r)
2012 transaction.add(self.indexfile, end + isize, r)
2013 dfh = None
2013 dfh = None
2014 else:
2014 else:
2015 transaction.add(self.indexfile, isize, r)
2015 transaction.add(self.indexfile, isize, r)
2016 transaction.add(self.datafile, end)
2016 transaction.add(self.datafile, end)
2017 dfh = self._datafp("a+")
2017 dfh = self._datafp("a+")
2018 def flush():
2018 def flush():
2019 if dfh:
2019 if dfh:
2020 dfh.flush()
2020 dfh.flush()
2021 ifh.flush()
2021 ifh.flush()
2022 try:
2022 try:
2023 deltacomputer = deltautil.deltacomputer(self)
2023 deltacomputer = deltautil.deltacomputer(self)
2024 # loop through our set of deltas
2024 # loop through our set of deltas
2025 for data in deltas:
2025 for data in deltas:
2026 node, p1, p2, linknode, deltabase, delta, flags = data
2026 node, p1, p2, linknode, deltabase, delta, flags = data
2027 link = linkmapper(linknode)
2027 link = linkmapper(linknode)
2028 flags = flags or REVIDX_DEFAULT_FLAGS
2028 flags = flags or REVIDX_DEFAULT_FLAGS
2029
2029
2030 nodes.append(node)
2030 nodes.append(node)
2031
2031
2032 if node in self.nodemap:
2032 if node in self.nodemap:
2033 self._nodeduplicatecallback(transaction, node)
2033 self._nodeduplicatecallback(transaction, node)
2034 # this can happen if two branches make the same change
2034 # this can happen if two branches make the same change
2035 continue
2035 continue
2036
2036
2037 for p in (p1, p2):
2037 for p in (p1, p2):
2038 if p not in self.nodemap:
2038 if p not in self.nodemap:
2039 raise error.LookupError(p, self.indexfile,
2039 raise error.LookupError(p, self.indexfile,
2040 _('unknown parent'))
2040 _('unknown parent'))
2041
2041
2042 if deltabase not in self.nodemap:
2042 if deltabase not in self.nodemap:
2043 raise error.LookupError(deltabase, self.indexfile,
2043 raise error.LookupError(deltabase, self.indexfile,
2044 _('unknown delta base'))
2044 _('unknown delta base'))
2045
2045
2046 baserev = self.rev(deltabase)
2046 baserev = self.rev(deltabase)
2047
2047
2048 if baserev != nullrev and self.iscensored(baserev):
2048 if baserev != nullrev and self.iscensored(baserev):
2049 # if base is censored, delta must be full replacement in a
2049 # if base is censored, delta must be full replacement in a
2050 # single patch operation
2050 # single patch operation
2051 hlen = struct.calcsize(">lll")
2051 hlen = struct.calcsize(">lll")
2052 oldlen = self.rawsize(baserev)
2052 oldlen = self.rawsize(baserev)
2053 newlen = len(delta) - hlen
2053 newlen = len(delta) - hlen
2054 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2054 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2055 raise error.CensoredBaseError(self.indexfile,
2055 raise error.CensoredBaseError(self.indexfile,
2056 self.node(baserev))
2056 self.node(baserev))
2057
2057
2058 if not flags and self._peek_iscensored(baserev, delta, flush):
2058 if not flags and self._peek_iscensored(baserev, delta, flush):
2059 flags |= REVIDX_ISCENSORED
2059 flags |= REVIDX_ISCENSORED
2060
2060
2061 # We assume consumers of addrevisioncb will want to retrieve
2061 # We assume consumers of addrevisioncb will want to retrieve
2062 # the added revision, which will require a call to
2062 # the added revision, which will require a call to
2063 # revision(). revision() will fast path if there is a cache
2063 # revision(). revision() will fast path if there is a cache
2064 # hit. So, we tell _addrevision() to always cache in this case.
2064 # hit. So, we tell _addrevision() to always cache in this case.
2065 # We're only using addgroup() in the context of changegroup
2065 # We're only using addgroup() in the context of changegroup
2066 # generation so the revision data can always be handled as raw
2066 # generation so the revision data can always be handled as raw
2067 # by the flagprocessor.
2067 # by the flagprocessor.
2068 self._addrevision(node, None, transaction, link,
2068 self._addrevision(node, None, transaction, link,
2069 p1, p2, flags, (baserev, delta),
2069 p1, p2, flags, (baserev, delta),
2070 ifh, dfh,
2070 ifh, dfh,
2071 alwayscache=bool(addrevisioncb),
2071 alwayscache=bool(addrevisioncb),
2072 deltacomputer=deltacomputer)
2072 deltacomputer=deltacomputer)
2073
2073
2074 if addrevisioncb:
2074 if addrevisioncb:
2075 addrevisioncb(self, node)
2075 addrevisioncb(self, node)
2076
2076
2077 if not dfh and not self._inline:
2077 if not dfh and not self._inline:
2078 # addrevision switched from inline to conventional
2078 # addrevision switched from inline to conventional
2079 # reopen the index
2079 # reopen the index
2080 ifh.close()
2080 ifh.close()
2081 dfh = self._datafp("a+")
2081 dfh = self._datafp("a+")
2082 ifh = self._indexfp("a+")
2082 ifh = self._indexfp("a+")
2083 finally:
2083 finally:
2084 if dfh:
2084 if dfh:
2085 dfh.close()
2085 dfh.close()
2086 ifh.close()
2086 ifh.close()
2087
2087
2088 return nodes
2088 return nodes
2089
2089
2090 def iscensored(self, rev):
2090 def iscensored(self, rev):
2091 """Check if a file revision is censored."""
2091 """Check if a file revision is censored."""
2092 if not self._censorable:
2092 if not self._censorable:
2093 return False
2093 return False
2094
2094
2095 return self.flags(rev) & REVIDX_ISCENSORED
2095 return self.flags(rev) & REVIDX_ISCENSORED
2096
2096
2097 def _peek_iscensored(self, baserev, delta, flush):
2097 def _peek_iscensored(self, baserev, delta, flush):
2098 """Quickly check if a delta produces a censored revision."""
2098 """Quickly check if a delta produces a censored revision."""
2099 if not self._censorable:
2099 if not self._censorable:
2100 return False
2100 return False
2101
2101
2102 # Fragile heuristic: unless new file meta keys are added alphabetically
2102 # Fragile heuristic: unless new file meta keys are added alphabetically
2103 # preceding "censored", all censored revisions are prefixed by
2103 # preceding "censored", all censored revisions are prefixed by
2104 # "\1\ncensored:". A delta producing such a censored revision must be a
2104 # "\1\ncensored:". A delta producing such a censored revision must be a
2105 # full-replacement delta, so we inspect the first and only patch in the
2105 # full-replacement delta, so we inspect the first and only patch in the
2106 # delta for this prefix.
2106 # delta for this prefix.
2107 hlen = struct.calcsize(">lll")
2107 hlen = struct.calcsize(">lll")
2108 if len(delta) <= hlen:
2108 if len(delta) <= hlen:
2109 return False
2109 return False
2110
2110
2111 oldlen = self.rawsize(baserev)
2111 oldlen = self.rawsize(baserev)
2112 newlen = len(delta) - hlen
2112 newlen = len(delta) - hlen
2113 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2113 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2114 return False
2114 return False
2115
2115
2116 add = "\1\ncensored:"
2116 add = "\1\ncensored:"
2117 addlen = len(add)
2117 addlen = len(add)
2118 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2118 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2119
2119
2120 def getstrippoint(self, minlink):
2120 def getstrippoint(self, minlink):
2121 """find the minimum rev that must be stripped to strip the linkrev
2121 """find the minimum rev that must be stripped to strip the linkrev
2122
2122
2123 Returns a tuple containing the minimum rev and a set of all revs that
2123 Returns a tuple containing the minimum rev and a set of all revs that
2124 have linkrevs that will be broken by this strip.
2124 have linkrevs that will be broken by this strip.
2125 """
2125 """
2126 brokenrevs = set()
2126 brokenrevs = set()
2127 strippoint = len(self)
2127 strippoint = len(self)
2128
2128
2129 heads = {}
2129 heads = {}
2130 futurelargelinkrevs = set()
2130 futurelargelinkrevs = set()
2131 for head in self.headrevs():
2131 for head in self.headrevs():
2132 headlinkrev = self.linkrev(head)
2132 headlinkrev = self.linkrev(head)
2133 heads[head] = headlinkrev
2133 heads[head] = headlinkrev
2134 if headlinkrev >= minlink:
2134 if headlinkrev >= minlink:
2135 futurelargelinkrevs.add(headlinkrev)
2135 futurelargelinkrevs.add(headlinkrev)
2136
2136
2137 # This algorithm involves walking down the rev graph, starting at the
2137 # This algorithm involves walking down the rev graph, starting at the
2138 # heads. Since the revs are topologically sorted according to linkrev,
2138 # heads. Since the revs are topologically sorted according to linkrev,
2139 # once all head linkrevs are below the minlink, we know there are
2139 # once all head linkrevs are below the minlink, we know there are
2140 # no more revs that could have a linkrev greater than minlink.
2140 # no more revs that could have a linkrev greater than minlink.
2141 # So we can stop walking.
2141 # So we can stop walking.
2142 while futurelargelinkrevs:
2142 while futurelargelinkrevs:
2143 strippoint -= 1
2143 strippoint -= 1
2144 linkrev = heads.pop(strippoint)
2144 linkrev = heads.pop(strippoint)
2145
2145
2146 if linkrev < minlink:
2146 if linkrev < minlink:
2147 brokenrevs.add(strippoint)
2147 brokenrevs.add(strippoint)
2148 else:
2148 else:
2149 futurelargelinkrevs.remove(linkrev)
2149 futurelargelinkrevs.remove(linkrev)
2150
2150
2151 for p in self.parentrevs(strippoint):
2151 for p in self.parentrevs(strippoint):
2152 if p != nullrev:
2152 if p != nullrev:
2153 plinkrev = self.linkrev(p)
2153 plinkrev = self.linkrev(p)
2154 heads[p] = plinkrev
2154 heads[p] = plinkrev
2155 if plinkrev >= minlink:
2155 if plinkrev >= minlink:
2156 futurelargelinkrevs.add(plinkrev)
2156 futurelargelinkrevs.add(plinkrev)
2157
2157
2158 return strippoint, brokenrevs
2158 return strippoint, brokenrevs
2159
2159
2160 def strip(self, minlink, transaction):
2160 def strip(self, minlink, transaction):
2161 """truncate the revlog on the first revision with a linkrev >= minlink
2161 """truncate the revlog on the first revision with a linkrev >= minlink
2162
2162
2163 This function is called when we're stripping revision minlink and
2163 This function is called when we're stripping revision minlink and
2164 its descendants from the repository.
2164 its descendants from the repository.
2165
2165
2166 We have to remove all revisions with linkrev >= minlink, because
2166 We have to remove all revisions with linkrev >= minlink, because
2167 the equivalent changelog revisions will be renumbered after the
2167 the equivalent changelog revisions will be renumbered after the
2168 strip.
2168 strip.
2169
2169
2170 So we truncate the revlog on the first of these revisions, and
2170 So we truncate the revlog on the first of these revisions, and
2171 trust that the caller has saved the revisions that shouldn't be
2171 trust that the caller has saved the revisions that shouldn't be
2172 removed and that it'll re-add them after this truncation.
2172 removed and that it'll re-add them after this truncation.
2173 """
2173 """
2174 if len(self) == 0:
2174 if len(self) == 0:
2175 return
2175 return
2176
2176
2177 rev, _ = self.getstrippoint(minlink)
2177 rev, _ = self.getstrippoint(minlink)
2178 if rev == len(self):
2178 if rev == len(self):
2179 return
2179 return
2180
2180
2181 # first truncate the files on disk
2181 # first truncate the files on disk
2182 end = self.start(rev)
2182 end = self.start(rev)
2183 if not self._inline:
2183 if not self._inline:
2184 transaction.add(self.datafile, end)
2184 transaction.add(self.datafile, end)
2185 end = rev * self._io.size
2185 end = rev * self._io.size
2186 else:
2186 else:
2187 end += rev * self._io.size
2187 end += rev * self._io.size
2188
2188
2189 transaction.add(self.indexfile, end)
2189 transaction.add(self.indexfile, end)
2190
2190
2191 # then reset internal state in memory to forget those revisions
2191 # then reset internal state in memory to forget those revisions
2192 self._cache = None
2192 self._cache = None
2193 self._chaininfocache = {}
2193 self._chaininfocache = {}
2194 self._chunkclear()
2194 self._chunkclear()
2195 for x in pycompat.xrange(rev, len(self)):
2195 for x in pycompat.xrange(rev, len(self)):
2196 del self.nodemap[self.node(x)]
2196 del self.nodemap[self.node(x)]
2197
2197
2198 del self.index[rev:-1]
2198 del self.index[rev:-1]
2199 self._nodepos = None
2199 self._nodepos = None
2200
2200
2201 def checksize(self):
2201 def checksize(self):
2202 expected = 0
2202 expected = 0
2203 if len(self):
2203 if len(self):
2204 expected = max(0, self.end(len(self) - 1))
2204 expected = max(0, self.end(len(self) - 1))
2205
2205
2206 try:
2206 try:
2207 with self._datafp() as f:
2207 with self._datafp() as f:
2208 f.seek(0, 2)
2208 f.seek(0, 2)
2209 actual = f.tell()
2209 actual = f.tell()
2210 dd = actual - expected
2210 dd = actual - expected
2211 except IOError as inst:
2211 except IOError as inst:
2212 if inst.errno != errno.ENOENT:
2212 if inst.errno != errno.ENOENT:
2213 raise
2213 raise
2214 dd = 0
2214 dd = 0
2215
2215
2216 try:
2216 try:
2217 f = self.opener(self.indexfile)
2217 f = self.opener(self.indexfile)
2218 f.seek(0, 2)
2218 f.seek(0, 2)
2219 actual = f.tell()
2219 actual = f.tell()
2220 f.close()
2220 f.close()
2221 s = self._io.size
2221 s = self._io.size
2222 i = max(0, actual // s)
2222 i = max(0, actual // s)
2223 di = actual - (i * s)
2223 di = actual - (i * s)
2224 if self._inline:
2224 if self._inline:
2225 databytes = 0
2225 databytes = 0
2226 for r in self:
2226 for r in self:
2227 databytes += max(0, self.length(r))
2227 databytes += max(0, self.length(r))
2228 dd = 0
2228 dd = 0
2229 di = actual - len(self) * s - databytes
2229 di = actual - len(self) * s - databytes
2230 except IOError as inst:
2230 except IOError as inst:
2231 if inst.errno != errno.ENOENT:
2231 if inst.errno != errno.ENOENT:
2232 raise
2232 raise
2233 di = 0
2233 di = 0
2234
2234
2235 return (dd, di)
2235 return (dd, di)
2236
2236
2237 def files(self):
2237 def files(self):
2238 res = [self.indexfile]
2238 res = [self.indexfile]
2239 if not self._inline:
2239 if not self._inline:
2240 res.append(self.datafile)
2240 res.append(self.datafile)
2241 return res
2241 return res
2242
2242
2243 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2243 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2244 assumehaveparentrevisions=False, deltaprevious=False):
2244 assumehaveparentrevisions=False, deltaprevious=False):
2245 if nodesorder not in ('nodes', 'storage', None):
2245 if nodesorder not in ('nodes', 'storage', None):
2246 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2246 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2247 nodesorder)
2247 nodesorder)
2248
2248
2249 if nodesorder is None and not self._generaldelta:
2249 if nodesorder is None and not self._generaldelta:
2250 nodesorder = 'storage'
2250 nodesorder = 'storage'
2251
2251
2252 frev = self.rev
2252 frev = self.rev
2253 fnode = self.node
2253 fnode = self.node
2254
2254
2255 if nodesorder == 'nodes':
2255 if nodesorder == 'nodes':
2256 revs = [frev(n) for n in nodes]
2256 revs = [frev(n) for n in nodes]
2257 elif nodesorder == 'storage':
2257 elif nodesorder == 'storage':
2258 revs = sorted(frev(n) for n in nodes)
2258 revs = sorted(frev(n) for n in nodes)
2259 else:
2259 else:
2260 assert self._generaldelta
2260 assert self._generaldelta
2261 revs = set(frev(n) for n in nodes)
2261 revs = set(frev(n) for n in nodes)
2262 revs = dagop.linearize(revs, self.parentrevs)
2262 revs = dagop.linearize(revs, self.parentrevs)
2263
2263
2264 prevrev = None
2264 prevrev = None
2265
2265
2266 if deltaprevious or assumehaveparentrevisions:
2266 if deltaprevious or assumehaveparentrevisions:
2267 prevrev = self.parentrevs(revs[0])[0]
2267 prevrev = self.parentrevs(revs[0])[0]
2268
2268
2269 # Set of revs available to delta against.
2269 # Set of revs available to delta against.
2270 available = set()
2270 available = set()
2271
2271
2272 for rev in revs:
2272 for rev in revs:
2273 if rev == nullrev:
2273 if rev == nullrev:
2274 continue
2274 continue
2275
2275
2276 node = fnode(rev)
2276 node = fnode(rev)
2277 deltaparentrev = self.deltaparent(rev)
2277 deltaparentrev = self.deltaparent(rev)
2278 p1rev, p2rev = self.parentrevs(rev)
2278 p1rev, p2rev = self.parentrevs(rev)
2279
2279
2280 # Forced delta against previous mode.
2280 # Forced delta against previous mode.
2281 if deltaprevious:
2281 if deltaprevious:
2282 baserev = prevrev
2282 baserev = prevrev
2283
2283
2284 # Revlog is configured to use full snapshots. Stick to that.
2284 # Revlog is configured to use full snapshots. Stick to that.
2285 elif not self._storedeltachains:
2285 elif not self._storedeltachains:
2286 baserev = nullrev
2286 baserev = nullrev
2287
2287
2288 # There is a delta in storage. We try to use that because it
2288 # There is a delta in storage. We try to use that because it
2289 # amounts to effectively copying data from storage and is
2289 # amounts to effectively copying data from storage and is
2290 # therefore the fastest.
2290 # therefore the fastest.
2291 elif deltaparentrev != nullrev:
2291 elif deltaparentrev != nullrev:
2292 # Base revision was already emitted in this group. We can
2292 # Base revision was already emitted in this group. We can
2293 # always safely use the delta.
2293 # always safely use the delta.
2294 if deltaparentrev in available:
2294 if deltaparentrev in available:
2295 baserev = deltaparentrev
2295 baserev = deltaparentrev
2296
2296
2297 # Base revision is a parent that hasn't been emitted already.
2297 # Base revision is a parent that hasn't been emitted already.
2298 # Use it if we can assume the receiver has the parent revision.
2298 # Use it if we can assume the receiver has the parent revision.
2299 elif (assumehaveparentrevisions
2299 elif (assumehaveparentrevisions
2300 and deltaparentrev in (p1rev, p2rev)):
2300 and deltaparentrev in (p1rev, p2rev)):
2301 baserev = deltaparentrev
2301 baserev = deltaparentrev
2302
2302
2303 # No guarantee the receiver has the delta parent. Send delta
2303 # No guarantee the receiver has the delta parent. Send delta
2304 # against last revision (if possible), which in the common case
2304 # against last revision (if possible), which in the common case
2305 # should be similar enough to this revision that the delta is
2305 # should be similar enough to this revision that the delta is
2306 # reasonable.
2306 # reasonable.
2307 elif prevrev is not None:
2307 elif prevrev is not None:
2308 baserev = prevrev
2308 baserev = prevrev
2309 else:
2309 else:
2310 baserev = nullrev
2310 baserev = nullrev
2311
2311
2312 # Storage has a fulltext revision.
2312 # Storage has a fulltext revision.
2313
2313
2314 # Let's use the previous revision, which is as good a guess as any.
2314 # Let's use the previous revision, which is as good a guess as any.
2315 # There is definitely room to improve this logic.
2315 # There is definitely room to improve this logic.
2316 elif prevrev is not None:
2316 elif prevrev is not None:
2317 baserev = prevrev
2317 baserev = prevrev
2318 else:
2318 else:
2319 baserev = nullrev
2319 baserev = nullrev
2320
2320
2321 # But we can't actually use our chosen delta base for whatever
2321 # But we can't actually use our chosen delta base for whatever
2322 # reason. Reset to fulltext.
2322 # reason. Reset to fulltext.
2323 if baserev != nullrev and not self.candelta(baserev, rev):
2323 if baserev != nullrev and not self.candelta(baserev, rev):
2324 baserev = nullrev
2324 baserev = nullrev
2325
2325
2326 revision = None
2326 revision = None
2327 delta = None
2327 delta = None
2328 baserevisionsize = None
2328 baserevisionsize = None
2329
2329
2330 if revisiondata:
2330 if revisiondata:
2331 if self.iscensored(baserev) or self.iscensored(rev):
2331 if self.iscensored(baserev) or self.iscensored(rev):
2332 try:
2332 try:
2333 revision = self.revision(node, raw=True)
2333 revision = self.revision(node, raw=True)
2334 except error.CensoredNodeError as e:
2334 except error.CensoredNodeError as e:
2335 revision = e.tombstone
2335 revision = e.tombstone
2336
2336
2337 if baserev != nullrev:
2337 if baserev != nullrev:
2338 baserevisionsize = self.rawsize(baserev)
2338 baserevisionsize = self.rawsize(baserev)
2339
2339
2340 elif baserev == nullrev and not deltaprevious:
2340 elif baserev == nullrev and not deltaprevious:
2341 revision = self.revision(node, raw=True)
2341 revision = self.revision(node, raw=True)
2342 available.add(rev)
2342 available.add(rev)
2343 else:
2343 else:
2344 delta = self.revdiff(baserev, rev)
2344 delta = self.revdiff(baserev, rev)
2345 available.add(rev)
2345 available.add(rev)
2346
2346
2347 yield revlogrevisiondelta(
2347 yield revlogrevisiondelta(
2348 node=node,
2348 node=node,
2349 p1node=fnode(p1rev),
2349 p1node=fnode(p1rev),
2350 p2node=fnode(p2rev),
2350 p2node=fnode(p2rev),
2351 basenode=fnode(baserev),
2351 basenode=fnode(baserev),
2352 flags=self.flags(rev),
2352 flags=self.flags(rev),
2353 baserevisionsize=baserevisionsize,
2353 baserevisionsize=baserevisionsize,
2354 revision=revision,
2354 revision=revision,
2355 delta=delta)
2355 delta=delta)
2356
2356
2357 prevrev = rev
2357 prevrev = rev
2358
2358
2359 DELTAREUSEALWAYS = 'always'
2359 DELTAREUSEALWAYS = 'always'
2360 DELTAREUSESAMEREVS = 'samerevs'
2360 DELTAREUSESAMEREVS = 'samerevs'
2361 DELTAREUSENEVER = 'never'
2361 DELTAREUSENEVER = 'never'
2362
2362
2363 DELTAREUSEFULLADD = 'fulladd'
2363 DELTAREUSEFULLADD = 'fulladd'
2364
2364
2365 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2365 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2366
2366
2367 def clone(self, tr, destrevlog, addrevisioncb=None,
2367 def clone(self, tr, destrevlog, addrevisioncb=None,
2368 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2368 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2369 """Copy this revlog to another, possibly with format changes.
2369 """Copy this revlog to another, possibly with format changes.
2370
2370
2371 The destination revlog will contain the same revisions and nodes.
2371 The destination revlog will contain the same revisions and nodes.
2372 However, it may not be bit-for-bit identical due to e.g. delta encoding
2372 However, it may not be bit-for-bit identical due to e.g. delta encoding
2373 differences.
2373 differences.
2374
2374
2375 The ``deltareuse`` argument control how deltas from the existing revlog
2375 The ``deltareuse`` argument control how deltas from the existing revlog
2376 are preserved in the destination revlog. The argument can have the
2376 are preserved in the destination revlog. The argument can have the
2377 following values:
2377 following values:
2378
2378
2379 DELTAREUSEALWAYS
2379 DELTAREUSEALWAYS
2380 Deltas will always be reused (if possible), even if the destination
2380 Deltas will always be reused (if possible), even if the destination
2381 revlog would not select the same revisions for the delta. This is the
2381 revlog would not select the same revisions for the delta. This is the
2382 fastest mode of operation.
2382 fastest mode of operation.
2383 DELTAREUSESAMEREVS
2383 DELTAREUSESAMEREVS
2384 Deltas will be reused if the destination revlog would pick the same
2384 Deltas will be reused if the destination revlog would pick the same
2385 revisions for the delta. This mode strikes a balance between speed
2385 revisions for the delta. This mode strikes a balance between speed
2386 and optimization.
2386 and optimization.
2387 DELTAREUSENEVER
2387 DELTAREUSENEVER
2388 Deltas will never be reused. This is the slowest mode of execution.
2388 Deltas will never be reused. This is the slowest mode of execution.
2389 This mode can be used to recompute deltas (e.g. if the diff/delta
2389 This mode can be used to recompute deltas (e.g. if the diff/delta
2390 algorithm changes).
2390 algorithm changes).
2391
2391
2392 Delta computation can be slow, so the choice of delta reuse policy can
2392 Delta computation can be slow, so the choice of delta reuse policy can
2393 significantly affect run time.
2393 significantly affect run time.
2394
2394
2395 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2395 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2396 two extremes. Deltas will be reused if they are appropriate. But if the
2396 two extremes. Deltas will be reused if they are appropriate. But if the
2397 delta could choose a better revision, it will do so. This means if you
2397 delta could choose a better revision, it will do so. This means if you
2398 are converting a non-generaldelta revlog to a generaldelta revlog,
2398 are converting a non-generaldelta revlog to a generaldelta revlog,
2399 deltas will be recomputed if the delta's parent isn't a parent of the
2399 deltas will be recomputed if the delta's parent isn't a parent of the
2400 revision.
2400 revision.
2401
2401
2402 In addition to the delta policy, the ``deltabothparents`` argument
2402 In addition to the delta policy, the ``deltabothparents`` argument
2403 controls whether to compute deltas against both parents for merges.
2403 controls whether to compute deltas against both parents for merges.
2404 By default, the current default is used.
2404 By default, the current default is used.
2405 """
2405 """
2406 if deltareuse not in self.DELTAREUSEALL:
2406 if deltareuse not in self.DELTAREUSEALL:
2407 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2407 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2408
2408
2409 if len(destrevlog):
2409 if len(destrevlog):
2410 raise ValueError(_('destination revlog is not empty'))
2410 raise ValueError(_('destination revlog is not empty'))
2411
2411
2412 if getattr(self, 'filteredrevs', None):
2412 if getattr(self, 'filteredrevs', None):
2413 raise ValueError(_('source revlog has filtered revisions'))
2413 raise ValueError(_('source revlog has filtered revisions'))
2414 if getattr(destrevlog, 'filteredrevs', None):
2414 if getattr(destrevlog, 'filteredrevs', None):
2415 raise ValueError(_('destination revlog has filtered revisions'))
2415 raise ValueError(_('destination revlog has filtered revisions'))
2416
2416
2417 # lazydeltabase controls whether to reuse a cached delta, if possible.
2417 # lazydeltabase controls whether to reuse a cached delta, if possible.
2418 oldlazydeltabase = destrevlog._lazydeltabase
2418 oldlazydeltabase = destrevlog._lazydeltabase
2419 oldamd = destrevlog._deltabothparents
2419 oldamd = destrevlog._deltabothparents
2420
2420
2421 try:
2421 try:
2422 if deltareuse == self.DELTAREUSEALWAYS:
2422 if deltareuse == self.DELTAREUSEALWAYS:
2423 destrevlog._lazydeltabase = True
2423 destrevlog._lazydeltabase = True
2424 elif deltareuse == self.DELTAREUSESAMEREVS:
2424 elif deltareuse == self.DELTAREUSESAMEREVS:
2425 destrevlog._lazydeltabase = False
2425 destrevlog._lazydeltabase = False
2426
2426
2427 destrevlog._deltabothparents = deltabothparents or oldamd
2427 destrevlog._deltabothparents = deltabothparents or oldamd
2428
2428
2429 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2429 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2430 self.DELTAREUSESAMEREVS)
2430 self.DELTAREUSESAMEREVS)
2431
2431
2432 deltacomputer = deltautil.deltacomputer(destrevlog)
2432 deltacomputer = deltautil.deltacomputer(destrevlog)
2433 index = self.index
2433 index = self.index
2434 for rev in self:
2434 for rev in self:
2435 entry = index[rev]
2435 entry = index[rev]
2436
2436
2437 # Some classes override linkrev to take filtered revs into
2437 # Some classes override linkrev to take filtered revs into
2438 # account. Use raw entry from index.
2438 # account. Use raw entry from index.
2439 flags = entry[0] & 0xffff
2439 flags = entry[0] & 0xffff
2440 linkrev = entry[4]
2440 linkrev = entry[4]
2441 p1 = index[entry[5]][7]
2441 p1 = index[entry[5]][7]
2442 p2 = index[entry[6]][7]
2442 p2 = index[entry[6]][7]
2443 node = entry[7]
2443 node = entry[7]
2444
2444
2445 # (Possibly) reuse the delta from the revlog if allowed and
2445 # (Possibly) reuse the delta from the revlog if allowed and
2446 # the revlog chunk is a delta.
2446 # the revlog chunk is a delta.
2447 cachedelta = None
2447 cachedelta = None
2448 rawtext = None
2448 rawtext = None
2449 if populatecachedelta:
2449 if populatecachedelta:
2450 dp = self.deltaparent(rev)
2450 dp = self.deltaparent(rev)
2451 if dp != nullrev:
2451 if dp != nullrev:
2452 cachedelta = (dp, bytes(self._chunk(rev)))
2452 cachedelta = (dp, bytes(self._chunk(rev)))
2453
2453
2454 if not cachedelta:
2454 if not cachedelta:
2455 rawtext = self.revision(rev, raw=True)
2455 rawtext = self.revision(rev, raw=True)
2456
2456
2457
2457
2458 if deltareuse == self.DELTAREUSEFULLADD:
2458 if deltareuse == self.DELTAREUSEFULLADD:
2459 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2459 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2460 cachedelta=cachedelta,
2460 cachedelta=cachedelta,
2461 node=node, flags=flags,
2461 node=node, flags=flags,
2462 deltacomputer=deltacomputer)
2462 deltacomputer=deltacomputer)
2463 else:
2463 else:
2464 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2464 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2465 checkambig=False)
2465 checkambig=False)
2466 dfh = None
2466 dfh = None
2467 if not destrevlog._inline:
2467 if not destrevlog._inline:
2468 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2468 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2469 try:
2469 try:
2470 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2470 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2471 p2, flags, cachedelta, ifh, dfh,
2471 p2, flags, cachedelta, ifh, dfh,
2472 deltacomputer=deltacomputer)
2472 deltacomputer=deltacomputer)
2473 finally:
2473 finally:
2474 if dfh:
2474 if dfh:
2475 dfh.close()
2475 dfh.close()
2476 ifh.close()
2476 ifh.close()
2477
2477
2478 if addrevisioncb:
2478 if addrevisioncb:
2479 addrevisioncb(self, rev, node)
2479 addrevisioncb(self, rev, node)
2480 finally:
2480 finally:
2481 destrevlog._lazydeltabase = oldlazydeltabase
2481 destrevlog._lazydeltabase = oldlazydeltabase
2482 destrevlog._deltabothparents = oldamd
2482 destrevlog._deltabothparents = oldamd
2483
2483
2484 def censorrevision(self, node, tombstone=b''):
2484 def censorrevision(self, node, tombstone=b''):
2485 if (self.version & 0xFFFF) == REVLOGV0:
2485 if (self.version & 0xFFFF) == REVLOGV0:
2486 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2486 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2487 self.version)
2487 self.version)
2488
2488
2489 rev = self.rev(node)
2489 rev = self.rev(node)
2490 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2490 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2491
2491
2492 if len(tombstone) > self.rawsize(rev):
2492 if len(tombstone) > self.rawsize(rev):
2493 raise error.Abort(_('censor tombstone must be no longer than '
2493 raise error.Abort(_('censor tombstone must be no longer than '
2494 'censored data'))
2494 'censored data'))
2495
2495
2496 # Using two files instead of one makes it easy to rewrite entry-by-entry
2496 # Using two files instead of one makes it easy to rewrite entry-by-entry
2497 idxread = self.opener(self.indexfile, 'r')
2497 idxread = self.opener(self.indexfile, 'r')
2498 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2498 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2499 if self.version & FLAG_INLINE_DATA:
2499 if self.version & FLAG_INLINE_DATA:
2500 dataread, datawrite = idxread, idxwrite
2500 dataread, datawrite = idxread, idxwrite
2501 else:
2501 else:
2502 dataread = self.opener(self.datafile, 'r')
2502 dataread = self.opener(self.datafile, 'r')
2503 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2503 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2504
2504
2505 # Copy all revlog data up to the entry to be censored.
2505 # Copy all revlog data up to the entry to be censored.
2506 offset = self.start(rev)
2506 offset = self.start(rev)
2507
2507
2508 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2508 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2509 idxwrite.write(chunk)
2509 idxwrite.write(chunk)
2510 for chunk in util.filechunkiter(dataread, limit=offset):
2510 for chunk in util.filechunkiter(dataread, limit=offset):
2511 datawrite.write(chunk)
2511 datawrite.write(chunk)
2512
2512
2513 def rewriteindex(r, newoffs, newdata=None):
2513 def rewriteindex(r, newoffs, newdata=None):
2514 """Rewrite the index entry with a new data offset and new data.
2514 """Rewrite the index entry with a new data offset and new data.
2515
2515
2516 The newdata argument, if given, is a tuple of three positive
2516 The newdata argument, if given, is a tuple of three positive
2517 integers: (new compressed, new uncompressed, added flag bits).
2517 integers: (new compressed, new uncompressed, added flag bits).
2518 """
2518 """
2519 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2519 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2520 flags = gettype(offlags)
2520 flags = gettype(offlags)
2521 if newdata:
2521 if newdata:
2522 comp, uncomp, nflags = newdata
2522 comp, uncomp, nflags = newdata
2523 flags |= nflags
2523 flags |= nflags
2524 offlags = offset_type(newoffs, flags)
2524 offlags = offset_type(newoffs, flags)
2525 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2525 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2526 idxwrite.write(self._io.packentry(e, None, self.version, r))
2526 idxwrite.write(self._io.packentry(e, None, self.version, r))
2527 idxread.seek(self._io.size, 1)
2527 idxread.seek(self._io.size, 1)
2528
2528
2529 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2529 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2530 """Write the given fulltext with the given data offset.
2530 """Write the given fulltext with the given data offset.
2531
2531
2532 Returns:
2532 Returns:
2533 The integer number of data bytes written, for tracking data
2533 The integer number of data bytes written, for tracking data
2534 offsets.
2534 offsets.
2535 """
2535 """
2536 flag, compdata = self.compress(data)
2536 flag, compdata = self.compress(data)
2537 newcomp = len(flag) + len(compdata)
2537 newcomp = len(flag) + len(compdata)
2538 rewriteindex(r, offs, (newcomp, len(data), nflags))
2538 rewriteindex(r, offs, (newcomp, len(data), nflags))
2539 datawrite.write(flag)
2539 datawrite.write(flag)
2540 datawrite.write(compdata)
2540 datawrite.write(compdata)
2541 dataread.seek(self.length(r), 1)
2541 dataread.seek(self.length(r), 1)
2542 return newcomp
2542 return newcomp
2543
2543
2544 # Rewrite censored entry with (padded) tombstone data.
2544 # Rewrite censored entry with (padded) tombstone data.
2545 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2545 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2546 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2546 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2547
2547
2548 # Rewrite all following filelog revisions fixing up offsets and deltas.
2548 # Rewrite all following filelog revisions fixing up offsets and deltas.
2549 for srev in pycompat.xrange(rev + 1, len(self)):
2549 for srev in pycompat.xrange(rev + 1, len(self)):
2550 if rev in self.parentrevs(srev):
2550 if rev in self.parentrevs(srev):
2551 # Immediate children of censored node must be re-added as
2551 # Immediate children of censored node must be re-added as
2552 # fulltext.
2552 # fulltext.
2553 try:
2553 try:
2554 revdata = self.revision(srev)
2554 revdata = self.revision(srev)
2555 except error.CensoredNodeError as e:
2555 except error.CensoredNodeError as e:
2556 revdata = e.tombstone
2556 revdata = e.tombstone
2557 dlen = rewrite(srev, offset, revdata)
2557 dlen = rewrite(srev, offset, revdata)
2558 else:
2558 else:
2559 # Copy any other revision data verbatim after fixing up the
2559 # Copy any other revision data verbatim after fixing up the
2560 # offset.
2560 # offset.
2561 rewriteindex(srev, offset)
2561 rewriteindex(srev, offset)
2562 dlen = self.length(srev)
2562 dlen = self.length(srev)
2563 for chunk in util.filechunkiter(dataread, limit=dlen):
2563 for chunk in util.filechunkiter(dataread, limit=dlen):
2564 datawrite.write(chunk)
2564 datawrite.write(chunk)
2565 offset += dlen
2565 offset += dlen
2566
2566
2567 idxread.close()
2567 idxread.close()
2568 idxwrite.close()
2568 idxwrite.close()
2569 if dataread is not idxread:
2569 if dataread is not idxread:
2570 dataread.close()
2570 dataread.close()
2571 datawrite.close()
2571 datawrite.close()
2572
2572
2573 def verifyintegrity(self, state):
2573 def verifyintegrity(self, state):
2574 """Verifies the integrity of the revlog.
2574 """Verifies the integrity of the revlog.
2575
2575
2576 Yields ``revlogproblem`` instances describing problems that are
2576 Yields ``revlogproblem`` instances describing problems that are
2577 found.
2577 found.
2578 """
2578 """
2579 dd, di = self.checksize()
2579 dd, di = self.checksize()
2580 if dd:
2580 if dd:
2581 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2581 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2582 if di:
2582 if di:
2583 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2583 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2584
2584
2585 version = self.version & 0xFFFF
2585 version = self.version & 0xFFFF
2586
2586
2587 # The verifier tells us what version revlog we should be.
2587 # The verifier tells us what version revlog we should be.
2588 if version != state['expectedversion']:
2588 if version != state['expectedversion']:
2589 yield revlogproblem(
2589 yield revlogproblem(
2590 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2590 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2591 (self.indexfile, version, state['expectedversion']))
2591 (self.indexfile, version, state['expectedversion']))
2592
2592
2593 state['skipread'] = set()
2593 state['skipread'] = set()
2594
2594
2595 for rev in self:
2595 for rev in self:
2596 node = self.node(rev)
2596 node = self.node(rev)
2597
2597
2598 # Verify contents. 4 cases to care about:
2598 # Verify contents. 4 cases to care about:
2599 #
2599 #
2600 # common: the most common case
2600 # common: the most common case
2601 # rename: with a rename
2601 # rename: with a rename
2602 # meta: file content starts with b'\1\n', the metadata
2602 # meta: file content starts with b'\1\n', the metadata
2603 # header defined in filelog.py, but without a rename
2603 # header defined in filelog.py, but without a rename
2604 # ext: content stored externally
2604 # ext: content stored externally
2605 #
2605 #
2606 # More formally, their differences are shown below:
2606 # More formally, their differences are shown below:
2607 #
2607 #
2608 # | common | rename | meta | ext
2608 # | common | rename | meta | ext
2609 # -------------------------------------------------------
2609 # -------------------------------------------------------
2610 # flags() | 0 | 0 | 0 | not 0
2610 # flags() | 0 | 0 | 0 | not 0
2611 # renamed() | False | True | False | ?
2611 # renamed() | False | True | False | ?
2612 # rawtext[0:2]=='\1\n'| False | True | True | ?
2612 # rawtext[0:2]=='\1\n'| False | True | True | ?
2613 #
2613 #
2614 # "rawtext" means the raw text stored in revlog data, which
2614 # "rawtext" means the raw text stored in revlog data, which
2615 # could be retrieved by "revision(rev, raw=True)". "text"
2615 # could be retrieved by "revision(rev, raw=True)". "text"
2616 # mentioned below is "revision(rev, raw=False)".
2616 # mentioned below is "revision(rev, raw=False)".
2617 #
2617 #
2618 # There are 3 different lengths stored physically:
2618 # There are 3 different lengths stored physically:
2619 # 1. L1: rawsize, stored in revlog index
2619 # 1. L1: rawsize, stored in revlog index
2620 # 2. L2: len(rawtext), stored in revlog data
2620 # 2. L2: len(rawtext), stored in revlog data
2621 # 3. L3: len(text), stored in revlog data if flags==0, or
2621 # 3. L3: len(text), stored in revlog data if flags==0, or
2622 # possibly somewhere else if flags!=0
2622 # possibly somewhere else if flags!=0
2623 #
2623 #
2624 # L1 should be equal to L2. L3 could be different from them.
2624 # L1 should be equal to L2. L3 could be different from them.
2625 # "text" may or may not affect commit hash depending on flag
2625 # "text" may or may not affect commit hash depending on flag
2626 # processors (see revlog.addflagprocessor).
2626 # processors (see revlog.addflagprocessor).
2627 #
2627 #
2628 # | common | rename | meta | ext
2628 # | common | rename | meta | ext
2629 # -------------------------------------------------
2629 # -------------------------------------------------
2630 # rawsize() | L1 | L1 | L1 | L1
2630 # rawsize() | L1 | L1 | L1 | L1
2631 # size() | L1 | L2-LM | L1(*) | L1 (?)
2631 # size() | L1 | L2-LM | L1(*) | L1 (?)
2632 # len(rawtext) | L2 | L2 | L2 | L2
2632 # len(rawtext) | L2 | L2 | L2 | L2
2633 # len(text) | L2 | L2 | L2 | L3
2633 # len(text) | L2 | L2 | L2 | L3
2634 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2634 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2635 #
2635 #
2636 # LM: length of metadata, depending on rawtext
2636 # LM: length of metadata, depending on rawtext
2637 # (*): not ideal, see comment in filelog.size
2637 # (*): not ideal, see comment in filelog.size
2638 # (?): could be "- len(meta)" if the resolved content has
2638 # (?): could be "- len(meta)" if the resolved content has
2639 # rename metadata
2639 # rename metadata
2640 #
2640 #
2641 # Checks needed to be done:
2641 # Checks needed to be done:
2642 # 1. length check: L1 == L2, in all cases.
2642 # 1. length check: L1 == L2, in all cases.
2643 # 2. hash check: depending on flag processor, we may need to
2643 # 2. hash check: depending on flag processor, we may need to
2644 # use either "text" (external), or "rawtext" (in revlog).
2644 # use either "text" (external), or "rawtext" (in revlog).
2645
2645
2646 try:
2646 try:
2647 skipflags = state.get('skipflags', 0)
2647 skipflags = state.get('skipflags', 0)
2648 if skipflags:
2648 if skipflags:
2649 skipflags &= self.flags(rev)
2649 skipflags &= self.flags(rev)
2650
2650
2651 if skipflags:
2651 if skipflags:
2652 state['skipread'].add(node)
2652 state['skipread'].add(node)
2653 else:
2653 else:
2654 # Side-effect: read content and verify hash.
2654 # Side-effect: read content and verify hash.
2655 self.revision(node)
2655 self.revision(node)
2656
2656
2657 l1 = self.rawsize(rev)
2657 l1 = self.rawsize(rev)
2658 l2 = len(self.revision(node, raw=True))
2658 l2 = len(self.revision(node, raw=True))
2659
2659
2660 if l1 != l2:
2660 if l1 != l2:
2661 yield revlogproblem(
2661 yield revlogproblem(
2662 error=_('unpacked size is %d, %d expected') % (l2, l1),
2662 error=_('unpacked size is %d, %d expected') % (l2, l1),
2663 node=node)
2663 node=node)
2664
2664
2665 except error.CensoredNodeError:
2665 except error.CensoredNodeError:
2666 if state['erroroncensored']:
2666 if state['erroroncensored']:
2667 yield revlogproblem(error=_('censored file data'),
2667 yield revlogproblem(error=_('censored file data'),
2668 node=node)
2668 node=node)
2669 state['skipread'].add(node)
2669 state['skipread'].add(node)
2670 except Exception as e:
2670 except Exception as e:
2671 yield revlogproblem(
2671 yield revlogproblem(
2672 error=_('unpacking %s: %s') % (short(node), e),
2672 error=_('unpacking %s: %s') % (short(node),
2673 stringutil.forcebytestr(e)),
2673 node=node)
2674 node=node)
2674 state['skipread'].add(node)
2675 state['skipread'].add(node)
2675
2676
2676 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2677 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2677 revisionscount=False, trackedsize=False,
2678 revisionscount=False, trackedsize=False,
2678 storedsize=False):
2679 storedsize=False):
2679 d = {}
2680 d = {}
2680
2681
2681 if exclusivefiles:
2682 if exclusivefiles:
2682 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2683 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2683 if not self._inline:
2684 if not self._inline:
2684 d['exclusivefiles'].append((self.opener, self.datafile))
2685 d['exclusivefiles'].append((self.opener, self.datafile))
2685
2686
2686 if sharedfiles:
2687 if sharedfiles:
2687 d['sharedfiles'] = []
2688 d['sharedfiles'] = []
2688
2689
2689 if revisionscount:
2690 if revisionscount:
2690 d['revisionscount'] = len(self)
2691 d['revisionscount'] = len(self)
2691
2692
2692 if trackedsize:
2693 if trackedsize:
2693 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2694 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2694
2695
2695 if storedsize:
2696 if storedsize:
2696 d['storedsize'] = sum(self.opener.stat(path).st_size
2697 d['storedsize'] = sum(self.opener.stat(path).st_size
2697 for path in self.files())
2698 for path in self.files())
2698
2699
2699 return d
2700 return d
General Comments 0
You need to be logged in to leave comments. Login now