##// END OF EJS Templates
revlog: optimize ancestors() to not check filtered revisions for each...
Yuya Nishihara -
r40188:adbf8ca2 default
parent child Browse files
Show More
@@ -1,2535 +1,2544 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 if not flag & REVIDX_KNOWN_FLAGS:
154 if not flag & REVIDX_KNOWN_FLAGS:
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 raise error.ProgrammingError(msg)
156 raise error.ProgrammingError(msg)
157 if flag not in REVIDX_FLAGS_ORDER:
157 if flag not in REVIDX_FLAGS_ORDER:
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag in _flagprocessors:
160 if flag in _flagprocessors:
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 raise error.Abort(msg)
162 raise error.Abort(msg)
163 _flagprocessors[flag] = processor
163 _flagprocessors[flag] = processor
164
164
165 def getoffset(q):
165 def getoffset(q):
166 return int(q >> 16)
166 return int(q >> 16)
167
167
168 def gettype(q):
168 def gettype(q):
169 return int(q & 0xFFFF)
169 return int(q & 0xFFFF)
170
170
171 def offset_type(offset, type):
171 def offset_type(offset, type):
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 raise ValueError('unknown revlog index flags')
173 raise ValueError('unknown revlog index flags')
174 return int(int(offset) << 16 | type)
174 return int(int(offset) << 16 | type)
175
175
176 @attr.s(slots=True, frozen=True)
176 @attr.s(slots=True, frozen=True)
177 class _revisioninfo(object):
177 class _revisioninfo(object):
178 """Information about a revision that allows building its fulltext
178 """Information about a revision that allows building its fulltext
179 node: expected hash of the revision
179 node: expected hash of the revision
180 p1, p2: parent revs of the revision
180 p1, p2: parent revs of the revision
181 btext: built text cache consisting of a one-element list
181 btext: built text cache consisting of a one-element list
182 cachedelta: (baserev, uncompressed_delta) or None
182 cachedelta: (baserev, uncompressed_delta) or None
183 flags: flags associated to the revision storage
183 flags: flags associated to the revision storage
184
184
185 One of btext[0] or cachedelta must be set.
185 One of btext[0] or cachedelta must be set.
186 """
186 """
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195 @interfaceutil.implementer(repository.irevisiondelta)
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
197 class revlogrevisiondelta(object):
198 node = attr.ib()
198 node = attr.ib()
199 p1node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
205 delta = attr.ib()
206 linknode = attr.ib(default=None)
206 linknode = attr.ib(default=None)
207
207
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
209 @attr.s(frozen=True)
209 @attr.s(frozen=True)
210 class revlogproblem(object):
210 class revlogproblem(object):
211 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
212 error = attr.ib(default=None)
212 error = attr.ib(default=None)
213 node = attr.ib(default=None)
213 node = attr.ib(default=None)
214
214
215 # index v0:
215 # index v0:
216 # 4 bytes: offset
216 # 4 bytes: offset
217 # 4 bytes: compressed length
217 # 4 bytes: compressed length
218 # 4 bytes: base rev
218 # 4 bytes: base rev
219 # 4 bytes: link rev
219 # 4 bytes: link rev
220 # 20 bytes: parent 1 nodeid
220 # 20 bytes: parent 1 nodeid
221 # 20 bytes: parent 2 nodeid
221 # 20 bytes: parent 2 nodeid
222 # 20 bytes: nodeid
222 # 20 bytes: nodeid
223 indexformatv0 = struct.Struct(">4l20s20s20s")
223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 indexformatv0_pack = indexformatv0.pack
224 indexformatv0_pack = indexformatv0.pack
225 indexformatv0_unpack = indexformatv0.unpack
225 indexformatv0_unpack = indexformatv0.unpack
226
226
227 class revlogoldindex(list):
227 class revlogoldindex(list):
228 def __getitem__(self, i):
228 def __getitem__(self, i):
229 if i == -1:
229 if i == -1:
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 return list.__getitem__(self, i)
231 return list.__getitem__(self, i)
232
232
233 class revlogoldio(object):
233 class revlogoldio(object):
234 def __init__(self):
234 def __init__(self):
235 self.size = indexformatv0.size
235 self.size = indexformatv0.size
236
236
237 def parseindex(self, data, inline):
237 def parseindex(self, data, inline):
238 s = self.size
238 s = self.size
239 index = []
239 index = []
240 nodemap = {nullid: nullrev}
240 nodemap = {nullid: nullrev}
241 n = off = 0
241 n = off = 0
242 l = len(data)
242 l = len(data)
243 while off + s <= l:
243 while off + s <= l:
244 cur = data[off:off + s]
244 cur = data[off:off + s]
245 off += s
245 off += s
246 e = indexformatv0_unpack(cur)
246 e = indexformatv0_unpack(cur)
247 # transform to revlogv1 format
247 # transform to revlogv1 format
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 index.append(e2)
250 index.append(e2)
251 nodemap[e[6]] = n
251 nodemap[e[6]] = n
252 n += 1
252 n += 1
253
253
254 return revlogoldindex(index), nodemap, None
254 return revlogoldindex(index), nodemap, None
255
255
256 def packentry(self, entry, node, version, rev):
256 def packentry(self, entry, node, version, rev):
257 if gettype(entry[0]):
257 if gettype(entry[0]):
258 raise error.RevlogError(_('index entry flags need revlog '
258 raise error.RevlogError(_('index entry flags need revlog '
259 'version 1'))
259 'version 1'))
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 node(entry[5]), node(entry[6]), entry[7])
261 node(entry[5]), node(entry[6]), entry[7])
262 return indexformatv0_pack(*e2)
262 return indexformatv0_pack(*e2)
263
263
264 # index ng:
264 # index ng:
265 # 6 bytes: offset
265 # 6 bytes: offset
266 # 2 bytes: flags
266 # 2 bytes: flags
267 # 4 bytes: compressed length
267 # 4 bytes: compressed length
268 # 4 bytes: uncompressed length
268 # 4 bytes: uncompressed length
269 # 4 bytes: base rev
269 # 4 bytes: base rev
270 # 4 bytes: link rev
270 # 4 bytes: link rev
271 # 4 bytes: parent 1 rev
271 # 4 bytes: parent 1 rev
272 # 4 bytes: parent 2 rev
272 # 4 bytes: parent 2 rev
273 # 32 bytes: nodeid
273 # 32 bytes: nodeid
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 indexformatng_pack = indexformatng.pack
275 indexformatng_pack = indexformatng.pack
276 versionformat = struct.Struct(">I")
276 versionformat = struct.Struct(">I")
277 versionformat_pack = versionformat.pack
277 versionformat_pack = versionformat.pack
278 versionformat_unpack = versionformat.unpack
278 versionformat_unpack = versionformat.unpack
279
279
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 # signed integer)
281 # signed integer)
282 _maxentrysize = 0x7fffffff
282 _maxentrysize = 0x7fffffff
283
283
284 class revlogio(object):
284 class revlogio(object):
285 def __init__(self):
285 def __init__(self):
286 self.size = indexformatng.size
286 self.size = indexformatng.size
287
287
288 def parseindex(self, data, inline):
288 def parseindex(self, data, inline):
289 # call the C implementation to parse the index data
289 # call the C implementation to parse the index data
290 index, cache = parsers.parse_index2(data, inline)
290 index, cache = parsers.parse_index2(data, inline)
291 return index, getattr(index, 'nodemap', None), cache
291 return index, getattr(index, 'nodemap', None), cache
292
292
293 def packentry(self, entry, node, version, rev):
293 def packentry(self, entry, node, version, rev):
294 p = indexformatng_pack(*entry)
294 p = indexformatng_pack(*entry)
295 if rev == 0:
295 if rev == 0:
296 p = versionformat_pack(version) + p[4:]
296 p = versionformat_pack(version) + p[4:]
297 return p
297 return p
298
298
299 class revlog(object):
299 class revlog(object):
300 """
300 """
301 the underlying revision storage object
301 the underlying revision storage object
302
302
303 A revlog consists of two parts, an index and the revision data.
303 A revlog consists of two parts, an index and the revision data.
304
304
305 The index is a file with a fixed record size containing
305 The index is a file with a fixed record size containing
306 information on each revision, including its nodeid (hash), the
306 information on each revision, including its nodeid (hash), the
307 nodeids of its parents, the position and offset of its data within
307 nodeids of its parents, the position and offset of its data within
308 the data file, and the revision it's based on. Finally, each entry
308 the data file, and the revision it's based on. Finally, each entry
309 contains a linkrev entry that can serve as a pointer to external
309 contains a linkrev entry that can serve as a pointer to external
310 data.
310 data.
311
311
312 The revision data itself is a linear collection of data chunks.
312 The revision data itself is a linear collection of data chunks.
313 Each chunk represents a revision and is usually represented as a
313 Each chunk represents a revision and is usually represented as a
314 delta against the previous chunk. To bound lookup time, runs of
314 delta against the previous chunk. To bound lookup time, runs of
315 deltas are limited to about 2 times the length of the original
315 deltas are limited to about 2 times the length of the original
316 version data. This makes retrieval of a version proportional to
316 version data. This makes retrieval of a version proportional to
317 its size, or O(1) relative to the number of revisions.
317 its size, or O(1) relative to the number of revisions.
318
318
319 Both pieces of the revlog are written to in an append-only
319 Both pieces of the revlog are written to in an append-only
320 fashion, which means we never need to rewrite a file to insert or
320 fashion, which means we never need to rewrite a file to insert or
321 remove data, and can use some simple techniques to avoid the need
321 remove data, and can use some simple techniques to avoid the need
322 for locking while reading.
322 for locking while reading.
323
323
324 If checkambig, indexfile is opened with checkambig=True at
324 If checkambig, indexfile is opened with checkambig=True at
325 writing, to avoid file stat ambiguity.
325 writing, to avoid file stat ambiguity.
326
326
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 index will be mmapped rather than read if it is larger than the
328 index will be mmapped rather than read if it is larger than the
329 configured threshold.
329 configured threshold.
330
330
331 If censorable is True, the revlog can have censored revisions.
331 If censorable is True, the revlog can have censored revisions.
332 """
332 """
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 mmaplargeindex=False, censorable=False):
334 mmaplargeindex=False, censorable=False):
335 """
335 """
336 create a revlog object
336 create a revlog object
337
337
338 opener is a function that abstracts the file opening operation
338 opener is a function that abstracts the file opening operation
339 and can be used to implement COW semantics or the like.
339 and can be used to implement COW semantics or the like.
340 """
340 """
341 self.indexfile = indexfile
341 self.indexfile = indexfile
342 self.datafile = datafile or (indexfile[:-2] + ".d")
342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 self.opener = opener
343 self.opener = opener
344 # When True, indexfile is opened with checkambig=True at writing, to
344 # When True, indexfile is opened with checkambig=True at writing, to
345 # avoid file stat ambiguity.
345 # avoid file stat ambiguity.
346 self._checkambig = checkambig
346 self._checkambig = checkambig
347 self._censorable = censorable
347 self._censorable = censorable
348 # 3-tuple of (node, rev, text) for a raw revision.
348 # 3-tuple of (node, rev, text) for a raw revision.
349 self._revisioncache = None
349 self._revisioncache = None
350 # Maps rev to chain base rev.
350 # Maps rev to chain base rev.
351 self._chainbasecache = util.lrucachedict(100)
351 self._chainbasecache = util.lrucachedict(100)
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 self._chunkcache = (0, '')
353 self._chunkcache = (0, '')
354 # How much data to read and cache into the raw revlog data cache.
354 # How much data to read and cache into the raw revlog data cache.
355 self._chunkcachesize = 65536
355 self._chunkcachesize = 65536
356 self._maxchainlen = None
356 self._maxchainlen = None
357 self._deltabothparents = True
357 self._deltabothparents = True
358 self.index = []
358 self.index = []
359 # Mapping of partial identifiers to full nodes.
359 # Mapping of partial identifiers to full nodes.
360 self._pcache = {}
360 self._pcache = {}
361 # Mapping of revision integer to full node.
361 # Mapping of revision integer to full node.
362 self._nodecache = {nullid: nullrev}
362 self._nodecache = {nullid: nullrev}
363 self._nodepos = None
363 self._nodepos = None
364 self._compengine = 'zlib'
364 self._compengine = 'zlib'
365 self._maxdeltachainspan = -1
365 self._maxdeltachainspan = -1
366 self._withsparseread = False
366 self._withsparseread = False
367 self._sparserevlog = False
367 self._sparserevlog = False
368 self._srdensitythreshold = 0.50
368 self._srdensitythreshold = 0.50
369 self._srmingapsize = 262144
369 self._srmingapsize = 262144
370
370
371 # Make copy of flag processors so each revlog instance can support
371 # Make copy of flag processors so each revlog instance can support
372 # custom flags.
372 # custom flags.
373 self._flagprocessors = dict(_flagprocessors)
373 self._flagprocessors = dict(_flagprocessors)
374
374
375 mmapindexthreshold = None
375 mmapindexthreshold = None
376 v = REVLOG_DEFAULT_VERSION
376 v = REVLOG_DEFAULT_VERSION
377 opts = getattr(opener, 'options', None)
377 opts = getattr(opener, 'options', None)
378 if opts is not None:
378 if opts is not None:
379 if 'revlogv2' in opts:
379 if 'revlogv2' in opts:
380 # version 2 revlogs always use generaldelta.
380 # version 2 revlogs always use generaldelta.
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 elif 'revlogv1' in opts:
382 elif 'revlogv1' in opts:
383 if 'generaldelta' in opts:
383 if 'generaldelta' in opts:
384 v |= FLAG_GENERALDELTA
384 v |= FLAG_GENERALDELTA
385 else:
385 else:
386 v = 0
386 v = 0
387 if 'chunkcachesize' in opts:
387 if 'chunkcachesize' in opts:
388 self._chunkcachesize = opts['chunkcachesize']
388 self._chunkcachesize = opts['chunkcachesize']
389 if 'maxchainlen' in opts:
389 if 'maxchainlen' in opts:
390 self._maxchainlen = opts['maxchainlen']
390 self._maxchainlen = opts['maxchainlen']
391 if 'deltabothparents' in opts:
391 if 'deltabothparents' in opts:
392 self._deltabothparents = opts['deltabothparents']
392 self._deltabothparents = opts['deltabothparents']
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 if 'compengine' in opts:
394 if 'compengine' in opts:
395 self._compengine = opts['compengine']
395 self._compengine = opts['compengine']
396 if 'maxdeltachainspan' in opts:
396 if 'maxdeltachainspan' in opts:
397 self._maxdeltachainspan = opts['maxdeltachainspan']
397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 mmapindexthreshold = opts['mmapindexthreshold']
399 mmapindexthreshold = opts['mmapindexthreshold']
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
402 # sparse-revlog forces sparse-read
402 # sparse-revlog forces sparse-read
403 self._withsparseread = self._sparserevlog or withsparseread
403 self._withsparseread = self._sparserevlog or withsparseread
404 if 'sparse-read-density-threshold' in opts:
404 if 'sparse-read-density-threshold' in opts:
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 if 'sparse-read-min-gap-size' in opts:
406 if 'sparse-read-min-gap-size' in opts:
407 self._srmingapsize = opts['sparse-read-min-gap-size']
407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 if opts.get('enableellipsis'):
408 if opts.get('enableellipsis'):
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410
410
411 if self._chunkcachesize <= 0:
411 if self._chunkcachesize <= 0:
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 'greater than 0') % self._chunkcachesize)
413 'greater than 0') % self._chunkcachesize)
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 'power of 2') % self._chunkcachesize)
416 'power of 2') % self._chunkcachesize)
417
417
418 self._loadindex(v, mmapindexthreshold)
418 self._loadindex(v, mmapindexthreshold)
419
419
420 def _loadindex(self, v, mmapindexthreshold):
420 def _loadindex(self, v, mmapindexthreshold):
421 indexdata = ''
421 indexdata = ''
422 self._initempty = True
422 self._initempty = True
423 try:
423 try:
424 with self._indexfp() as f:
424 with self._indexfp() as f:
425 if (mmapindexthreshold is not None and
425 if (mmapindexthreshold is not None and
426 self.opener.fstat(f).st_size >= mmapindexthreshold):
426 self.opener.fstat(f).st_size >= mmapindexthreshold):
427 indexdata = util.buffer(util.mmapread(f))
427 indexdata = util.buffer(util.mmapread(f))
428 else:
428 else:
429 indexdata = f.read()
429 indexdata = f.read()
430 if len(indexdata) > 0:
430 if len(indexdata) > 0:
431 v = versionformat_unpack(indexdata[:4])[0]
431 v = versionformat_unpack(indexdata[:4])[0]
432 self._initempty = False
432 self._initempty = False
433 except IOError as inst:
433 except IOError as inst:
434 if inst.errno != errno.ENOENT:
434 if inst.errno != errno.ENOENT:
435 raise
435 raise
436
436
437 self.version = v
437 self.version = v
438 self._inline = v & FLAG_INLINE_DATA
438 self._inline = v & FLAG_INLINE_DATA
439 self._generaldelta = v & FLAG_GENERALDELTA
439 self._generaldelta = v & FLAG_GENERALDELTA
440 flags = v & ~0xFFFF
440 flags = v & ~0xFFFF
441 fmt = v & 0xFFFF
441 fmt = v & 0xFFFF
442 if fmt == REVLOGV0:
442 if fmt == REVLOGV0:
443 if flags:
443 if flags:
444 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
444 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
445 'revlog %s') %
445 'revlog %s') %
446 (flags >> 16, fmt, self.indexfile))
446 (flags >> 16, fmt, self.indexfile))
447 elif fmt == REVLOGV1:
447 elif fmt == REVLOGV1:
448 if flags & ~REVLOGV1_FLAGS:
448 if flags & ~REVLOGV1_FLAGS:
449 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
449 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
450 'revlog %s') %
450 'revlog %s') %
451 (flags >> 16, fmt, self.indexfile))
451 (flags >> 16, fmt, self.indexfile))
452 elif fmt == REVLOGV2:
452 elif fmt == REVLOGV2:
453 if flags & ~REVLOGV2_FLAGS:
453 if flags & ~REVLOGV2_FLAGS:
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 'revlog %s') %
455 'revlog %s') %
456 (flags >> 16, fmt, self.indexfile))
456 (flags >> 16, fmt, self.indexfile))
457 else:
457 else:
458 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
458 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
459 (fmt, self.indexfile))
459 (fmt, self.indexfile))
460
460
461 self._storedeltachains = True
461 self._storedeltachains = True
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 try:
466 try:
467 d = self._io.parseindex(indexdata, self._inline)
467 d = self._io.parseindex(indexdata, self._inline)
468 except (ValueError, IndexError):
468 except (ValueError, IndexError):
469 raise error.RevlogError(_("index %s is corrupted") %
469 raise error.RevlogError(_("index %s is corrupted") %
470 self.indexfile)
470 self.indexfile)
471 self.index, nodemap, self._chunkcache = d
471 self.index, nodemap, self._chunkcache = d
472 if nodemap is not None:
472 if nodemap is not None:
473 self.nodemap = self._nodecache = nodemap
473 self.nodemap = self._nodecache = nodemap
474 if not self._chunkcache:
474 if not self._chunkcache:
475 self._chunkclear()
475 self._chunkclear()
476 # revnum -> (chain-length, sum-delta-length)
476 # revnum -> (chain-length, sum-delta-length)
477 self._chaininfocache = {}
477 self._chaininfocache = {}
478 # revlog header -> revlog compressor
478 # revlog header -> revlog compressor
479 self._decompressors = {}
479 self._decompressors = {}
480
480
481 @util.propertycache
481 @util.propertycache
482 def _compressor(self):
482 def _compressor(self):
483 return util.compengines[self._compengine].revlogcompressor()
483 return util.compengines[self._compengine].revlogcompressor()
484
484
485 def _indexfp(self, mode='r'):
485 def _indexfp(self, mode='r'):
486 """file object for the revlog's index file"""
486 """file object for the revlog's index file"""
487 args = {r'mode': mode}
487 args = {r'mode': mode}
488 if mode != 'r':
488 if mode != 'r':
489 args[r'checkambig'] = self._checkambig
489 args[r'checkambig'] = self._checkambig
490 if mode == 'w':
490 if mode == 'w':
491 args[r'atomictemp'] = True
491 args[r'atomictemp'] = True
492 return self.opener(self.indexfile, **args)
492 return self.opener(self.indexfile, **args)
493
493
494 def _datafp(self, mode='r'):
494 def _datafp(self, mode='r'):
495 """file object for the revlog's data file"""
495 """file object for the revlog's data file"""
496 return self.opener(self.datafile, mode=mode)
496 return self.opener(self.datafile, mode=mode)
497
497
498 @contextlib.contextmanager
498 @contextlib.contextmanager
499 def _datareadfp(self, existingfp=None):
499 def _datareadfp(self, existingfp=None):
500 """file object suitable to read data"""
500 """file object suitable to read data"""
501 if existingfp is not None:
501 if existingfp is not None:
502 yield existingfp
502 yield existingfp
503 else:
503 else:
504 if self._inline:
504 if self._inline:
505 func = self._indexfp
505 func = self._indexfp
506 else:
506 else:
507 func = self._datafp
507 func = self._datafp
508 with func() as fp:
508 with func() as fp:
509 yield fp
509 yield fp
510
510
511 def tip(self):
511 def tip(self):
512 return self.node(len(self.index) - 1)
512 return self.node(len(self.index) - 1)
513 def __contains__(self, rev):
513 def __contains__(self, rev):
514 return 0 <= rev < len(self)
514 return 0 <= rev < len(self)
515 def __len__(self):
515 def __len__(self):
516 return len(self.index)
516 return len(self.index)
517 def __iter__(self):
517 def __iter__(self):
518 return iter(pycompat.xrange(len(self)))
518 return iter(pycompat.xrange(len(self)))
519 def revs(self, start=0, stop=None):
519 def revs(self, start=0, stop=None):
520 """iterate over all rev in this revlog (from start to stop)"""
520 """iterate over all rev in this revlog (from start to stop)"""
521 return storageutil.iterrevs(len(self), start=start, stop=stop)
521 return storageutil.iterrevs(len(self), start=start, stop=stop)
522
522
523 @util.propertycache
523 @util.propertycache
524 def nodemap(self):
524 def nodemap(self):
525 if self.index:
525 if self.index:
526 # populate mapping down to the initial node
526 # populate mapping down to the initial node
527 node0 = self.index[0][7] # get around changelog filtering
527 node0 = self.index[0][7] # get around changelog filtering
528 self.rev(node0)
528 self.rev(node0)
529 return self._nodecache
529 return self._nodecache
530
530
531 def hasnode(self, node):
531 def hasnode(self, node):
532 try:
532 try:
533 self.rev(node)
533 self.rev(node)
534 return True
534 return True
535 except KeyError:
535 except KeyError:
536 return False
536 return False
537
537
538 def candelta(self, baserev, rev):
538 def candelta(self, baserev, rev):
539 """whether two revisions (baserev, rev) can be delta-ed or not"""
539 """whether two revisions (baserev, rev) can be delta-ed or not"""
540 # Disable delta if either rev requires a content-changing flag
540 # Disable delta if either rev requires a content-changing flag
541 # processor (ex. LFS). This is because such flag processor can alter
541 # processor (ex. LFS). This is because such flag processor can alter
542 # the rawtext content that the delta will be based on, and two clients
542 # the rawtext content that the delta will be based on, and two clients
543 # could have a same revlog node with different flags (i.e. different
543 # could have a same revlog node with different flags (i.e. different
544 # rawtext contents) and the delta could be incompatible.
544 # rawtext contents) and the delta could be incompatible.
545 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
545 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
546 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
546 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
547 return False
547 return False
548 return True
548 return True
549
549
550 def clearcaches(self):
550 def clearcaches(self):
551 self._revisioncache = None
551 self._revisioncache = None
552 self._chainbasecache.clear()
552 self._chainbasecache.clear()
553 self._chunkcache = (0, '')
553 self._chunkcache = (0, '')
554 self._pcache = {}
554 self._pcache = {}
555
555
556 try:
556 try:
557 self._nodecache.clearcaches()
557 self._nodecache.clearcaches()
558 except AttributeError:
558 except AttributeError:
559 self._nodecache = {nullid: nullrev}
559 self._nodecache = {nullid: nullrev}
560 self._nodepos = None
560 self._nodepos = None
561
561
562 def rev(self, node):
562 def rev(self, node):
563 try:
563 try:
564 return self._nodecache[node]
564 return self._nodecache[node]
565 except TypeError:
565 except TypeError:
566 raise
566 raise
567 except error.RevlogError:
567 except error.RevlogError:
568 # parsers.c radix tree lookup failed
568 # parsers.c radix tree lookup failed
569 if node == wdirid or node in wdirfilenodeids:
569 if node == wdirid or node in wdirfilenodeids:
570 raise error.WdirUnsupported
570 raise error.WdirUnsupported
571 raise error.LookupError(node, self.indexfile, _('no node'))
571 raise error.LookupError(node, self.indexfile, _('no node'))
572 except KeyError:
572 except KeyError:
573 # pure python cache lookup failed
573 # pure python cache lookup failed
574 n = self._nodecache
574 n = self._nodecache
575 i = self.index
575 i = self.index
576 p = self._nodepos
576 p = self._nodepos
577 if p is None:
577 if p is None:
578 p = len(i) - 1
578 p = len(i) - 1
579 else:
579 else:
580 assert p < len(i)
580 assert p < len(i)
581 for r in pycompat.xrange(p, -1, -1):
581 for r in pycompat.xrange(p, -1, -1):
582 v = i[r][7]
582 v = i[r][7]
583 n[v] = r
583 n[v] = r
584 if v == node:
584 if v == node:
585 self._nodepos = r - 1
585 self._nodepos = r - 1
586 return r
586 return r
587 if node == wdirid or node in wdirfilenodeids:
587 if node == wdirid or node in wdirfilenodeids:
588 raise error.WdirUnsupported
588 raise error.WdirUnsupported
589 raise error.LookupError(node, self.indexfile, _('no node'))
589 raise error.LookupError(node, self.indexfile, _('no node'))
590
590
591 # Accessors for index entries.
591 # Accessors for index entries.
592
592
593 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
593 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
594 # are flags.
594 # are flags.
595 def start(self, rev):
595 def start(self, rev):
596 return int(self.index[rev][0] >> 16)
596 return int(self.index[rev][0] >> 16)
597
597
598 def flags(self, rev):
598 def flags(self, rev):
599 return self.index[rev][0] & 0xFFFF
599 return self.index[rev][0] & 0xFFFF
600
600
601 def length(self, rev):
601 def length(self, rev):
602 return self.index[rev][1]
602 return self.index[rev][1]
603
603
604 def rawsize(self, rev):
604 def rawsize(self, rev):
605 """return the length of the uncompressed text for a given revision"""
605 """return the length of the uncompressed text for a given revision"""
606 l = self.index[rev][2]
606 l = self.index[rev][2]
607 if l >= 0:
607 if l >= 0:
608 return l
608 return l
609
609
610 t = self.revision(rev, raw=True)
610 t = self.revision(rev, raw=True)
611 return len(t)
611 return len(t)
612
612
613 def size(self, rev):
613 def size(self, rev):
614 """length of non-raw text (processed by a "read" flag processor)"""
614 """length of non-raw text (processed by a "read" flag processor)"""
615 # fast path: if no "read" flag processor could change the content,
615 # fast path: if no "read" flag processor could change the content,
616 # size is rawsize. note: ELLIPSIS is known to not change the content.
616 # size is rawsize. note: ELLIPSIS is known to not change the content.
617 flags = self.flags(rev)
617 flags = self.flags(rev)
618 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
618 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
619 return self.rawsize(rev)
619 return self.rawsize(rev)
620
620
621 return len(self.revision(rev, raw=False))
621 return len(self.revision(rev, raw=False))
622
622
623 def chainbase(self, rev):
623 def chainbase(self, rev):
624 base = self._chainbasecache.get(rev)
624 base = self._chainbasecache.get(rev)
625 if base is not None:
625 if base is not None:
626 return base
626 return base
627
627
628 index = self.index
628 index = self.index
629 iterrev = rev
629 iterrev = rev
630 base = index[iterrev][3]
630 base = index[iterrev][3]
631 while base != iterrev:
631 while base != iterrev:
632 iterrev = base
632 iterrev = base
633 base = index[iterrev][3]
633 base = index[iterrev][3]
634
634
635 self._chainbasecache[rev] = base
635 self._chainbasecache[rev] = base
636 return base
636 return base
637
637
638 def linkrev(self, rev):
638 def linkrev(self, rev):
639 return self.index[rev][4]
639 return self.index[rev][4]
640
640
641 def parentrevs(self, rev):
641 def parentrevs(self, rev):
642 try:
642 try:
643 entry = self.index[rev]
643 entry = self.index[rev]
644 except IndexError:
644 except IndexError:
645 if rev == wdirrev:
645 if rev == wdirrev:
646 raise error.WdirUnsupported
646 raise error.WdirUnsupported
647 raise
647 raise
648
648
649 return entry[5], entry[6]
649 return entry[5], entry[6]
650
650
651 # fast parentrevs(rev) where rev isn't filtered
652 _uncheckedparentrevs = parentrevs
653
651 def node(self, rev):
654 def node(self, rev):
652 try:
655 try:
653 return self.index[rev][7]
656 return self.index[rev][7]
654 except IndexError:
657 except IndexError:
655 if rev == wdirrev:
658 if rev == wdirrev:
656 raise error.WdirUnsupported
659 raise error.WdirUnsupported
657 raise
660 raise
658
661
659 # Derived from index values.
662 # Derived from index values.
660
663
661 def end(self, rev):
664 def end(self, rev):
662 return self.start(rev) + self.length(rev)
665 return self.start(rev) + self.length(rev)
663
666
664 def parents(self, node):
667 def parents(self, node):
665 i = self.index
668 i = self.index
666 d = i[self.rev(node)]
669 d = i[self.rev(node)]
667 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
670 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
668
671
669 def chainlen(self, rev):
672 def chainlen(self, rev):
670 return self._chaininfo(rev)[0]
673 return self._chaininfo(rev)[0]
671
674
672 def _chaininfo(self, rev):
675 def _chaininfo(self, rev):
673 chaininfocache = self._chaininfocache
676 chaininfocache = self._chaininfocache
674 if rev in chaininfocache:
677 if rev in chaininfocache:
675 return chaininfocache[rev]
678 return chaininfocache[rev]
676 index = self.index
679 index = self.index
677 generaldelta = self._generaldelta
680 generaldelta = self._generaldelta
678 iterrev = rev
681 iterrev = rev
679 e = index[iterrev]
682 e = index[iterrev]
680 clen = 0
683 clen = 0
681 compresseddeltalen = 0
684 compresseddeltalen = 0
682 while iterrev != e[3]:
685 while iterrev != e[3]:
683 clen += 1
686 clen += 1
684 compresseddeltalen += e[1]
687 compresseddeltalen += e[1]
685 if generaldelta:
688 if generaldelta:
686 iterrev = e[3]
689 iterrev = e[3]
687 else:
690 else:
688 iterrev -= 1
691 iterrev -= 1
689 if iterrev in chaininfocache:
692 if iterrev in chaininfocache:
690 t = chaininfocache[iterrev]
693 t = chaininfocache[iterrev]
691 clen += t[0]
694 clen += t[0]
692 compresseddeltalen += t[1]
695 compresseddeltalen += t[1]
693 break
696 break
694 e = index[iterrev]
697 e = index[iterrev]
695 else:
698 else:
696 # Add text length of base since decompressing that also takes
699 # Add text length of base since decompressing that also takes
697 # work. For cache hits the length is already included.
700 # work. For cache hits the length is already included.
698 compresseddeltalen += e[1]
701 compresseddeltalen += e[1]
699 r = (clen, compresseddeltalen)
702 r = (clen, compresseddeltalen)
700 chaininfocache[rev] = r
703 chaininfocache[rev] = r
701 return r
704 return r
702
705
703 def _deltachain(self, rev, stoprev=None):
706 def _deltachain(self, rev, stoprev=None):
704 """Obtain the delta chain for a revision.
707 """Obtain the delta chain for a revision.
705
708
706 ``stoprev`` specifies a revision to stop at. If not specified, we
709 ``stoprev`` specifies a revision to stop at. If not specified, we
707 stop at the base of the chain.
710 stop at the base of the chain.
708
711
709 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
712 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
710 revs in ascending order and ``stopped`` is a bool indicating whether
713 revs in ascending order and ``stopped`` is a bool indicating whether
711 ``stoprev`` was hit.
714 ``stoprev`` was hit.
712 """
715 """
713 # Try C implementation.
716 # Try C implementation.
714 try:
717 try:
715 return self.index.deltachain(rev, stoprev, self._generaldelta)
718 return self.index.deltachain(rev, stoprev, self._generaldelta)
716 except AttributeError:
719 except AttributeError:
717 pass
720 pass
718
721
719 chain = []
722 chain = []
720
723
721 # Alias to prevent attribute lookup in tight loop.
724 # Alias to prevent attribute lookup in tight loop.
722 index = self.index
725 index = self.index
723 generaldelta = self._generaldelta
726 generaldelta = self._generaldelta
724
727
725 iterrev = rev
728 iterrev = rev
726 e = index[iterrev]
729 e = index[iterrev]
727 while iterrev != e[3] and iterrev != stoprev:
730 while iterrev != e[3] and iterrev != stoprev:
728 chain.append(iterrev)
731 chain.append(iterrev)
729 if generaldelta:
732 if generaldelta:
730 iterrev = e[3]
733 iterrev = e[3]
731 else:
734 else:
732 iterrev -= 1
735 iterrev -= 1
733 e = index[iterrev]
736 e = index[iterrev]
734
737
735 if iterrev == stoprev:
738 if iterrev == stoprev:
736 stopped = True
739 stopped = True
737 else:
740 else:
738 chain.append(iterrev)
741 chain.append(iterrev)
739 stopped = False
742 stopped = False
740
743
741 chain.reverse()
744 chain.reverse()
742 return chain, stopped
745 return chain, stopped
743
746
744 def ancestors(self, revs, stoprev=0, inclusive=False):
747 def ancestors(self, revs, stoprev=0, inclusive=False):
745 """Generate the ancestors of 'revs' in reverse topological order.
748 """Generate the ancestors of 'revs' in reverse topological order.
746 Does not generate revs lower than stoprev.
749 Does not generate revs lower than stoprev.
747
750
748 See the documentation for ancestor.lazyancestors for more details."""
751 See the documentation for ancestor.lazyancestors for more details."""
749
752
750 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
753 # first, make sure start revisions aren't filtered
751 inclusive=inclusive)
754 revs = list(revs)
755 checkrev = self.node
756 for r in revs:
757 checkrev(r)
758 # and we're sure ancestors aren't filtered as well
759 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
760 stoprev=stoprev, inclusive=inclusive)
752
761
753 def descendants(self, revs):
762 def descendants(self, revs):
754 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
763 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
755
764
756 def findcommonmissing(self, common=None, heads=None):
765 def findcommonmissing(self, common=None, heads=None):
757 """Return a tuple of the ancestors of common and the ancestors of heads
766 """Return a tuple of the ancestors of common and the ancestors of heads
758 that are not ancestors of common. In revset terminology, we return the
767 that are not ancestors of common. In revset terminology, we return the
759 tuple:
768 tuple:
760
769
761 ::common, (::heads) - (::common)
770 ::common, (::heads) - (::common)
762
771
763 The list is sorted by revision number, meaning it is
772 The list is sorted by revision number, meaning it is
764 topologically sorted.
773 topologically sorted.
765
774
766 'heads' and 'common' are both lists of node IDs. If heads is
775 'heads' and 'common' are both lists of node IDs. If heads is
767 not supplied, uses all of the revlog's heads. If common is not
776 not supplied, uses all of the revlog's heads. If common is not
768 supplied, uses nullid."""
777 supplied, uses nullid."""
769 if common is None:
778 if common is None:
770 common = [nullid]
779 common = [nullid]
771 if heads is None:
780 if heads is None:
772 heads = self.heads()
781 heads = self.heads()
773
782
774 common = [self.rev(n) for n in common]
783 common = [self.rev(n) for n in common]
775 heads = [self.rev(n) for n in heads]
784 heads = [self.rev(n) for n in heads]
776
785
777 # we want the ancestors, but inclusive
786 # we want the ancestors, but inclusive
778 class lazyset(object):
787 class lazyset(object):
779 def __init__(self, lazyvalues):
788 def __init__(self, lazyvalues):
780 self.addedvalues = set()
789 self.addedvalues = set()
781 self.lazyvalues = lazyvalues
790 self.lazyvalues = lazyvalues
782
791
783 def __contains__(self, value):
792 def __contains__(self, value):
784 return value in self.addedvalues or value in self.lazyvalues
793 return value in self.addedvalues or value in self.lazyvalues
785
794
786 def __iter__(self):
795 def __iter__(self):
787 added = self.addedvalues
796 added = self.addedvalues
788 for r in added:
797 for r in added:
789 yield r
798 yield r
790 for r in self.lazyvalues:
799 for r in self.lazyvalues:
791 if not r in added:
800 if not r in added:
792 yield r
801 yield r
793
802
794 def add(self, value):
803 def add(self, value):
795 self.addedvalues.add(value)
804 self.addedvalues.add(value)
796
805
797 def update(self, values):
806 def update(self, values):
798 self.addedvalues.update(values)
807 self.addedvalues.update(values)
799
808
800 has = lazyset(self.ancestors(common))
809 has = lazyset(self.ancestors(common))
801 has.add(nullrev)
810 has.add(nullrev)
802 has.update(common)
811 has.update(common)
803
812
804 # take all ancestors from heads that aren't in has
813 # take all ancestors from heads that aren't in has
805 missing = set()
814 missing = set()
806 visit = collections.deque(r for r in heads if r not in has)
815 visit = collections.deque(r for r in heads if r not in has)
807 while visit:
816 while visit:
808 r = visit.popleft()
817 r = visit.popleft()
809 if r in missing:
818 if r in missing:
810 continue
819 continue
811 else:
820 else:
812 missing.add(r)
821 missing.add(r)
813 for p in self.parentrevs(r):
822 for p in self.parentrevs(r):
814 if p not in has:
823 if p not in has:
815 visit.append(p)
824 visit.append(p)
816 missing = list(missing)
825 missing = list(missing)
817 missing.sort()
826 missing.sort()
818 return has, [self.node(miss) for miss in missing]
827 return has, [self.node(miss) for miss in missing]
819
828
820 def incrementalmissingrevs(self, common=None):
829 def incrementalmissingrevs(self, common=None):
821 """Return an object that can be used to incrementally compute the
830 """Return an object that can be used to incrementally compute the
822 revision numbers of the ancestors of arbitrary sets that are not
831 revision numbers of the ancestors of arbitrary sets that are not
823 ancestors of common. This is an ancestor.incrementalmissingancestors
832 ancestors of common. This is an ancestor.incrementalmissingancestors
824 object.
833 object.
825
834
826 'common' is a list of revision numbers. If common is not supplied, uses
835 'common' is a list of revision numbers. If common is not supplied, uses
827 nullrev.
836 nullrev.
828 """
837 """
829 if common is None:
838 if common is None:
830 common = [nullrev]
839 common = [nullrev]
831
840
832 return ancestor.incrementalmissingancestors(self.parentrevs, common)
841 return ancestor.incrementalmissingancestors(self.parentrevs, common)
833
842
834 def findmissingrevs(self, common=None, heads=None):
843 def findmissingrevs(self, common=None, heads=None):
835 """Return the revision numbers of the ancestors of heads that
844 """Return the revision numbers of the ancestors of heads that
836 are not ancestors of common.
845 are not ancestors of common.
837
846
838 More specifically, return a list of revision numbers corresponding to
847 More specifically, return a list of revision numbers corresponding to
839 nodes N such that every N satisfies the following constraints:
848 nodes N such that every N satisfies the following constraints:
840
849
841 1. N is an ancestor of some node in 'heads'
850 1. N is an ancestor of some node in 'heads'
842 2. N is not an ancestor of any node in 'common'
851 2. N is not an ancestor of any node in 'common'
843
852
844 The list is sorted by revision number, meaning it is
853 The list is sorted by revision number, meaning it is
845 topologically sorted.
854 topologically sorted.
846
855
847 'heads' and 'common' are both lists of revision numbers. If heads is
856 'heads' and 'common' are both lists of revision numbers. If heads is
848 not supplied, uses all of the revlog's heads. If common is not
857 not supplied, uses all of the revlog's heads. If common is not
849 supplied, uses nullid."""
858 supplied, uses nullid."""
850 if common is None:
859 if common is None:
851 common = [nullrev]
860 common = [nullrev]
852 if heads is None:
861 if heads is None:
853 heads = self.headrevs()
862 heads = self.headrevs()
854
863
855 inc = self.incrementalmissingrevs(common=common)
864 inc = self.incrementalmissingrevs(common=common)
856 return inc.missingancestors(heads)
865 return inc.missingancestors(heads)
857
866
858 def findmissing(self, common=None, heads=None):
867 def findmissing(self, common=None, heads=None):
859 """Return the ancestors of heads that are not ancestors of common.
868 """Return the ancestors of heads that are not ancestors of common.
860
869
861 More specifically, return a list of nodes N such that every N
870 More specifically, return a list of nodes N such that every N
862 satisfies the following constraints:
871 satisfies the following constraints:
863
872
864 1. N is an ancestor of some node in 'heads'
873 1. N is an ancestor of some node in 'heads'
865 2. N is not an ancestor of any node in 'common'
874 2. N is not an ancestor of any node in 'common'
866
875
867 The list is sorted by revision number, meaning it is
876 The list is sorted by revision number, meaning it is
868 topologically sorted.
877 topologically sorted.
869
878
870 'heads' and 'common' are both lists of node IDs. If heads is
879 'heads' and 'common' are both lists of node IDs. If heads is
871 not supplied, uses all of the revlog's heads. If common is not
880 not supplied, uses all of the revlog's heads. If common is not
872 supplied, uses nullid."""
881 supplied, uses nullid."""
873 if common is None:
882 if common is None:
874 common = [nullid]
883 common = [nullid]
875 if heads is None:
884 if heads is None:
876 heads = self.heads()
885 heads = self.heads()
877
886
878 common = [self.rev(n) for n in common]
887 common = [self.rev(n) for n in common]
879 heads = [self.rev(n) for n in heads]
888 heads = [self.rev(n) for n in heads]
880
889
881 inc = self.incrementalmissingrevs(common=common)
890 inc = self.incrementalmissingrevs(common=common)
882 return [self.node(r) for r in inc.missingancestors(heads)]
891 return [self.node(r) for r in inc.missingancestors(heads)]
883
892
884 def nodesbetween(self, roots=None, heads=None):
893 def nodesbetween(self, roots=None, heads=None):
885 """Return a topological path from 'roots' to 'heads'.
894 """Return a topological path from 'roots' to 'heads'.
886
895
887 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
896 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
888 topologically sorted list of all nodes N that satisfy both of
897 topologically sorted list of all nodes N that satisfy both of
889 these constraints:
898 these constraints:
890
899
891 1. N is a descendant of some node in 'roots'
900 1. N is a descendant of some node in 'roots'
892 2. N is an ancestor of some node in 'heads'
901 2. N is an ancestor of some node in 'heads'
893
902
894 Every node is considered to be both a descendant and an ancestor
903 Every node is considered to be both a descendant and an ancestor
895 of itself, so every reachable node in 'roots' and 'heads' will be
904 of itself, so every reachable node in 'roots' and 'heads' will be
896 included in 'nodes'.
905 included in 'nodes'.
897
906
898 'outroots' is the list of reachable nodes in 'roots', i.e., the
907 'outroots' is the list of reachable nodes in 'roots', i.e., the
899 subset of 'roots' that is returned in 'nodes'. Likewise,
908 subset of 'roots' that is returned in 'nodes'. Likewise,
900 'outheads' is the subset of 'heads' that is also in 'nodes'.
909 'outheads' is the subset of 'heads' that is also in 'nodes'.
901
910
902 'roots' and 'heads' are both lists of node IDs. If 'roots' is
911 'roots' and 'heads' are both lists of node IDs. If 'roots' is
903 unspecified, uses nullid as the only root. If 'heads' is
912 unspecified, uses nullid as the only root. If 'heads' is
904 unspecified, uses list of all of the revlog's heads."""
913 unspecified, uses list of all of the revlog's heads."""
905 nonodes = ([], [], [])
914 nonodes = ([], [], [])
906 if roots is not None:
915 if roots is not None:
907 roots = list(roots)
916 roots = list(roots)
908 if not roots:
917 if not roots:
909 return nonodes
918 return nonodes
910 lowestrev = min([self.rev(n) for n in roots])
919 lowestrev = min([self.rev(n) for n in roots])
911 else:
920 else:
912 roots = [nullid] # Everybody's a descendant of nullid
921 roots = [nullid] # Everybody's a descendant of nullid
913 lowestrev = nullrev
922 lowestrev = nullrev
914 if (lowestrev == nullrev) and (heads is None):
923 if (lowestrev == nullrev) and (heads is None):
915 # We want _all_ the nodes!
924 # We want _all_ the nodes!
916 return ([self.node(r) for r in self], [nullid], list(self.heads()))
925 return ([self.node(r) for r in self], [nullid], list(self.heads()))
917 if heads is None:
926 if heads is None:
918 # All nodes are ancestors, so the latest ancestor is the last
927 # All nodes are ancestors, so the latest ancestor is the last
919 # node.
928 # node.
920 highestrev = len(self) - 1
929 highestrev = len(self) - 1
921 # Set ancestors to None to signal that every node is an ancestor.
930 # Set ancestors to None to signal that every node is an ancestor.
922 ancestors = None
931 ancestors = None
923 # Set heads to an empty dictionary for later discovery of heads
932 # Set heads to an empty dictionary for later discovery of heads
924 heads = {}
933 heads = {}
925 else:
934 else:
926 heads = list(heads)
935 heads = list(heads)
927 if not heads:
936 if not heads:
928 return nonodes
937 return nonodes
929 ancestors = set()
938 ancestors = set()
930 # Turn heads into a dictionary so we can remove 'fake' heads.
939 # Turn heads into a dictionary so we can remove 'fake' heads.
931 # Also, later we will be using it to filter out the heads we can't
940 # Also, later we will be using it to filter out the heads we can't
932 # find from roots.
941 # find from roots.
933 heads = dict.fromkeys(heads, False)
942 heads = dict.fromkeys(heads, False)
934 # Start at the top and keep marking parents until we're done.
943 # Start at the top and keep marking parents until we're done.
935 nodestotag = set(heads)
944 nodestotag = set(heads)
936 # Remember where the top was so we can use it as a limit later.
945 # Remember where the top was so we can use it as a limit later.
937 highestrev = max([self.rev(n) for n in nodestotag])
946 highestrev = max([self.rev(n) for n in nodestotag])
938 while nodestotag:
947 while nodestotag:
939 # grab a node to tag
948 # grab a node to tag
940 n = nodestotag.pop()
949 n = nodestotag.pop()
941 # Never tag nullid
950 # Never tag nullid
942 if n == nullid:
951 if n == nullid:
943 continue
952 continue
944 # A node's revision number represents its place in a
953 # A node's revision number represents its place in a
945 # topologically sorted list of nodes.
954 # topologically sorted list of nodes.
946 r = self.rev(n)
955 r = self.rev(n)
947 if r >= lowestrev:
956 if r >= lowestrev:
948 if n not in ancestors:
957 if n not in ancestors:
949 # If we are possibly a descendant of one of the roots
958 # If we are possibly a descendant of one of the roots
950 # and we haven't already been marked as an ancestor
959 # and we haven't already been marked as an ancestor
951 ancestors.add(n) # Mark as ancestor
960 ancestors.add(n) # Mark as ancestor
952 # Add non-nullid parents to list of nodes to tag.
961 # Add non-nullid parents to list of nodes to tag.
953 nodestotag.update([p for p in self.parents(n) if
962 nodestotag.update([p for p in self.parents(n) if
954 p != nullid])
963 p != nullid])
955 elif n in heads: # We've seen it before, is it a fake head?
964 elif n in heads: # We've seen it before, is it a fake head?
956 # So it is, real heads should not be the ancestors of
965 # So it is, real heads should not be the ancestors of
957 # any other heads.
966 # any other heads.
958 heads.pop(n)
967 heads.pop(n)
959 if not ancestors:
968 if not ancestors:
960 return nonodes
969 return nonodes
961 # Now that we have our set of ancestors, we want to remove any
970 # Now that we have our set of ancestors, we want to remove any
962 # roots that are not ancestors.
971 # roots that are not ancestors.
963
972
964 # If one of the roots was nullid, everything is included anyway.
973 # If one of the roots was nullid, everything is included anyway.
965 if lowestrev > nullrev:
974 if lowestrev > nullrev:
966 # But, since we weren't, let's recompute the lowest rev to not
975 # But, since we weren't, let's recompute the lowest rev to not
967 # include roots that aren't ancestors.
976 # include roots that aren't ancestors.
968
977
969 # Filter out roots that aren't ancestors of heads
978 # Filter out roots that aren't ancestors of heads
970 roots = [root for root in roots if root in ancestors]
979 roots = [root for root in roots if root in ancestors]
971 # Recompute the lowest revision
980 # Recompute the lowest revision
972 if roots:
981 if roots:
973 lowestrev = min([self.rev(root) for root in roots])
982 lowestrev = min([self.rev(root) for root in roots])
974 else:
983 else:
975 # No more roots? Return empty list
984 # No more roots? Return empty list
976 return nonodes
985 return nonodes
977 else:
986 else:
978 # We are descending from nullid, and don't need to care about
987 # We are descending from nullid, and don't need to care about
979 # any other roots.
988 # any other roots.
980 lowestrev = nullrev
989 lowestrev = nullrev
981 roots = [nullid]
990 roots = [nullid]
982 # Transform our roots list into a set.
991 # Transform our roots list into a set.
983 descendants = set(roots)
992 descendants = set(roots)
984 # Also, keep the original roots so we can filter out roots that aren't
993 # Also, keep the original roots so we can filter out roots that aren't
985 # 'real' roots (i.e. are descended from other roots).
994 # 'real' roots (i.e. are descended from other roots).
986 roots = descendants.copy()
995 roots = descendants.copy()
987 # Our topologically sorted list of output nodes.
996 # Our topologically sorted list of output nodes.
988 orderedout = []
997 orderedout = []
989 # Don't start at nullid since we don't want nullid in our output list,
998 # Don't start at nullid since we don't want nullid in our output list,
990 # and if nullid shows up in descendants, empty parents will look like
999 # and if nullid shows up in descendants, empty parents will look like
991 # they're descendants.
1000 # they're descendants.
992 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1001 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
993 n = self.node(r)
1002 n = self.node(r)
994 isdescendant = False
1003 isdescendant = False
995 if lowestrev == nullrev: # Everybody is a descendant of nullid
1004 if lowestrev == nullrev: # Everybody is a descendant of nullid
996 isdescendant = True
1005 isdescendant = True
997 elif n in descendants:
1006 elif n in descendants:
998 # n is already a descendant
1007 # n is already a descendant
999 isdescendant = True
1008 isdescendant = True
1000 # This check only needs to be done here because all the roots
1009 # This check only needs to be done here because all the roots
1001 # will start being marked is descendants before the loop.
1010 # will start being marked is descendants before the loop.
1002 if n in roots:
1011 if n in roots:
1003 # If n was a root, check if it's a 'real' root.
1012 # If n was a root, check if it's a 'real' root.
1004 p = tuple(self.parents(n))
1013 p = tuple(self.parents(n))
1005 # If any of its parents are descendants, it's not a root.
1014 # If any of its parents are descendants, it's not a root.
1006 if (p[0] in descendants) or (p[1] in descendants):
1015 if (p[0] in descendants) or (p[1] in descendants):
1007 roots.remove(n)
1016 roots.remove(n)
1008 else:
1017 else:
1009 p = tuple(self.parents(n))
1018 p = tuple(self.parents(n))
1010 # A node is a descendant if either of its parents are
1019 # A node is a descendant if either of its parents are
1011 # descendants. (We seeded the dependents list with the roots
1020 # descendants. (We seeded the dependents list with the roots
1012 # up there, remember?)
1021 # up there, remember?)
1013 if (p[0] in descendants) or (p[1] in descendants):
1022 if (p[0] in descendants) or (p[1] in descendants):
1014 descendants.add(n)
1023 descendants.add(n)
1015 isdescendant = True
1024 isdescendant = True
1016 if isdescendant and ((ancestors is None) or (n in ancestors)):
1025 if isdescendant and ((ancestors is None) or (n in ancestors)):
1017 # Only include nodes that are both descendants and ancestors.
1026 # Only include nodes that are both descendants and ancestors.
1018 orderedout.append(n)
1027 orderedout.append(n)
1019 if (ancestors is not None) and (n in heads):
1028 if (ancestors is not None) and (n in heads):
1020 # We're trying to figure out which heads are reachable
1029 # We're trying to figure out which heads are reachable
1021 # from roots.
1030 # from roots.
1022 # Mark this head as having been reached
1031 # Mark this head as having been reached
1023 heads[n] = True
1032 heads[n] = True
1024 elif ancestors is None:
1033 elif ancestors is None:
1025 # Otherwise, we're trying to discover the heads.
1034 # Otherwise, we're trying to discover the heads.
1026 # Assume this is a head because if it isn't, the next step
1035 # Assume this is a head because if it isn't, the next step
1027 # will eventually remove it.
1036 # will eventually remove it.
1028 heads[n] = True
1037 heads[n] = True
1029 # But, obviously its parents aren't.
1038 # But, obviously its parents aren't.
1030 for p in self.parents(n):
1039 for p in self.parents(n):
1031 heads.pop(p, None)
1040 heads.pop(p, None)
1032 heads = [head for head, flag in heads.iteritems() if flag]
1041 heads = [head for head, flag in heads.iteritems() if flag]
1033 roots = list(roots)
1042 roots = list(roots)
1034 assert orderedout
1043 assert orderedout
1035 assert roots
1044 assert roots
1036 assert heads
1045 assert heads
1037 return (orderedout, roots, heads)
1046 return (orderedout, roots, heads)
1038
1047
1039 def headrevs(self):
1048 def headrevs(self):
1040 try:
1049 try:
1041 return self.index.headrevs()
1050 return self.index.headrevs()
1042 except AttributeError:
1051 except AttributeError:
1043 return self._headrevs()
1052 return self._headrevs()
1044
1053
1045 def computephases(self, roots):
1054 def computephases(self, roots):
1046 return self.index.computephasesmapsets(roots)
1055 return self.index.computephasesmapsets(roots)
1047
1056
1048 def _headrevs(self):
1057 def _headrevs(self):
1049 count = len(self)
1058 count = len(self)
1050 if not count:
1059 if not count:
1051 return [nullrev]
1060 return [nullrev]
1052 # we won't iter over filtered rev so nobody is a head at start
1061 # we won't iter over filtered rev so nobody is a head at start
1053 ishead = [0] * (count + 1)
1062 ishead = [0] * (count + 1)
1054 index = self.index
1063 index = self.index
1055 for r in self:
1064 for r in self:
1056 ishead[r] = 1 # I may be an head
1065 ishead[r] = 1 # I may be an head
1057 e = index[r]
1066 e = index[r]
1058 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1067 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1059 return [r for r, val in enumerate(ishead) if val]
1068 return [r for r, val in enumerate(ishead) if val]
1060
1069
1061 def heads(self, start=None, stop=None):
1070 def heads(self, start=None, stop=None):
1062 """return the list of all nodes that have no children
1071 """return the list of all nodes that have no children
1063
1072
1064 if start is specified, only heads that are descendants of
1073 if start is specified, only heads that are descendants of
1065 start will be returned
1074 start will be returned
1066 if stop is specified, it will consider all the revs from stop
1075 if stop is specified, it will consider all the revs from stop
1067 as if they had no children
1076 as if they had no children
1068 """
1077 """
1069 if start is None and stop is None:
1078 if start is None and stop is None:
1070 if not len(self):
1079 if not len(self):
1071 return [nullid]
1080 return [nullid]
1072 return [self.node(r) for r in self.headrevs()]
1081 return [self.node(r) for r in self.headrevs()]
1073
1082
1074 if start is None:
1083 if start is None:
1075 start = nullrev
1084 start = nullrev
1076 else:
1085 else:
1077 start = self.rev(start)
1086 start = self.rev(start)
1078
1087
1079 stoprevs = set(self.rev(n) for n in stop or [])
1088 stoprevs = set(self.rev(n) for n in stop or [])
1080
1089
1081 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1090 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1082 stoprevs=stoprevs)
1091 stoprevs=stoprevs)
1083
1092
1084 return [self.node(rev) for rev in revs]
1093 return [self.node(rev) for rev in revs]
1085
1094
1086 def children(self, node):
1095 def children(self, node):
1087 """find the children of a given node"""
1096 """find the children of a given node"""
1088 c = []
1097 c = []
1089 p = self.rev(node)
1098 p = self.rev(node)
1090 for r in self.revs(start=p + 1):
1099 for r in self.revs(start=p + 1):
1091 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1100 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1092 if prevs:
1101 if prevs:
1093 for pr in prevs:
1102 for pr in prevs:
1094 if pr == p:
1103 if pr == p:
1095 c.append(self.node(r))
1104 c.append(self.node(r))
1096 elif p == nullrev:
1105 elif p == nullrev:
1097 c.append(self.node(r))
1106 c.append(self.node(r))
1098 return c
1107 return c
1099
1108
1100 def commonancestorsheads(self, a, b):
1109 def commonancestorsheads(self, a, b):
1101 """calculate all the heads of the common ancestors of nodes a and b"""
1110 """calculate all the heads of the common ancestors of nodes a and b"""
1102 a, b = self.rev(a), self.rev(b)
1111 a, b = self.rev(a), self.rev(b)
1103 ancs = self._commonancestorsheads(a, b)
1112 ancs = self._commonancestorsheads(a, b)
1104 return pycompat.maplist(self.node, ancs)
1113 return pycompat.maplist(self.node, ancs)
1105
1114
1106 def _commonancestorsheads(self, *revs):
1115 def _commonancestorsheads(self, *revs):
1107 """calculate all the heads of the common ancestors of revs"""
1116 """calculate all the heads of the common ancestors of revs"""
1108 try:
1117 try:
1109 ancs = self.index.commonancestorsheads(*revs)
1118 ancs = self.index.commonancestorsheads(*revs)
1110 except (AttributeError, OverflowError): # C implementation failed
1119 except (AttributeError, OverflowError): # C implementation failed
1111 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1120 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1112 return ancs
1121 return ancs
1113
1122
1114 def isancestor(self, a, b):
1123 def isancestor(self, a, b):
1115 """return True if node a is an ancestor of node b
1124 """return True if node a is an ancestor of node b
1116
1125
1117 A revision is considered an ancestor of itself."""
1126 A revision is considered an ancestor of itself."""
1118 a, b = self.rev(a), self.rev(b)
1127 a, b = self.rev(a), self.rev(b)
1119 return self.isancestorrev(a, b)
1128 return self.isancestorrev(a, b)
1120
1129
1121 def isancestorrev(self, a, b):
1130 def isancestorrev(self, a, b):
1122 """return True if revision a is an ancestor of revision b
1131 """return True if revision a is an ancestor of revision b
1123
1132
1124 A revision is considered an ancestor of itself.
1133 A revision is considered an ancestor of itself.
1125
1134
1126 The implementation of this is trivial but the use of
1135 The implementation of this is trivial but the use of
1127 commonancestorsheads is not."""
1136 commonancestorsheads is not."""
1128 if a == nullrev:
1137 if a == nullrev:
1129 return True
1138 return True
1130 elif a == b:
1139 elif a == b:
1131 return True
1140 return True
1132 elif a > b:
1141 elif a > b:
1133 return False
1142 return False
1134 return a in self._commonancestorsheads(a, b)
1143 return a in self._commonancestorsheads(a, b)
1135
1144
1136 def ancestor(self, a, b):
1145 def ancestor(self, a, b):
1137 """calculate the "best" common ancestor of nodes a and b"""
1146 """calculate the "best" common ancestor of nodes a and b"""
1138
1147
1139 a, b = self.rev(a), self.rev(b)
1148 a, b = self.rev(a), self.rev(b)
1140 try:
1149 try:
1141 ancs = self.index.ancestors(a, b)
1150 ancs = self.index.ancestors(a, b)
1142 except (AttributeError, OverflowError):
1151 except (AttributeError, OverflowError):
1143 ancs = ancestor.ancestors(self.parentrevs, a, b)
1152 ancs = ancestor.ancestors(self.parentrevs, a, b)
1144 if ancs:
1153 if ancs:
1145 # choose a consistent winner when there's a tie
1154 # choose a consistent winner when there's a tie
1146 return min(map(self.node, ancs))
1155 return min(map(self.node, ancs))
1147 return nullid
1156 return nullid
1148
1157
1149 def _match(self, id):
1158 def _match(self, id):
1150 if isinstance(id, int):
1159 if isinstance(id, int):
1151 # rev
1160 # rev
1152 return self.node(id)
1161 return self.node(id)
1153 if len(id) == 20:
1162 if len(id) == 20:
1154 # possibly a binary node
1163 # possibly a binary node
1155 # odds of a binary node being all hex in ASCII are 1 in 10**25
1164 # odds of a binary node being all hex in ASCII are 1 in 10**25
1156 try:
1165 try:
1157 node = id
1166 node = id
1158 self.rev(node) # quick search the index
1167 self.rev(node) # quick search the index
1159 return node
1168 return node
1160 except error.LookupError:
1169 except error.LookupError:
1161 pass # may be partial hex id
1170 pass # may be partial hex id
1162 try:
1171 try:
1163 # str(rev)
1172 # str(rev)
1164 rev = int(id)
1173 rev = int(id)
1165 if "%d" % rev != id:
1174 if "%d" % rev != id:
1166 raise ValueError
1175 raise ValueError
1167 if rev < 0:
1176 if rev < 0:
1168 rev = len(self) + rev
1177 rev = len(self) + rev
1169 if rev < 0 or rev >= len(self):
1178 if rev < 0 or rev >= len(self):
1170 raise ValueError
1179 raise ValueError
1171 return self.node(rev)
1180 return self.node(rev)
1172 except (ValueError, OverflowError):
1181 except (ValueError, OverflowError):
1173 pass
1182 pass
1174 if len(id) == 40:
1183 if len(id) == 40:
1175 try:
1184 try:
1176 # a full hex nodeid?
1185 # a full hex nodeid?
1177 node = bin(id)
1186 node = bin(id)
1178 self.rev(node)
1187 self.rev(node)
1179 return node
1188 return node
1180 except (TypeError, error.LookupError):
1189 except (TypeError, error.LookupError):
1181 pass
1190 pass
1182
1191
1183 def _partialmatch(self, id):
1192 def _partialmatch(self, id):
1184 # we don't care wdirfilenodeids as they should be always full hash
1193 # we don't care wdirfilenodeids as they should be always full hash
1185 maybewdir = wdirhex.startswith(id)
1194 maybewdir = wdirhex.startswith(id)
1186 try:
1195 try:
1187 partial = self.index.partialmatch(id)
1196 partial = self.index.partialmatch(id)
1188 if partial and self.hasnode(partial):
1197 if partial and self.hasnode(partial):
1189 if maybewdir:
1198 if maybewdir:
1190 # single 'ff...' match in radix tree, ambiguous with wdir
1199 # single 'ff...' match in radix tree, ambiguous with wdir
1191 raise error.RevlogError
1200 raise error.RevlogError
1192 return partial
1201 return partial
1193 if maybewdir:
1202 if maybewdir:
1194 # no 'ff...' match in radix tree, wdir identified
1203 # no 'ff...' match in radix tree, wdir identified
1195 raise error.WdirUnsupported
1204 raise error.WdirUnsupported
1196 return None
1205 return None
1197 except error.RevlogError:
1206 except error.RevlogError:
1198 # parsers.c radix tree lookup gave multiple matches
1207 # parsers.c radix tree lookup gave multiple matches
1199 # fast path: for unfiltered changelog, radix tree is accurate
1208 # fast path: for unfiltered changelog, radix tree is accurate
1200 if not getattr(self, 'filteredrevs', None):
1209 if not getattr(self, 'filteredrevs', None):
1201 raise error.AmbiguousPrefixLookupError(
1210 raise error.AmbiguousPrefixLookupError(
1202 id, self.indexfile, _('ambiguous identifier'))
1211 id, self.indexfile, _('ambiguous identifier'))
1203 # fall through to slow path that filters hidden revisions
1212 # fall through to slow path that filters hidden revisions
1204 except (AttributeError, ValueError):
1213 except (AttributeError, ValueError):
1205 # we are pure python, or key was too short to search radix tree
1214 # we are pure python, or key was too short to search radix tree
1206 pass
1215 pass
1207
1216
1208 if id in self._pcache:
1217 if id in self._pcache:
1209 return self._pcache[id]
1218 return self._pcache[id]
1210
1219
1211 if len(id) <= 40:
1220 if len(id) <= 40:
1212 try:
1221 try:
1213 # hex(node)[:...]
1222 # hex(node)[:...]
1214 l = len(id) // 2 # grab an even number of digits
1223 l = len(id) // 2 # grab an even number of digits
1215 prefix = bin(id[:l * 2])
1224 prefix = bin(id[:l * 2])
1216 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1225 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1217 nl = [n for n in nl if hex(n).startswith(id) and
1226 nl = [n for n in nl if hex(n).startswith(id) and
1218 self.hasnode(n)]
1227 self.hasnode(n)]
1219 if nullhex.startswith(id):
1228 if nullhex.startswith(id):
1220 nl.append(nullid)
1229 nl.append(nullid)
1221 if len(nl) > 0:
1230 if len(nl) > 0:
1222 if len(nl) == 1 and not maybewdir:
1231 if len(nl) == 1 and not maybewdir:
1223 self._pcache[id] = nl[0]
1232 self._pcache[id] = nl[0]
1224 return nl[0]
1233 return nl[0]
1225 raise error.AmbiguousPrefixLookupError(
1234 raise error.AmbiguousPrefixLookupError(
1226 id, self.indexfile, _('ambiguous identifier'))
1235 id, self.indexfile, _('ambiguous identifier'))
1227 if maybewdir:
1236 if maybewdir:
1228 raise error.WdirUnsupported
1237 raise error.WdirUnsupported
1229 return None
1238 return None
1230 except TypeError:
1239 except TypeError:
1231 pass
1240 pass
1232
1241
1233 def lookup(self, id):
1242 def lookup(self, id):
1234 """locate a node based on:
1243 """locate a node based on:
1235 - revision number or str(revision number)
1244 - revision number or str(revision number)
1236 - nodeid or subset of hex nodeid
1245 - nodeid or subset of hex nodeid
1237 """
1246 """
1238 n = self._match(id)
1247 n = self._match(id)
1239 if n is not None:
1248 if n is not None:
1240 return n
1249 return n
1241 n = self._partialmatch(id)
1250 n = self._partialmatch(id)
1242 if n:
1251 if n:
1243 return n
1252 return n
1244
1253
1245 raise error.LookupError(id, self.indexfile, _('no match found'))
1254 raise error.LookupError(id, self.indexfile, _('no match found'))
1246
1255
1247 def shortest(self, node, minlength=1):
1256 def shortest(self, node, minlength=1):
1248 """Find the shortest unambiguous prefix that matches node."""
1257 """Find the shortest unambiguous prefix that matches node."""
1249 def isvalid(prefix):
1258 def isvalid(prefix):
1250 try:
1259 try:
1251 node = self._partialmatch(prefix)
1260 node = self._partialmatch(prefix)
1252 except error.AmbiguousPrefixLookupError:
1261 except error.AmbiguousPrefixLookupError:
1253 return False
1262 return False
1254 except error.WdirUnsupported:
1263 except error.WdirUnsupported:
1255 # single 'ff...' match
1264 # single 'ff...' match
1256 return True
1265 return True
1257 if node is None:
1266 if node is None:
1258 raise error.LookupError(node, self.indexfile, _('no node'))
1267 raise error.LookupError(node, self.indexfile, _('no node'))
1259 return True
1268 return True
1260
1269
1261 def maybewdir(prefix):
1270 def maybewdir(prefix):
1262 return all(c == 'f' for c in prefix)
1271 return all(c == 'f' for c in prefix)
1263
1272
1264 hexnode = hex(node)
1273 hexnode = hex(node)
1265
1274
1266 def disambiguate(hexnode, minlength):
1275 def disambiguate(hexnode, minlength):
1267 """Disambiguate against wdirid."""
1276 """Disambiguate against wdirid."""
1268 for length in range(minlength, 41):
1277 for length in range(minlength, 41):
1269 prefix = hexnode[:length]
1278 prefix = hexnode[:length]
1270 if not maybewdir(prefix):
1279 if not maybewdir(prefix):
1271 return prefix
1280 return prefix
1272
1281
1273 if not getattr(self, 'filteredrevs', None):
1282 if not getattr(self, 'filteredrevs', None):
1274 try:
1283 try:
1275 length = max(self.index.shortest(node), minlength)
1284 length = max(self.index.shortest(node), minlength)
1276 return disambiguate(hexnode, length)
1285 return disambiguate(hexnode, length)
1277 except error.RevlogError:
1286 except error.RevlogError:
1278 if node != wdirid:
1287 if node != wdirid:
1279 raise error.LookupError(node, self.indexfile, _('no node'))
1288 raise error.LookupError(node, self.indexfile, _('no node'))
1280 except AttributeError:
1289 except AttributeError:
1281 # Fall through to pure code
1290 # Fall through to pure code
1282 pass
1291 pass
1283
1292
1284 if node == wdirid:
1293 if node == wdirid:
1285 for length in range(minlength, 41):
1294 for length in range(minlength, 41):
1286 prefix = hexnode[:length]
1295 prefix = hexnode[:length]
1287 if isvalid(prefix):
1296 if isvalid(prefix):
1288 return prefix
1297 return prefix
1289
1298
1290 for length in range(minlength, 41):
1299 for length in range(minlength, 41):
1291 prefix = hexnode[:length]
1300 prefix = hexnode[:length]
1292 if isvalid(prefix):
1301 if isvalid(prefix):
1293 return disambiguate(hexnode, length)
1302 return disambiguate(hexnode, length)
1294
1303
1295 def cmp(self, node, text):
1304 def cmp(self, node, text):
1296 """compare text with a given file revision
1305 """compare text with a given file revision
1297
1306
1298 returns True if text is different than what is stored.
1307 returns True if text is different than what is stored.
1299 """
1308 """
1300 p1, p2 = self.parents(node)
1309 p1, p2 = self.parents(node)
1301 return storageutil.hashrevisionsha1(text, p1, p2) != node
1310 return storageutil.hashrevisionsha1(text, p1, p2) != node
1302
1311
1303 def _cachesegment(self, offset, data):
1312 def _cachesegment(self, offset, data):
1304 """Add a segment to the revlog cache.
1313 """Add a segment to the revlog cache.
1305
1314
1306 Accepts an absolute offset and the data that is at that location.
1315 Accepts an absolute offset and the data that is at that location.
1307 """
1316 """
1308 o, d = self._chunkcache
1317 o, d = self._chunkcache
1309 # try to add to existing cache
1318 # try to add to existing cache
1310 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1319 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1311 self._chunkcache = o, d + data
1320 self._chunkcache = o, d + data
1312 else:
1321 else:
1313 self._chunkcache = offset, data
1322 self._chunkcache = offset, data
1314
1323
1315 def _readsegment(self, offset, length, df=None):
1324 def _readsegment(self, offset, length, df=None):
1316 """Load a segment of raw data from the revlog.
1325 """Load a segment of raw data from the revlog.
1317
1326
1318 Accepts an absolute offset, length to read, and an optional existing
1327 Accepts an absolute offset, length to read, and an optional existing
1319 file handle to read from.
1328 file handle to read from.
1320
1329
1321 If an existing file handle is passed, it will be seeked and the
1330 If an existing file handle is passed, it will be seeked and the
1322 original seek position will NOT be restored.
1331 original seek position will NOT be restored.
1323
1332
1324 Returns a str or buffer of raw byte data.
1333 Returns a str or buffer of raw byte data.
1325 """
1334 """
1326 # Cache data both forward and backward around the requested
1335 # Cache data both forward and backward around the requested
1327 # data, in a fixed size window. This helps speed up operations
1336 # data, in a fixed size window. This helps speed up operations
1328 # involving reading the revlog backwards.
1337 # involving reading the revlog backwards.
1329 cachesize = self._chunkcachesize
1338 cachesize = self._chunkcachesize
1330 realoffset = offset & ~(cachesize - 1)
1339 realoffset = offset & ~(cachesize - 1)
1331 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1340 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1332 - realoffset)
1341 - realoffset)
1333 with self._datareadfp(df) as df:
1342 with self._datareadfp(df) as df:
1334 df.seek(realoffset)
1343 df.seek(realoffset)
1335 d = df.read(reallength)
1344 d = df.read(reallength)
1336 self._cachesegment(realoffset, d)
1345 self._cachesegment(realoffset, d)
1337 if offset != realoffset or reallength != length:
1346 if offset != realoffset or reallength != length:
1338 return util.buffer(d, offset - realoffset, length)
1347 return util.buffer(d, offset - realoffset, length)
1339 return d
1348 return d
1340
1349
1341 def _getsegment(self, offset, length, df=None):
1350 def _getsegment(self, offset, length, df=None):
1342 """Obtain a segment of raw data from the revlog.
1351 """Obtain a segment of raw data from the revlog.
1343
1352
1344 Accepts an absolute offset, length of bytes to obtain, and an
1353 Accepts an absolute offset, length of bytes to obtain, and an
1345 optional file handle to the already-opened revlog. If the file
1354 optional file handle to the already-opened revlog. If the file
1346 handle is used, it's original seek position will not be preserved.
1355 handle is used, it's original seek position will not be preserved.
1347
1356
1348 Requests for data may be returned from a cache.
1357 Requests for data may be returned from a cache.
1349
1358
1350 Returns a str or a buffer instance of raw byte data.
1359 Returns a str or a buffer instance of raw byte data.
1351 """
1360 """
1352 o, d = self._chunkcache
1361 o, d = self._chunkcache
1353 l = len(d)
1362 l = len(d)
1354
1363
1355 # is it in the cache?
1364 # is it in the cache?
1356 cachestart = offset - o
1365 cachestart = offset - o
1357 cacheend = cachestart + length
1366 cacheend = cachestart + length
1358 if cachestart >= 0 and cacheend <= l:
1367 if cachestart >= 0 and cacheend <= l:
1359 if cachestart == 0 and cacheend == l:
1368 if cachestart == 0 and cacheend == l:
1360 return d # avoid a copy
1369 return d # avoid a copy
1361 return util.buffer(d, cachestart, cacheend - cachestart)
1370 return util.buffer(d, cachestart, cacheend - cachestart)
1362
1371
1363 return self._readsegment(offset, length, df=df)
1372 return self._readsegment(offset, length, df=df)
1364
1373
1365 def _getsegmentforrevs(self, startrev, endrev, df=None):
1374 def _getsegmentforrevs(self, startrev, endrev, df=None):
1366 """Obtain a segment of raw data corresponding to a range of revisions.
1375 """Obtain a segment of raw data corresponding to a range of revisions.
1367
1376
1368 Accepts the start and end revisions and an optional already-open
1377 Accepts the start and end revisions and an optional already-open
1369 file handle to be used for reading. If the file handle is read, its
1378 file handle to be used for reading. If the file handle is read, its
1370 seek position will not be preserved.
1379 seek position will not be preserved.
1371
1380
1372 Requests for data may be satisfied by a cache.
1381 Requests for data may be satisfied by a cache.
1373
1382
1374 Returns a 2-tuple of (offset, data) for the requested range of
1383 Returns a 2-tuple of (offset, data) for the requested range of
1375 revisions. Offset is the integer offset from the beginning of the
1384 revisions. Offset is the integer offset from the beginning of the
1376 revlog and data is a str or buffer of the raw byte data.
1385 revlog and data is a str or buffer of the raw byte data.
1377
1386
1378 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1387 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1379 to determine where each revision's data begins and ends.
1388 to determine where each revision's data begins and ends.
1380 """
1389 """
1381 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1390 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1382 # (functions are expensive).
1391 # (functions are expensive).
1383 index = self.index
1392 index = self.index
1384 istart = index[startrev]
1393 istart = index[startrev]
1385 start = int(istart[0] >> 16)
1394 start = int(istart[0] >> 16)
1386 if startrev == endrev:
1395 if startrev == endrev:
1387 end = start + istart[1]
1396 end = start + istart[1]
1388 else:
1397 else:
1389 iend = index[endrev]
1398 iend = index[endrev]
1390 end = int(iend[0] >> 16) + iend[1]
1399 end = int(iend[0] >> 16) + iend[1]
1391
1400
1392 if self._inline:
1401 if self._inline:
1393 start += (startrev + 1) * self._io.size
1402 start += (startrev + 1) * self._io.size
1394 end += (endrev + 1) * self._io.size
1403 end += (endrev + 1) * self._io.size
1395 length = end - start
1404 length = end - start
1396
1405
1397 return start, self._getsegment(start, length, df=df)
1406 return start, self._getsegment(start, length, df=df)
1398
1407
1399 def _chunk(self, rev, df=None):
1408 def _chunk(self, rev, df=None):
1400 """Obtain a single decompressed chunk for a revision.
1409 """Obtain a single decompressed chunk for a revision.
1401
1410
1402 Accepts an integer revision and an optional already-open file handle
1411 Accepts an integer revision and an optional already-open file handle
1403 to be used for reading. If used, the seek position of the file will not
1412 to be used for reading. If used, the seek position of the file will not
1404 be preserved.
1413 be preserved.
1405
1414
1406 Returns a str holding uncompressed data for the requested revision.
1415 Returns a str holding uncompressed data for the requested revision.
1407 """
1416 """
1408 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1417 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1409
1418
1410 def _chunks(self, revs, df=None, targetsize=None):
1419 def _chunks(self, revs, df=None, targetsize=None):
1411 """Obtain decompressed chunks for the specified revisions.
1420 """Obtain decompressed chunks for the specified revisions.
1412
1421
1413 Accepts an iterable of numeric revisions that are assumed to be in
1422 Accepts an iterable of numeric revisions that are assumed to be in
1414 ascending order. Also accepts an optional already-open file handle
1423 ascending order. Also accepts an optional already-open file handle
1415 to be used for reading. If used, the seek position of the file will
1424 to be used for reading. If used, the seek position of the file will
1416 not be preserved.
1425 not be preserved.
1417
1426
1418 This function is similar to calling ``self._chunk()`` multiple times,
1427 This function is similar to calling ``self._chunk()`` multiple times,
1419 but is faster.
1428 but is faster.
1420
1429
1421 Returns a list with decompressed data for each requested revision.
1430 Returns a list with decompressed data for each requested revision.
1422 """
1431 """
1423 if not revs:
1432 if not revs:
1424 return []
1433 return []
1425 start = self.start
1434 start = self.start
1426 length = self.length
1435 length = self.length
1427 inline = self._inline
1436 inline = self._inline
1428 iosize = self._io.size
1437 iosize = self._io.size
1429 buffer = util.buffer
1438 buffer = util.buffer
1430
1439
1431 l = []
1440 l = []
1432 ladd = l.append
1441 ladd = l.append
1433
1442
1434 if not self._withsparseread:
1443 if not self._withsparseread:
1435 slicedchunks = (revs,)
1444 slicedchunks = (revs,)
1436 else:
1445 else:
1437 slicedchunks = deltautil.slicechunk(self, revs,
1446 slicedchunks = deltautil.slicechunk(self, revs,
1438 targetsize=targetsize)
1447 targetsize=targetsize)
1439
1448
1440 for revschunk in slicedchunks:
1449 for revschunk in slicedchunks:
1441 firstrev = revschunk[0]
1450 firstrev = revschunk[0]
1442 # Skip trailing revisions with empty diff
1451 # Skip trailing revisions with empty diff
1443 for lastrev in revschunk[::-1]:
1452 for lastrev in revschunk[::-1]:
1444 if length(lastrev) != 0:
1453 if length(lastrev) != 0:
1445 break
1454 break
1446
1455
1447 try:
1456 try:
1448 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1457 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1449 except OverflowError:
1458 except OverflowError:
1450 # issue4215 - we can't cache a run of chunks greater than
1459 # issue4215 - we can't cache a run of chunks greater than
1451 # 2G on Windows
1460 # 2G on Windows
1452 return [self._chunk(rev, df=df) for rev in revschunk]
1461 return [self._chunk(rev, df=df) for rev in revschunk]
1453
1462
1454 decomp = self.decompress
1463 decomp = self.decompress
1455 for rev in revschunk:
1464 for rev in revschunk:
1456 chunkstart = start(rev)
1465 chunkstart = start(rev)
1457 if inline:
1466 if inline:
1458 chunkstart += (rev + 1) * iosize
1467 chunkstart += (rev + 1) * iosize
1459 chunklength = length(rev)
1468 chunklength = length(rev)
1460 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1469 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1461
1470
1462 return l
1471 return l
1463
1472
1464 def _chunkclear(self):
1473 def _chunkclear(self):
1465 """Clear the raw chunk cache."""
1474 """Clear the raw chunk cache."""
1466 self._chunkcache = (0, '')
1475 self._chunkcache = (0, '')
1467
1476
1468 def deltaparent(self, rev):
1477 def deltaparent(self, rev):
1469 """return deltaparent of the given revision"""
1478 """return deltaparent of the given revision"""
1470 base = self.index[rev][3]
1479 base = self.index[rev][3]
1471 if base == rev:
1480 if base == rev:
1472 return nullrev
1481 return nullrev
1473 elif self._generaldelta:
1482 elif self._generaldelta:
1474 return base
1483 return base
1475 else:
1484 else:
1476 return rev - 1
1485 return rev - 1
1477
1486
1478 def issnapshot(self, rev):
1487 def issnapshot(self, rev):
1479 """tells whether rev is a snapshot
1488 """tells whether rev is a snapshot
1480 """
1489 """
1481 if rev == nullrev:
1490 if rev == nullrev:
1482 return True
1491 return True
1483 deltap = self.deltaparent(rev)
1492 deltap = self.deltaparent(rev)
1484 if deltap == nullrev:
1493 if deltap == nullrev:
1485 return True
1494 return True
1486 p1, p2 = self.parentrevs(rev)
1495 p1, p2 = self.parentrevs(rev)
1487 if deltap in (p1, p2):
1496 if deltap in (p1, p2):
1488 return False
1497 return False
1489 return self.issnapshot(deltap)
1498 return self.issnapshot(deltap)
1490
1499
1491 def snapshotdepth(self, rev):
1500 def snapshotdepth(self, rev):
1492 """number of snapshot in the chain before this one"""
1501 """number of snapshot in the chain before this one"""
1493 if not self.issnapshot(rev):
1502 if not self.issnapshot(rev):
1494 raise error.ProgrammingError('revision %d not a snapshot')
1503 raise error.ProgrammingError('revision %d not a snapshot')
1495 return len(self._deltachain(rev)[0]) - 1
1504 return len(self._deltachain(rev)[0]) - 1
1496
1505
1497 def revdiff(self, rev1, rev2):
1506 def revdiff(self, rev1, rev2):
1498 """return or calculate a delta between two revisions
1507 """return or calculate a delta between two revisions
1499
1508
1500 The delta calculated is in binary form and is intended to be written to
1509 The delta calculated is in binary form and is intended to be written to
1501 revlog data directly. So this function needs raw revision data.
1510 revlog data directly. So this function needs raw revision data.
1502 """
1511 """
1503 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1512 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1504 return bytes(self._chunk(rev2))
1513 return bytes(self._chunk(rev2))
1505
1514
1506 return mdiff.textdiff(self.revision(rev1, raw=True),
1515 return mdiff.textdiff(self.revision(rev1, raw=True),
1507 self.revision(rev2, raw=True))
1516 self.revision(rev2, raw=True))
1508
1517
1509 def revision(self, nodeorrev, _df=None, raw=False):
1518 def revision(self, nodeorrev, _df=None, raw=False):
1510 """return an uncompressed revision of a given node or revision
1519 """return an uncompressed revision of a given node or revision
1511 number.
1520 number.
1512
1521
1513 _df - an existing file handle to read from. (internal-only)
1522 _df - an existing file handle to read from. (internal-only)
1514 raw - an optional argument specifying if the revision data is to be
1523 raw - an optional argument specifying if the revision data is to be
1515 treated as raw data when applying flag transforms. 'raw' should be set
1524 treated as raw data when applying flag transforms. 'raw' should be set
1516 to True when generating changegroups or in debug commands.
1525 to True when generating changegroups or in debug commands.
1517 """
1526 """
1518 if isinstance(nodeorrev, int):
1527 if isinstance(nodeorrev, int):
1519 rev = nodeorrev
1528 rev = nodeorrev
1520 node = self.node(rev)
1529 node = self.node(rev)
1521 else:
1530 else:
1522 node = nodeorrev
1531 node = nodeorrev
1523 rev = None
1532 rev = None
1524
1533
1525 cachedrev = None
1534 cachedrev = None
1526 flags = None
1535 flags = None
1527 rawtext = None
1536 rawtext = None
1528 if node == nullid:
1537 if node == nullid:
1529 return ""
1538 return ""
1530 if self._revisioncache:
1539 if self._revisioncache:
1531 if self._revisioncache[0] == node:
1540 if self._revisioncache[0] == node:
1532 # _cache only stores rawtext
1541 # _cache only stores rawtext
1533 if raw:
1542 if raw:
1534 return self._revisioncache[2]
1543 return self._revisioncache[2]
1535 # duplicated, but good for perf
1544 # duplicated, but good for perf
1536 if rev is None:
1545 if rev is None:
1537 rev = self.rev(node)
1546 rev = self.rev(node)
1538 if flags is None:
1547 if flags is None:
1539 flags = self.flags(rev)
1548 flags = self.flags(rev)
1540 # no extra flags set, no flag processor runs, text = rawtext
1549 # no extra flags set, no flag processor runs, text = rawtext
1541 if flags == REVIDX_DEFAULT_FLAGS:
1550 if flags == REVIDX_DEFAULT_FLAGS:
1542 return self._revisioncache[2]
1551 return self._revisioncache[2]
1543 # rawtext is reusable. need to run flag processor
1552 # rawtext is reusable. need to run flag processor
1544 rawtext = self._revisioncache[2]
1553 rawtext = self._revisioncache[2]
1545
1554
1546 cachedrev = self._revisioncache[1]
1555 cachedrev = self._revisioncache[1]
1547
1556
1548 # look up what we need to read
1557 # look up what we need to read
1549 if rawtext is None:
1558 if rawtext is None:
1550 if rev is None:
1559 if rev is None:
1551 rev = self.rev(node)
1560 rev = self.rev(node)
1552
1561
1553 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1562 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1554 if stopped:
1563 if stopped:
1555 rawtext = self._revisioncache[2]
1564 rawtext = self._revisioncache[2]
1556
1565
1557 # drop cache to save memory
1566 # drop cache to save memory
1558 self._revisioncache = None
1567 self._revisioncache = None
1559
1568
1560 targetsize = None
1569 targetsize = None
1561 rawsize = self.index[rev][2]
1570 rawsize = self.index[rev][2]
1562 if 0 <= rawsize:
1571 if 0 <= rawsize:
1563 targetsize = 4 * rawsize
1572 targetsize = 4 * rawsize
1564
1573
1565 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1574 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1566 if rawtext is None:
1575 if rawtext is None:
1567 rawtext = bytes(bins[0])
1576 rawtext = bytes(bins[0])
1568 bins = bins[1:]
1577 bins = bins[1:]
1569
1578
1570 rawtext = mdiff.patches(rawtext, bins)
1579 rawtext = mdiff.patches(rawtext, bins)
1571 self._revisioncache = (node, rev, rawtext)
1580 self._revisioncache = (node, rev, rawtext)
1572
1581
1573 if flags is None:
1582 if flags is None:
1574 if rev is None:
1583 if rev is None:
1575 rev = self.rev(node)
1584 rev = self.rev(node)
1576 flags = self.flags(rev)
1585 flags = self.flags(rev)
1577
1586
1578 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1587 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1579 if validatehash:
1588 if validatehash:
1580 self.checkhash(text, node, rev=rev)
1589 self.checkhash(text, node, rev=rev)
1581
1590
1582 return text
1591 return text
1583
1592
1584 def hash(self, text, p1, p2):
1593 def hash(self, text, p1, p2):
1585 """Compute a node hash.
1594 """Compute a node hash.
1586
1595
1587 Available as a function so that subclasses can replace the hash
1596 Available as a function so that subclasses can replace the hash
1588 as needed.
1597 as needed.
1589 """
1598 """
1590 return storageutil.hashrevisionsha1(text, p1, p2)
1599 return storageutil.hashrevisionsha1(text, p1, p2)
1591
1600
1592 def _processflags(self, text, flags, operation, raw=False):
1601 def _processflags(self, text, flags, operation, raw=False):
1593 """Inspect revision data flags and applies transforms defined by
1602 """Inspect revision data flags and applies transforms defined by
1594 registered flag processors.
1603 registered flag processors.
1595
1604
1596 ``text`` - the revision data to process
1605 ``text`` - the revision data to process
1597 ``flags`` - the revision flags
1606 ``flags`` - the revision flags
1598 ``operation`` - the operation being performed (read or write)
1607 ``operation`` - the operation being performed (read or write)
1599 ``raw`` - an optional argument describing if the raw transform should be
1608 ``raw`` - an optional argument describing if the raw transform should be
1600 applied.
1609 applied.
1601
1610
1602 This method processes the flags in the order (or reverse order if
1611 This method processes the flags in the order (or reverse order if
1603 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1612 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1604 flag processors registered for present flags. The order of flags defined
1613 flag processors registered for present flags. The order of flags defined
1605 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1614 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1606
1615
1607 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1616 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1608 processed text and ``validatehash`` is a bool indicating whether the
1617 processed text and ``validatehash`` is a bool indicating whether the
1609 returned text should be checked for hash integrity.
1618 returned text should be checked for hash integrity.
1610
1619
1611 Note: If the ``raw`` argument is set, it has precedence over the
1620 Note: If the ``raw`` argument is set, it has precedence over the
1612 operation and will only update the value of ``validatehash``.
1621 operation and will only update the value of ``validatehash``.
1613 """
1622 """
1614 # fast path: no flag processors will run
1623 # fast path: no flag processors will run
1615 if flags == 0:
1624 if flags == 0:
1616 return text, True
1625 return text, True
1617 if not operation in ('read', 'write'):
1626 if not operation in ('read', 'write'):
1618 raise error.ProgrammingError(_("invalid '%s' operation") %
1627 raise error.ProgrammingError(_("invalid '%s' operation") %
1619 operation)
1628 operation)
1620 # Check all flags are known.
1629 # Check all flags are known.
1621 if flags & ~REVIDX_KNOWN_FLAGS:
1630 if flags & ~REVIDX_KNOWN_FLAGS:
1622 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1631 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1623 (flags & ~REVIDX_KNOWN_FLAGS))
1632 (flags & ~REVIDX_KNOWN_FLAGS))
1624 validatehash = True
1633 validatehash = True
1625 # Depending on the operation (read or write), the order might be
1634 # Depending on the operation (read or write), the order might be
1626 # reversed due to non-commutative transforms.
1635 # reversed due to non-commutative transforms.
1627 orderedflags = REVIDX_FLAGS_ORDER
1636 orderedflags = REVIDX_FLAGS_ORDER
1628 if operation == 'write':
1637 if operation == 'write':
1629 orderedflags = reversed(orderedflags)
1638 orderedflags = reversed(orderedflags)
1630
1639
1631 for flag in orderedflags:
1640 for flag in orderedflags:
1632 # If a flagprocessor has been registered for a known flag, apply the
1641 # If a flagprocessor has been registered for a known flag, apply the
1633 # related operation transform and update result tuple.
1642 # related operation transform and update result tuple.
1634 if flag & flags:
1643 if flag & flags:
1635 vhash = True
1644 vhash = True
1636
1645
1637 if flag not in self._flagprocessors:
1646 if flag not in self._flagprocessors:
1638 message = _("missing processor for flag '%#x'") % (flag)
1647 message = _("missing processor for flag '%#x'") % (flag)
1639 raise error.RevlogError(message)
1648 raise error.RevlogError(message)
1640
1649
1641 processor = self._flagprocessors[flag]
1650 processor = self._flagprocessors[flag]
1642 if processor is not None:
1651 if processor is not None:
1643 readtransform, writetransform, rawtransform = processor
1652 readtransform, writetransform, rawtransform = processor
1644
1653
1645 if raw:
1654 if raw:
1646 vhash = rawtransform(self, text)
1655 vhash = rawtransform(self, text)
1647 elif operation == 'read':
1656 elif operation == 'read':
1648 text, vhash = readtransform(self, text)
1657 text, vhash = readtransform(self, text)
1649 else: # write operation
1658 else: # write operation
1650 text, vhash = writetransform(self, text)
1659 text, vhash = writetransform(self, text)
1651 validatehash = validatehash and vhash
1660 validatehash = validatehash and vhash
1652
1661
1653 return text, validatehash
1662 return text, validatehash
1654
1663
1655 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1664 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1656 """Check node hash integrity.
1665 """Check node hash integrity.
1657
1666
1658 Available as a function so that subclasses can extend hash mismatch
1667 Available as a function so that subclasses can extend hash mismatch
1659 behaviors as needed.
1668 behaviors as needed.
1660 """
1669 """
1661 try:
1670 try:
1662 if p1 is None and p2 is None:
1671 if p1 is None and p2 is None:
1663 p1, p2 = self.parents(node)
1672 p1, p2 = self.parents(node)
1664 if node != self.hash(text, p1, p2):
1673 if node != self.hash(text, p1, p2):
1665 # Clear the revision cache on hash failure. The revision cache
1674 # Clear the revision cache on hash failure. The revision cache
1666 # only stores the raw revision and clearing the cache does have
1675 # only stores the raw revision and clearing the cache does have
1667 # the side-effect that we won't have a cache hit when the raw
1676 # the side-effect that we won't have a cache hit when the raw
1668 # revision data is accessed. But this case should be rare and
1677 # revision data is accessed. But this case should be rare and
1669 # it is extra work to teach the cache about the hash
1678 # it is extra work to teach the cache about the hash
1670 # verification state.
1679 # verification state.
1671 if self._revisioncache and self._revisioncache[0] == node:
1680 if self._revisioncache and self._revisioncache[0] == node:
1672 self._revisioncache = None
1681 self._revisioncache = None
1673
1682
1674 revornode = rev
1683 revornode = rev
1675 if revornode is None:
1684 if revornode is None:
1676 revornode = templatefilters.short(hex(node))
1685 revornode = templatefilters.short(hex(node))
1677 raise error.RevlogError(_("integrity check failed on %s:%s")
1686 raise error.RevlogError(_("integrity check failed on %s:%s")
1678 % (self.indexfile, pycompat.bytestr(revornode)))
1687 % (self.indexfile, pycompat.bytestr(revornode)))
1679 except error.RevlogError:
1688 except error.RevlogError:
1680 if self._censorable and storageutil.iscensoredtext(text):
1689 if self._censorable and storageutil.iscensoredtext(text):
1681 raise error.CensoredNodeError(self.indexfile, node, text)
1690 raise error.CensoredNodeError(self.indexfile, node, text)
1682 raise
1691 raise
1683
1692
1684 def _enforceinlinesize(self, tr, fp=None):
1693 def _enforceinlinesize(self, tr, fp=None):
1685 """Check if the revlog is too big for inline and convert if so.
1694 """Check if the revlog is too big for inline and convert if so.
1686
1695
1687 This should be called after revisions are added to the revlog. If the
1696 This should be called after revisions are added to the revlog. If the
1688 revlog has grown too large to be an inline revlog, it will convert it
1697 revlog has grown too large to be an inline revlog, it will convert it
1689 to use multiple index and data files.
1698 to use multiple index and data files.
1690 """
1699 """
1691 tiprev = len(self) - 1
1700 tiprev = len(self) - 1
1692 if (not self._inline or
1701 if (not self._inline or
1693 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1702 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1694 return
1703 return
1695
1704
1696 trinfo = tr.find(self.indexfile)
1705 trinfo = tr.find(self.indexfile)
1697 if trinfo is None:
1706 if trinfo is None:
1698 raise error.RevlogError(_("%s not found in the transaction")
1707 raise error.RevlogError(_("%s not found in the transaction")
1699 % self.indexfile)
1708 % self.indexfile)
1700
1709
1701 trindex = trinfo[2]
1710 trindex = trinfo[2]
1702 if trindex is not None:
1711 if trindex is not None:
1703 dataoff = self.start(trindex)
1712 dataoff = self.start(trindex)
1704 else:
1713 else:
1705 # revlog was stripped at start of transaction, use all leftover data
1714 # revlog was stripped at start of transaction, use all leftover data
1706 trindex = len(self) - 1
1715 trindex = len(self) - 1
1707 dataoff = self.end(tiprev)
1716 dataoff = self.end(tiprev)
1708
1717
1709 tr.add(self.datafile, dataoff)
1718 tr.add(self.datafile, dataoff)
1710
1719
1711 if fp:
1720 if fp:
1712 fp.flush()
1721 fp.flush()
1713 fp.close()
1722 fp.close()
1714
1723
1715 with self._datafp('w') as df:
1724 with self._datafp('w') as df:
1716 for r in self:
1725 for r in self:
1717 df.write(self._getsegmentforrevs(r, r)[1])
1726 df.write(self._getsegmentforrevs(r, r)[1])
1718
1727
1719 with self._indexfp('w') as fp:
1728 with self._indexfp('w') as fp:
1720 self.version &= ~FLAG_INLINE_DATA
1729 self.version &= ~FLAG_INLINE_DATA
1721 self._inline = False
1730 self._inline = False
1722 io = self._io
1731 io = self._io
1723 for i in self:
1732 for i in self:
1724 e = io.packentry(self.index[i], self.node, self.version, i)
1733 e = io.packentry(self.index[i], self.node, self.version, i)
1725 fp.write(e)
1734 fp.write(e)
1726
1735
1727 # the temp file replace the real index when we exit the context
1736 # the temp file replace the real index when we exit the context
1728 # manager
1737 # manager
1729
1738
1730 tr.replace(self.indexfile, trindex * self._io.size)
1739 tr.replace(self.indexfile, trindex * self._io.size)
1731 self._chunkclear()
1740 self._chunkclear()
1732
1741
1733 def _nodeduplicatecallback(self, transaction, node):
1742 def _nodeduplicatecallback(self, transaction, node):
1734 """called when trying to add a node already stored.
1743 """called when trying to add a node already stored.
1735 """
1744 """
1736
1745
1737 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1746 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1738 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1747 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1739 """add a revision to the log
1748 """add a revision to the log
1740
1749
1741 text - the revision data to add
1750 text - the revision data to add
1742 transaction - the transaction object used for rollback
1751 transaction - the transaction object used for rollback
1743 link - the linkrev data to add
1752 link - the linkrev data to add
1744 p1, p2 - the parent nodeids of the revision
1753 p1, p2 - the parent nodeids of the revision
1745 cachedelta - an optional precomputed delta
1754 cachedelta - an optional precomputed delta
1746 node - nodeid of revision; typically node is not specified, and it is
1755 node - nodeid of revision; typically node is not specified, and it is
1747 computed by default as hash(text, p1, p2), however subclasses might
1756 computed by default as hash(text, p1, p2), however subclasses might
1748 use different hashing method (and override checkhash() in such case)
1757 use different hashing method (and override checkhash() in such case)
1749 flags - the known flags to set on the revision
1758 flags - the known flags to set on the revision
1750 deltacomputer - an optional deltacomputer instance shared between
1759 deltacomputer - an optional deltacomputer instance shared between
1751 multiple calls
1760 multiple calls
1752 """
1761 """
1753 if link == nullrev:
1762 if link == nullrev:
1754 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1763 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1755 % self.indexfile)
1764 % self.indexfile)
1756
1765
1757 if flags:
1766 if flags:
1758 node = node or self.hash(text, p1, p2)
1767 node = node or self.hash(text, p1, p2)
1759
1768
1760 rawtext, validatehash = self._processflags(text, flags, 'write')
1769 rawtext, validatehash = self._processflags(text, flags, 'write')
1761
1770
1762 # If the flag processor modifies the revision data, ignore any provided
1771 # If the flag processor modifies the revision data, ignore any provided
1763 # cachedelta.
1772 # cachedelta.
1764 if rawtext != text:
1773 if rawtext != text:
1765 cachedelta = None
1774 cachedelta = None
1766
1775
1767 if len(rawtext) > _maxentrysize:
1776 if len(rawtext) > _maxentrysize:
1768 raise error.RevlogError(
1777 raise error.RevlogError(
1769 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1778 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1770 % (self.indexfile, len(rawtext)))
1779 % (self.indexfile, len(rawtext)))
1771
1780
1772 node = node or self.hash(rawtext, p1, p2)
1781 node = node or self.hash(rawtext, p1, p2)
1773 if node in self.nodemap:
1782 if node in self.nodemap:
1774 return node
1783 return node
1775
1784
1776 if validatehash:
1785 if validatehash:
1777 self.checkhash(rawtext, node, p1=p1, p2=p2)
1786 self.checkhash(rawtext, node, p1=p1, p2=p2)
1778
1787
1779 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1788 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1780 flags, cachedelta=cachedelta,
1789 flags, cachedelta=cachedelta,
1781 deltacomputer=deltacomputer)
1790 deltacomputer=deltacomputer)
1782
1791
1783 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1792 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1784 cachedelta=None, deltacomputer=None):
1793 cachedelta=None, deltacomputer=None):
1785 """add a raw revision with known flags, node and parents
1794 """add a raw revision with known flags, node and parents
1786 useful when reusing a revision not stored in this revlog (ex: received
1795 useful when reusing a revision not stored in this revlog (ex: received
1787 over wire, or read from an external bundle).
1796 over wire, or read from an external bundle).
1788 """
1797 """
1789 dfh = None
1798 dfh = None
1790 if not self._inline:
1799 if not self._inline:
1791 dfh = self._datafp("a+")
1800 dfh = self._datafp("a+")
1792 ifh = self._indexfp("a+")
1801 ifh = self._indexfp("a+")
1793 try:
1802 try:
1794 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1803 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1795 flags, cachedelta, ifh, dfh,
1804 flags, cachedelta, ifh, dfh,
1796 deltacomputer=deltacomputer)
1805 deltacomputer=deltacomputer)
1797 finally:
1806 finally:
1798 if dfh:
1807 if dfh:
1799 dfh.close()
1808 dfh.close()
1800 ifh.close()
1809 ifh.close()
1801
1810
1802 def compress(self, data):
1811 def compress(self, data):
1803 """Generate a possibly-compressed representation of data."""
1812 """Generate a possibly-compressed representation of data."""
1804 if not data:
1813 if not data:
1805 return '', data
1814 return '', data
1806
1815
1807 compressed = self._compressor.compress(data)
1816 compressed = self._compressor.compress(data)
1808
1817
1809 if compressed:
1818 if compressed:
1810 # The revlog compressor added the header in the returned data.
1819 # The revlog compressor added the header in the returned data.
1811 return '', compressed
1820 return '', compressed
1812
1821
1813 if data[0:1] == '\0':
1822 if data[0:1] == '\0':
1814 return '', data
1823 return '', data
1815 return 'u', data
1824 return 'u', data
1816
1825
1817 def decompress(self, data):
1826 def decompress(self, data):
1818 """Decompress a revlog chunk.
1827 """Decompress a revlog chunk.
1819
1828
1820 The chunk is expected to begin with a header identifying the
1829 The chunk is expected to begin with a header identifying the
1821 format type so it can be routed to an appropriate decompressor.
1830 format type so it can be routed to an appropriate decompressor.
1822 """
1831 """
1823 if not data:
1832 if not data:
1824 return data
1833 return data
1825
1834
1826 # Revlogs are read much more frequently than they are written and many
1835 # Revlogs are read much more frequently than they are written and many
1827 # chunks only take microseconds to decompress, so performance is
1836 # chunks only take microseconds to decompress, so performance is
1828 # important here.
1837 # important here.
1829 #
1838 #
1830 # We can make a few assumptions about revlogs:
1839 # We can make a few assumptions about revlogs:
1831 #
1840 #
1832 # 1) the majority of chunks will be compressed (as opposed to inline
1841 # 1) the majority of chunks will be compressed (as opposed to inline
1833 # raw data).
1842 # raw data).
1834 # 2) decompressing *any* data will likely by at least 10x slower than
1843 # 2) decompressing *any* data will likely by at least 10x slower than
1835 # returning raw inline data.
1844 # returning raw inline data.
1836 # 3) we want to prioritize common and officially supported compression
1845 # 3) we want to prioritize common and officially supported compression
1837 # engines
1846 # engines
1838 #
1847 #
1839 # It follows that we want to optimize for "decompress compressed data
1848 # It follows that we want to optimize for "decompress compressed data
1840 # when encoded with common and officially supported compression engines"
1849 # when encoded with common and officially supported compression engines"
1841 # case over "raw data" and "data encoded by less common or non-official
1850 # case over "raw data" and "data encoded by less common or non-official
1842 # compression engines." That is why we have the inline lookup first
1851 # compression engines." That is why we have the inline lookup first
1843 # followed by the compengines lookup.
1852 # followed by the compengines lookup.
1844 #
1853 #
1845 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1854 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1846 # compressed chunks. And this matters for changelog and manifest reads.
1855 # compressed chunks. And this matters for changelog and manifest reads.
1847 t = data[0:1]
1856 t = data[0:1]
1848
1857
1849 if t == 'x':
1858 if t == 'x':
1850 try:
1859 try:
1851 return _zlibdecompress(data)
1860 return _zlibdecompress(data)
1852 except zlib.error as e:
1861 except zlib.error as e:
1853 raise error.RevlogError(_('revlog decompress error: %s') %
1862 raise error.RevlogError(_('revlog decompress error: %s') %
1854 stringutil.forcebytestr(e))
1863 stringutil.forcebytestr(e))
1855 # '\0' is more common than 'u' so it goes first.
1864 # '\0' is more common than 'u' so it goes first.
1856 elif t == '\0':
1865 elif t == '\0':
1857 return data
1866 return data
1858 elif t == 'u':
1867 elif t == 'u':
1859 return util.buffer(data, 1)
1868 return util.buffer(data, 1)
1860
1869
1861 try:
1870 try:
1862 compressor = self._decompressors[t]
1871 compressor = self._decompressors[t]
1863 except KeyError:
1872 except KeyError:
1864 try:
1873 try:
1865 engine = util.compengines.forrevlogheader(t)
1874 engine = util.compengines.forrevlogheader(t)
1866 compressor = engine.revlogcompressor()
1875 compressor = engine.revlogcompressor()
1867 self._decompressors[t] = compressor
1876 self._decompressors[t] = compressor
1868 except KeyError:
1877 except KeyError:
1869 raise error.RevlogError(_('unknown compression type %r') % t)
1878 raise error.RevlogError(_('unknown compression type %r') % t)
1870
1879
1871 return compressor.decompress(data)
1880 return compressor.decompress(data)
1872
1881
1873 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1882 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1874 cachedelta, ifh, dfh, alwayscache=False,
1883 cachedelta, ifh, dfh, alwayscache=False,
1875 deltacomputer=None):
1884 deltacomputer=None):
1876 """internal function to add revisions to the log
1885 """internal function to add revisions to the log
1877
1886
1878 see addrevision for argument descriptions.
1887 see addrevision for argument descriptions.
1879
1888
1880 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1889 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1881
1890
1882 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1891 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1883 be used.
1892 be used.
1884
1893
1885 invariants:
1894 invariants:
1886 - rawtext is optional (can be None); if not set, cachedelta must be set.
1895 - rawtext is optional (can be None); if not set, cachedelta must be set.
1887 if both are set, they must correspond to each other.
1896 if both are set, they must correspond to each other.
1888 """
1897 """
1889 if node == nullid:
1898 if node == nullid:
1890 raise error.RevlogError(_("%s: attempt to add null revision") %
1899 raise error.RevlogError(_("%s: attempt to add null revision") %
1891 self.indexfile)
1900 self.indexfile)
1892 if node == wdirid or node in wdirfilenodeids:
1901 if node == wdirid or node in wdirfilenodeids:
1893 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1902 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1894 self.indexfile)
1903 self.indexfile)
1895
1904
1896 if self._inline:
1905 if self._inline:
1897 fh = ifh
1906 fh = ifh
1898 else:
1907 else:
1899 fh = dfh
1908 fh = dfh
1900
1909
1901 btext = [rawtext]
1910 btext = [rawtext]
1902
1911
1903 curr = len(self)
1912 curr = len(self)
1904 prev = curr - 1
1913 prev = curr - 1
1905 offset = self.end(prev)
1914 offset = self.end(prev)
1906 p1r, p2r = self.rev(p1), self.rev(p2)
1915 p1r, p2r = self.rev(p1), self.rev(p2)
1907
1916
1908 # full versions are inserted when the needed deltas
1917 # full versions are inserted when the needed deltas
1909 # become comparable to the uncompressed text
1918 # become comparable to the uncompressed text
1910 if rawtext is None:
1919 if rawtext is None:
1911 # need rawtext size, before changed by flag processors, which is
1920 # need rawtext size, before changed by flag processors, which is
1912 # the non-raw size. use revlog explicitly to avoid filelog's extra
1921 # the non-raw size. use revlog explicitly to avoid filelog's extra
1913 # logic that might remove metadata size.
1922 # logic that might remove metadata size.
1914 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1923 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1915 cachedelta[1])
1924 cachedelta[1])
1916 else:
1925 else:
1917 textlen = len(rawtext)
1926 textlen = len(rawtext)
1918
1927
1919 if deltacomputer is None:
1928 if deltacomputer is None:
1920 deltacomputer = deltautil.deltacomputer(self)
1929 deltacomputer = deltautil.deltacomputer(self)
1921
1930
1922 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1931 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1923
1932
1924 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1933 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1925
1934
1926 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1935 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1927 deltainfo.base, link, p1r, p2r, node)
1936 deltainfo.base, link, p1r, p2r, node)
1928 self.index.append(e)
1937 self.index.append(e)
1929 self.nodemap[node] = curr
1938 self.nodemap[node] = curr
1930
1939
1931 entry = self._io.packentry(e, self.node, self.version, curr)
1940 entry = self._io.packentry(e, self.node, self.version, curr)
1932 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1941 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1933 link, offset)
1942 link, offset)
1934
1943
1935 rawtext = btext[0]
1944 rawtext = btext[0]
1936
1945
1937 if alwayscache and rawtext is None:
1946 if alwayscache and rawtext is None:
1938 rawtext = deltacomputer.buildtext(revinfo, fh)
1947 rawtext = deltacomputer.buildtext(revinfo, fh)
1939
1948
1940 if type(rawtext) == bytes: # only accept immutable objects
1949 if type(rawtext) == bytes: # only accept immutable objects
1941 self._revisioncache = (node, curr, rawtext)
1950 self._revisioncache = (node, curr, rawtext)
1942 self._chainbasecache[curr] = deltainfo.chainbase
1951 self._chainbasecache[curr] = deltainfo.chainbase
1943 return node
1952 return node
1944
1953
1945 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1954 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1946 # Files opened in a+ mode have inconsistent behavior on various
1955 # Files opened in a+ mode have inconsistent behavior on various
1947 # platforms. Windows requires that a file positioning call be made
1956 # platforms. Windows requires that a file positioning call be made
1948 # when the file handle transitions between reads and writes. See
1957 # when the file handle transitions between reads and writes. See
1949 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1958 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1950 # platforms, Python or the platform itself can be buggy. Some versions
1959 # platforms, Python or the platform itself can be buggy. Some versions
1951 # of Solaris have been observed to not append at the end of the file
1960 # of Solaris have been observed to not append at the end of the file
1952 # if the file was seeked to before the end. See issue4943 for more.
1961 # if the file was seeked to before the end. See issue4943 for more.
1953 #
1962 #
1954 # We work around this issue by inserting a seek() before writing.
1963 # We work around this issue by inserting a seek() before writing.
1955 # Note: This is likely not necessary on Python 3.
1964 # Note: This is likely not necessary on Python 3.
1956 ifh.seek(0, os.SEEK_END)
1965 ifh.seek(0, os.SEEK_END)
1957 if dfh:
1966 if dfh:
1958 dfh.seek(0, os.SEEK_END)
1967 dfh.seek(0, os.SEEK_END)
1959
1968
1960 curr = len(self) - 1
1969 curr = len(self) - 1
1961 if not self._inline:
1970 if not self._inline:
1962 transaction.add(self.datafile, offset)
1971 transaction.add(self.datafile, offset)
1963 transaction.add(self.indexfile, curr * len(entry))
1972 transaction.add(self.indexfile, curr * len(entry))
1964 if data[0]:
1973 if data[0]:
1965 dfh.write(data[0])
1974 dfh.write(data[0])
1966 dfh.write(data[1])
1975 dfh.write(data[1])
1967 ifh.write(entry)
1976 ifh.write(entry)
1968 else:
1977 else:
1969 offset += curr * self._io.size
1978 offset += curr * self._io.size
1970 transaction.add(self.indexfile, offset, curr)
1979 transaction.add(self.indexfile, offset, curr)
1971 ifh.write(entry)
1980 ifh.write(entry)
1972 ifh.write(data[0])
1981 ifh.write(data[0])
1973 ifh.write(data[1])
1982 ifh.write(data[1])
1974 self._enforceinlinesize(transaction, ifh)
1983 self._enforceinlinesize(transaction, ifh)
1975
1984
1976 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1985 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1977 """
1986 """
1978 add a delta group
1987 add a delta group
1979
1988
1980 given a set of deltas, add them to the revision log. the
1989 given a set of deltas, add them to the revision log. the
1981 first delta is against its parent, which should be in our
1990 first delta is against its parent, which should be in our
1982 log, the rest are against the previous delta.
1991 log, the rest are against the previous delta.
1983
1992
1984 If ``addrevisioncb`` is defined, it will be called with arguments of
1993 If ``addrevisioncb`` is defined, it will be called with arguments of
1985 this revlog and the node that was added.
1994 this revlog and the node that was added.
1986 """
1995 """
1987
1996
1988 nodes = []
1997 nodes = []
1989
1998
1990 r = len(self)
1999 r = len(self)
1991 end = 0
2000 end = 0
1992 if r:
2001 if r:
1993 end = self.end(r - 1)
2002 end = self.end(r - 1)
1994 ifh = self._indexfp("a+")
2003 ifh = self._indexfp("a+")
1995 isize = r * self._io.size
2004 isize = r * self._io.size
1996 if self._inline:
2005 if self._inline:
1997 transaction.add(self.indexfile, end + isize, r)
2006 transaction.add(self.indexfile, end + isize, r)
1998 dfh = None
2007 dfh = None
1999 else:
2008 else:
2000 transaction.add(self.indexfile, isize, r)
2009 transaction.add(self.indexfile, isize, r)
2001 transaction.add(self.datafile, end)
2010 transaction.add(self.datafile, end)
2002 dfh = self._datafp("a+")
2011 dfh = self._datafp("a+")
2003 def flush():
2012 def flush():
2004 if dfh:
2013 if dfh:
2005 dfh.flush()
2014 dfh.flush()
2006 ifh.flush()
2015 ifh.flush()
2007 try:
2016 try:
2008 deltacomputer = deltautil.deltacomputer(self)
2017 deltacomputer = deltautil.deltacomputer(self)
2009 # loop through our set of deltas
2018 # loop through our set of deltas
2010 for data in deltas:
2019 for data in deltas:
2011 node, p1, p2, linknode, deltabase, delta, flags = data
2020 node, p1, p2, linknode, deltabase, delta, flags = data
2012 link = linkmapper(linknode)
2021 link = linkmapper(linknode)
2013 flags = flags or REVIDX_DEFAULT_FLAGS
2022 flags = flags or REVIDX_DEFAULT_FLAGS
2014
2023
2015 nodes.append(node)
2024 nodes.append(node)
2016
2025
2017 if node in self.nodemap:
2026 if node in self.nodemap:
2018 self._nodeduplicatecallback(transaction, node)
2027 self._nodeduplicatecallback(transaction, node)
2019 # this can happen if two branches make the same change
2028 # this can happen if two branches make the same change
2020 continue
2029 continue
2021
2030
2022 for p in (p1, p2):
2031 for p in (p1, p2):
2023 if p not in self.nodemap:
2032 if p not in self.nodemap:
2024 raise error.LookupError(p, self.indexfile,
2033 raise error.LookupError(p, self.indexfile,
2025 _('unknown parent'))
2034 _('unknown parent'))
2026
2035
2027 if deltabase not in self.nodemap:
2036 if deltabase not in self.nodemap:
2028 raise error.LookupError(deltabase, self.indexfile,
2037 raise error.LookupError(deltabase, self.indexfile,
2029 _('unknown delta base'))
2038 _('unknown delta base'))
2030
2039
2031 baserev = self.rev(deltabase)
2040 baserev = self.rev(deltabase)
2032
2041
2033 if baserev != nullrev and self.iscensored(baserev):
2042 if baserev != nullrev and self.iscensored(baserev):
2034 # if base is censored, delta must be full replacement in a
2043 # if base is censored, delta must be full replacement in a
2035 # single patch operation
2044 # single patch operation
2036 hlen = struct.calcsize(">lll")
2045 hlen = struct.calcsize(">lll")
2037 oldlen = self.rawsize(baserev)
2046 oldlen = self.rawsize(baserev)
2038 newlen = len(delta) - hlen
2047 newlen = len(delta) - hlen
2039 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2048 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2040 raise error.CensoredBaseError(self.indexfile,
2049 raise error.CensoredBaseError(self.indexfile,
2041 self.node(baserev))
2050 self.node(baserev))
2042
2051
2043 if not flags and self._peek_iscensored(baserev, delta, flush):
2052 if not flags and self._peek_iscensored(baserev, delta, flush):
2044 flags |= REVIDX_ISCENSORED
2053 flags |= REVIDX_ISCENSORED
2045
2054
2046 # We assume consumers of addrevisioncb will want to retrieve
2055 # We assume consumers of addrevisioncb will want to retrieve
2047 # the added revision, which will require a call to
2056 # the added revision, which will require a call to
2048 # revision(). revision() will fast path if there is a cache
2057 # revision(). revision() will fast path if there is a cache
2049 # hit. So, we tell _addrevision() to always cache in this case.
2058 # hit. So, we tell _addrevision() to always cache in this case.
2050 # We're only using addgroup() in the context of changegroup
2059 # We're only using addgroup() in the context of changegroup
2051 # generation so the revision data can always be handled as raw
2060 # generation so the revision data can always be handled as raw
2052 # by the flagprocessor.
2061 # by the flagprocessor.
2053 self._addrevision(node, None, transaction, link,
2062 self._addrevision(node, None, transaction, link,
2054 p1, p2, flags, (baserev, delta),
2063 p1, p2, flags, (baserev, delta),
2055 ifh, dfh,
2064 ifh, dfh,
2056 alwayscache=bool(addrevisioncb),
2065 alwayscache=bool(addrevisioncb),
2057 deltacomputer=deltacomputer)
2066 deltacomputer=deltacomputer)
2058
2067
2059 if addrevisioncb:
2068 if addrevisioncb:
2060 addrevisioncb(self, node)
2069 addrevisioncb(self, node)
2061
2070
2062 if not dfh and not self._inline:
2071 if not dfh and not self._inline:
2063 # addrevision switched from inline to conventional
2072 # addrevision switched from inline to conventional
2064 # reopen the index
2073 # reopen the index
2065 ifh.close()
2074 ifh.close()
2066 dfh = self._datafp("a+")
2075 dfh = self._datafp("a+")
2067 ifh = self._indexfp("a+")
2076 ifh = self._indexfp("a+")
2068 finally:
2077 finally:
2069 if dfh:
2078 if dfh:
2070 dfh.close()
2079 dfh.close()
2071 ifh.close()
2080 ifh.close()
2072
2081
2073 return nodes
2082 return nodes
2074
2083
2075 def iscensored(self, rev):
2084 def iscensored(self, rev):
2076 """Check if a file revision is censored."""
2085 """Check if a file revision is censored."""
2077 if not self._censorable:
2086 if not self._censorable:
2078 return False
2087 return False
2079
2088
2080 return self.flags(rev) & REVIDX_ISCENSORED
2089 return self.flags(rev) & REVIDX_ISCENSORED
2081
2090
2082 def _peek_iscensored(self, baserev, delta, flush):
2091 def _peek_iscensored(self, baserev, delta, flush):
2083 """Quickly check if a delta produces a censored revision."""
2092 """Quickly check if a delta produces a censored revision."""
2084 if not self._censorable:
2093 if not self._censorable:
2085 return False
2094 return False
2086
2095
2087 # Fragile heuristic: unless new file meta keys are added alphabetically
2096 # Fragile heuristic: unless new file meta keys are added alphabetically
2088 # preceding "censored", all censored revisions are prefixed by
2097 # preceding "censored", all censored revisions are prefixed by
2089 # "\1\ncensored:". A delta producing such a censored revision must be a
2098 # "\1\ncensored:". A delta producing such a censored revision must be a
2090 # full-replacement delta, so we inspect the first and only patch in the
2099 # full-replacement delta, so we inspect the first and only patch in the
2091 # delta for this prefix.
2100 # delta for this prefix.
2092 hlen = struct.calcsize(">lll")
2101 hlen = struct.calcsize(">lll")
2093 if len(delta) <= hlen:
2102 if len(delta) <= hlen:
2094 return False
2103 return False
2095
2104
2096 oldlen = self.rawsize(baserev)
2105 oldlen = self.rawsize(baserev)
2097 newlen = len(delta) - hlen
2106 newlen = len(delta) - hlen
2098 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2107 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2099 return False
2108 return False
2100
2109
2101 add = "\1\ncensored:"
2110 add = "\1\ncensored:"
2102 addlen = len(add)
2111 addlen = len(add)
2103 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2112 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2104
2113
2105 def getstrippoint(self, minlink):
2114 def getstrippoint(self, minlink):
2106 """find the minimum rev that must be stripped to strip the linkrev
2115 """find the minimum rev that must be stripped to strip the linkrev
2107
2116
2108 Returns a tuple containing the minimum rev and a set of all revs that
2117 Returns a tuple containing the minimum rev and a set of all revs that
2109 have linkrevs that will be broken by this strip.
2118 have linkrevs that will be broken by this strip.
2110 """
2119 """
2111 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2120 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2112 self.headrevs(),
2121 self.headrevs(),
2113 self.linkrev, self.parentrevs)
2122 self.linkrev, self.parentrevs)
2114
2123
2115 def strip(self, minlink, transaction):
2124 def strip(self, minlink, transaction):
2116 """truncate the revlog on the first revision with a linkrev >= minlink
2125 """truncate the revlog on the first revision with a linkrev >= minlink
2117
2126
2118 This function is called when we're stripping revision minlink and
2127 This function is called when we're stripping revision minlink and
2119 its descendants from the repository.
2128 its descendants from the repository.
2120
2129
2121 We have to remove all revisions with linkrev >= minlink, because
2130 We have to remove all revisions with linkrev >= minlink, because
2122 the equivalent changelog revisions will be renumbered after the
2131 the equivalent changelog revisions will be renumbered after the
2123 strip.
2132 strip.
2124
2133
2125 So we truncate the revlog on the first of these revisions, and
2134 So we truncate the revlog on the first of these revisions, and
2126 trust that the caller has saved the revisions that shouldn't be
2135 trust that the caller has saved the revisions that shouldn't be
2127 removed and that it'll re-add them after this truncation.
2136 removed and that it'll re-add them after this truncation.
2128 """
2137 """
2129 if len(self) == 0:
2138 if len(self) == 0:
2130 return
2139 return
2131
2140
2132 rev, _ = self.getstrippoint(minlink)
2141 rev, _ = self.getstrippoint(minlink)
2133 if rev == len(self):
2142 if rev == len(self):
2134 return
2143 return
2135
2144
2136 # first truncate the files on disk
2145 # first truncate the files on disk
2137 end = self.start(rev)
2146 end = self.start(rev)
2138 if not self._inline:
2147 if not self._inline:
2139 transaction.add(self.datafile, end)
2148 transaction.add(self.datafile, end)
2140 end = rev * self._io.size
2149 end = rev * self._io.size
2141 else:
2150 else:
2142 end += rev * self._io.size
2151 end += rev * self._io.size
2143
2152
2144 transaction.add(self.indexfile, end)
2153 transaction.add(self.indexfile, end)
2145
2154
2146 # then reset internal state in memory to forget those revisions
2155 # then reset internal state in memory to forget those revisions
2147 self._revisioncache = None
2156 self._revisioncache = None
2148 self._chaininfocache = {}
2157 self._chaininfocache = {}
2149 self._chunkclear()
2158 self._chunkclear()
2150 for x in pycompat.xrange(rev, len(self)):
2159 for x in pycompat.xrange(rev, len(self)):
2151 del self.nodemap[self.node(x)]
2160 del self.nodemap[self.node(x)]
2152
2161
2153 del self.index[rev:-1]
2162 del self.index[rev:-1]
2154 self._nodepos = None
2163 self._nodepos = None
2155
2164
2156 def checksize(self):
2165 def checksize(self):
2157 expected = 0
2166 expected = 0
2158 if len(self):
2167 if len(self):
2159 expected = max(0, self.end(len(self) - 1))
2168 expected = max(0, self.end(len(self) - 1))
2160
2169
2161 try:
2170 try:
2162 with self._datafp() as f:
2171 with self._datafp() as f:
2163 f.seek(0, 2)
2172 f.seek(0, 2)
2164 actual = f.tell()
2173 actual = f.tell()
2165 dd = actual - expected
2174 dd = actual - expected
2166 except IOError as inst:
2175 except IOError as inst:
2167 if inst.errno != errno.ENOENT:
2176 if inst.errno != errno.ENOENT:
2168 raise
2177 raise
2169 dd = 0
2178 dd = 0
2170
2179
2171 try:
2180 try:
2172 f = self.opener(self.indexfile)
2181 f = self.opener(self.indexfile)
2173 f.seek(0, 2)
2182 f.seek(0, 2)
2174 actual = f.tell()
2183 actual = f.tell()
2175 f.close()
2184 f.close()
2176 s = self._io.size
2185 s = self._io.size
2177 i = max(0, actual // s)
2186 i = max(0, actual // s)
2178 di = actual - (i * s)
2187 di = actual - (i * s)
2179 if self._inline:
2188 if self._inline:
2180 databytes = 0
2189 databytes = 0
2181 for r in self:
2190 for r in self:
2182 databytes += max(0, self.length(r))
2191 databytes += max(0, self.length(r))
2183 dd = 0
2192 dd = 0
2184 di = actual - len(self) * s - databytes
2193 di = actual - len(self) * s - databytes
2185 except IOError as inst:
2194 except IOError as inst:
2186 if inst.errno != errno.ENOENT:
2195 if inst.errno != errno.ENOENT:
2187 raise
2196 raise
2188 di = 0
2197 di = 0
2189
2198
2190 return (dd, di)
2199 return (dd, di)
2191
2200
2192 def files(self):
2201 def files(self):
2193 res = [self.indexfile]
2202 res = [self.indexfile]
2194 if not self._inline:
2203 if not self._inline:
2195 res.append(self.datafile)
2204 res.append(self.datafile)
2196 return res
2205 return res
2197
2206
2198 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2207 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2199 assumehaveparentrevisions=False, deltaprevious=False):
2208 assumehaveparentrevisions=False, deltaprevious=False):
2200 if nodesorder not in ('nodes', 'storage', None):
2209 if nodesorder not in ('nodes', 'storage', None):
2201 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2210 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2202 nodesorder)
2211 nodesorder)
2203
2212
2204 if nodesorder is None and not self._generaldelta:
2213 if nodesorder is None and not self._generaldelta:
2205 nodesorder = 'storage'
2214 nodesorder = 'storage'
2206
2215
2207 return storageutil.emitrevisions(
2216 return storageutil.emitrevisions(
2208 self, nodes, nodesorder, revlogrevisiondelta,
2217 self, nodes, nodesorder, revlogrevisiondelta,
2209 deltaparentfn=self.deltaparent,
2218 deltaparentfn=self.deltaparent,
2210 candeltafn=self.candelta,
2219 candeltafn=self.candelta,
2211 rawsizefn=self.rawsize,
2220 rawsizefn=self.rawsize,
2212 revdifffn=self.revdiff,
2221 revdifffn=self.revdiff,
2213 flagsfn=self.flags,
2222 flagsfn=self.flags,
2214 sendfulltext=not self._storedeltachains,
2223 sendfulltext=not self._storedeltachains,
2215 revisiondata=revisiondata,
2224 revisiondata=revisiondata,
2216 assumehaveparentrevisions=assumehaveparentrevisions,
2225 assumehaveparentrevisions=assumehaveparentrevisions,
2217 deltaprevious=deltaprevious)
2226 deltaprevious=deltaprevious)
2218
2227
2219 DELTAREUSEALWAYS = 'always'
2228 DELTAREUSEALWAYS = 'always'
2220 DELTAREUSESAMEREVS = 'samerevs'
2229 DELTAREUSESAMEREVS = 'samerevs'
2221 DELTAREUSENEVER = 'never'
2230 DELTAREUSENEVER = 'never'
2222
2231
2223 DELTAREUSEFULLADD = 'fulladd'
2232 DELTAREUSEFULLADD = 'fulladd'
2224
2233
2225 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2234 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2226
2235
2227 def clone(self, tr, destrevlog, addrevisioncb=None,
2236 def clone(self, tr, destrevlog, addrevisioncb=None,
2228 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2237 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2229 """Copy this revlog to another, possibly with format changes.
2238 """Copy this revlog to another, possibly with format changes.
2230
2239
2231 The destination revlog will contain the same revisions and nodes.
2240 The destination revlog will contain the same revisions and nodes.
2232 However, it may not be bit-for-bit identical due to e.g. delta encoding
2241 However, it may not be bit-for-bit identical due to e.g. delta encoding
2233 differences.
2242 differences.
2234
2243
2235 The ``deltareuse`` argument control how deltas from the existing revlog
2244 The ``deltareuse`` argument control how deltas from the existing revlog
2236 are preserved in the destination revlog. The argument can have the
2245 are preserved in the destination revlog. The argument can have the
2237 following values:
2246 following values:
2238
2247
2239 DELTAREUSEALWAYS
2248 DELTAREUSEALWAYS
2240 Deltas will always be reused (if possible), even if the destination
2249 Deltas will always be reused (if possible), even if the destination
2241 revlog would not select the same revisions for the delta. This is the
2250 revlog would not select the same revisions for the delta. This is the
2242 fastest mode of operation.
2251 fastest mode of operation.
2243 DELTAREUSESAMEREVS
2252 DELTAREUSESAMEREVS
2244 Deltas will be reused if the destination revlog would pick the same
2253 Deltas will be reused if the destination revlog would pick the same
2245 revisions for the delta. This mode strikes a balance between speed
2254 revisions for the delta. This mode strikes a balance between speed
2246 and optimization.
2255 and optimization.
2247 DELTAREUSENEVER
2256 DELTAREUSENEVER
2248 Deltas will never be reused. This is the slowest mode of execution.
2257 Deltas will never be reused. This is the slowest mode of execution.
2249 This mode can be used to recompute deltas (e.g. if the diff/delta
2258 This mode can be used to recompute deltas (e.g. if the diff/delta
2250 algorithm changes).
2259 algorithm changes).
2251
2260
2252 Delta computation can be slow, so the choice of delta reuse policy can
2261 Delta computation can be slow, so the choice of delta reuse policy can
2253 significantly affect run time.
2262 significantly affect run time.
2254
2263
2255 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2264 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2256 two extremes. Deltas will be reused if they are appropriate. But if the
2265 two extremes. Deltas will be reused if they are appropriate. But if the
2257 delta could choose a better revision, it will do so. This means if you
2266 delta could choose a better revision, it will do so. This means if you
2258 are converting a non-generaldelta revlog to a generaldelta revlog,
2267 are converting a non-generaldelta revlog to a generaldelta revlog,
2259 deltas will be recomputed if the delta's parent isn't a parent of the
2268 deltas will be recomputed if the delta's parent isn't a parent of the
2260 revision.
2269 revision.
2261
2270
2262 In addition to the delta policy, the ``deltabothparents`` argument
2271 In addition to the delta policy, the ``deltabothparents`` argument
2263 controls whether to compute deltas against both parents for merges.
2272 controls whether to compute deltas against both parents for merges.
2264 By default, the current default is used.
2273 By default, the current default is used.
2265 """
2274 """
2266 if deltareuse not in self.DELTAREUSEALL:
2275 if deltareuse not in self.DELTAREUSEALL:
2267 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2276 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2268
2277
2269 if len(destrevlog):
2278 if len(destrevlog):
2270 raise ValueError(_('destination revlog is not empty'))
2279 raise ValueError(_('destination revlog is not empty'))
2271
2280
2272 if getattr(self, 'filteredrevs', None):
2281 if getattr(self, 'filteredrevs', None):
2273 raise ValueError(_('source revlog has filtered revisions'))
2282 raise ValueError(_('source revlog has filtered revisions'))
2274 if getattr(destrevlog, 'filteredrevs', None):
2283 if getattr(destrevlog, 'filteredrevs', None):
2275 raise ValueError(_('destination revlog has filtered revisions'))
2284 raise ValueError(_('destination revlog has filtered revisions'))
2276
2285
2277 # lazydeltabase controls whether to reuse a cached delta, if possible.
2286 # lazydeltabase controls whether to reuse a cached delta, if possible.
2278 oldlazydeltabase = destrevlog._lazydeltabase
2287 oldlazydeltabase = destrevlog._lazydeltabase
2279 oldamd = destrevlog._deltabothparents
2288 oldamd = destrevlog._deltabothparents
2280
2289
2281 try:
2290 try:
2282 if deltareuse == self.DELTAREUSEALWAYS:
2291 if deltareuse == self.DELTAREUSEALWAYS:
2283 destrevlog._lazydeltabase = True
2292 destrevlog._lazydeltabase = True
2284 elif deltareuse == self.DELTAREUSESAMEREVS:
2293 elif deltareuse == self.DELTAREUSESAMEREVS:
2285 destrevlog._lazydeltabase = False
2294 destrevlog._lazydeltabase = False
2286
2295
2287 destrevlog._deltabothparents = deltabothparents or oldamd
2296 destrevlog._deltabothparents = deltabothparents or oldamd
2288
2297
2289 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2298 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2290 self.DELTAREUSESAMEREVS)
2299 self.DELTAREUSESAMEREVS)
2291
2300
2292 deltacomputer = deltautil.deltacomputer(destrevlog)
2301 deltacomputer = deltautil.deltacomputer(destrevlog)
2293 index = self.index
2302 index = self.index
2294 for rev in self:
2303 for rev in self:
2295 entry = index[rev]
2304 entry = index[rev]
2296
2305
2297 # Some classes override linkrev to take filtered revs into
2306 # Some classes override linkrev to take filtered revs into
2298 # account. Use raw entry from index.
2307 # account. Use raw entry from index.
2299 flags = entry[0] & 0xffff
2308 flags = entry[0] & 0xffff
2300 linkrev = entry[4]
2309 linkrev = entry[4]
2301 p1 = index[entry[5]][7]
2310 p1 = index[entry[5]][7]
2302 p2 = index[entry[6]][7]
2311 p2 = index[entry[6]][7]
2303 node = entry[7]
2312 node = entry[7]
2304
2313
2305 # (Possibly) reuse the delta from the revlog if allowed and
2314 # (Possibly) reuse the delta from the revlog if allowed and
2306 # the revlog chunk is a delta.
2315 # the revlog chunk is a delta.
2307 cachedelta = None
2316 cachedelta = None
2308 rawtext = None
2317 rawtext = None
2309 if populatecachedelta:
2318 if populatecachedelta:
2310 dp = self.deltaparent(rev)
2319 dp = self.deltaparent(rev)
2311 if dp != nullrev:
2320 if dp != nullrev:
2312 cachedelta = (dp, bytes(self._chunk(rev)))
2321 cachedelta = (dp, bytes(self._chunk(rev)))
2313
2322
2314 if not cachedelta:
2323 if not cachedelta:
2315 rawtext = self.revision(rev, raw=True)
2324 rawtext = self.revision(rev, raw=True)
2316
2325
2317
2326
2318 if deltareuse == self.DELTAREUSEFULLADD:
2327 if deltareuse == self.DELTAREUSEFULLADD:
2319 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2328 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2320 cachedelta=cachedelta,
2329 cachedelta=cachedelta,
2321 node=node, flags=flags,
2330 node=node, flags=flags,
2322 deltacomputer=deltacomputer)
2331 deltacomputer=deltacomputer)
2323 else:
2332 else:
2324 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2333 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2325 checkambig=False)
2334 checkambig=False)
2326 dfh = None
2335 dfh = None
2327 if not destrevlog._inline:
2336 if not destrevlog._inline:
2328 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2337 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2329 try:
2338 try:
2330 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2339 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2331 p2, flags, cachedelta, ifh, dfh,
2340 p2, flags, cachedelta, ifh, dfh,
2332 deltacomputer=deltacomputer)
2341 deltacomputer=deltacomputer)
2333 finally:
2342 finally:
2334 if dfh:
2343 if dfh:
2335 dfh.close()
2344 dfh.close()
2336 ifh.close()
2345 ifh.close()
2337
2346
2338 if addrevisioncb:
2347 if addrevisioncb:
2339 addrevisioncb(self, rev, node)
2348 addrevisioncb(self, rev, node)
2340 finally:
2349 finally:
2341 destrevlog._lazydeltabase = oldlazydeltabase
2350 destrevlog._lazydeltabase = oldlazydeltabase
2342 destrevlog._deltabothparents = oldamd
2351 destrevlog._deltabothparents = oldamd
2343
2352
2344 def censorrevision(self, tr, censornode, tombstone=b''):
2353 def censorrevision(self, tr, censornode, tombstone=b''):
2345 if (self.version & 0xFFFF) == REVLOGV0:
2354 if (self.version & 0xFFFF) == REVLOGV0:
2346 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2355 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2347 self.version)
2356 self.version)
2348
2357
2349 censorrev = self.rev(censornode)
2358 censorrev = self.rev(censornode)
2350 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2359 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2351
2360
2352 if len(tombstone) > self.rawsize(censorrev):
2361 if len(tombstone) > self.rawsize(censorrev):
2353 raise error.Abort(_('censor tombstone must be no longer than '
2362 raise error.Abort(_('censor tombstone must be no longer than '
2354 'censored data'))
2363 'censored data'))
2355
2364
2356 # Rewriting the revlog in place is hard. Our strategy for censoring is
2365 # Rewriting the revlog in place is hard. Our strategy for censoring is
2357 # to create a new revlog, copy all revisions to it, then replace the
2366 # to create a new revlog, copy all revisions to it, then replace the
2358 # revlogs on transaction close.
2367 # revlogs on transaction close.
2359
2368
2360 newindexfile = self.indexfile + b'.tmpcensored'
2369 newindexfile = self.indexfile + b'.tmpcensored'
2361 newdatafile = self.datafile + b'.tmpcensored'
2370 newdatafile = self.datafile + b'.tmpcensored'
2362
2371
2363 # This is a bit dangerous. We could easily have a mismatch of state.
2372 # This is a bit dangerous. We could easily have a mismatch of state.
2364 newrl = revlog(self.opener, newindexfile, newdatafile,
2373 newrl = revlog(self.opener, newindexfile, newdatafile,
2365 censorable=True)
2374 censorable=True)
2366 newrl.version = self.version
2375 newrl.version = self.version
2367 newrl._generaldelta = self._generaldelta
2376 newrl._generaldelta = self._generaldelta
2368 newrl._io = self._io
2377 newrl._io = self._io
2369
2378
2370 for rev in self.revs():
2379 for rev in self.revs():
2371 node = self.node(rev)
2380 node = self.node(rev)
2372 p1, p2 = self.parents(node)
2381 p1, p2 = self.parents(node)
2373
2382
2374 if rev == censorrev:
2383 if rev == censorrev:
2375 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2384 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2376 p1, p2, censornode, REVIDX_ISCENSORED)
2385 p1, p2, censornode, REVIDX_ISCENSORED)
2377
2386
2378 if newrl.deltaparent(rev) != nullrev:
2387 if newrl.deltaparent(rev) != nullrev:
2379 raise error.Abort(_('censored revision stored as delta; '
2388 raise error.Abort(_('censored revision stored as delta; '
2380 'cannot censor'),
2389 'cannot censor'),
2381 hint=_('censoring of revlogs is not '
2390 hint=_('censoring of revlogs is not '
2382 'fully implemented; please report '
2391 'fully implemented; please report '
2383 'this bug'))
2392 'this bug'))
2384 continue
2393 continue
2385
2394
2386 if self.iscensored(rev):
2395 if self.iscensored(rev):
2387 if self.deltaparent(rev) != nullrev:
2396 if self.deltaparent(rev) != nullrev:
2388 raise error.Abort(_('cannot censor due to censored '
2397 raise error.Abort(_('cannot censor due to censored '
2389 'revision having delta stored'))
2398 'revision having delta stored'))
2390 rawtext = self._chunk(rev)
2399 rawtext = self._chunk(rev)
2391 else:
2400 else:
2392 rawtext = self.revision(rev, raw=True)
2401 rawtext = self.revision(rev, raw=True)
2393
2402
2394 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2403 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2395 self.flags(rev))
2404 self.flags(rev))
2396
2405
2397 tr.addbackup(self.indexfile, location='store')
2406 tr.addbackup(self.indexfile, location='store')
2398 if not self._inline:
2407 if not self._inline:
2399 tr.addbackup(self.datafile, location='store')
2408 tr.addbackup(self.datafile, location='store')
2400
2409
2401 self.opener.rename(newrl.indexfile, self.indexfile)
2410 self.opener.rename(newrl.indexfile, self.indexfile)
2402 if not self._inline:
2411 if not self._inline:
2403 self.opener.rename(newrl.datafile, self.datafile)
2412 self.opener.rename(newrl.datafile, self.datafile)
2404
2413
2405 self.clearcaches()
2414 self.clearcaches()
2406 self._loadindex(self.version, None)
2415 self._loadindex(self.version, None)
2407
2416
2408 def verifyintegrity(self, state):
2417 def verifyintegrity(self, state):
2409 """Verifies the integrity of the revlog.
2418 """Verifies the integrity of the revlog.
2410
2419
2411 Yields ``revlogproblem`` instances describing problems that are
2420 Yields ``revlogproblem`` instances describing problems that are
2412 found.
2421 found.
2413 """
2422 """
2414 dd, di = self.checksize()
2423 dd, di = self.checksize()
2415 if dd:
2424 if dd:
2416 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2425 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2417 if di:
2426 if di:
2418 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2427 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2419
2428
2420 version = self.version & 0xFFFF
2429 version = self.version & 0xFFFF
2421
2430
2422 # The verifier tells us what version revlog we should be.
2431 # The verifier tells us what version revlog we should be.
2423 if version != state['expectedversion']:
2432 if version != state['expectedversion']:
2424 yield revlogproblem(
2433 yield revlogproblem(
2425 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2434 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2426 (self.indexfile, version, state['expectedversion']))
2435 (self.indexfile, version, state['expectedversion']))
2427
2436
2428 state['skipread'] = set()
2437 state['skipread'] = set()
2429
2438
2430 for rev in self:
2439 for rev in self:
2431 node = self.node(rev)
2440 node = self.node(rev)
2432
2441
2433 # Verify contents. 4 cases to care about:
2442 # Verify contents. 4 cases to care about:
2434 #
2443 #
2435 # common: the most common case
2444 # common: the most common case
2436 # rename: with a rename
2445 # rename: with a rename
2437 # meta: file content starts with b'\1\n', the metadata
2446 # meta: file content starts with b'\1\n', the metadata
2438 # header defined in filelog.py, but without a rename
2447 # header defined in filelog.py, but without a rename
2439 # ext: content stored externally
2448 # ext: content stored externally
2440 #
2449 #
2441 # More formally, their differences are shown below:
2450 # More formally, their differences are shown below:
2442 #
2451 #
2443 # | common | rename | meta | ext
2452 # | common | rename | meta | ext
2444 # -------------------------------------------------------
2453 # -------------------------------------------------------
2445 # flags() | 0 | 0 | 0 | not 0
2454 # flags() | 0 | 0 | 0 | not 0
2446 # renamed() | False | True | False | ?
2455 # renamed() | False | True | False | ?
2447 # rawtext[0:2]=='\1\n'| False | True | True | ?
2456 # rawtext[0:2]=='\1\n'| False | True | True | ?
2448 #
2457 #
2449 # "rawtext" means the raw text stored in revlog data, which
2458 # "rawtext" means the raw text stored in revlog data, which
2450 # could be retrieved by "revision(rev, raw=True)". "text"
2459 # could be retrieved by "revision(rev, raw=True)". "text"
2451 # mentioned below is "revision(rev, raw=False)".
2460 # mentioned below is "revision(rev, raw=False)".
2452 #
2461 #
2453 # There are 3 different lengths stored physically:
2462 # There are 3 different lengths stored physically:
2454 # 1. L1: rawsize, stored in revlog index
2463 # 1. L1: rawsize, stored in revlog index
2455 # 2. L2: len(rawtext), stored in revlog data
2464 # 2. L2: len(rawtext), stored in revlog data
2456 # 3. L3: len(text), stored in revlog data if flags==0, or
2465 # 3. L3: len(text), stored in revlog data if flags==0, or
2457 # possibly somewhere else if flags!=0
2466 # possibly somewhere else if flags!=0
2458 #
2467 #
2459 # L1 should be equal to L2. L3 could be different from them.
2468 # L1 should be equal to L2. L3 could be different from them.
2460 # "text" may or may not affect commit hash depending on flag
2469 # "text" may or may not affect commit hash depending on flag
2461 # processors (see revlog.addflagprocessor).
2470 # processors (see revlog.addflagprocessor).
2462 #
2471 #
2463 # | common | rename | meta | ext
2472 # | common | rename | meta | ext
2464 # -------------------------------------------------
2473 # -------------------------------------------------
2465 # rawsize() | L1 | L1 | L1 | L1
2474 # rawsize() | L1 | L1 | L1 | L1
2466 # size() | L1 | L2-LM | L1(*) | L1 (?)
2475 # size() | L1 | L2-LM | L1(*) | L1 (?)
2467 # len(rawtext) | L2 | L2 | L2 | L2
2476 # len(rawtext) | L2 | L2 | L2 | L2
2468 # len(text) | L2 | L2 | L2 | L3
2477 # len(text) | L2 | L2 | L2 | L3
2469 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2478 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2470 #
2479 #
2471 # LM: length of metadata, depending on rawtext
2480 # LM: length of metadata, depending on rawtext
2472 # (*): not ideal, see comment in filelog.size
2481 # (*): not ideal, see comment in filelog.size
2473 # (?): could be "- len(meta)" if the resolved content has
2482 # (?): could be "- len(meta)" if the resolved content has
2474 # rename metadata
2483 # rename metadata
2475 #
2484 #
2476 # Checks needed to be done:
2485 # Checks needed to be done:
2477 # 1. length check: L1 == L2, in all cases.
2486 # 1. length check: L1 == L2, in all cases.
2478 # 2. hash check: depending on flag processor, we may need to
2487 # 2. hash check: depending on flag processor, we may need to
2479 # use either "text" (external), or "rawtext" (in revlog).
2488 # use either "text" (external), or "rawtext" (in revlog).
2480
2489
2481 try:
2490 try:
2482 skipflags = state.get('skipflags', 0)
2491 skipflags = state.get('skipflags', 0)
2483 if skipflags:
2492 if skipflags:
2484 skipflags &= self.flags(rev)
2493 skipflags &= self.flags(rev)
2485
2494
2486 if skipflags:
2495 if skipflags:
2487 state['skipread'].add(node)
2496 state['skipread'].add(node)
2488 else:
2497 else:
2489 # Side-effect: read content and verify hash.
2498 # Side-effect: read content and verify hash.
2490 self.revision(node)
2499 self.revision(node)
2491
2500
2492 l1 = self.rawsize(rev)
2501 l1 = self.rawsize(rev)
2493 l2 = len(self.revision(node, raw=True))
2502 l2 = len(self.revision(node, raw=True))
2494
2503
2495 if l1 != l2:
2504 if l1 != l2:
2496 yield revlogproblem(
2505 yield revlogproblem(
2497 error=_('unpacked size is %d, %d expected') % (l2, l1),
2506 error=_('unpacked size is %d, %d expected') % (l2, l1),
2498 node=node)
2507 node=node)
2499
2508
2500 except error.CensoredNodeError:
2509 except error.CensoredNodeError:
2501 if state['erroroncensored']:
2510 if state['erroroncensored']:
2502 yield revlogproblem(error=_('censored file data'),
2511 yield revlogproblem(error=_('censored file data'),
2503 node=node)
2512 node=node)
2504 state['skipread'].add(node)
2513 state['skipread'].add(node)
2505 except Exception as e:
2514 except Exception as e:
2506 yield revlogproblem(
2515 yield revlogproblem(
2507 error=_('unpacking %s: %s') % (short(node),
2516 error=_('unpacking %s: %s') % (short(node),
2508 stringutil.forcebytestr(e)),
2517 stringutil.forcebytestr(e)),
2509 node=node)
2518 node=node)
2510 state['skipread'].add(node)
2519 state['skipread'].add(node)
2511
2520
2512 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2521 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2513 revisionscount=False, trackedsize=False,
2522 revisionscount=False, trackedsize=False,
2514 storedsize=False):
2523 storedsize=False):
2515 d = {}
2524 d = {}
2516
2525
2517 if exclusivefiles:
2526 if exclusivefiles:
2518 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2527 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2519 if not self._inline:
2528 if not self._inline:
2520 d['exclusivefiles'].append((self.opener, self.datafile))
2529 d['exclusivefiles'].append((self.opener, self.datafile))
2521
2530
2522 if sharedfiles:
2531 if sharedfiles:
2523 d['sharedfiles'] = []
2532 d['sharedfiles'] = []
2524
2533
2525 if revisionscount:
2534 if revisionscount:
2526 d['revisionscount'] = len(self)
2535 d['revisionscount'] = len(self)
2527
2536
2528 if trackedsize:
2537 if trackedsize:
2529 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2538 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2530
2539
2531 if storedsize:
2540 if storedsize:
2532 d['storedsize'] = sum(self.opener.stat(path).st_size
2541 d['storedsize'] = sum(self.opener.stat(path).st_size
2533 for path in self.files())
2542 for path in self.files())
2534
2543
2535 return d
2544 return d
General Comments 0
You need to be logged in to leave comments. Login now