##// END OF EJS Templates
revlog: move loading of index data into own method...
Gregory Szorc -
r40091:0a4625ff default
parent child Browse files
Show More
@@ -1,2557 +1,2560 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 if not flag & REVIDX_KNOWN_FLAGS:
154 if not flag & REVIDX_KNOWN_FLAGS:
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 raise error.ProgrammingError(msg)
156 raise error.ProgrammingError(msg)
157 if flag not in REVIDX_FLAGS_ORDER:
157 if flag not in REVIDX_FLAGS_ORDER:
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag in _flagprocessors:
160 if flag in _flagprocessors:
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 raise error.Abort(msg)
162 raise error.Abort(msg)
163 _flagprocessors[flag] = processor
163 _flagprocessors[flag] = processor
164
164
165 def getoffset(q):
165 def getoffset(q):
166 return int(q >> 16)
166 return int(q >> 16)
167
167
168 def gettype(q):
168 def gettype(q):
169 return int(q & 0xFFFF)
169 return int(q & 0xFFFF)
170
170
171 def offset_type(offset, type):
171 def offset_type(offset, type):
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 raise ValueError('unknown revlog index flags')
173 raise ValueError('unknown revlog index flags')
174 return int(int(offset) << 16 | type)
174 return int(int(offset) << 16 | type)
175
175
176 @attr.s(slots=True, frozen=True)
176 @attr.s(slots=True, frozen=True)
177 class _revisioninfo(object):
177 class _revisioninfo(object):
178 """Information about a revision that allows building its fulltext
178 """Information about a revision that allows building its fulltext
179 node: expected hash of the revision
179 node: expected hash of the revision
180 p1, p2: parent revs of the revision
180 p1, p2: parent revs of the revision
181 btext: built text cache consisting of a one-element list
181 btext: built text cache consisting of a one-element list
182 cachedelta: (baserev, uncompressed_delta) or None
182 cachedelta: (baserev, uncompressed_delta) or None
183 flags: flags associated to the revision storage
183 flags: flags associated to the revision storage
184
184
185 One of btext[0] or cachedelta must be set.
185 One of btext[0] or cachedelta must be set.
186 """
186 """
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195 @interfaceutil.implementer(repository.irevisiondelta)
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
197 class revlogrevisiondelta(object):
198 node = attr.ib()
198 node = attr.ib()
199 p1node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
205 delta = attr.ib()
206 linknode = attr.ib(default=None)
206 linknode = attr.ib(default=None)
207
207
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
209 @attr.s(frozen=True)
209 @attr.s(frozen=True)
210 class revlogproblem(object):
210 class revlogproblem(object):
211 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
212 error = attr.ib(default=None)
212 error = attr.ib(default=None)
213 node = attr.ib(default=None)
213 node = attr.ib(default=None)
214
214
215 # index v0:
215 # index v0:
216 # 4 bytes: offset
216 # 4 bytes: offset
217 # 4 bytes: compressed length
217 # 4 bytes: compressed length
218 # 4 bytes: base rev
218 # 4 bytes: base rev
219 # 4 bytes: link rev
219 # 4 bytes: link rev
220 # 20 bytes: parent 1 nodeid
220 # 20 bytes: parent 1 nodeid
221 # 20 bytes: parent 2 nodeid
221 # 20 bytes: parent 2 nodeid
222 # 20 bytes: nodeid
222 # 20 bytes: nodeid
223 indexformatv0 = struct.Struct(">4l20s20s20s")
223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 indexformatv0_pack = indexformatv0.pack
224 indexformatv0_pack = indexformatv0.pack
225 indexformatv0_unpack = indexformatv0.unpack
225 indexformatv0_unpack = indexformatv0.unpack
226
226
227 class revlogoldindex(list):
227 class revlogoldindex(list):
228 def __getitem__(self, i):
228 def __getitem__(self, i):
229 if i == -1:
229 if i == -1:
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 return list.__getitem__(self, i)
231 return list.__getitem__(self, i)
232
232
233 class revlogoldio(object):
233 class revlogoldio(object):
234 def __init__(self):
234 def __init__(self):
235 self.size = indexformatv0.size
235 self.size = indexformatv0.size
236
236
237 def parseindex(self, data, inline):
237 def parseindex(self, data, inline):
238 s = self.size
238 s = self.size
239 index = []
239 index = []
240 nodemap = {nullid: nullrev}
240 nodemap = {nullid: nullrev}
241 n = off = 0
241 n = off = 0
242 l = len(data)
242 l = len(data)
243 while off + s <= l:
243 while off + s <= l:
244 cur = data[off:off + s]
244 cur = data[off:off + s]
245 off += s
245 off += s
246 e = indexformatv0_unpack(cur)
246 e = indexformatv0_unpack(cur)
247 # transform to revlogv1 format
247 # transform to revlogv1 format
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 index.append(e2)
250 index.append(e2)
251 nodemap[e[6]] = n
251 nodemap[e[6]] = n
252 n += 1
252 n += 1
253
253
254 return revlogoldindex(index), nodemap, None
254 return revlogoldindex(index), nodemap, None
255
255
256 def packentry(self, entry, node, version, rev):
256 def packentry(self, entry, node, version, rev):
257 if gettype(entry[0]):
257 if gettype(entry[0]):
258 raise error.RevlogError(_('index entry flags need revlog '
258 raise error.RevlogError(_('index entry flags need revlog '
259 'version 1'))
259 'version 1'))
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 node(entry[5]), node(entry[6]), entry[7])
261 node(entry[5]), node(entry[6]), entry[7])
262 return indexformatv0_pack(*e2)
262 return indexformatv0_pack(*e2)
263
263
264 # index ng:
264 # index ng:
265 # 6 bytes: offset
265 # 6 bytes: offset
266 # 2 bytes: flags
266 # 2 bytes: flags
267 # 4 bytes: compressed length
267 # 4 bytes: compressed length
268 # 4 bytes: uncompressed length
268 # 4 bytes: uncompressed length
269 # 4 bytes: base rev
269 # 4 bytes: base rev
270 # 4 bytes: link rev
270 # 4 bytes: link rev
271 # 4 bytes: parent 1 rev
271 # 4 bytes: parent 1 rev
272 # 4 bytes: parent 2 rev
272 # 4 bytes: parent 2 rev
273 # 32 bytes: nodeid
273 # 32 bytes: nodeid
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 indexformatng_pack = indexformatng.pack
275 indexformatng_pack = indexformatng.pack
276 versionformat = struct.Struct(">I")
276 versionformat = struct.Struct(">I")
277 versionformat_pack = versionformat.pack
277 versionformat_pack = versionformat.pack
278 versionformat_unpack = versionformat.unpack
278 versionformat_unpack = versionformat.unpack
279
279
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 # signed integer)
281 # signed integer)
282 _maxentrysize = 0x7fffffff
282 _maxentrysize = 0x7fffffff
283
283
284 class revlogio(object):
284 class revlogio(object):
285 def __init__(self):
285 def __init__(self):
286 self.size = indexformatng.size
286 self.size = indexformatng.size
287
287
288 def parseindex(self, data, inline):
288 def parseindex(self, data, inline):
289 # call the C implementation to parse the index data
289 # call the C implementation to parse the index data
290 index, cache = parsers.parse_index2(data, inline)
290 index, cache = parsers.parse_index2(data, inline)
291 return index, getattr(index, 'nodemap', None), cache
291 return index, getattr(index, 'nodemap', None), cache
292
292
293 def packentry(self, entry, node, version, rev):
293 def packentry(self, entry, node, version, rev):
294 p = indexformatng_pack(*entry)
294 p = indexformatng_pack(*entry)
295 if rev == 0:
295 if rev == 0:
296 p = versionformat_pack(version) + p[4:]
296 p = versionformat_pack(version) + p[4:]
297 return p
297 return p
298
298
299 class revlog(object):
299 class revlog(object):
300 """
300 """
301 the underlying revision storage object
301 the underlying revision storage object
302
302
303 A revlog consists of two parts, an index and the revision data.
303 A revlog consists of two parts, an index and the revision data.
304
304
305 The index is a file with a fixed record size containing
305 The index is a file with a fixed record size containing
306 information on each revision, including its nodeid (hash), the
306 information on each revision, including its nodeid (hash), the
307 nodeids of its parents, the position and offset of its data within
307 nodeids of its parents, the position and offset of its data within
308 the data file, and the revision it's based on. Finally, each entry
308 the data file, and the revision it's based on. Finally, each entry
309 contains a linkrev entry that can serve as a pointer to external
309 contains a linkrev entry that can serve as a pointer to external
310 data.
310 data.
311
311
312 The revision data itself is a linear collection of data chunks.
312 The revision data itself is a linear collection of data chunks.
313 Each chunk represents a revision and is usually represented as a
313 Each chunk represents a revision and is usually represented as a
314 delta against the previous chunk. To bound lookup time, runs of
314 delta against the previous chunk. To bound lookup time, runs of
315 deltas are limited to about 2 times the length of the original
315 deltas are limited to about 2 times the length of the original
316 version data. This makes retrieval of a version proportional to
316 version data. This makes retrieval of a version proportional to
317 its size, or O(1) relative to the number of revisions.
317 its size, or O(1) relative to the number of revisions.
318
318
319 Both pieces of the revlog are written to in an append-only
319 Both pieces of the revlog are written to in an append-only
320 fashion, which means we never need to rewrite a file to insert or
320 fashion, which means we never need to rewrite a file to insert or
321 remove data, and can use some simple techniques to avoid the need
321 remove data, and can use some simple techniques to avoid the need
322 for locking while reading.
322 for locking while reading.
323
323
324 If checkambig, indexfile is opened with checkambig=True at
324 If checkambig, indexfile is opened with checkambig=True at
325 writing, to avoid file stat ambiguity.
325 writing, to avoid file stat ambiguity.
326
326
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 index will be mmapped rather than read if it is larger than the
328 index will be mmapped rather than read if it is larger than the
329 configured threshold.
329 configured threshold.
330
330
331 If censorable is True, the revlog can have censored revisions.
331 If censorable is True, the revlog can have censored revisions.
332 """
332 """
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 mmaplargeindex=False, censorable=False):
334 mmaplargeindex=False, censorable=False):
335 """
335 """
336 create a revlog object
336 create a revlog object
337
337
338 opener is a function that abstracts the file opening operation
338 opener is a function that abstracts the file opening operation
339 and can be used to implement COW semantics or the like.
339 and can be used to implement COW semantics or the like.
340 """
340 """
341 self.indexfile = indexfile
341 self.indexfile = indexfile
342 self.datafile = datafile or (indexfile[:-2] + ".d")
342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 self.opener = opener
343 self.opener = opener
344 # When True, indexfile is opened with checkambig=True at writing, to
344 # When True, indexfile is opened with checkambig=True at writing, to
345 # avoid file stat ambiguity.
345 # avoid file stat ambiguity.
346 self._checkambig = checkambig
346 self._checkambig = checkambig
347 self._censorable = censorable
347 self._censorable = censorable
348 # 3-tuple of (node, rev, text) for a raw revision.
348 # 3-tuple of (node, rev, text) for a raw revision.
349 self._revisioncache = None
349 self._revisioncache = None
350 # Maps rev to chain base rev.
350 # Maps rev to chain base rev.
351 self._chainbasecache = util.lrucachedict(100)
351 self._chainbasecache = util.lrucachedict(100)
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 self._chunkcache = (0, '')
353 self._chunkcache = (0, '')
354 # How much data to read and cache into the raw revlog data cache.
354 # How much data to read and cache into the raw revlog data cache.
355 self._chunkcachesize = 65536
355 self._chunkcachesize = 65536
356 self._maxchainlen = None
356 self._maxchainlen = None
357 self._deltabothparents = True
357 self._deltabothparents = True
358 self.index = []
358 self.index = []
359 # Mapping of partial identifiers to full nodes.
359 # Mapping of partial identifiers to full nodes.
360 self._pcache = {}
360 self._pcache = {}
361 # Mapping of revision integer to full node.
361 # Mapping of revision integer to full node.
362 self._nodecache = {nullid: nullrev}
362 self._nodecache = {nullid: nullrev}
363 self._nodepos = None
363 self._nodepos = None
364 self._compengine = 'zlib'
364 self._compengine = 'zlib'
365 self._maxdeltachainspan = -1
365 self._maxdeltachainspan = -1
366 self._withsparseread = False
366 self._withsparseread = False
367 self._sparserevlog = False
367 self._sparserevlog = False
368 self._srdensitythreshold = 0.50
368 self._srdensitythreshold = 0.50
369 self._srmingapsize = 262144
369 self._srmingapsize = 262144
370
370
371 # Make copy of flag processors so each revlog instance can support
371 # Make copy of flag processors so each revlog instance can support
372 # custom flags.
372 # custom flags.
373 self._flagprocessors = dict(_flagprocessors)
373 self._flagprocessors = dict(_flagprocessors)
374
374
375 mmapindexthreshold = None
375 mmapindexthreshold = None
376 v = REVLOG_DEFAULT_VERSION
376 v = REVLOG_DEFAULT_VERSION
377 opts = getattr(opener, 'options', None)
377 opts = getattr(opener, 'options', None)
378 if opts is not None:
378 if opts is not None:
379 if 'revlogv2' in opts:
379 if 'revlogv2' in opts:
380 # version 2 revlogs always use generaldelta.
380 # version 2 revlogs always use generaldelta.
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 elif 'revlogv1' in opts:
382 elif 'revlogv1' in opts:
383 if 'generaldelta' in opts:
383 if 'generaldelta' in opts:
384 v |= FLAG_GENERALDELTA
384 v |= FLAG_GENERALDELTA
385 else:
385 else:
386 v = 0
386 v = 0
387 if 'chunkcachesize' in opts:
387 if 'chunkcachesize' in opts:
388 self._chunkcachesize = opts['chunkcachesize']
388 self._chunkcachesize = opts['chunkcachesize']
389 if 'maxchainlen' in opts:
389 if 'maxchainlen' in opts:
390 self._maxchainlen = opts['maxchainlen']
390 self._maxchainlen = opts['maxchainlen']
391 if 'deltabothparents' in opts:
391 if 'deltabothparents' in opts:
392 self._deltabothparents = opts['deltabothparents']
392 self._deltabothparents = opts['deltabothparents']
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 if 'compengine' in opts:
394 if 'compengine' in opts:
395 self._compengine = opts['compengine']
395 self._compengine = opts['compengine']
396 if 'maxdeltachainspan' in opts:
396 if 'maxdeltachainspan' in opts:
397 self._maxdeltachainspan = opts['maxdeltachainspan']
397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 mmapindexthreshold = opts['mmapindexthreshold']
399 mmapindexthreshold = opts['mmapindexthreshold']
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
402 # sparse-revlog forces sparse-read
402 # sparse-revlog forces sparse-read
403 self._withsparseread = self._sparserevlog or withsparseread
403 self._withsparseread = self._sparserevlog or withsparseread
404 if 'sparse-read-density-threshold' in opts:
404 if 'sparse-read-density-threshold' in opts:
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 if 'sparse-read-min-gap-size' in opts:
406 if 'sparse-read-min-gap-size' in opts:
407 self._srmingapsize = opts['sparse-read-min-gap-size']
407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 if opts.get('enableellipsis'):
408 if opts.get('enableellipsis'):
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410
410
411 if self._chunkcachesize <= 0:
411 if self._chunkcachesize <= 0:
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 'greater than 0') % self._chunkcachesize)
413 'greater than 0') % self._chunkcachesize)
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 'power of 2') % self._chunkcachesize)
416 'power of 2') % self._chunkcachesize)
417
417
418 self._loadindex(v, mmapindexthreshold)
419
420 def _loadindex(self, v, mmapindexthreshold):
418 indexdata = ''
421 indexdata = ''
419 self._initempty = True
422 self._initempty = True
420 try:
423 try:
421 with self._indexfp() as f:
424 with self._indexfp() as f:
422 if (mmapindexthreshold is not None and
425 if (mmapindexthreshold is not None and
423 self.opener.fstat(f).st_size >= mmapindexthreshold):
426 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 indexdata = util.buffer(util.mmapread(f))
427 indexdata = util.buffer(util.mmapread(f))
425 else:
428 else:
426 indexdata = f.read()
429 indexdata = f.read()
427 if len(indexdata) > 0:
430 if len(indexdata) > 0:
428 v = versionformat_unpack(indexdata[:4])[0]
431 v = versionformat_unpack(indexdata[:4])[0]
429 self._initempty = False
432 self._initempty = False
430 except IOError as inst:
433 except IOError as inst:
431 if inst.errno != errno.ENOENT:
434 if inst.errno != errno.ENOENT:
432 raise
435 raise
433
436
434 self.version = v
437 self.version = v
435 self._inline = v & FLAG_INLINE_DATA
438 self._inline = v & FLAG_INLINE_DATA
436 self._generaldelta = v & FLAG_GENERALDELTA
439 self._generaldelta = v & FLAG_GENERALDELTA
437 flags = v & ~0xFFFF
440 flags = v & ~0xFFFF
438 fmt = v & 0xFFFF
441 fmt = v & 0xFFFF
439 if fmt == REVLOGV0:
442 if fmt == REVLOGV0:
440 if flags:
443 if flags:
441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
444 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 'revlog %s') %
445 'revlog %s') %
443 (flags >> 16, fmt, self.indexfile))
446 (flags >> 16, fmt, self.indexfile))
444 elif fmt == REVLOGV1:
447 elif fmt == REVLOGV1:
445 if flags & ~REVLOGV1_FLAGS:
448 if flags & ~REVLOGV1_FLAGS:
446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
449 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
447 'revlog %s') %
450 'revlog %s') %
448 (flags >> 16, fmt, self.indexfile))
451 (flags >> 16, fmt, self.indexfile))
449 elif fmt == REVLOGV2:
452 elif fmt == REVLOGV2:
450 if flags & ~REVLOGV2_FLAGS:
453 if flags & ~REVLOGV2_FLAGS:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
455 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
456 (flags >> 16, fmt, self.indexfile))
454 else:
457 else:
455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
458 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
456 (fmt, self.indexfile))
459 (fmt, self.indexfile))
457
460
458 self._storedeltachains = True
461 self._storedeltachains = True
459
462
460 self._io = revlogio()
463 self._io = revlogio()
461 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
462 self._io = revlogoldio()
465 self._io = revlogoldio()
463 try:
466 try:
464 d = self._io.parseindex(indexdata, self._inline)
467 d = self._io.parseindex(indexdata, self._inline)
465 except (ValueError, IndexError):
468 except (ValueError, IndexError):
466 raise error.RevlogError(_("index %s is corrupted") %
469 raise error.RevlogError(_("index %s is corrupted") %
467 self.indexfile)
470 self.indexfile)
468 self.index, nodemap, self._chunkcache = d
471 self.index, nodemap, self._chunkcache = d
469 if nodemap is not None:
472 if nodemap is not None:
470 self.nodemap = self._nodecache = nodemap
473 self.nodemap = self._nodecache = nodemap
471 if not self._chunkcache:
474 if not self._chunkcache:
472 self._chunkclear()
475 self._chunkclear()
473 # revnum -> (chain-length, sum-delta-length)
476 # revnum -> (chain-length, sum-delta-length)
474 self._chaininfocache = {}
477 self._chaininfocache = {}
475 # revlog header -> revlog compressor
478 # revlog header -> revlog compressor
476 self._decompressors = {}
479 self._decompressors = {}
477
480
478 @util.propertycache
481 @util.propertycache
479 def _compressor(self):
482 def _compressor(self):
480 return util.compengines[self._compengine].revlogcompressor()
483 return util.compengines[self._compengine].revlogcompressor()
481
484
482 def _indexfp(self, mode='r'):
485 def _indexfp(self, mode='r'):
483 """file object for the revlog's index file"""
486 """file object for the revlog's index file"""
484 args = {r'mode': mode}
487 args = {r'mode': mode}
485 if mode != 'r':
488 if mode != 'r':
486 args[r'checkambig'] = self._checkambig
489 args[r'checkambig'] = self._checkambig
487 if mode == 'w':
490 if mode == 'w':
488 args[r'atomictemp'] = True
491 args[r'atomictemp'] = True
489 return self.opener(self.indexfile, **args)
492 return self.opener(self.indexfile, **args)
490
493
491 def _datafp(self, mode='r'):
494 def _datafp(self, mode='r'):
492 """file object for the revlog's data file"""
495 """file object for the revlog's data file"""
493 return self.opener(self.datafile, mode=mode)
496 return self.opener(self.datafile, mode=mode)
494
497
495 @contextlib.contextmanager
498 @contextlib.contextmanager
496 def _datareadfp(self, existingfp=None):
499 def _datareadfp(self, existingfp=None):
497 """file object suitable to read data"""
500 """file object suitable to read data"""
498 if existingfp is not None:
501 if existingfp is not None:
499 yield existingfp
502 yield existingfp
500 else:
503 else:
501 if self._inline:
504 if self._inline:
502 func = self._indexfp
505 func = self._indexfp
503 else:
506 else:
504 func = self._datafp
507 func = self._datafp
505 with func() as fp:
508 with func() as fp:
506 yield fp
509 yield fp
507
510
508 def tip(self):
511 def tip(self):
509 return self.node(len(self.index) - 1)
512 return self.node(len(self.index) - 1)
510 def __contains__(self, rev):
513 def __contains__(self, rev):
511 return 0 <= rev < len(self)
514 return 0 <= rev < len(self)
512 def __len__(self):
515 def __len__(self):
513 return len(self.index)
516 return len(self.index)
514 def __iter__(self):
517 def __iter__(self):
515 return iter(pycompat.xrange(len(self)))
518 return iter(pycompat.xrange(len(self)))
516 def revs(self, start=0, stop=None):
519 def revs(self, start=0, stop=None):
517 """iterate over all rev in this revlog (from start to stop)"""
520 """iterate over all rev in this revlog (from start to stop)"""
518 return storageutil.iterrevs(len(self), start=start, stop=stop)
521 return storageutil.iterrevs(len(self), start=start, stop=stop)
519
522
520 @util.propertycache
523 @util.propertycache
521 def nodemap(self):
524 def nodemap(self):
522 if self.index:
525 if self.index:
523 # populate mapping down to the initial node
526 # populate mapping down to the initial node
524 node0 = self.index[0][7] # get around changelog filtering
527 node0 = self.index[0][7] # get around changelog filtering
525 self.rev(node0)
528 self.rev(node0)
526 return self._nodecache
529 return self._nodecache
527
530
528 def hasnode(self, node):
531 def hasnode(self, node):
529 try:
532 try:
530 self.rev(node)
533 self.rev(node)
531 return True
534 return True
532 except KeyError:
535 except KeyError:
533 return False
536 return False
534
537
535 def candelta(self, baserev, rev):
538 def candelta(self, baserev, rev):
536 """whether two revisions (baserev, rev) can be delta-ed or not"""
539 """whether two revisions (baserev, rev) can be delta-ed or not"""
537 # Disable delta if either rev requires a content-changing flag
540 # Disable delta if either rev requires a content-changing flag
538 # processor (ex. LFS). This is because such flag processor can alter
541 # processor (ex. LFS). This is because such flag processor can alter
539 # the rawtext content that the delta will be based on, and two clients
542 # the rawtext content that the delta will be based on, and two clients
540 # could have a same revlog node with different flags (i.e. different
543 # could have a same revlog node with different flags (i.e. different
541 # rawtext contents) and the delta could be incompatible.
544 # rawtext contents) and the delta could be incompatible.
542 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
545 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
543 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
546 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
544 return False
547 return False
545 return True
548 return True
546
549
547 def clearcaches(self):
550 def clearcaches(self):
548 self._revisioncache = None
551 self._revisioncache = None
549 self._chainbasecache.clear()
552 self._chainbasecache.clear()
550 self._chunkcache = (0, '')
553 self._chunkcache = (0, '')
551 self._pcache = {}
554 self._pcache = {}
552
555
553 try:
556 try:
554 self._nodecache.clearcaches()
557 self._nodecache.clearcaches()
555 except AttributeError:
558 except AttributeError:
556 self._nodecache = {nullid: nullrev}
559 self._nodecache = {nullid: nullrev}
557 self._nodepos = None
560 self._nodepos = None
558
561
559 def rev(self, node):
562 def rev(self, node):
560 try:
563 try:
561 return self._nodecache[node]
564 return self._nodecache[node]
562 except TypeError:
565 except TypeError:
563 raise
566 raise
564 except error.RevlogError:
567 except error.RevlogError:
565 # parsers.c radix tree lookup failed
568 # parsers.c radix tree lookup failed
566 if node == wdirid or node in wdirfilenodeids:
569 if node == wdirid or node in wdirfilenodeids:
567 raise error.WdirUnsupported
570 raise error.WdirUnsupported
568 raise error.LookupError(node, self.indexfile, _('no node'))
571 raise error.LookupError(node, self.indexfile, _('no node'))
569 except KeyError:
572 except KeyError:
570 # pure python cache lookup failed
573 # pure python cache lookup failed
571 n = self._nodecache
574 n = self._nodecache
572 i = self.index
575 i = self.index
573 p = self._nodepos
576 p = self._nodepos
574 if p is None:
577 if p is None:
575 p = len(i) - 1
578 p = len(i) - 1
576 else:
579 else:
577 assert p < len(i)
580 assert p < len(i)
578 for r in pycompat.xrange(p, -1, -1):
581 for r in pycompat.xrange(p, -1, -1):
579 v = i[r][7]
582 v = i[r][7]
580 n[v] = r
583 n[v] = r
581 if v == node:
584 if v == node:
582 self._nodepos = r - 1
585 self._nodepos = r - 1
583 return r
586 return r
584 if node == wdirid or node in wdirfilenodeids:
587 if node == wdirid or node in wdirfilenodeids:
585 raise error.WdirUnsupported
588 raise error.WdirUnsupported
586 raise error.LookupError(node, self.indexfile, _('no node'))
589 raise error.LookupError(node, self.indexfile, _('no node'))
587
590
588 # Accessors for index entries.
591 # Accessors for index entries.
589
592
590 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
593 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
591 # are flags.
594 # are flags.
592 def start(self, rev):
595 def start(self, rev):
593 return int(self.index[rev][0] >> 16)
596 return int(self.index[rev][0] >> 16)
594
597
595 def flags(self, rev):
598 def flags(self, rev):
596 return self.index[rev][0] & 0xFFFF
599 return self.index[rev][0] & 0xFFFF
597
600
598 def length(self, rev):
601 def length(self, rev):
599 return self.index[rev][1]
602 return self.index[rev][1]
600
603
601 def rawsize(self, rev):
604 def rawsize(self, rev):
602 """return the length of the uncompressed text for a given revision"""
605 """return the length of the uncompressed text for a given revision"""
603 l = self.index[rev][2]
606 l = self.index[rev][2]
604 if l >= 0:
607 if l >= 0:
605 return l
608 return l
606
609
607 t = self.revision(rev, raw=True)
610 t = self.revision(rev, raw=True)
608 return len(t)
611 return len(t)
609
612
610 def size(self, rev):
613 def size(self, rev):
611 """length of non-raw text (processed by a "read" flag processor)"""
614 """length of non-raw text (processed by a "read" flag processor)"""
612 # fast path: if no "read" flag processor could change the content,
615 # fast path: if no "read" flag processor could change the content,
613 # size is rawsize. note: ELLIPSIS is known to not change the content.
616 # size is rawsize. note: ELLIPSIS is known to not change the content.
614 flags = self.flags(rev)
617 flags = self.flags(rev)
615 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
618 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
616 return self.rawsize(rev)
619 return self.rawsize(rev)
617
620
618 return len(self.revision(rev, raw=False))
621 return len(self.revision(rev, raw=False))
619
622
620 def chainbase(self, rev):
623 def chainbase(self, rev):
621 base = self._chainbasecache.get(rev)
624 base = self._chainbasecache.get(rev)
622 if base is not None:
625 if base is not None:
623 return base
626 return base
624
627
625 index = self.index
628 index = self.index
626 iterrev = rev
629 iterrev = rev
627 base = index[iterrev][3]
630 base = index[iterrev][3]
628 while base != iterrev:
631 while base != iterrev:
629 iterrev = base
632 iterrev = base
630 base = index[iterrev][3]
633 base = index[iterrev][3]
631
634
632 self._chainbasecache[rev] = base
635 self._chainbasecache[rev] = base
633 return base
636 return base
634
637
635 def linkrev(self, rev):
638 def linkrev(self, rev):
636 return self.index[rev][4]
639 return self.index[rev][4]
637
640
638 def parentrevs(self, rev):
641 def parentrevs(self, rev):
639 try:
642 try:
640 entry = self.index[rev]
643 entry = self.index[rev]
641 except IndexError:
644 except IndexError:
642 if rev == wdirrev:
645 if rev == wdirrev:
643 raise error.WdirUnsupported
646 raise error.WdirUnsupported
644 raise
647 raise
645
648
646 return entry[5], entry[6]
649 return entry[5], entry[6]
647
650
648 def node(self, rev):
651 def node(self, rev):
649 try:
652 try:
650 return self.index[rev][7]
653 return self.index[rev][7]
651 except IndexError:
654 except IndexError:
652 if rev == wdirrev:
655 if rev == wdirrev:
653 raise error.WdirUnsupported
656 raise error.WdirUnsupported
654 raise
657 raise
655
658
656 # Derived from index values.
659 # Derived from index values.
657
660
658 def end(self, rev):
661 def end(self, rev):
659 return self.start(rev) + self.length(rev)
662 return self.start(rev) + self.length(rev)
660
663
661 def parents(self, node):
664 def parents(self, node):
662 i = self.index
665 i = self.index
663 d = i[self.rev(node)]
666 d = i[self.rev(node)]
664 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
667 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
665
668
666 def chainlen(self, rev):
669 def chainlen(self, rev):
667 return self._chaininfo(rev)[0]
670 return self._chaininfo(rev)[0]
668
671
669 def _chaininfo(self, rev):
672 def _chaininfo(self, rev):
670 chaininfocache = self._chaininfocache
673 chaininfocache = self._chaininfocache
671 if rev in chaininfocache:
674 if rev in chaininfocache:
672 return chaininfocache[rev]
675 return chaininfocache[rev]
673 index = self.index
676 index = self.index
674 generaldelta = self._generaldelta
677 generaldelta = self._generaldelta
675 iterrev = rev
678 iterrev = rev
676 e = index[iterrev]
679 e = index[iterrev]
677 clen = 0
680 clen = 0
678 compresseddeltalen = 0
681 compresseddeltalen = 0
679 while iterrev != e[3]:
682 while iterrev != e[3]:
680 clen += 1
683 clen += 1
681 compresseddeltalen += e[1]
684 compresseddeltalen += e[1]
682 if generaldelta:
685 if generaldelta:
683 iterrev = e[3]
686 iterrev = e[3]
684 else:
687 else:
685 iterrev -= 1
688 iterrev -= 1
686 if iterrev in chaininfocache:
689 if iterrev in chaininfocache:
687 t = chaininfocache[iterrev]
690 t = chaininfocache[iterrev]
688 clen += t[0]
691 clen += t[0]
689 compresseddeltalen += t[1]
692 compresseddeltalen += t[1]
690 break
693 break
691 e = index[iterrev]
694 e = index[iterrev]
692 else:
695 else:
693 # Add text length of base since decompressing that also takes
696 # Add text length of base since decompressing that also takes
694 # work. For cache hits the length is already included.
697 # work. For cache hits the length is already included.
695 compresseddeltalen += e[1]
698 compresseddeltalen += e[1]
696 r = (clen, compresseddeltalen)
699 r = (clen, compresseddeltalen)
697 chaininfocache[rev] = r
700 chaininfocache[rev] = r
698 return r
701 return r
699
702
700 def _deltachain(self, rev, stoprev=None):
703 def _deltachain(self, rev, stoprev=None):
701 """Obtain the delta chain for a revision.
704 """Obtain the delta chain for a revision.
702
705
703 ``stoprev`` specifies a revision to stop at. If not specified, we
706 ``stoprev`` specifies a revision to stop at. If not specified, we
704 stop at the base of the chain.
707 stop at the base of the chain.
705
708
706 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
709 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
707 revs in ascending order and ``stopped`` is a bool indicating whether
710 revs in ascending order and ``stopped`` is a bool indicating whether
708 ``stoprev`` was hit.
711 ``stoprev`` was hit.
709 """
712 """
710 # Try C implementation.
713 # Try C implementation.
711 try:
714 try:
712 return self.index.deltachain(rev, stoprev, self._generaldelta)
715 return self.index.deltachain(rev, stoprev, self._generaldelta)
713 except AttributeError:
716 except AttributeError:
714 pass
717 pass
715
718
716 chain = []
719 chain = []
717
720
718 # Alias to prevent attribute lookup in tight loop.
721 # Alias to prevent attribute lookup in tight loop.
719 index = self.index
722 index = self.index
720 generaldelta = self._generaldelta
723 generaldelta = self._generaldelta
721
724
722 iterrev = rev
725 iterrev = rev
723 e = index[iterrev]
726 e = index[iterrev]
724 while iterrev != e[3] and iterrev != stoprev:
727 while iterrev != e[3] and iterrev != stoprev:
725 chain.append(iterrev)
728 chain.append(iterrev)
726 if generaldelta:
729 if generaldelta:
727 iterrev = e[3]
730 iterrev = e[3]
728 else:
731 else:
729 iterrev -= 1
732 iterrev -= 1
730 e = index[iterrev]
733 e = index[iterrev]
731
734
732 if iterrev == stoprev:
735 if iterrev == stoprev:
733 stopped = True
736 stopped = True
734 else:
737 else:
735 chain.append(iterrev)
738 chain.append(iterrev)
736 stopped = False
739 stopped = False
737
740
738 chain.reverse()
741 chain.reverse()
739 return chain, stopped
742 return chain, stopped
740
743
741 def ancestors(self, revs, stoprev=0, inclusive=False):
744 def ancestors(self, revs, stoprev=0, inclusive=False):
742 """Generate the ancestors of 'revs' in reverse topological order.
745 """Generate the ancestors of 'revs' in reverse topological order.
743 Does not generate revs lower than stoprev.
746 Does not generate revs lower than stoprev.
744
747
745 See the documentation for ancestor.lazyancestors for more details."""
748 See the documentation for ancestor.lazyancestors for more details."""
746
749
747 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
750 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
748 inclusive=inclusive)
751 inclusive=inclusive)
749
752
750 def descendants(self, revs):
753 def descendants(self, revs):
751 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
754 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
752
755
753 def findcommonmissing(self, common=None, heads=None):
756 def findcommonmissing(self, common=None, heads=None):
754 """Return a tuple of the ancestors of common and the ancestors of heads
757 """Return a tuple of the ancestors of common and the ancestors of heads
755 that are not ancestors of common. In revset terminology, we return the
758 that are not ancestors of common. In revset terminology, we return the
756 tuple:
759 tuple:
757
760
758 ::common, (::heads) - (::common)
761 ::common, (::heads) - (::common)
759
762
760 The list is sorted by revision number, meaning it is
763 The list is sorted by revision number, meaning it is
761 topologically sorted.
764 topologically sorted.
762
765
763 'heads' and 'common' are both lists of node IDs. If heads is
766 'heads' and 'common' are both lists of node IDs. If heads is
764 not supplied, uses all of the revlog's heads. If common is not
767 not supplied, uses all of the revlog's heads. If common is not
765 supplied, uses nullid."""
768 supplied, uses nullid."""
766 if common is None:
769 if common is None:
767 common = [nullid]
770 common = [nullid]
768 if heads is None:
771 if heads is None:
769 heads = self.heads()
772 heads = self.heads()
770
773
771 common = [self.rev(n) for n in common]
774 common = [self.rev(n) for n in common]
772 heads = [self.rev(n) for n in heads]
775 heads = [self.rev(n) for n in heads]
773
776
774 # we want the ancestors, but inclusive
777 # we want the ancestors, but inclusive
775 class lazyset(object):
778 class lazyset(object):
776 def __init__(self, lazyvalues):
779 def __init__(self, lazyvalues):
777 self.addedvalues = set()
780 self.addedvalues = set()
778 self.lazyvalues = lazyvalues
781 self.lazyvalues = lazyvalues
779
782
780 def __contains__(self, value):
783 def __contains__(self, value):
781 return value in self.addedvalues or value in self.lazyvalues
784 return value in self.addedvalues or value in self.lazyvalues
782
785
783 def __iter__(self):
786 def __iter__(self):
784 added = self.addedvalues
787 added = self.addedvalues
785 for r in added:
788 for r in added:
786 yield r
789 yield r
787 for r in self.lazyvalues:
790 for r in self.lazyvalues:
788 if not r in added:
791 if not r in added:
789 yield r
792 yield r
790
793
791 def add(self, value):
794 def add(self, value):
792 self.addedvalues.add(value)
795 self.addedvalues.add(value)
793
796
794 def update(self, values):
797 def update(self, values):
795 self.addedvalues.update(values)
798 self.addedvalues.update(values)
796
799
797 has = lazyset(self.ancestors(common))
800 has = lazyset(self.ancestors(common))
798 has.add(nullrev)
801 has.add(nullrev)
799 has.update(common)
802 has.update(common)
800
803
801 # take all ancestors from heads that aren't in has
804 # take all ancestors from heads that aren't in has
802 missing = set()
805 missing = set()
803 visit = collections.deque(r for r in heads if r not in has)
806 visit = collections.deque(r for r in heads if r not in has)
804 while visit:
807 while visit:
805 r = visit.popleft()
808 r = visit.popleft()
806 if r in missing:
809 if r in missing:
807 continue
810 continue
808 else:
811 else:
809 missing.add(r)
812 missing.add(r)
810 for p in self.parentrevs(r):
813 for p in self.parentrevs(r):
811 if p not in has:
814 if p not in has:
812 visit.append(p)
815 visit.append(p)
813 missing = list(missing)
816 missing = list(missing)
814 missing.sort()
817 missing.sort()
815 return has, [self.node(miss) for miss in missing]
818 return has, [self.node(miss) for miss in missing]
816
819
817 def incrementalmissingrevs(self, common=None):
820 def incrementalmissingrevs(self, common=None):
818 """Return an object that can be used to incrementally compute the
821 """Return an object that can be used to incrementally compute the
819 revision numbers of the ancestors of arbitrary sets that are not
822 revision numbers of the ancestors of arbitrary sets that are not
820 ancestors of common. This is an ancestor.incrementalmissingancestors
823 ancestors of common. This is an ancestor.incrementalmissingancestors
821 object.
824 object.
822
825
823 'common' is a list of revision numbers. If common is not supplied, uses
826 'common' is a list of revision numbers. If common is not supplied, uses
824 nullrev.
827 nullrev.
825 """
828 """
826 if common is None:
829 if common is None:
827 common = [nullrev]
830 common = [nullrev]
828
831
829 return ancestor.incrementalmissingancestors(self.parentrevs, common)
832 return ancestor.incrementalmissingancestors(self.parentrevs, common)
830
833
831 def findmissingrevs(self, common=None, heads=None):
834 def findmissingrevs(self, common=None, heads=None):
832 """Return the revision numbers of the ancestors of heads that
835 """Return the revision numbers of the ancestors of heads that
833 are not ancestors of common.
836 are not ancestors of common.
834
837
835 More specifically, return a list of revision numbers corresponding to
838 More specifically, return a list of revision numbers corresponding to
836 nodes N such that every N satisfies the following constraints:
839 nodes N such that every N satisfies the following constraints:
837
840
838 1. N is an ancestor of some node in 'heads'
841 1. N is an ancestor of some node in 'heads'
839 2. N is not an ancestor of any node in 'common'
842 2. N is not an ancestor of any node in 'common'
840
843
841 The list is sorted by revision number, meaning it is
844 The list is sorted by revision number, meaning it is
842 topologically sorted.
845 topologically sorted.
843
846
844 'heads' and 'common' are both lists of revision numbers. If heads is
847 'heads' and 'common' are both lists of revision numbers. If heads is
845 not supplied, uses all of the revlog's heads. If common is not
848 not supplied, uses all of the revlog's heads. If common is not
846 supplied, uses nullid."""
849 supplied, uses nullid."""
847 if common is None:
850 if common is None:
848 common = [nullrev]
851 common = [nullrev]
849 if heads is None:
852 if heads is None:
850 heads = self.headrevs()
853 heads = self.headrevs()
851
854
852 inc = self.incrementalmissingrevs(common=common)
855 inc = self.incrementalmissingrevs(common=common)
853 return inc.missingancestors(heads)
856 return inc.missingancestors(heads)
854
857
855 def findmissing(self, common=None, heads=None):
858 def findmissing(self, common=None, heads=None):
856 """Return the ancestors of heads that are not ancestors of common.
859 """Return the ancestors of heads that are not ancestors of common.
857
860
858 More specifically, return a list of nodes N such that every N
861 More specifically, return a list of nodes N such that every N
859 satisfies the following constraints:
862 satisfies the following constraints:
860
863
861 1. N is an ancestor of some node in 'heads'
864 1. N is an ancestor of some node in 'heads'
862 2. N is not an ancestor of any node in 'common'
865 2. N is not an ancestor of any node in 'common'
863
866
864 The list is sorted by revision number, meaning it is
867 The list is sorted by revision number, meaning it is
865 topologically sorted.
868 topologically sorted.
866
869
867 'heads' and 'common' are both lists of node IDs. If heads is
870 'heads' and 'common' are both lists of node IDs. If heads is
868 not supplied, uses all of the revlog's heads. If common is not
871 not supplied, uses all of the revlog's heads. If common is not
869 supplied, uses nullid."""
872 supplied, uses nullid."""
870 if common is None:
873 if common is None:
871 common = [nullid]
874 common = [nullid]
872 if heads is None:
875 if heads is None:
873 heads = self.heads()
876 heads = self.heads()
874
877
875 common = [self.rev(n) for n in common]
878 common = [self.rev(n) for n in common]
876 heads = [self.rev(n) for n in heads]
879 heads = [self.rev(n) for n in heads]
877
880
878 inc = self.incrementalmissingrevs(common=common)
881 inc = self.incrementalmissingrevs(common=common)
879 return [self.node(r) for r in inc.missingancestors(heads)]
882 return [self.node(r) for r in inc.missingancestors(heads)]
880
883
881 def nodesbetween(self, roots=None, heads=None):
884 def nodesbetween(self, roots=None, heads=None):
882 """Return a topological path from 'roots' to 'heads'.
885 """Return a topological path from 'roots' to 'heads'.
883
886
884 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
887 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
885 topologically sorted list of all nodes N that satisfy both of
888 topologically sorted list of all nodes N that satisfy both of
886 these constraints:
889 these constraints:
887
890
888 1. N is a descendant of some node in 'roots'
891 1. N is a descendant of some node in 'roots'
889 2. N is an ancestor of some node in 'heads'
892 2. N is an ancestor of some node in 'heads'
890
893
891 Every node is considered to be both a descendant and an ancestor
894 Every node is considered to be both a descendant and an ancestor
892 of itself, so every reachable node in 'roots' and 'heads' will be
895 of itself, so every reachable node in 'roots' and 'heads' will be
893 included in 'nodes'.
896 included in 'nodes'.
894
897
895 'outroots' is the list of reachable nodes in 'roots', i.e., the
898 'outroots' is the list of reachable nodes in 'roots', i.e., the
896 subset of 'roots' that is returned in 'nodes'. Likewise,
899 subset of 'roots' that is returned in 'nodes'. Likewise,
897 'outheads' is the subset of 'heads' that is also in 'nodes'.
900 'outheads' is the subset of 'heads' that is also in 'nodes'.
898
901
899 'roots' and 'heads' are both lists of node IDs. If 'roots' is
902 'roots' and 'heads' are both lists of node IDs. If 'roots' is
900 unspecified, uses nullid as the only root. If 'heads' is
903 unspecified, uses nullid as the only root. If 'heads' is
901 unspecified, uses list of all of the revlog's heads."""
904 unspecified, uses list of all of the revlog's heads."""
902 nonodes = ([], [], [])
905 nonodes = ([], [], [])
903 if roots is not None:
906 if roots is not None:
904 roots = list(roots)
907 roots = list(roots)
905 if not roots:
908 if not roots:
906 return nonodes
909 return nonodes
907 lowestrev = min([self.rev(n) for n in roots])
910 lowestrev = min([self.rev(n) for n in roots])
908 else:
911 else:
909 roots = [nullid] # Everybody's a descendant of nullid
912 roots = [nullid] # Everybody's a descendant of nullid
910 lowestrev = nullrev
913 lowestrev = nullrev
911 if (lowestrev == nullrev) and (heads is None):
914 if (lowestrev == nullrev) and (heads is None):
912 # We want _all_ the nodes!
915 # We want _all_ the nodes!
913 return ([self.node(r) for r in self], [nullid], list(self.heads()))
916 return ([self.node(r) for r in self], [nullid], list(self.heads()))
914 if heads is None:
917 if heads is None:
915 # All nodes are ancestors, so the latest ancestor is the last
918 # All nodes are ancestors, so the latest ancestor is the last
916 # node.
919 # node.
917 highestrev = len(self) - 1
920 highestrev = len(self) - 1
918 # Set ancestors to None to signal that every node is an ancestor.
921 # Set ancestors to None to signal that every node is an ancestor.
919 ancestors = None
922 ancestors = None
920 # Set heads to an empty dictionary for later discovery of heads
923 # Set heads to an empty dictionary for later discovery of heads
921 heads = {}
924 heads = {}
922 else:
925 else:
923 heads = list(heads)
926 heads = list(heads)
924 if not heads:
927 if not heads:
925 return nonodes
928 return nonodes
926 ancestors = set()
929 ancestors = set()
927 # Turn heads into a dictionary so we can remove 'fake' heads.
930 # Turn heads into a dictionary so we can remove 'fake' heads.
928 # Also, later we will be using it to filter out the heads we can't
931 # Also, later we will be using it to filter out the heads we can't
929 # find from roots.
932 # find from roots.
930 heads = dict.fromkeys(heads, False)
933 heads = dict.fromkeys(heads, False)
931 # Start at the top and keep marking parents until we're done.
934 # Start at the top and keep marking parents until we're done.
932 nodestotag = set(heads)
935 nodestotag = set(heads)
933 # Remember where the top was so we can use it as a limit later.
936 # Remember where the top was so we can use it as a limit later.
934 highestrev = max([self.rev(n) for n in nodestotag])
937 highestrev = max([self.rev(n) for n in nodestotag])
935 while nodestotag:
938 while nodestotag:
936 # grab a node to tag
939 # grab a node to tag
937 n = nodestotag.pop()
940 n = nodestotag.pop()
938 # Never tag nullid
941 # Never tag nullid
939 if n == nullid:
942 if n == nullid:
940 continue
943 continue
941 # A node's revision number represents its place in a
944 # A node's revision number represents its place in a
942 # topologically sorted list of nodes.
945 # topologically sorted list of nodes.
943 r = self.rev(n)
946 r = self.rev(n)
944 if r >= lowestrev:
947 if r >= lowestrev:
945 if n not in ancestors:
948 if n not in ancestors:
946 # If we are possibly a descendant of one of the roots
949 # If we are possibly a descendant of one of the roots
947 # and we haven't already been marked as an ancestor
950 # and we haven't already been marked as an ancestor
948 ancestors.add(n) # Mark as ancestor
951 ancestors.add(n) # Mark as ancestor
949 # Add non-nullid parents to list of nodes to tag.
952 # Add non-nullid parents to list of nodes to tag.
950 nodestotag.update([p for p in self.parents(n) if
953 nodestotag.update([p for p in self.parents(n) if
951 p != nullid])
954 p != nullid])
952 elif n in heads: # We've seen it before, is it a fake head?
955 elif n in heads: # We've seen it before, is it a fake head?
953 # So it is, real heads should not be the ancestors of
956 # So it is, real heads should not be the ancestors of
954 # any other heads.
957 # any other heads.
955 heads.pop(n)
958 heads.pop(n)
956 if not ancestors:
959 if not ancestors:
957 return nonodes
960 return nonodes
958 # Now that we have our set of ancestors, we want to remove any
961 # Now that we have our set of ancestors, we want to remove any
959 # roots that are not ancestors.
962 # roots that are not ancestors.
960
963
961 # If one of the roots was nullid, everything is included anyway.
964 # If one of the roots was nullid, everything is included anyway.
962 if lowestrev > nullrev:
965 if lowestrev > nullrev:
963 # But, since we weren't, let's recompute the lowest rev to not
966 # But, since we weren't, let's recompute the lowest rev to not
964 # include roots that aren't ancestors.
967 # include roots that aren't ancestors.
965
968
966 # Filter out roots that aren't ancestors of heads
969 # Filter out roots that aren't ancestors of heads
967 roots = [root for root in roots if root in ancestors]
970 roots = [root for root in roots if root in ancestors]
968 # Recompute the lowest revision
971 # Recompute the lowest revision
969 if roots:
972 if roots:
970 lowestrev = min([self.rev(root) for root in roots])
973 lowestrev = min([self.rev(root) for root in roots])
971 else:
974 else:
972 # No more roots? Return empty list
975 # No more roots? Return empty list
973 return nonodes
976 return nonodes
974 else:
977 else:
975 # We are descending from nullid, and don't need to care about
978 # We are descending from nullid, and don't need to care about
976 # any other roots.
979 # any other roots.
977 lowestrev = nullrev
980 lowestrev = nullrev
978 roots = [nullid]
981 roots = [nullid]
979 # Transform our roots list into a set.
982 # Transform our roots list into a set.
980 descendants = set(roots)
983 descendants = set(roots)
981 # Also, keep the original roots so we can filter out roots that aren't
984 # Also, keep the original roots so we can filter out roots that aren't
982 # 'real' roots (i.e. are descended from other roots).
985 # 'real' roots (i.e. are descended from other roots).
983 roots = descendants.copy()
986 roots = descendants.copy()
984 # Our topologically sorted list of output nodes.
987 # Our topologically sorted list of output nodes.
985 orderedout = []
988 orderedout = []
986 # Don't start at nullid since we don't want nullid in our output list,
989 # Don't start at nullid since we don't want nullid in our output list,
987 # and if nullid shows up in descendants, empty parents will look like
990 # and if nullid shows up in descendants, empty parents will look like
988 # they're descendants.
991 # they're descendants.
989 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
992 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
990 n = self.node(r)
993 n = self.node(r)
991 isdescendant = False
994 isdescendant = False
992 if lowestrev == nullrev: # Everybody is a descendant of nullid
995 if lowestrev == nullrev: # Everybody is a descendant of nullid
993 isdescendant = True
996 isdescendant = True
994 elif n in descendants:
997 elif n in descendants:
995 # n is already a descendant
998 # n is already a descendant
996 isdescendant = True
999 isdescendant = True
997 # This check only needs to be done here because all the roots
1000 # This check only needs to be done here because all the roots
998 # will start being marked is descendants before the loop.
1001 # will start being marked is descendants before the loop.
999 if n in roots:
1002 if n in roots:
1000 # If n was a root, check if it's a 'real' root.
1003 # If n was a root, check if it's a 'real' root.
1001 p = tuple(self.parents(n))
1004 p = tuple(self.parents(n))
1002 # If any of its parents are descendants, it's not a root.
1005 # If any of its parents are descendants, it's not a root.
1003 if (p[0] in descendants) or (p[1] in descendants):
1006 if (p[0] in descendants) or (p[1] in descendants):
1004 roots.remove(n)
1007 roots.remove(n)
1005 else:
1008 else:
1006 p = tuple(self.parents(n))
1009 p = tuple(self.parents(n))
1007 # A node is a descendant if either of its parents are
1010 # A node is a descendant if either of its parents are
1008 # descendants. (We seeded the dependents list with the roots
1011 # descendants. (We seeded the dependents list with the roots
1009 # up there, remember?)
1012 # up there, remember?)
1010 if (p[0] in descendants) or (p[1] in descendants):
1013 if (p[0] in descendants) or (p[1] in descendants):
1011 descendants.add(n)
1014 descendants.add(n)
1012 isdescendant = True
1015 isdescendant = True
1013 if isdescendant and ((ancestors is None) or (n in ancestors)):
1016 if isdescendant and ((ancestors is None) or (n in ancestors)):
1014 # Only include nodes that are both descendants and ancestors.
1017 # Only include nodes that are both descendants and ancestors.
1015 orderedout.append(n)
1018 orderedout.append(n)
1016 if (ancestors is not None) and (n in heads):
1019 if (ancestors is not None) and (n in heads):
1017 # We're trying to figure out which heads are reachable
1020 # We're trying to figure out which heads are reachable
1018 # from roots.
1021 # from roots.
1019 # Mark this head as having been reached
1022 # Mark this head as having been reached
1020 heads[n] = True
1023 heads[n] = True
1021 elif ancestors is None:
1024 elif ancestors is None:
1022 # Otherwise, we're trying to discover the heads.
1025 # Otherwise, we're trying to discover the heads.
1023 # Assume this is a head because if it isn't, the next step
1026 # Assume this is a head because if it isn't, the next step
1024 # will eventually remove it.
1027 # will eventually remove it.
1025 heads[n] = True
1028 heads[n] = True
1026 # But, obviously its parents aren't.
1029 # But, obviously its parents aren't.
1027 for p in self.parents(n):
1030 for p in self.parents(n):
1028 heads.pop(p, None)
1031 heads.pop(p, None)
1029 heads = [head for head, flag in heads.iteritems() if flag]
1032 heads = [head for head, flag in heads.iteritems() if flag]
1030 roots = list(roots)
1033 roots = list(roots)
1031 assert orderedout
1034 assert orderedout
1032 assert roots
1035 assert roots
1033 assert heads
1036 assert heads
1034 return (orderedout, roots, heads)
1037 return (orderedout, roots, heads)
1035
1038
1036 def headrevs(self):
1039 def headrevs(self):
1037 try:
1040 try:
1038 return self.index.headrevs()
1041 return self.index.headrevs()
1039 except AttributeError:
1042 except AttributeError:
1040 return self._headrevs()
1043 return self._headrevs()
1041
1044
1042 def computephases(self, roots):
1045 def computephases(self, roots):
1043 return self.index.computephasesmapsets(roots)
1046 return self.index.computephasesmapsets(roots)
1044
1047
1045 def _headrevs(self):
1048 def _headrevs(self):
1046 count = len(self)
1049 count = len(self)
1047 if not count:
1050 if not count:
1048 return [nullrev]
1051 return [nullrev]
1049 # we won't iter over filtered rev so nobody is a head at start
1052 # we won't iter over filtered rev so nobody is a head at start
1050 ishead = [0] * (count + 1)
1053 ishead = [0] * (count + 1)
1051 index = self.index
1054 index = self.index
1052 for r in self:
1055 for r in self:
1053 ishead[r] = 1 # I may be an head
1056 ishead[r] = 1 # I may be an head
1054 e = index[r]
1057 e = index[r]
1055 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1058 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1056 return [r for r, val in enumerate(ishead) if val]
1059 return [r for r, val in enumerate(ishead) if val]
1057
1060
1058 def heads(self, start=None, stop=None):
1061 def heads(self, start=None, stop=None):
1059 """return the list of all nodes that have no children
1062 """return the list of all nodes that have no children
1060
1063
1061 if start is specified, only heads that are descendants of
1064 if start is specified, only heads that are descendants of
1062 start will be returned
1065 start will be returned
1063 if stop is specified, it will consider all the revs from stop
1066 if stop is specified, it will consider all the revs from stop
1064 as if they had no children
1067 as if they had no children
1065 """
1068 """
1066 if start is None and stop is None:
1069 if start is None and stop is None:
1067 if not len(self):
1070 if not len(self):
1068 return [nullid]
1071 return [nullid]
1069 return [self.node(r) for r in self.headrevs()]
1072 return [self.node(r) for r in self.headrevs()]
1070
1073
1071 if start is None:
1074 if start is None:
1072 start = nullrev
1075 start = nullrev
1073 else:
1076 else:
1074 start = self.rev(start)
1077 start = self.rev(start)
1075
1078
1076 stoprevs = set(self.rev(n) for n in stop or [])
1079 stoprevs = set(self.rev(n) for n in stop or [])
1077
1080
1078 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1081 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1079 stoprevs=stoprevs)
1082 stoprevs=stoprevs)
1080
1083
1081 return [self.node(rev) for rev in revs]
1084 return [self.node(rev) for rev in revs]
1082
1085
1083 def children(self, node):
1086 def children(self, node):
1084 """find the children of a given node"""
1087 """find the children of a given node"""
1085 c = []
1088 c = []
1086 p = self.rev(node)
1089 p = self.rev(node)
1087 for r in self.revs(start=p + 1):
1090 for r in self.revs(start=p + 1):
1088 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1091 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1089 if prevs:
1092 if prevs:
1090 for pr in prevs:
1093 for pr in prevs:
1091 if pr == p:
1094 if pr == p:
1092 c.append(self.node(r))
1095 c.append(self.node(r))
1093 elif p == nullrev:
1096 elif p == nullrev:
1094 c.append(self.node(r))
1097 c.append(self.node(r))
1095 return c
1098 return c
1096
1099
1097 def commonancestorsheads(self, a, b):
1100 def commonancestorsheads(self, a, b):
1098 """calculate all the heads of the common ancestors of nodes a and b"""
1101 """calculate all the heads of the common ancestors of nodes a and b"""
1099 a, b = self.rev(a), self.rev(b)
1102 a, b = self.rev(a), self.rev(b)
1100 ancs = self._commonancestorsheads(a, b)
1103 ancs = self._commonancestorsheads(a, b)
1101 return pycompat.maplist(self.node, ancs)
1104 return pycompat.maplist(self.node, ancs)
1102
1105
1103 def _commonancestorsheads(self, *revs):
1106 def _commonancestorsheads(self, *revs):
1104 """calculate all the heads of the common ancestors of revs"""
1107 """calculate all the heads of the common ancestors of revs"""
1105 try:
1108 try:
1106 ancs = self.index.commonancestorsheads(*revs)
1109 ancs = self.index.commonancestorsheads(*revs)
1107 except (AttributeError, OverflowError): # C implementation failed
1110 except (AttributeError, OverflowError): # C implementation failed
1108 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1111 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1109 return ancs
1112 return ancs
1110
1113
1111 def isancestor(self, a, b):
1114 def isancestor(self, a, b):
1112 """return True if node a is an ancestor of node b
1115 """return True if node a is an ancestor of node b
1113
1116
1114 A revision is considered an ancestor of itself."""
1117 A revision is considered an ancestor of itself."""
1115 a, b = self.rev(a), self.rev(b)
1118 a, b = self.rev(a), self.rev(b)
1116 return self.isancestorrev(a, b)
1119 return self.isancestorrev(a, b)
1117
1120
1118 def isancestorrev(self, a, b):
1121 def isancestorrev(self, a, b):
1119 """return True if revision a is an ancestor of revision b
1122 """return True if revision a is an ancestor of revision b
1120
1123
1121 A revision is considered an ancestor of itself.
1124 A revision is considered an ancestor of itself.
1122
1125
1123 The implementation of this is trivial but the use of
1126 The implementation of this is trivial but the use of
1124 commonancestorsheads is not."""
1127 commonancestorsheads is not."""
1125 if a == nullrev:
1128 if a == nullrev:
1126 return True
1129 return True
1127 elif a == b:
1130 elif a == b:
1128 return True
1131 return True
1129 elif a > b:
1132 elif a > b:
1130 return False
1133 return False
1131 return a in self._commonancestorsheads(a, b)
1134 return a in self._commonancestorsheads(a, b)
1132
1135
1133 def ancestor(self, a, b):
1136 def ancestor(self, a, b):
1134 """calculate the "best" common ancestor of nodes a and b"""
1137 """calculate the "best" common ancestor of nodes a and b"""
1135
1138
1136 a, b = self.rev(a), self.rev(b)
1139 a, b = self.rev(a), self.rev(b)
1137 try:
1140 try:
1138 ancs = self.index.ancestors(a, b)
1141 ancs = self.index.ancestors(a, b)
1139 except (AttributeError, OverflowError):
1142 except (AttributeError, OverflowError):
1140 ancs = ancestor.ancestors(self.parentrevs, a, b)
1143 ancs = ancestor.ancestors(self.parentrevs, a, b)
1141 if ancs:
1144 if ancs:
1142 # choose a consistent winner when there's a tie
1145 # choose a consistent winner when there's a tie
1143 return min(map(self.node, ancs))
1146 return min(map(self.node, ancs))
1144 return nullid
1147 return nullid
1145
1148
1146 def _match(self, id):
1149 def _match(self, id):
1147 if isinstance(id, int):
1150 if isinstance(id, int):
1148 # rev
1151 # rev
1149 return self.node(id)
1152 return self.node(id)
1150 if len(id) == 20:
1153 if len(id) == 20:
1151 # possibly a binary node
1154 # possibly a binary node
1152 # odds of a binary node being all hex in ASCII are 1 in 10**25
1155 # odds of a binary node being all hex in ASCII are 1 in 10**25
1153 try:
1156 try:
1154 node = id
1157 node = id
1155 self.rev(node) # quick search the index
1158 self.rev(node) # quick search the index
1156 return node
1159 return node
1157 except error.LookupError:
1160 except error.LookupError:
1158 pass # may be partial hex id
1161 pass # may be partial hex id
1159 try:
1162 try:
1160 # str(rev)
1163 # str(rev)
1161 rev = int(id)
1164 rev = int(id)
1162 if "%d" % rev != id:
1165 if "%d" % rev != id:
1163 raise ValueError
1166 raise ValueError
1164 if rev < 0:
1167 if rev < 0:
1165 rev = len(self) + rev
1168 rev = len(self) + rev
1166 if rev < 0 or rev >= len(self):
1169 if rev < 0 or rev >= len(self):
1167 raise ValueError
1170 raise ValueError
1168 return self.node(rev)
1171 return self.node(rev)
1169 except (ValueError, OverflowError):
1172 except (ValueError, OverflowError):
1170 pass
1173 pass
1171 if len(id) == 40:
1174 if len(id) == 40:
1172 try:
1175 try:
1173 # a full hex nodeid?
1176 # a full hex nodeid?
1174 node = bin(id)
1177 node = bin(id)
1175 self.rev(node)
1178 self.rev(node)
1176 return node
1179 return node
1177 except (TypeError, error.LookupError):
1180 except (TypeError, error.LookupError):
1178 pass
1181 pass
1179
1182
1180 def _partialmatch(self, id):
1183 def _partialmatch(self, id):
1181 # we don't care wdirfilenodeids as they should be always full hash
1184 # we don't care wdirfilenodeids as they should be always full hash
1182 maybewdir = wdirhex.startswith(id)
1185 maybewdir = wdirhex.startswith(id)
1183 try:
1186 try:
1184 partial = self.index.partialmatch(id)
1187 partial = self.index.partialmatch(id)
1185 if partial and self.hasnode(partial):
1188 if partial and self.hasnode(partial):
1186 if maybewdir:
1189 if maybewdir:
1187 # single 'ff...' match in radix tree, ambiguous with wdir
1190 # single 'ff...' match in radix tree, ambiguous with wdir
1188 raise error.RevlogError
1191 raise error.RevlogError
1189 return partial
1192 return partial
1190 if maybewdir:
1193 if maybewdir:
1191 # no 'ff...' match in radix tree, wdir identified
1194 # no 'ff...' match in radix tree, wdir identified
1192 raise error.WdirUnsupported
1195 raise error.WdirUnsupported
1193 return None
1196 return None
1194 except error.RevlogError:
1197 except error.RevlogError:
1195 # parsers.c radix tree lookup gave multiple matches
1198 # parsers.c radix tree lookup gave multiple matches
1196 # fast path: for unfiltered changelog, radix tree is accurate
1199 # fast path: for unfiltered changelog, radix tree is accurate
1197 if not getattr(self, 'filteredrevs', None):
1200 if not getattr(self, 'filteredrevs', None):
1198 raise error.AmbiguousPrefixLookupError(
1201 raise error.AmbiguousPrefixLookupError(
1199 id, self.indexfile, _('ambiguous identifier'))
1202 id, self.indexfile, _('ambiguous identifier'))
1200 # fall through to slow path that filters hidden revisions
1203 # fall through to slow path that filters hidden revisions
1201 except (AttributeError, ValueError):
1204 except (AttributeError, ValueError):
1202 # we are pure python, or key was too short to search radix tree
1205 # we are pure python, or key was too short to search radix tree
1203 pass
1206 pass
1204
1207
1205 if id in self._pcache:
1208 if id in self._pcache:
1206 return self._pcache[id]
1209 return self._pcache[id]
1207
1210
1208 if len(id) <= 40:
1211 if len(id) <= 40:
1209 try:
1212 try:
1210 # hex(node)[:...]
1213 # hex(node)[:...]
1211 l = len(id) // 2 # grab an even number of digits
1214 l = len(id) // 2 # grab an even number of digits
1212 prefix = bin(id[:l * 2])
1215 prefix = bin(id[:l * 2])
1213 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1216 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1214 nl = [n for n in nl if hex(n).startswith(id) and
1217 nl = [n for n in nl if hex(n).startswith(id) and
1215 self.hasnode(n)]
1218 self.hasnode(n)]
1216 if nullhex.startswith(id):
1219 if nullhex.startswith(id):
1217 nl.append(nullid)
1220 nl.append(nullid)
1218 if len(nl) > 0:
1221 if len(nl) > 0:
1219 if len(nl) == 1 and not maybewdir:
1222 if len(nl) == 1 and not maybewdir:
1220 self._pcache[id] = nl[0]
1223 self._pcache[id] = nl[0]
1221 return nl[0]
1224 return nl[0]
1222 raise error.AmbiguousPrefixLookupError(
1225 raise error.AmbiguousPrefixLookupError(
1223 id, self.indexfile, _('ambiguous identifier'))
1226 id, self.indexfile, _('ambiguous identifier'))
1224 if maybewdir:
1227 if maybewdir:
1225 raise error.WdirUnsupported
1228 raise error.WdirUnsupported
1226 return None
1229 return None
1227 except TypeError:
1230 except TypeError:
1228 pass
1231 pass
1229
1232
1230 def lookup(self, id):
1233 def lookup(self, id):
1231 """locate a node based on:
1234 """locate a node based on:
1232 - revision number or str(revision number)
1235 - revision number or str(revision number)
1233 - nodeid or subset of hex nodeid
1236 - nodeid or subset of hex nodeid
1234 """
1237 """
1235 n = self._match(id)
1238 n = self._match(id)
1236 if n is not None:
1239 if n is not None:
1237 return n
1240 return n
1238 n = self._partialmatch(id)
1241 n = self._partialmatch(id)
1239 if n:
1242 if n:
1240 return n
1243 return n
1241
1244
1242 raise error.LookupError(id, self.indexfile, _('no match found'))
1245 raise error.LookupError(id, self.indexfile, _('no match found'))
1243
1246
1244 def shortest(self, node, minlength=1):
1247 def shortest(self, node, minlength=1):
1245 """Find the shortest unambiguous prefix that matches node."""
1248 """Find the shortest unambiguous prefix that matches node."""
1246 def isvalid(prefix):
1249 def isvalid(prefix):
1247 try:
1250 try:
1248 node = self._partialmatch(prefix)
1251 node = self._partialmatch(prefix)
1249 except error.AmbiguousPrefixLookupError:
1252 except error.AmbiguousPrefixLookupError:
1250 return False
1253 return False
1251 except error.WdirUnsupported:
1254 except error.WdirUnsupported:
1252 # single 'ff...' match
1255 # single 'ff...' match
1253 return True
1256 return True
1254 if node is None:
1257 if node is None:
1255 raise error.LookupError(node, self.indexfile, _('no node'))
1258 raise error.LookupError(node, self.indexfile, _('no node'))
1256 return True
1259 return True
1257
1260
1258 def maybewdir(prefix):
1261 def maybewdir(prefix):
1259 return all(c == 'f' for c in prefix)
1262 return all(c == 'f' for c in prefix)
1260
1263
1261 hexnode = hex(node)
1264 hexnode = hex(node)
1262
1265
1263 def disambiguate(hexnode, minlength):
1266 def disambiguate(hexnode, minlength):
1264 """Disambiguate against wdirid."""
1267 """Disambiguate against wdirid."""
1265 for length in range(minlength, 41):
1268 for length in range(minlength, 41):
1266 prefix = hexnode[:length]
1269 prefix = hexnode[:length]
1267 if not maybewdir(prefix):
1270 if not maybewdir(prefix):
1268 return prefix
1271 return prefix
1269
1272
1270 if not getattr(self, 'filteredrevs', None):
1273 if not getattr(self, 'filteredrevs', None):
1271 try:
1274 try:
1272 length = max(self.index.shortest(node), minlength)
1275 length = max(self.index.shortest(node), minlength)
1273 return disambiguate(hexnode, length)
1276 return disambiguate(hexnode, length)
1274 except error.RevlogError:
1277 except error.RevlogError:
1275 if node != wdirid:
1278 if node != wdirid:
1276 raise error.LookupError(node, self.indexfile, _('no node'))
1279 raise error.LookupError(node, self.indexfile, _('no node'))
1277 except AttributeError:
1280 except AttributeError:
1278 # Fall through to pure code
1281 # Fall through to pure code
1279 pass
1282 pass
1280
1283
1281 if node == wdirid:
1284 if node == wdirid:
1282 for length in range(minlength, 41):
1285 for length in range(minlength, 41):
1283 prefix = hexnode[:length]
1286 prefix = hexnode[:length]
1284 if isvalid(prefix):
1287 if isvalid(prefix):
1285 return prefix
1288 return prefix
1286
1289
1287 for length in range(minlength, 41):
1290 for length in range(minlength, 41):
1288 prefix = hexnode[:length]
1291 prefix = hexnode[:length]
1289 if isvalid(prefix):
1292 if isvalid(prefix):
1290 return disambiguate(hexnode, length)
1293 return disambiguate(hexnode, length)
1291
1294
1292 def cmp(self, node, text):
1295 def cmp(self, node, text):
1293 """compare text with a given file revision
1296 """compare text with a given file revision
1294
1297
1295 returns True if text is different than what is stored.
1298 returns True if text is different than what is stored.
1296 """
1299 """
1297 p1, p2 = self.parents(node)
1300 p1, p2 = self.parents(node)
1298 return storageutil.hashrevisionsha1(text, p1, p2) != node
1301 return storageutil.hashrevisionsha1(text, p1, p2) != node
1299
1302
1300 def _cachesegment(self, offset, data):
1303 def _cachesegment(self, offset, data):
1301 """Add a segment to the revlog cache.
1304 """Add a segment to the revlog cache.
1302
1305
1303 Accepts an absolute offset and the data that is at that location.
1306 Accepts an absolute offset and the data that is at that location.
1304 """
1307 """
1305 o, d = self._chunkcache
1308 o, d = self._chunkcache
1306 # try to add to existing cache
1309 # try to add to existing cache
1307 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1310 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1308 self._chunkcache = o, d + data
1311 self._chunkcache = o, d + data
1309 else:
1312 else:
1310 self._chunkcache = offset, data
1313 self._chunkcache = offset, data
1311
1314
1312 def _readsegment(self, offset, length, df=None):
1315 def _readsegment(self, offset, length, df=None):
1313 """Load a segment of raw data from the revlog.
1316 """Load a segment of raw data from the revlog.
1314
1317
1315 Accepts an absolute offset, length to read, and an optional existing
1318 Accepts an absolute offset, length to read, and an optional existing
1316 file handle to read from.
1319 file handle to read from.
1317
1320
1318 If an existing file handle is passed, it will be seeked and the
1321 If an existing file handle is passed, it will be seeked and the
1319 original seek position will NOT be restored.
1322 original seek position will NOT be restored.
1320
1323
1321 Returns a str or buffer of raw byte data.
1324 Returns a str or buffer of raw byte data.
1322 """
1325 """
1323 # Cache data both forward and backward around the requested
1326 # Cache data both forward and backward around the requested
1324 # data, in a fixed size window. This helps speed up operations
1327 # data, in a fixed size window. This helps speed up operations
1325 # involving reading the revlog backwards.
1328 # involving reading the revlog backwards.
1326 cachesize = self._chunkcachesize
1329 cachesize = self._chunkcachesize
1327 realoffset = offset & ~(cachesize - 1)
1330 realoffset = offset & ~(cachesize - 1)
1328 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1331 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1329 - realoffset)
1332 - realoffset)
1330 with self._datareadfp(df) as df:
1333 with self._datareadfp(df) as df:
1331 df.seek(realoffset)
1334 df.seek(realoffset)
1332 d = df.read(reallength)
1335 d = df.read(reallength)
1333 self._cachesegment(realoffset, d)
1336 self._cachesegment(realoffset, d)
1334 if offset != realoffset or reallength != length:
1337 if offset != realoffset or reallength != length:
1335 return util.buffer(d, offset - realoffset, length)
1338 return util.buffer(d, offset - realoffset, length)
1336 return d
1339 return d
1337
1340
1338 def _getsegment(self, offset, length, df=None):
1341 def _getsegment(self, offset, length, df=None):
1339 """Obtain a segment of raw data from the revlog.
1342 """Obtain a segment of raw data from the revlog.
1340
1343
1341 Accepts an absolute offset, length of bytes to obtain, and an
1344 Accepts an absolute offset, length of bytes to obtain, and an
1342 optional file handle to the already-opened revlog. If the file
1345 optional file handle to the already-opened revlog. If the file
1343 handle is used, it's original seek position will not be preserved.
1346 handle is used, it's original seek position will not be preserved.
1344
1347
1345 Requests for data may be returned from a cache.
1348 Requests for data may be returned from a cache.
1346
1349
1347 Returns a str or a buffer instance of raw byte data.
1350 Returns a str or a buffer instance of raw byte data.
1348 """
1351 """
1349 o, d = self._chunkcache
1352 o, d = self._chunkcache
1350 l = len(d)
1353 l = len(d)
1351
1354
1352 # is it in the cache?
1355 # is it in the cache?
1353 cachestart = offset - o
1356 cachestart = offset - o
1354 cacheend = cachestart + length
1357 cacheend = cachestart + length
1355 if cachestart >= 0 and cacheend <= l:
1358 if cachestart >= 0 and cacheend <= l:
1356 if cachestart == 0 and cacheend == l:
1359 if cachestart == 0 and cacheend == l:
1357 return d # avoid a copy
1360 return d # avoid a copy
1358 return util.buffer(d, cachestart, cacheend - cachestart)
1361 return util.buffer(d, cachestart, cacheend - cachestart)
1359
1362
1360 return self._readsegment(offset, length, df=df)
1363 return self._readsegment(offset, length, df=df)
1361
1364
1362 def _getsegmentforrevs(self, startrev, endrev, df=None):
1365 def _getsegmentforrevs(self, startrev, endrev, df=None):
1363 """Obtain a segment of raw data corresponding to a range of revisions.
1366 """Obtain a segment of raw data corresponding to a range of revisions.
1364
1367
1365 Accepts the start and end revisions and an optional already-open
1368 Accepts the start and end revisions and an optional already-open
1366 file handle to be used for reading. If the file handle is read, its
1369 file handle to be used for reading. If the file handle is read, its
1367 seek position will not be preserved.
1370 seek position will not be preserved.
1368
1371
1369 Requests for data may be satisfied by a cache.
1372 Requests for data may be satisfied by a cache.
1370
1373
1371 Returns a 2-tuple of (offset, data) for the requested range of
1374 Returns a 2-tuple of (offset, data) for the requested range of
1372 revisions. Offset is the integer offset from the beginning of the
1375 revisions. Offset is the integer offset from the beginning of the
1373 revlog and data is a str or buffer of the raw byte data.
1376 revlog and data is a str or buffer of the raw byte data.
1374
1377
1375 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1378 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1376 to determine where each revision's data begins and ends.
1379 to determine where each revision's data begins and ends.
1377 """
1380 """
1378 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1381 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1379 # (functions are expensive).
1382 # (functions are expensive).
1380 index = self.index
1383 index = self.index
1381 istart = index[startrev]
1384 istart = index[startrev]
1382 start = int(istart[0] >> 16)
1385 start = int(istart[0] >> 16)
1383 if startrev == endrev:
1386 if startrev == endrev:
1384 end = start + istart[1]
1387 end = start + istart[1]
1385 else:
1388 else:
1386 iend = index[endrev]
1389 iend = index[endrev]
1387 end = int(iend[0] >> 16) + iend[1]
1390 end = int(iend[0] >> 16) + iend[1]
1388
1391
1389 if self._inline:
1392 if self._inline:
1390 start += (startrev + 1) * self._io.size
1393 start += (startrev + 1) * self._io.size
1391 end += (endrev + 1) * self._io.size
1394 end += (endrev + 1) * self._io.size
1392 length = end - start
1395 length = end - start
1393
1396
1394 return start, self._getsegment(start, length, df=df)
1397 return start, self._getsegment(start, length, df=df)
1395
1398
1396 def _chunk(self, rev, df=None):
1399 def _chunk(self, rev, df=None):
1397 """Obtain a single decompressed chunk for a revision.
1400 """Obtain a single decompressed chunk for a revision.
1398
1401
1399 Accepts an integer revision and an optional already-open file handle
1402 Accepts an integer revision and an optional already-open file handle
1400 to be used for reading. If used, the seek position of the file will not
1403 to be used for reading. If used, the seek position of the file will not
1401 be preserved.
1404 be preserved.
1402
1405
1403 Returns a str holding uncompressed data for the requested revision.
1406 Returns a str holding uncompressed data for the requested revision.
1404 """
1407 """
1405 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1408 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1406
1409
1407 def _chunks(self, revs, df=None, targetsize=None):
1410 def _chunks(self, revs, df=None, targetsize=None):
1408 """Obtain decompressed chunks for the specified revisions.
1411 """Obtain decompressed chunks for the specified revisions.
1409
1412
1410 Accepts an iterable of numeric revisions that are assumed to be in
1413 Accepts an iterable of numeric revisions that are assumed to be in
1411 ascending order. Also accepts an optional already-open file handle
1414 ascending order. Also accepts an optional already-open file handle
1412 to be used for reading. If used, the seek position of the file will
1415 to be used for reading. If used, the seek position of the file will
1413 not be preserved.
1416 not be preserved.
1414
1417
1415 This function is similar to calling ``self._chunk()`` multiple times,
1418 This function is similar to calling ``self._chunk()`` multiple times,
1416 but is faster.
1419 but is faster.
1417
1420
1418 Returns a list with decompressed data for each requested revision.
1421 Returns a list with decompressed data for each requested revision.
1419 """
1422 """
1420 if not revs:
1423 if not revs:
1421 return []
1424 return []
1422 start = self.start
1425 start = self.start
1423 length = self.length
1426 length = self.length
1424 inline = self._inline
1427 inline = self._inline
1425 iosize = self._io.size
1428 iosize = self._io.size
1426 buffer = util.buffer
1429 buffer = util.buffer
1427
1430
1428 l = []
1431 l = []
1429 ladd = l.append
1432 ladd = l.append
1430
1433
1431 if not self._withsparseread:
1434 if not self._withsparseread:
1432 slicedchunks = (revs,)
1435 slicedchunks = (revs,)
1433 else:
1436 else:
1434 slicedchunks = deltautil.slicechunk(self, revs,
1437 slicedchunks = deltautil.slicechunk(self, revs,
1435 targetsize=targetsize)
1438 targetsize=targetsize)
1436
1439
1437 for revschunk in slicedchunks:
1440 for revschunk in slicedchunks:
1438 firstrev = revschunk[0]
1441 firstrev = revschunk[0]
1439 # Skip trailing revisions with empty diff
1442 # Skip trailing revisions with empty diff
1440 for lastrev in revschunk[::-1]:
1443 for lastrev in revschunk[::-1]:
1441 if length(lastrev) != 0:
1444 if length(lastrev) != 0:
1442 break
1445 break
1443
1446
1444 try:
1447 try:
1445 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1448 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1446 except OverflowError:
1449 except OverflowError:
1447 # issue4215 - we can't cache a run of chunks greater than
1450 # issue4215 - we can't cache a run of chunks greater than
1448 # 2G on Windows
1451 # 2G on Windows
1449 return [self._chunk(rev, df=df) for rev in revschunk]
1452 return [self._chunk(rev, df=df) for rev in revschunk]
1450
1453
1451 decomp = self.decompress
1454 decomp = self.decompress
1452 for rev in revschunk:
1455 for rev in revschunk:
1453 chunkstart = start(rev)
1456 chunkstart = start(rev)
1454 if inline:
1457 if inline:
1455 chunkstart += (rev + 1) * iosize
1458 chunkstart += (rev + 1) * iosize
1456 chunklength = length(rev)
1459 chunklength = length(rev)
1457 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1460 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1458
1461
1459 return l
1462 return l
1460
1463
1461 def _chunkclear(self):
1464 def _chunkclear(self):
1462 """Clear the raw chunk cache."""
1465 """Clear the raw chunk cache."""
1463 self._chunkcache = (0, '')
1466 self._chunkcache = (0, '')
1464
1467
1465 def deltaparent(self, rev):
1468 def deltaparent(self, rev):
1466 """return deltaparent of the given revision"""
1469 """return deltaparent of the given revision"""
1467 base = self.index[rev][3]
1470 base = self.index[rev][3]
1468 if base == rev:
1471 if base == rev:
1469 return nullrev
1472 return nullrev
1470 elif self._generaldelta:
1473 elif self._generaldelta:
1471 return base
1474 return base
1472 else:
1475 else:
1473 return rev - 1
1476 return rev - 1
1474
1477
1475 def issnapshot(self, rev):
1478 def issnapshot(self, rev):
1476 """tells whether rev is a snapshot
1479 """tells whether rev is a snapshot
1477 """
1480 """
1478 if rev == nullrev:
1481 if rev == nullrev:
1479 return True
1482 return True
1480 deltap = self.deltaparent(rev)
1483 deltap = self.deltaparent(rev)
1481 if deltap == nullrev:
1484 if deltap == nullrev:
1482 return True
1485 return True
1483 p1, p2 = self.parentrevs(rev)
1486 p1, p2 = self.parentrevs(rev)
1484 if deltap in (p1, p2):
1487 if deltap in (p1, p2):
1485 return False
1488 return False
1486 return self.issnapshot(deltap)
1489 return self.issnapshot(deltap)
1487
1490
1488 def snapshotdepth(self, rev):
1491 def snapshotdepth(self, rev):
1489 """number of snapshot in the chain before this one"""
1492 """number of snapshot in the chain before this one"""
1490 if not self.issnapshot(rev):
1493 if not self.issnapshot(rev):
1491 raise error.ProgrammingError('revision %d not a snapshot')
1494 raise error.ProgrammingError('revision %d not a snapshot')
1492 return len(self._deltachain(rev)[0]) - 1
1495 return len(self._deltachain(rev)[0]) - 1
1493
1496
1494 def revdiff(self, rev1, rev2):
1497 def revdiff(self, rev1, rev2):
1495 """return or calculate a delta between two revisions
1498 """return or calculate a delta between two revisions
1496
1499
1497 The delta calculated is in binary form and is intended to be written to
1500 The delta calculated is in binary form and is intended to be written to
1498 revlog data directly. So this function needs raw revision data.
1501 revlog data directly. So this function needs raw revision data.
1499 """
1502 """
1500 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1503 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1501 return bytes(self._chunk(rev2))
1504 return bytes(self._chunk(rev2))
1502
1505
1503 return mdiff.textdiff(self.revision(rev1, raw=True),
1506 return mdiff.textdiff(self.revision(rev1, raw=True),
1504 self.revision(rev2, raw=True))
1507 self.revision(rev2, raw=True))
1505
1508
1506 def revision(self, nodeorrev, _df=None, raw=False):
1509 def revision(self, nodeorrev, _df=None, raw=False):
1507 """return an uncompressed revision of a given node or revision
1510 """return an uncompressed revision of a given node or revision
1508 number.
1511 number.
1509
1512
1510 _df - an existing file handle to read from. (internal-only)
1513 _df - an existing file handle to read from. (internal-only)
1511 raw - an optional argument specifying if the revision data is to be
1514 raw - an optional argument specifying if the revision data is to be
1512 treated as raw data when applying flag transforms. 'raw' should be set
1515 treated as raw data when applying flag transforms. 'raw' should be set
1513 to True when generating changegroups or in debug commands.
1516 to True when generating changegroups or in debug commands.
1514 """
1517 """
1515 if isinstance(nodeorrev, int):
1518 if isinstance(nodeorrev, int):
1516 rev = nodeorrev
1519 rev = nodeorrev
1517 node = self.node(rev)
1520 node = self.node(rev)
1518 else:
1521 else:
1519 node = nodeorrev
1522 node = nodeorrev
1520 rev = None
1523 rev = None
1521
1524
1522 cachedrev = None
1525 cachedrev = None
1523 flags = None
1526 flags = None
1524 rawtext = None
1527 rawtext = None
1525 if node == nullid:
1528 if node == nullid:
1526 return ""
1529 return ""
1527 if self._revisioncache:
1530 if self._revisioncache:
1528 if self._revisioncache[0] == node:
1531 if self._revisioncache[0] == node:
1529 # _cache only stores rawtext
1532 # _cache only stores rawtext
1530 if raw:
1533 if raw:
1531 return self._revisioncache[2]
1534 return self._revisioncache[2]
1532 # duplicated, but good for perf
1535 # duplicated, but good for perf
1533 if rev is None:
1536 if rev is None:
1534 rev = self.rev(node)
1537 rev = self.rev(node)
1535 if flags is None:
1538 if flags is None:
1536 flags = self.flags(rev)
1539 flags = self.flags(rev)
1537 # no extra flags set, no flag processor runs, text = rawtext
1540 # no extra flags set, no flag processor runs, text = rawtext
1538 if flags == REVIDX_DEFAULT_FLAGS:
1541 if flags == REVIDX_DEFAULT_FLAGS:
1539 return self._revisioncache[2]
1542 return self._revisioncache[2]
1540 # rawtext is reusable. need to run flag processor
1543 # rawtext is reusable. need to run flag processor
1541 rawtext = self._revisioncache[2]
1544 rawtext = self._revisioncache[2]
1542
1545
1543 cachedrev = self._revisioncache[1]
1546 cachedrev = self._revisioncache[1]
1544
1547
1545 # look up what we need to read
1548 # look up what we need to read
1546 if rawtext is None:
1549 if rawtext is None:
1547 if rev is None:
1550 if rev is None:
1548 rev = self.rev(node)
1551 rev = self.rev(node)
1549
1552
1550 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1553 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1551 if stopped:
1554 if stopped:
1552 rawtext = self._revisioncache[2]
1555 rawtext = self._revisioncache[2]
1553
1556
1554 # drop cache to save memory
1557 # drop cache to save memory
1555 self._revisioncache = None
1558 self._revisioncache = None
1556
1559
1557 targetsize = None
1560 targetsize = None
1558 rawsize = self.index[rev][2]
1561 rawsize = self.index[rev][2]
1559 if 0 <= rawsize:
1562 if 0 <= rawsize:
1560 targetsize = 4 * rawsize
1563 targetsize = 4 * rawsize
1561
1564
1562 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1565 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1563 if rawtext is None:
1566 if rawtext is None:
1564 rawtext = bytes(bins[0])
1567 rawtext = bytes(bins[0])
1565 bins = bins[1:]
1568 bins = bins[1:]
1566
1569
1567 rawtext = mdiff.patches(rawtext, bins)
1570 rawtext = mdiff.patches(rawtext, bins)
1568 self._revisioncache = (node, rev, rawtext)
1571 self._revisioncache = (node, rev, rawtext)
1569
1572
1570 if flags is None:
1573 if flags is None:
1571 if rev is None:
1574 if rev is None:
1572 rev = self.rev(node)
1575 rev = self.rev(node)
1573 flags = self.flags(rev)
1576 flags = self.flags(rev)
1574
1577
1575 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1578 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1576 if validatehash:
1579 if validatehash:
1577 self.checkhash(text, node, rev=rev)
1580 self.checkhash(text, node, rev=rev)
1578
1581
1579 return text
1582 return text
1580
1583
1581 def hash(self, text, p1, p2):
1584 def hash(self, text, p1, p2):
1582 """Compute a node hash.
1585 """Compute a node hash.
1583
1586
1584 Available as a function so that subclasses can replace the hash
1587 Available as a function so that subclasses can replace the hash
1585 as needed.
1588 as needed.
1586 """
1589 """
1587 return storageutil.hashrevisionsha1(text, p1, p2)
1590 return storageutil.hashrevisionsha1(text, p1, p2)
1588
1591
1589 def _processflags(self, text, flags, operation, raw=False):
1592 def _processflags(self, text, flags, operation, raw=False):
1590 """Inspect revision data flags and applies transforms defined by
1593 """Inspect revision data flags and applies transforms defined by
1591 registered flag processors.
1594 registered flag processors.
1592
1595
1593 ``text`` - the revision data to process
1596 ``text`` - the revision data to process
1594 ``flags`` - the revision flags
1597 ``flags`` - the revision flags
1595 ``operation`` - the operation being performed (read or write)
1598 ``operation`` - the operation being performed (read or write)
1596 ``raw`` - an optional argument describing if the raw transform should be
1599 ``raw`` - an optional argument describing if the raw transform should be
1597 applied.
1600 applied.
1598
1601
1599 This method processes the flags in the order (or reverse order if
1602 This method processes the flags in the order (or reverse order if
1600 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1603 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1601 flag processors registered for present flags. The order of flags defined
1604 flag processors registered for present flags. The order of flags defined
1602 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1605 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1603
1606
1604 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1607 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1605 processed text and ``validatehash`` is a bool indicating whether the
1608 processed text and ``validatehash`` is a bool indicating whether the
1606 returned text should be checked for hash integrity.
1609 returned text should be checked for hash integrity.
1607
1610
1608 Note: If the ``raw`` argument is set, it has precedence over the
1611 Note: If the ``raw`` argument is set, it has precedence over the
1609 operation and will only update the value of ``validatehash``.
1612 operation and will only update the value of ``validatehash``.
1610 """
1613 """
1611 # fast path: no flag processors will run
1614 # fast path: no flag processors will run
1612 if flags == 0:
1615 if flags == 0:
1613 return text, True
1616 return text, True
1614 if not operation in ('read', 'write'):
1617 if not operation in ('read', 'write'):
1615 raise error.ProgrammingError(_("invalid '%s' operation") %
1618 raise error.ProgrammingError(_("invalid '%s' operation") %
1616 operation)
1619 operation)
1617 # Check all flags are known.
1620 # Check all flags are known.
1618 if flags & ~REVIDX_KNOWN_FLAGS:
1621 if flags & ~REVIDX_KNOWN_FLAGS:
1619 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1622 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1620 (flags & ~REVIDX_KNOWN_FLAGS))
1623 (flags & ~REVIDX_KNOWN_FLAGS))
1621 validatehash = True
1624 validatehash = True
1622 # Depending on the operation (read or write), the order might be
1625 # Depending on the operation (read or write), the order might be
1623 # reversed due to non-commutative transforms.
1626 # reversed due to non-commutative transforms.
1624 orderedflags = REVIDX_FLAGS_ORDER
1627 orderedflags = REVIDX_FLAGS_ORDER
1625 if operation == 'write':
1628 if operation == 'write':
1626 orderedflags = reversed(orderedflags)
1629 orderedflags = reversed(orderedflags)
1627
1630
1628 for flag in orderedflags:
1631 for flag in orderedflags:
1629 # If a flagprocessor has been registered for a known flag, apply the
1632 # If a flagprocessor has been registered for a known flag, apply the
1630 # related operation transform and update result tuple.
1633 # related operation transform and update result tuple.
1631 if flag & flags:
1634 if flag & flags:
1632 vhash = True
1635 vhash = True
1633
1636
1634 if flag not in self._flagprocessors:
1637 if flag not in self._flagprocessors:
1635 message = _("missing processor for flag '%#x'") % (flag)
1638 message = _("missing processor for flag '%#x'") % (flag)
1636 raise error.RevlogError(message)
1639 raise error.RevlogError(message)
1637
1640
1638 processor = self._flagprocessors[flag]
1641 processor = self._flagprocessors[flag]
1639 if processor is not None:
1642 if processor is not None:
1640 readtransform, writetransform, rawtransform = processor
1643 readtransform, writetransform, rawtransform = processor
1641
1644
1642 if raw:
1645 if raw:
1643 vhash = rawtransform(self, text)
1646 vhash = rawtransform(self, text)
1644 elif operation == 'read':
1647 elif operation == 'read':
1645 text, vhash = readtransform(self, text)
1648 text, vhash = readtransform(self, text)
1646 else: # write operation
1649 else: # write operation
1647 text, vhash = writetransform(self, text)
1650 text, vhash = writetransform(self, text)
1648 validatehash = validatehash and vhash
1651 validatehash = validatehash and vhash
1649
1652
1650 return text, validatehash
1653 return text, validatehash
1651
1654
1652 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1655 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1653 """Check node hash integrity.
1656 """Check node hash integrity.
1654
1657
1655 Available as a function so that subclasses can extend hash mismatch
1658 Available as a function so that subclasses can extend hash mismatch
1656 behaviors as needed.
1659 behaviors as needed.
1657 """
1660 """
1658 try:
1661 try:
1659 if p1 is None and p2 is None:
1662 if p1 is None and p2 is None:
1660 p1, p2 = self.parents(node)
1663 p1, p2 = self.parents(node)
1661 if node != self.hash(text, p1, p2):
1664 if node != self.hash(text, p1, p2):
1662 # Clear the revision cache on hash failure. The revision cache
1665 # Clear the revision cache on hash failure. The revision cache
1663 # only stores the raw revision and clearing the cache does have
1666 # only stores the raw revision and clearing the cache does have
1664 # the side-effect that we won't have a cache hit when the raw
1667 # the side-effect that we won't have a cache hit when the raw
1665 # revision data is accessed. But this case should be rare and
1668 # revision data is accessed. But this case should be rare and
1666 # it is extra work to teach the cache about the hash
1669 # it is extra work to teach the cache about the hash
1667 # verification state.
1670 # verification state.
1668 if self._revisioncache and self._revisioncache[0] == node:
1671 if self._revisioncache and self._revisioncache[0] == node:
1669 self._revisioncache = None
1672 self._revisioncache = None
1670
1673
1671 revornode = rev
1674 revornode = rev
1672 if revornode is None:
1675 if revornode is None:
1673 revornode = templatefilters.short(hex(node))
1676 revornode = templatefilters.short(hex(node))
1674 raise error.RevlogError(_("integrity check failed on %s:%s")
1677 raise error.RevlogError(_("integrity check failed on %s:%s")
1675 % (self.indexfile, pycompat.bytestr(revornode)))
1678 % (self.indexfile, pycompat.bytestr(revornode)))
1676 except error.RevlogError:
1679 except error.RevlogError:
1677 if self._censorable and storageutil.iscensoredtext(text):
1680 if self._censorable and storageutil.iscensoredtext(text):
1678 raise error.CensoredNodeError(self.indexfile, node, text)
1681 raise error.CensoredNodeError(self.indexfile, node, text)
1679 raise
1682 raise
1680
1683
1681 def _enforceinlinesize(self, tr, fp=None):
1684 def _enforceinlinesize(self, tr, fp=None):
1682 """Check if the revlog is too big for inline and convert if so.
1685 """Check if the revlog is too big for inline and convert if so.
1683
1686
1684 This should be called after revisions are added to the revlog. If the
1687 This should be called after revisions are added to the revlog. If the
1685 revlog has grown too large to be an inline revlog, it will convert it
1688 revlog has grown too large to be an inline revlog, it will convert it
1686 to use multiple index and data files.
1689 to use multiple index and data files.
1687 """
1690 """
1688 tiprev = len(self) - 1
1691 tiprev = len(self) - 1
1689 if (not self._inline or
1692 if (not self._inline or
1690 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1693 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1691 return
1694 return
1692
1695
1693 trinfo = tr.find(self.indexfile)
1696 trinfo = tr.find(self.indexfile)
1694 if trinfo is None:
1697 if trinfo is None:
1695 raise error.RevlogError(_("%s not found in the transaction")
1698 raise error.RevlogError(_("%s not found in the transaction")
1696 % self.indexfile)
1699 % self.indexfile)
1697
1700
1698 trindex = trinfo[2]
1701 trindex = trinfo[2]
1699 if trindex is not None:
1702 if trindex is not None:
1700 dataoff = self.start(trindex)
1703 dataoff = self.start(trindex)
1701 else:
1704 else:
1702 # revlog was stripped at start of transaction, use all leftover data
1705 # revlog was stripped at start of transaction, use all leftover data
1703 trindex = len(self) - 1
1706 trindex = len(self) - 1
1704 dataoff = self.end(tiprev)
1707 dataoff = self.end(tiprev)
1705
1708
1706 tr.add(self.datafile, dataoff)
1709 tr.add(self.datafile, dataoff)
1707
1710
1708 if fp:
1711 if fp:
1709 fp.flush()
1712 fp.flush()
1710 fp.close()
1713 fp.close()
1711
1714
1712 with self._datafp('w') as df:
1715 with self._datafp('w') as df:
1713 for r in self:
1716 for r in self:
1714 df.write(self._getsegmentforrevs(r, r)[1])
1717 df.write(self._getsegmentforrevs(r, r)[1])
1715
1718
1716 with self._indexfp('w') as fp:
1719 with self._indexfp('w') as fp:
1717 self.version &= ~FLAG_INLINE_DATA
1720 self.version &= ~FLAG_INLINE_DATA
1718 self._inline = False
1721 self._inline = False
1719 io = self._io
1722 io = self._io
1720 for i in self:
1723 for i in self:
1721 e = io.packentry(self.index[i], self.node, self.version, i)
1724 e = io.packentry(self.index[i], self.node, self.version, i)
1722 fp.write(e)
1725 fp.write(e)
1723
1726
1724 # the temp file replace the real index when we exit the context
1727 # the temp file replace the real index when we exit the context
1725 # manager
1728 # manager
1726
1729
1727 tr.replace(self.indexfile, trindex * self._io.size)
1730 tr.replace(self.indexfile, trindex * self._io.size)
1728 self._chunkclear()
1731 self._chunkclear()
1729
1732
1730 def _nodeduplicatecallback(self, transaction, node):
1733 def _nodeduplicatecallback(self, transaction, node):
1731 """called when trying to add a node already stored.
1734 """called when trying to add a node already stored.
1732 """
1735 """
1733
1736
1734 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1737 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1735 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1738 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1736 """add a revision to the log
1739 """add a revision to the log
1737
1740
1738 text - the revision data to add
1741 text - the revision data to add
1739 transaction - the transaction object used for rollback
1742 transaction - the transaction object used for rollback
1740 link - the linkrev data to add
1743 link - the linkrev data to add
1741 p1, p2 - the parent nodeids of the revision
1744 p1, p2 - the parent nodeids of the revision
1742 cachedelta - an optional precomputed delta
1745 cachedelta - an optional precomputed delta
1743 node - nodeid of revision; typically node is not specified, and it is
1746 node - nodeid of revision; typically node is not specified, and it is
1744 computed by default as hash(text, p1, p2), however subclasses might
1747 computed by default as hash(text, p1, p2), however subclasses might
1745 use different hashing method (and override checkhash() in such case)
1748 use different hashing method (and override checkhash() in such case)
1746 flags - the known flags to set on the revision
1749 flags - the known flags to set on the revision
1747 deltacomputer - an optional deltacomputer instance shared between
1750 deltacomputer - an optional deltacomputer instance shared between
1748 multiple calls
1751 multiple calls
1749 """
1752 """
1750 if link == nullrev:
1753 if link == nullrev:
1751 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1754 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1752 % self.indexfile)
1755 % self.indexfile)
1753
1756
1754 if flags:
1757 if flags:
1755 node = node or self.hash(text, p1, p2)
1758 node = node or self.hash(text, p1, p2)
1756
1759
1757 rawtext, validatehash = self._processflags(text, flags, 'write')
1760 rawtext, validatehash = self._processflags(text, flags, 'write')
1758
1761
1759 # If the flag processor modifies the revision data, ignore any provided
1762 # If the flag processor modifies the revision data, ignore any provided
1760 # cachedelta.
1763 # cachedelta.
1761 if rawtext != text:
1764 if rawtext != text:
1762 cachedelta = None
1765 cachedelta = None
1763
1766
1764 if len(rawtext) > _maxentrysize:
1767 if len(rawtext) > _maxentrysize:
1765 raise error.RevlogError(
1768 raise error.RevlogError(
1766 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1769 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1767 % (self.indexfile, len(rawtext)))
1770 % (self.indexfile, len(rawtext)))
1768
1771
1769 node = node or self.hash(rawtext, p1, p2)
1772 node = node or self.hash(rawtext, p1, p2)
1770 if node in self.nodemap:
1773 if node in self.nodemap:
1771 return node
1774 return node
1772
1775
1773 if validatehash:
1776 if validatehash:
1774 self.checkhash(rawtext, node, p1=p1, p2=p2)
1777 self.checkhash(rawtext, node, p1=p1, p2=p2)
1775
1778
1776 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1779 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1777 flags, cachedelta=cachedelta,
1780 flags, cachedelta=cachedelta,
1778 deltacomputer=deltacomputer)
1781 deltacomputer=deltacomputer)
1779
1782
1780 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1783 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1781 cachedelta=None, deltacomputer=None):
1784 cachedelta=None, deltacomputer=None):
1782 """add a raw revision with known flags, node and parents
1785 """add a raw revision with known flags, node and parents
1783 useful when reusing a revision not stored in this revlog (ex: received
1786 useful when reusing a revision not stored in this revlog (ex: received
1784 over wire, or read from an external bundle).
1787 over wire, or read from an external bundle).
1785 """
1788 """
1786 dfh = None
1789 dfh = None
1787 if not self._inline:
1790 if not self._inline:
1788 dfh = self._datafp("a+")
1791 dfh = self._datafp("a+")
1789 ifh = self._indexfp("a+")
1792 ifh = self._indexfp("a+")
1790 try:
1793 try:
1791 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1794 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1792 flags, cachedelta, ifh, dfh,
1795 flags, cachedelta, ifh, dfh,
1793 deltacomputer=deltacomputer)
1796 deltacomputer=deltacomputer)
1794 finally:
1797 finally:
1795 if dfh:
1798 if dfh:
1796 dfh.close()
1799 dfh.close()
1797 ifh.close()
1800 ifh.close()
1798
1801
1799 def compress(self, data):
1802 def compress(self, data):
1800 """Generate a possibly-compressed representation of data."""
1803 """Generate a possibly-compressed representation of data."""
1801 if not data:
1804 if not data:
1802 return '', data
1805 return '', data
1803
1806
1804 compressed = self._compressor.compress(data)
1807 compressed = self._compressor.compress(data)
1805
1808
1806 if compressed:
1809 if compressed:
1807 # The revlog compressor added the header in the returned data.
1810 # The revlog compressor added the header in the returned data.
1808 return '', compressed
1811 return '', compressed
1809
1812
1810 if data[0:1] == '\0':
1813 if data[0:1] == '\0':
1811 return '', data
1814 return '', data
1812 return 'u', data
1815 return 'u', data
1813
1816
1814 def decompress(self, data):
1817 def decompress(self, data):
1815 """Decompress a revlog chunk.
1818 """Decompress a revlog chunk.
1816
1819
1817 The chunk is expected to begin with a header identifying the
1820 The chunk is expected to begin with a header identifying the
1818 format type so it can be routed to an appropriate decompressor.
1821 format type so it can be routed to an appropriate decompressor.
1819 """
1822 """
1820 if not data:
1823 if not data:
1821 return data
1824 return data
1822
1825
1823 # Revlogs are read much more frequently than they are written and many
1826 # Revlogs are read much more frequently than they are written and many
1824 # chunks only take microseconds to decompress, so performance is
1827 # chunks only take microseconds to decompress, so performance is
1825 # important here.
1828 # important here.
1826 #
1829 #
1827 # We can make a few assumptions about revlogs:
1830 # We can make a few assumptions about revlogs:
1828 #
1831 #
1829 # 1) the majority of chunks will be compressed (as opposed to inline
1832 # 1) the majority of chunks will be compressed (as opposed to inline
1830 # raw data).
1833 # raw data).
1831 # 2) decompressing *any* data will likely by at least 10x slower than
1834 # 2) decompressing *any* data will likely by at least 10x slower than
1832 # returning raw inline data.
1835 # returning raw inline data.
1833 # 3) we want to prioritize common and officially supported compression
1836 # 3) we want to prioritize common and officially supported compression
1834 # engines
1837 # engines
1835 #
1838 #
1836 # It follows that we want to optimize for "decompress compressed data
1839 # It follows that we want to optimize for "decompress compressed data
1837 # when encoded with common and officially supported compression engines"
1840 # when encoded with common and officially supported compression engines"
1838 # case over "raw data" and "data encoded by less common or non-official
1841 # case over "raw data" and "data encoded by less common or non-official
1839 # compression engines." That is why we have the inline lookup first
1842 # compression engines." That is why we have the inline lookup first
1840 # followed by the compengines lookup.
1843 # followed by the compengines lookup.
1841 #
1844 #
1842 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1845 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1843 # compressed chunks. And this matters for changelog and manifest reads.
1846 # compressed chunks. And this matters for changelog and manifest reads.
1844 t = data[0:1]
1847 t = data[0:1]
1845
1848
1846 if t == 'x':
1849 if t == 'x':
1847 try:
1850 try:
1848 return _zlibdecompress(data)
1851 return _zlibdecompress(data)
1849 except zlib.error as e:
1852 except zlib.error as e:
1850 raise error.RevlogError(_('revlog decompress error: %s') %
1853 raise error.RevlogError(_('revlog decompress error: %s') %
1851 stringutil.forcebytestr(e))
1854 stringutil.forcebytestr(e))
1852 # '\0' is more common than 'u' so it goes first.
1855 # '\0' is more common than 'u' so it goes first.
1853 elif t == '\0':
1856 elif t == '\0':
1854 return data
1857 return data
1855 elif t == 'u':
1858 elif t == 'u':
1856 return util.buffer(data, 1)
1859 return util.buffer(data, 1)
1857
1860
1858 try:
1861 try:
1859 compressor = self._decompressors[t]
1862 compressor = self._decompressors[t]
1860 except KeyError:
1863 except KeyError:
1861 try:
1864 try:
1862 engine = util.compengines.forrevlogheader(t)
1865 engine = util.compengines.forrevlogheader(t)
1863 compressor = engine.revlogcompressor()
1866 compressor = engine.revlogcompressor()
1864 self._decompressors[t] = compressor
1867 self._decompressors[t] = compressor
1865 except KeyError:
1868 except KeyError:
1866 raise error.RevlogError(_('unknown compression type %r') % t)
1869 raise error.RevlogError(_('unknown compression type %r') % t)
1867
1870
1868 return compressor.decompress(data)
1871 return compressor.decompress(data)
1869
1872
1870 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1873 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1871 cachedelta, ifh, dfh, alwayscache=False,
1874 cachedelta, ifh, dfh, alwayscache=False,
1872 deltacomputer=None):
1875 deltacomputer=None):
1873 """internal function to add revisions to the log
1876 """internal function to add revisions to the log
1874
1877
1875 see addrevision for argument descriptions.
1878 see addrevision for argument descriptions.
1876
1879
1877 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1880 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1878
1881
1879 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1882 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1880 be used.
1883 be used.
1881
1884
1882 invariants:
1885 invariants:
1883 - rawtext is optional (can be None); if not set, cachedelta must be set.
1886 - rawtext is optional (can be None); if not set, cachedelta must be set.
1884 if both are set, they must correspond to each other.
1887 if both are set, they must correspond to each other.
1885 """
1888 """
1886 if node == nullid:
1889 if node == nullid:
1887 raise error.RevlogError(_("%s: attempt to add null revision") %
1890 raise error.RevlogError(_("%s: attempt to add null revision") %
1888 self.indexfile)
1891 self.indexfile)
1889 if node == wdirid or node in wdirfilenodeids:
1892 if node == wdirid or node in wdirfilenodeids:
1890 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1893 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1891 self.indexfile)
1894 self.indexfile)
1892
1895
1893 if self._inline:
1896 if self._inline:
1894 fh = ifh
1897 fh = ifh
1895 else:
1898 else:
1896 fh = dfh
1899 fh = dfh
1897
1900
1898 btext = [rawtext]
1901 btext = [rawtext]
1899
1902
1900 curr = len(self)
1903 curr = len(self)
1901 prev = curr - 1
1904 prev = curr - 1
1902 offset = self.end(prev)
1905 offset = self.end(prev)
1903 p1r, p2r = self.rev(p1), self.rev(p2)
1906 p1r, p2r = self.rev(p1), self.rev(p2)
1904
1907
1905 # full versions are inserted when the needed deltas
1908 # full versions are inserted when the needed deltas
1906 # become comparable to the uncompressed text
1909 # become comparable to the uncompressed text
1907 if rawtext is None:
1910 if rawtext is None:
1908 # need rawtext size, before changed by flag processors, which is
1911 # need rawtext size, before changed by flag processors, which is
1909 # the non-raw size. use revlog explicitly to avoid filelog's extra
1912 # the non-raw size. use revlog explicitly to avoid filelog's extra
1910 # logic that might remove metadata size.
1913 # logic that might remove metadata size.
1911 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1914 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1912 cachedelta[1])
1915 cachedelta[1])
1913 else:
1916 else:
1914 textlen = len(rawtext)
1917 textlen = len(rawtext)
1915
1918
1916 if deltacomputer is None:
1919 if deltacomputer is None:
1917 deltacomputer = deltautil.deltacomputer(self)
1920 deltacomputer = deltautil.deltacomputer(self)
1918
1921
1919 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1922 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1920
1923
1921 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1924 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1922
1925
1923 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1926 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1924 deltainfo.base, link, p1r, p2r, node)
1927 deltainfo.base, link, p1r, p2r, node)
1925 self.index.append(e)
1928 self.index.append(e)
1926 self.nodemap[node] = curr
1929 self.nodemap[node] = curr
1927
1930
1928 entry = self._io.packentry(e, self.node, self.version, curr)
1931 entry = self._io.packentry(e, self.node, self.version, curr)
1929 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1932 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1930 link, offset)
1933 link, offset)
1931
1934
1932 rawtext = btext[0]
1935 rawtext = btext[0]
1933
1936
1934 if alwayscache and rawtext is None:
1937 if alwayscache and rawtext is None:
1935 rawtext = deltacomputer.buildtext(revinfo, fh)
1938 rawtext = deltacomputer.buildtext(revinfo, fh)
1936
1939
1937 if type(rawtext) == bytes: # only accept immutable objects
1940 if type(rawtext) == bytes: # only accept immutable objects
1938 self._revisioncache = (node, curr, rawtext)
1941 self._revisioncache = (node, curr, rawtext)
1939 self._chainbasecache[curr] = deltainfo.chainbase
1942 self._chainbasecache[curr] = deltainfo.chainbase
1940 return node
1943 return node
1941
1944
1942 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1945 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1943 # Files opened in a+ mode have inconsistent behavior on various
1946 # Files opened in a+ mode have inconsistent behavior on various
1944 # platforms. Windows requires that a file positioning call be made
1947 # platforms. Windows requires that a file positioning call be made
1945 # when the file handle transitions between reads and writes. See
1948 # when the file handle transitions between reads and writes. See
1946 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1949 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1947 # platforms, Python or the platform itself can be buggy. Some versions
1950 # platforms, Python or the platform itself can be buggy. Some versions
1948 # of Solaris have been observed to not append at the end of the file
1951 # of Solaris have been observed to not append at the end of the file
1949 # if the file was seeked to before the end. See issue4943 for more.
1952 # if the file was seeked to before the end. See issue4943 for more.
1950 #
1953 #
1951 # We work around this issue by inserting a seek() before writing.
1954 # We work around this issue by inserting a seek() before writing.
1952 # Note: This is likely not necessary on Python 3.
1955 # Note: This is likely not necessary on Python 3.
1953 ifh.seek(0, os.SEEK_END)
1956 ifh.seek(0, os.SEEK_END)
1954 if dfh:
1957 if dfh:
1955 dfh.seek(0, os.SEEK_END)
1958 dfh.seek(0, os.SEEK_END)
1956
1959
1957 curr = len(self) - 1
1960 curr = len(self) - 1
1958 if not self._inline:
1961 if not self._inline:
1959 transaction.add(self.datafile, offset)
1962 transaction.add(self.datafile, offset)
1960 transaction.add(self.indexfile, curr * len(entry))
1963 transaction.add(self.indexfile, curr * len(entry))
1961 if data[0]:
1964 if data[0]:
1962 dfh.write(data[0])
1965 dfh.write(data[0])
1963 dfh.write(data[1])
1966 dfh.write(data[1])
1964 ifh.write(entry)
1967 ifh.write(entry)
1965 else:
1968 else:
1966 offset += curr * self._io.size
1969 offset += curr * self._io.size
1967 transaction.add(self.indexfile, offset, curr)
1970 transaction.add(self.indexfile, offset, curr)
1968 ifh.write(entry)
1971 ifh.write(entry)
1969 ifh.write(data[0])
1972 ifh.write(data[0])
1970 ifh.write(data[1])
1973 ifh.write(data[1])
1971 self._enforceinlinesize(transaction, ifh)
1974 self._enforceinlinesize(transaction, ifh)
1972
1975
1973 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1976 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1974 """
1977 """
1975 add a delta group
1978 add a delta group
1976
1979
1977 given a set of deltas, add them to the revision log. the
1980 given a set of deltas, add them to the revision log. the
1978 first delta is against its parent, which should be in our
1981 first delta is against its parent, which should be in our
1979 log, the rest are against the previous delta.
1982 log, the rest are against the previous delta.
1980
1983
1981 If ``addrevisioncb`` is defined, it will be called with arguments of
1984 If ``addrevisioncb`` is defined, it will be called with arguments of
1982 this revlog and the node that was added.
1985 this revlog and the node that was added.
1983 """
1986 """
1984
1987
1985 nodes = []
1988 nodes = []
1986
1989
1987 r = len(self)
1990 r = len(self)
1988 end = 0
1991 end = 0
1989 if r:
1992 if r:
1990 end = self.end(r - 1)
1993 end = self.end(r - 1)
1991 ifh = self._indexfp("a+")
1994 ifh = self._indexfp("a+")
1992 isize = r * self._io.size
1995 isize = r * self._io.size
1993 if self._inline:
1996 if self._inline:
1994 transaction.add(self.indexfile, end + isize, r)
1997 transaction.add(self.indexfile, end + isize, r)
1995 dfh = None
1998 dfh = None
1996 else:
1999 else:
1997 transaction.add(self.indexfile, isize, r)
2000 transaction.add(self.indexfile, isize, r)
1998 transaction.add(self.datafile, end)
2001 transaction.add(self.datafile, end)
1999 dfh = self._datafp("a+")
2002 dfh = self._datafp("a+")
2000 def flush():
2003 def flush():
2001 if dfh:
2004 if dfh:
2002 dfh.flush()
2005 dfh.flush()
2003 ifh.flush()
2006 ifh.flush()
2004 try:
2007 try:
2005 deltacomputer = deltautil.deltacomputer(self)
2008 deltacomputer = deltautil.deltacomputer(self)
2006 # loop through our set of deltas
2009 # loop through our set of deltas
2007 for data in deltas:
2010 for data in deltas:
2008 node, p1, p2, linknode, deltabase, delta, flags = data
2011 node, p1, p2, linknode, deltabase, delta, flags = data
2009 link = linkmapper(linknode)
2012 link = linkmapper(linknode)
2010 flags = flags or REVIDX_DEFAULT_FLAGS
2013 flags = flags or REVIDX_DEFAULT_FLAGS
2011
2014
2012 nodes.append(node)
2015 nodes.append(node)
2013
2016
2014 if node in self.nodemap:
2017 if node in self.nodemap:
2015 self._nodeduplicatecallback(transaction, node)
2018 self._nodeduplicatecallback(transaction, node)
2016 # this can happen if two branches make the same change
2019 # this can happen if two branches make the same change
2017 continue
2020 continue
2018
2021
2019 for p in (p1, p2):
2022 for p in (p1, p2):
2020 if p not in self.nodemap:
2023 if p not in self.nodemap:
2021 raise error.LookupError(p, self.indexfile,
2024 raise error.LookupError(p, self.indexfile,
2022 _('unknown parent'))
2025 _('unknown parent'))
2023
2026
2024 if deltabase not in self.nodemap:
2027 if deltabase not in self.nodemap:
2025 raise error.LookupError(deltabase, self.indexfile,
2028 raise error.LookupError(deltabase, self.indexfile,
2026 _('unknown delta base'))
2029 _('unknown delta base'))
2027
2030
2028 baserev = self.rev(deltabase)
2031 baserev = self.rev(deltabase)
2029
2032
2030 if baserev != nullrev and self.iscensored(baserev):
2033 if baserev != nullrev and self.iscensored(baserev):
2031 # if base is censored, delta must be full replacement in a
2034 # if base is censored, delta must be full replacement in a
2032 # single patch operation
2035 # single patch operation
2033 hlen = struct.calcsize(">lll")
2036 hlen = struct.calcsize(">lll")
2034 oldlen = self.rawsize(baserev)
2037 oldlen = self.rawsize(baserev)
2035 newlen = len(delta) - hlen
2038 newlen = len(delta) - hlen
2036 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2039 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2037 raise error.CensoredBaseError(self.indexfile,
2040 raise error.CensoredBaseError(self.indexfile,
2038 self.node(baserev))
2041 self.node(baserev))
2039
2042
2040 if not flags and self._peek_iscensored(baserev, delta, flush):
2043 if not flags and self._peek_iscensored(baserev, delta, flush):
2041 flags |= REVIDX_ISCENSORED
2044 flags |= REVIDX_ISCENSORED
2042
2045
2043 # We assume consumers of addrevisioncb will want to retrieve
2046 # We assume consumers of addrevisioncb will want to retrieve
2044 # the added revision, which will require a call to
2047 # the added revision, which will require a call to
2045 # revision(). revision() will fast path if there is a cache
2048 # revision(). revision() will fast path if there is a cache
2046 # hit. So, we tell _addrevision() to always cache in this case.
2049 # hit. So, we tell _addrevision() to always cache in this case.
2047 # We're only using addgroup() in the context of changegroup
2050 # We're only using addgroup() in the context of changegroup
2048 # generation so the revision data can always be handled as raw
2051 # generation so the revision data can always be handled as raw
2049 # by the flagprocessor.
2052 # by the flagprocessor.
2050 self._addrevision(node, None, transaction, link,
2053 self._addrevision(node, None, transaction, link,
2051 p1, p2, flags, (baserev, delta),
2054 p1, p2, flags, (baserev, delta),
2052 ifh, dfh,
2055 ifh, dfh,
2053 alwayscache=bool(addrevisioncb),
2056 alwayscache=bool(addrevisioncb),
2054 deltacomputer=deltacomputer)
2057 deltacomputer=deltacomputer)
2055
2058
2056 if addrevisioncb:
2059 if addrevisioncb:
2057 addrevisioncb(self, node)
2060 addrevisioncb(self, node)
2058
2061
2059 if not dfh and not self._inline:
2062 if not dfh and not self._inline:
2060 # addrevision switched from inline to conventional
2063 # addrevision switched from inline to conventional
2061 # reopen the index
2064 # reopen the index
2062 ifh.close()
2065 ifh.close()
2063 dfh = self._datafp("a+")
2066 dfh = self._datafp("a+")
2064 ifh = self._indexfp("a+")
2067 ifh = self._indexfp("a+")
2065 finally:
2068 finally:
2066 if dfh:
2069 if dfh:
2067 dfh.close()
2070 dfh.close()
2068 ifh.close()
2071 ifh.close()
2069
2072
2070 return nodes
2073 return nodes
2071
2074
2072 def iscensored(self, rev):
2075 def iscensored(self, rev):
2073 """Check if a file revision is censored."""
2076 """Check if a file revision is censored."""
2074 if not self._censorable:
2077 if not self._censorable:
2075 return False
2078 return False
2076
2079
2077 return self.flags(rev) & REVIDX_ISCENSORED
2080 return self.flags(rev) & REVIDX_ISCENSORED
2078
2081
2079 def _peek_iscensored(self, baserev, delta, flush):
2082 def _peek_iscensored(self, baserev, delta, flush):
2080 """Quickly check if a delta produces a censored revision."""
2083 """Quickly check if a delta produces a censored revision."""
2081 if not self._censorable:
2084 if not self._censorable:
2082 return False
2085 return False
2083
2086
2084 # Fragile heuristic: unless new file meta keys are added alphabetically
2087 # Fragile heuristic: unless new file meta keys are added alphabetically
2085 # preceding "censored", all censored revisions are prefixed by
2088 # preceding "censored", all censored revisions are prefixed by
2086 # "\1\ncensored:". A delta producing such a censored revision must be a
2089 # "\1\ncensored:". A delta producing such a censored revision must be a
2087 # full-replacement delta, so we inspect the first and only patch in the
2090 # full-replacement delta, so we inspect the first and only patch in the
2088 # delta for this prefix.
2091 # delta for this prefix.
2089 hlen = struct.calcsize(">lll")
2092 hlen = struct.calcsize(">lll")
2090 if len(delta) <= hlen:
2093 if len(delta) <= hlen:
2091 return False
2094 return False
2092
2095
2093 oldlen = self.rawsize(baserev)
2096 oldlen = self.rawsize(baserev)
2094 newlen = len(delta) - hlen
2097 newlen = len(delta) - hlen
2095 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2098 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2096 return False
2099 return False
2097
2100
2098 add = "\1\ncensored:"
2101 add = "\1\ncensored:"
2099 addlen = len(add)
2102 addlen = len(add)
2100 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2103 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2101
2104
2102 def getstrippoint(self, minlink):
2105 def getstrippoint(self, minlink):
2103 """find the minimum rev that must be stripped to strip the linkrev
2106 """find the minimum rev that must be stripped to strip the linkrev
2104
2107
2105 Returns a tuple containing the minimum rev and a set of all revs that
2108 Returns a tuple containing the minimum rev and a set of all revs that
2106 have linkrevs that will be broken by this strip.
2109 have linkrevs that will be broken by this strip.
2107 """
2110 """
2108 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2111 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2109 self.headrevs(),
2112 self.headrevs(),
2110 self.linkrev, self.parentrevs)
2113 self.linkrev, self.parentrevs)
2111
2114
2112 def strip(self, minlink, transaction):
2115 def strip(self, minlink, transaction):
2113 """truncate the revlog on the first revision with a linkrev >= minlink
2116 """truncate the revlog on the first revision with a linkrev >= minlink
2114
2117
2115 This function is called when we're stripping revision minlink and
2118 This function is called when we're stripping revision minlink and
2116 its descendants from the repository.
2119 its descendants from the repository.
2117
2120
2118 We have to remove all revisions with linkrev >= minlink, because
2121 We have to remove all revisions with linkrev >= minlink, because
2119 the equivalent changelog revisions will be renumbered after the
2122 the equivalent changelog revisions will be renumbered after the
2120 strip.
2123 strip.
2121
2124
2122 So we truncate the revlog on the first of these revisions, and
2125 So we truncate the revlog on the first of these revisions, and
2123 trust that the caller has saved the revisions that shouldn't be
2126 trust that the caller has saved the revisions that shouldn't be
2124 removed and that it'll re-add them after this truncation.
2127 removed and that it'll re-add them after this truncation.
2125 """
2128 """
2126 if len(self) == 0:
2129 if len(self) == 0:
2127 return
2130 return
2128
2131
2129 rev, _ = self.getstrippoint(minlink)
2132 rev, _ = self.getstrippoint(minlink)
2130 if rev == len(self):
2133 if rev == len(self):
2131 return
2134 return
2132
2135
2133 # first truncate the files on disk
2136 # first truncate the files on disk
2134 end = self.start(rev)
2137 end = self.start(rev)
2135 if not self._inline:
2138 if not self._inline:
2136 transaction.add(self.datafile, end)
2139 transaction.add(self.datafile, end)
2137 end = rev * self._io.size
2140 end = rev * self._io.size
2138 else:
2141 else:
2139 end += rev * self._io.size
2142 end += rev * self._io.size
2140
2143
2141 transaction.add(self.indexfile, end)
2144 transaction.add(self.indexfile, end)
2142
2145
2143 # then reset internal state in memory to forget those revisions
2146 # then reset internal state in memory to forget those revisions
2144 self._revisioncache = None
2147 self._revisioncache = None
2145 self._chaininfocache = {}
2148 self._chaininfocache = {}
2146 self._chunkclear()
2149 self._chunkclear()
2147 for x in pycompat.xrange(rev, len(self)):
2150 for x in pycompat.xrange(rev, len(self)):
2148 del self.nodemap[self.node(x)]
2151 del self.nodemap[self.node(x)]
2149
2152
2150 del self.index[rev:-1]
2153 del self.index[rev:-1]
2151 self._nodepos = None
2154 self._nodepos = None
2152
2155
2153 def checksize(self):
2156 def checksize(self):
2154 expected = 0
2157 expected = 0
2155 if len(self):
2158 if len(self):
2156 expected = max(0, self.end(len(self) - 1))
2159 expected = max(0, self.end(len(self) - 1))
2157
2160
2158 try:
2161 try:
2159 with self._datafp() as f:
2162 with self._datafp() as f:
2160 f.seek(0, 2)
2163 f.seek(0, 2)
2161 actual = f.tell()
2164 actual = f.tell()
2162 dd = actual - expected
2165 dd = actual - expected
2163 except IOError as inst:
2166 except IOError as inst:
2164 if inst.errno != errno.ENOENT:
2167 if inst.errno != errno.ENOENT:
2165 raise
2168 raise
2166 dd = 0
2169 dd = 0
2167
2170
2168 try:
2171 try:
2169 f = self.opener(self.indexfile)
2172 f = self.opener(self.indexfile)
2170 f.seek(0, 2)
2173 f.seek(0, 2)
2171 actual = f.tell()
2174 actual = f.tell()
2172 f.close()
2175 f.close()
2173 s = self._io.size
2176 s = self._io.size
2174 i = max(0, actual // s)
2177 i = max(0, actual // s)
2175 di = actual - (i * s)
2178 di = actual - (i * s)
2176 if self._inline:
2179 if self._inline:
2177 databytes = 0
2180 databytes = 0
2178 for r in self:
2181 for r in self:
2179 databytes += max(0, self.length(r))
2182 databytes += max(0, self.length(r))
2180 dd = 0
2183 dd = 0
2181 di = actual - len(self) * s - databytes
2184 di = actual - len(self) * s - databytes
2182 except IOError as inst:
2185 except IOError as inst:
2183 if inst.errno != errno.ENOENT:
2186 if inst.errno != errno.ENOENT:
2184 raise
2187 raise
2185 di = 0
2188 di = 0
2186
2189
2187 return (dd, di)
2190 return (dd, di)
2188
2191
2189 def files(self):
2192 def files(self):
2190 res = [self.indexfile]
2193 res = [self.indexfile]
2191 if not self._inline:
2194 if not self._inline:
2192 res.append(self.datafile)
2195 res.append(self.datafile)
2193 return res
2196 return res
2194
2197
2195 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2198 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2196 assumehaveparentrevisions=False, deltaprevious=False):
2199 assumehaveparentrevisions=False, deltaprevious=False):
2197 if nodesorder not in ('nodes', 'storage', None):
2200 if nodesorder not in ('nodes', 'storage', None):
2198 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2201 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2199 nodesorder)
2202 nodesorder)
2200
2203
2201 if nodesorder is None and not self._generaldelta:
2204 if nodesorder is None and not self._generaldelta:
2202 nodesorder = 'storage'
2205 nodesorder = 'storage'
2203
2206
2204 return storageutil.emitrevisions(
2207 return storageutil.emitrevisions(
2205 self, nodes, nodesorder, revlogrevisiondelta,
2208 self, nodes, nodesorder, revlogrevisiondelta,
2206 deltaparentfn=self.deltaparent,
2209 deltaparentfn=self.deltaparent,
2207 candeltafn=self.candelta,
2210 candeltafn=self.candelta,
2208 rawsizefn=self.rawsize,
2211 rawsizefn=self.rawsize,
2209 revdifffn=self.revdiff,
2212 revdifffn=self.revdiff,
2210 flagsfn=self.flags,
2213 flagsfn=self.flags,
2211 sendfulltext=not self._storedeltachains,
2214 sendfulltext=not self._storedeltachains,
2212 revisiondata=revisiondata,
2215 revisiondata=revisiondata,
2213 assumehaveparentrevisions=assumehaveparentrevisions,
2216 assumehaveparentrevisions=assumehaveparentrevisions,
2214 deltaprevious=deltaprevious)
2217 deltaprevious=deltaprevious)
2215
2218
2216 DELTAREUSEALWAYS = 'always'
2219 DELTAREUSEALWAYS = 'always'
2217 DELTAREUSESAMEREVS = 'samerevs'
2220 DELTAREUSESAMEREVS = 'samerevs'
2218 DELTAREUSENEVER = 'never'
2221 DELTAREUSENEVER = 'never'
2219
2222
2220 DELTAREUSEFULLADD = 'fulladd'
2223 DELTAREUSEFULLADD = 'fulladd'
2221
2224
2222 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2225 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2223
2226
2224 def clone(self, tr, destrevlog, addrevisioncb=None,
2227 def clone(self, tr, destrevlog, addrevisioncb=None,
2225 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2228 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2226 """Copy this revlog to another, possibly with format changes.
2229 """Copy this revlog to another, possibly with format changes.
2227
2230
2228 The destination revlog will contain the same revisions and nodes.
2231 The destination revlog will contain the same revisions and nodes.
2229 However, it may not be bit-for-bit identical due to e.g. delta encoding
2232 However, it may not be bit-for-bit identical due to e.g. delta encoding
2230 differences.
2233 differences.
2231
2234
2232 The ``deltareuse`` argument control how deltas from the existing revlog
2235 The ``deltareuse`` argument control how deltas from the existing revlog
2233 are preserved in the destination revlog. The argument can have the
2236 are preserved in the destination revlog. The argument can have the
2234 following values:
2237 following values:
2235
2238
2236 DELTAREUSEALWAYS
2239 DELTAREUSEALWAYS
2237 Deltas will always be reused (if possible), even if the destination
2240 Deltas will always be reused (if possible), even if the destination
2238 revlog would not select the same revisions for the delta. This is the
2241 revlog would not select the same revisions for the delta. This is the
2239 fastest mode of operation.
2242 fastest mode of operation.
2240 DELTAREUSESAMEREVS
2243 DELTAREUSESAMEREVS
2241 Deltas will be reused if the destination revlog would pick the same
2244 Deltas will be reused if the destination revlog would pick the same
2242 revisions for the delta. This mode strikes a balance between speed
2245 revisions for the delta. This mode strikes a balance between speed
2243 and optimization.
2246 and optimization.
2244 DELTAREUSENEVER
2247 DELTAREUSENEVER
2245 Deltas will never be reused. This is the slowest mode of execution.
2248 Deltas will never be reused. This is the slowest mode of execution.
2246 This mode can be used to recompute deltas (e.g. if the diff/delta
2249 This mode can be used to recompute deltas (e.g. if the diff/delta
2247 algorithm changes).
2250 algorithm changes).
2248
2251
2249 Delta computation can be slow, so the choice of delta reuse policy can
2252 Delta computation can be slow, so the choice of delta reuse policy can
2250 significantly affect run time.
2253 significantly affect run time.
2251
2254
2252 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2255 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2253 two extremes. Deltas will be reused if they are appropriate. But if the
2256 two extremes. Deltas will be reused if they are appropriate. But if the
2254 delta could choose a better revision, it will do so. This means if you
2257 delta could choose a better revision, it will do so. This means if you
2255 are converting a non-generaldelta revlog to a generaldelta revlog,
2258 are converting a non-generaldelta revlog to a generaldelta revlog,
2256 deltas will be recomputed if the delta's parent isn't a parent of the
2259 deltas will be recomputed if the delta's parent isn't a parent of the
2257 revision.
2260 revision.
2258
2261
2259 In addition to the delta policy, the ``deltabothparents`` argument
2262 In addition to the delta policy, the ``deltabothparents`` argument
2260 controls whether to compute deltas against both parents for merges.
2263 controls whether to compute deltas against both parents for merges.
2261 By default, the current default is used.
2264 By default, the current default is used.
2262 """
2265 """
2263 if deltareuse not in self.DELTAREUSEALL:
2266 if deltareuse not in self.DELTAREUSEALL:
2264 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2267 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2265
2268
2266 if len(destrevlog):
2269 if len(destrevlog):
2267 raise ValueError(_('destination revlog is not empty'))
2270 raise ValueError(_('destination revlog is not empty'))
2268
2271
2269 if getattr(self, 'filteredrevs', None):
2272 if getattr(self, 'filteredrevs', None):
2270 raise ValueError(_('source revlog has filtered revisions'))
2273 raise ValueError(_('source revlog has filtered revisions'))
2271 if getattr(destrevlog, 'filteredrevs', None):
2274 if getattr(destrevlog, 'filteredrevs', None):
2272 raise ValueError(_('destination revlog has filtered revisions'))
2275 raise ValueError(_('destination revlog has filtered revisions'))
2273
2276
2274 # lazydeltabase controls whether to reuse a cached delta, if possible.
2277 # lazydeltabase controls whether to reuse a cached delta, if possible.
2275 oldlazydeltabase = destrevlog._lazydeltabase
2278 oldlazydeltabase = destrevlog._lazydeltabase
2276 oldamd = destrevlog._deltabothparents
2279 oldamd = destrevlog._deltabothparents
2277
2280
2278 try:
2281 try:
2279 if deltareuse == self.DELTAREUSEALWAYS:
2282 if deltareuse == self.DELTAREUSEALWAYS:
2280 destrevlog._lazydeltabase = True
2283 destrevlog._lazydeltabase = True
2281 elif deltareuse == self.DELTAREUSESAMEREVS:
2284 elif deltareuse == self.DELTAREUSESAMEREVS:
2282 destrevlog._lazydeltabase = False
2285 destrevlog._lazydeltabase = False
2283
2286
2284 destrevlog._deltabothparents = deltabothparents or oldamd
2287 destrevlog._deltabothparents = deltabothparents or oldamd
2285
2288
2286 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2289 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2287 self.DELTAREUSESAMEREVS)
2290 self.DELTAREUSESAMEREVS)
2288
2291
2289 deltacomputer = deltautil.deltacomputer(destrevlog)
2292 deltacomputer = deltautil.deltacomputer(destrevlog)
2290 index = self.index
2293 index = self.index
2291 for rev in self:
2294 for rev in self:
2292 entry = index[rev]
2295 entry = index[rev]
2293
2296
2294 # Some classes override linkrev to take filtered revs into
2297 # Some classes override linkrev to take filtered revs into
2295 # account. Use raw entry from index.
2298 # account. Use raw entry from index.
2296 flags = entry[0] & 0xffff
2299 flags = entry[0] & 0xffff
2297 linkrev = entry[4]
2300 linkrev = entry[4]
2298 p1 = index[entry[5]][7]
2301 p1 = index[entry[5]][7]
2299 p2 = index[entry[6]][7]
2302 p2 = index[entry[6]][7]
2300 node = entry[7]
2303 node = entry[7]
2301
2304
2302 # (Possibly) reuse the delta from the revlog if allowed and
2305 # (Possibly) reuse the delta from the revlog if allowed and
2303 # the revlog chunk is a delta.
2306 # the revlog chunk is a delta.
2304 cachedelta = None
2307 cachedelta = None
2305 rawtext = None
2308 rawtext = None
2306 if populatecachedelta:
2309 if populatecachedelta:
2307 dp = self.deltaparent(rev)
2310 dp = self.deltaparent(rev)
2308 if dp != nullrev:
2311 if dp != nullrev:
2309 cachedelta = (dp, bytes(self._chunk(rev)))
2312 cachedelta = (dp, bytes(self._chunk(rev)))
2310
2313
2311 if not cachedelta:
2314 if not cachedelta:
2312 rawtext = self.revision(rev, raw=True)
2315 rawtext = self.revision(rev, raw=True)
2313
2316
2314
2317
2315 if deltareuse == self.DELTAREUSEFULLADD:
2318 if deltareuse == self.DELTAREUSEFULLADD:
2316 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2319 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2317 cachedelta=cachedelta,
2320 cachedelta=cachedelta,
2318 node=node, flags=flags,
2321 node=node, flags=flags,
2319 deltacomputer=deltacomputer)
2322 deltacomputer=deltacomputer)
2320 else:
2323 else:
2321 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2324 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2322 checkambig=False)
2325 checkambig=False)
2323 dfh = None
2326 dfh = None
2324 if not destrevlog._inline:
2327 if not destrevlog._inline:
2325 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2328 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2326 try:
2329 try:
2327 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2330 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2328 p2, flags, cachedelta, ifh, dfh,
2331 p2, flags, cachedelta, ifh, dfh,
2329 deltacomputer=deltacomputer)
2332 deltacomputer=deltacomputer)
2330 finally:
2333 finally:
2331 if dfh:
2334 if dfh:
2332 dfh.close()
2335 dfh.close()
2333 ifh.close()
2336 ifh.close()
2334
2337
2335 if addrevisioncb:
2338 if addrevisioncb:
2336 addrevisioncb(self, rev, node)
2339 addrevisioncb(self, rev, node)
2337 finally:
2340 finally:
2338 destrevlog._lazydeltabase = oldlazydeltabase
2341 destrevlog._lazydeltabase = oldlazydeltabase
2339 destrevlog._deltabothparents = oldamd
2342 destrevlog._deltabothparents = oldamd
2340
2343
2341 def censorrevision(self, node, tombstone=b''):
2344 def censorrevision(self, node, tombstone=b''):
2342 if (self.version & 0xFFFF) == REVLOGV0:
2345 if (self.version & 0xFFFF) == REVLOGV0:
2343 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2346 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2344 self.version)
2347 self.version)
2345
2348
2346 rev = self.rev(node)
2349 rev = self.rev(node)
2347 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2350 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2348
2351
2349 if len(tombstone) > self.rawsize(rev):
2352 if len(tombstone) > self.rawsize(rev):
2350 raise error.Abort(_('censor tombstone must be no longer than '
2353 raise error.Abort(_('censor tombstone must be no longer than '
2351 'censored data'))
2354 'censored data'))
2352
2355
2353 # Using two files instead of one makes it easy to rewrite entry-by-entry
2356 # Using two files instead of one makes it easy to rewrite entry-by-entry
2354 idxread = self.opener(self.indexfile, 'r')
2357 idxread = self.opener(self.indexfile, 'r')
2355 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2358 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2356 if self.version & FLAG_INLINE_DATA:
2359 if self.version & FLAG_INLINE_DATA:
2357 dataread, datawrite = idxread, idxwrite
2360 dataread, datawrite = idxread, idxwrite
2358 else:
2361 else:
2359 dataread = self.opener(self.datafile, 'r')
2362 dataread = self.opener(self.datafile, 'r')
2360 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2363 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2361
2364
2362 # Copy all revlog data up to the entry to be censored.
2365 # Copy all revlog data up to the entry to be censored.
2363 offset = self.start(rev)
2366 offset = self.start(rev)
2364
2367
2365 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2368 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2366 idxwrite.write(chunk)
2369 idxwrite.write(chunk)
2367 for chunk in util.filechunkiter(dataread, limit=offset):
2370 for chunk in util.filechunkiter(dataread, limit=offset):
2368 datawrite.write(chunk)
2371 datawrite.write(chunk)
2369
2372
2370 def rewriteindex(r, newoffs, newdata=None):
2373 def rewriteindex(r, newoffs, newdata=None):
2371 """Rewrite the index entry with a new data offset and new data.
2374 """Rewrite the index entry with a new data offset and new data.
2372
2375
2373 The newdata argument, if given, is a tuple of three positive
2376 The newdata argument, if given, is a tuple of three positive
2374 integers: (new compressed, new uncompressed, added flag bits).
2377 integers: (new compressed, new uncompressed, added flag bits).
2375 """
2378 """
2376 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2379 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2377 flags = gettype(offlags)
2380 flags = gettype(offlags)
2378 if newdata:
2381 if newdata:
2379 comp, uncomp, nflags = newdata
2382 comp, uncomp, nflags = newdata
2380 flags |= nflags
2383 flags |= nflags
2381 offlags = offset_type(newoffs, flags)
2384 offlags = offset_type(newoffs, flags)
2382 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2385 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2383 idxwrite.write(self._io.packentry(e, None, self.version, r))
2386 idxwrite.write(self._io.packentry(e, None, self.version, r))
2384 idxread.seek(self._io.size, 1)
2387 idxread.seek(self._io.size, 1)
2385
2388
2386 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2389 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2387 """Write the given fulltext with the given data offset.
2390 """Write the given fulltext with the given data offset.
2388
2391
2389 Returns:
2392 Returns:
2390 The integer number of data bytes written, for tracking data
2393 The integer number of data bytes written, for tracking data
2391 offsets.
2394 offsets.
2392 """
2395 """
2393 flag, compdata = self.compress(data)
2396 flag, compdata = self.compress(data)
2394 newcomp = len(flag) + len(compdata)
2397 newcomp = len(flag) + len(compdata)
2395 rewriteindex(r, offs, (newcomp, len(data), nflags))
2398 rewriteindex(r, offs, (newcomp, len(data), nflags))
2396 datawrite.write(flag)
2399 datawrite.write(flag)
2397 datawrite.write(compdata)
2400 datawrite.write(compdata)
2398 dataread.seek(self.length(r), 1)
2401 dataread.seek(self.length(r), 1)
2399 return newcomp
2402 return newcomp
2400
2403
2401 # Rewrite censored entry with (padded) tombstone data.
2404 # Rewrite censored entry with (padded) tombstone data.
2402 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2405 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2403 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2406 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2404
2407
2405 # Rewrite all following filelog revisions fixing up offsets and deltas.
2408 # Rewrite all following filelog revisions fixing up offsets and deltas.
2406 for srev in pycompat.xrange(rev + 1, len(self)):
2409 for srev in pycompat.xrange(rev + 1, len(self)):
2407 if rev in self.parentrevs(srev):
2410 if rev in self.parentrevs(srev):
2408 # Immediate children of censored node must be re-added as
2411 # Immediate children of censored node must be re-added as
2409 # fulltext.
2412 # fulltext.
2410 try:
2413 try:
2411 revdata = self.revision(srev)
2414 revdata = self.revision(srev)
2412 except error.CensoredNodeError as e:
2415 except error.CensoredNodeError as e:
2413 revdata = e.tombstone
2416 revdata = e.tombstone
2414 dlen = rewrite(srev, offset, revdata)
2417 dlen = rewrite(srev, offset, revdata)
2415 else:
2418 else:
2416 # Copy any other revision data verbatim after fixing up the
2419 # Copy any other revision data verbatim after fixing up the
2417 # offset.
2420 # offset.
2418 rewriteindex(srev, offset)
2421 rewriteindex(srev, offset)
2419 dlen = self.length(srev)
2422 dlen = self.length(srev)
2420 for chunk in util.filechunkiter(dataread, limit=dlen):
2423 for chunk in util.filechunkiter(dataread, limit=dlen):
2421 datawrite.write(chunk)
2424 datawrite.write(chunk)
2422 offset += dlen
2425 offset += dlen
2423
2426
2424 idxread.close()
2427 idxread.close()
2425 idxwrite.close()
2428 idxwrite.close()
2426 if dataread is not idxread:
2429 if dataread is not idxread:
2427 dataread.close()
2430 dataread.close()
2428 datawrite.close()
2431 datawrite.close()
2429
2432
2430 def verifyintegrity(self, state):
2433 def verifyintegrity(self, state):
2431 """Verifies the integrity of the revlog.
2434 """Verifies the integrity of the revlog.
2432
2435
2433 Yields ``revlogproblem`` instances describing problems that are
2436 Yields ``revlogproblem`` instances describing problems that are
2434 found.
2437 found.
2435 """
2438 """
2436 dd, di = self.checksize()
2439 dd, di = self.checksize()
2437 if dd:
2440 if dd:
2438 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2441 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2439 if di:
2442 if di:
2440 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2443 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2441
2444
2442 version = self.version & 0xFFFF
2445 version = self.version & 0xFFFF
2443
2446
2444 # The verifier tells us what version revlog we should be.
2447 # The verifier tells us what version revlog we should be.
2445 if version != state['expectedversion']:
2448 if version != state['expectedversion']:
2446 yield revlogproblem(
2449 yield revlogproblem(
2447 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2450 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2448 (self.indexfile, version, state['expectedversion']))
2451 (self.indexfile, version, state['expectedversion']))
2449
2452
2450 state['skipread'] = set()
2453 state['skipread'] = set()
2451
2454
2452 for rev in self:
2455 for rev in self:
2453 node = self.node(rev)
2456 node = self.node(rev)
2454
2457
2455 # Verify contents. 4 cases to care about:
2458 # Verify contents. 4 cases to care about:
2456 #
2459 #
2457 # common: the most common case
2460 # common: the most common case
2458 # rename: with a rename
2461 # rename: with a rename
2459 # meta: file content starts with b'\1\n', the metadata
2462 # meta: file content starts with b'\1\n', the metadata
2460 # header defined in filelog.py, but without a rename
2463 # header defined in filelog.py, but without a rename
2461 # ext: content stored externally
2464 # ext: content stored externally
2462 #
2465 #
2463 # More formally, their differences are shown below:
2466 # More formally, their differences are shown below:
2464 #
2467 #
2465 # | common | rename | meta | ext
2468 # | common | rename | meta | ext
2466 # -------------------------------------------------------
2469 # -------------------------------------------------------
2467 # flags() | 0 | 0 | 0 | not 0
2470 # flags() | 0 | 0 | 0 | not 0
2468 # renamed() | False | True | False | ?
2471 # renamed() | False | True | False | ?
2469 # rawtext[0:2]=='\1\n'| False | True | True | ?
2472 # rawtext[0:2]=='\1\n'| False | True | True | ?
2470 #
2473 #
2471 # "rawtext" means the raw text stored in revlog data, which
2474 # "rawtext" means the raw text stored in revlog data, which
2472 # could be retrieved by "revision(rev, raw=True)". "text"
2475 # could be retrieved by "revision(rev, raw=True)". "text"
2473 # mentioned below is "revision(rev, raw=False)".
2476 # mentioned below is "revision(rev, raw=False)".
2474 #
2477 #
2475 # There are 3 different lengths stored physically:
2478 # There are 3 different lengths stored physically:
2476 # 1. L1: rawsize, stored in revlog index
2479 # 1. L1: rawsize, stored in revlog index
2477 # 2. L2: len(rawtext), stored in revlog data
2480 # 2. L2: len(rawtext), stored in revlog data
2478 # 3. L3: len(text), stored in revlog data if flags==0, or
2481 # 3. L3: len(text), stored in revlog data if flags==0, or
2479 # possibly somewhere else if flags!=0
2482 # possibly somewhere else if flags!=0
2480 #
2483 #
2481 # L1 should be equal to L2. L3 could be different from them.
2484 # L1 should be equal to L2. L3 could be different from them.
2482 # "text" may or may not affect commit hash depending on flag
2485 # "text" may or may not affect commit hash depending on flag
2483 # processors (see revlog.addflagprocessor).
2486 # processors (see revlog.addflagprocessor).
2484 #
2487 #
2485 # | common | rename | meta | ext
2488 # | common | rename | meta | ext
2486 # -------------------------------------------------
2489 # -------------------------------------------------
2487 # rawsize() | L1 | L1 | L1 | L1
2490 # rawsize() | L1 | L1 | L1 | L1
2488 # size() | L1 | L2-LM | L1(*) | L1 (?)
2491 # size() | L1 | L2-LM | L1(*) | L1 (?)
2489 # len(rawtext) | L2 | L2 | L2 | L2
2492 # len(rawtext) | L2 | L2 | L2 | L2
2490 # len(text) | L2 | L2 | L2 | L3
2493 # len(text) | L2 | L2 | L2 | L3
2491 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2494 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2492 #
2495 #
2493 # LM: length of metadata, depending on rawtext
2496 # LM: length of metadata, depending on rawtext
2494 # (*): not ideal, see comment in filelog.size
2497 # (*): not ideal, see comment in filelog.size
2495 # (?): could be "- len(meta)" if the resolved content has
2498 # (?): could be "- len(meta)" if the resolved content has
2496 # rename metadata
2499 # rename metadata
2497 #
2500 #
2498 # Checks needed to be done:
2501 # Checks needed to be done:
2499 # 1. length check: L1 == L2, in all cases.
2502 # 1. length check: L1 == L2, in all cases.
2500 # 2. hash check: depending on flag processor, we may need to
2503 # 2. hash check: depending on flag processor, we may need to
2501 # use either "text" (external), or "rawtext" (in revlog).
2504 # use either "text" (external), or "rawtext" (in revlog).
2502
2505
2503 try:
2506 try:
2504 skipflags = state.get('skipflags', 0)
2507 skipflags = state.get('skipflags', 0)
2505 if skipflags:
2508 if skipflags:
2506 skipflags &= self.flags(rev)
2509 skipflags &= self.flags(rev)
2507
2510
2508 if skipflags:
2511 if skipflags:
2509 state['skipread'].add(node)
2512 state['skipread'].add(node)
2510 else:
2513 else:
2511 # Side-effect: read content and verify hash.
2514 # Side-effect: read content and verify hash.
2512 self.revision(node)
2515 self.revision(node)
2513
2516
2514 l1 = self.rawsize(rev)
2517 l1 = self.rawsize(rev)
2515 l2 = len(self.revision(node, raw=True))
2518 l2 = len(self.revision(node, raw=True))
2516
2519
2517 if l1 != l2:
2520 if l1 != l2:
2518 yield revlogproblem(
2521 yield revlogproblem(
2519 error=_('unpacked size is %d, %d expected') % (l2, l1),
2522 error=_('unpacked size is %d, %d expected') % (l2, l1),
2520 node=node)
2523 node=node)
2521
2524
2522 except error.CensoredNodeError:
2525 except error.CensoredNodeError:
2523 if state['erroroncensored']:
2526 if state['erroroncensored']:
2524 yield revlogproblem(error=_('censored file data'),
2527 yield revlogproblem(error=_('censored file data'),
2525 node=node)
2528 node=node)
2526 state['skipread'].add(node)
2529 state['skipread'].add(node)
2527 except Exception as e:
2530 except Exception as e:
2528 yield revlogproblem(
2531 yield revlogproblem(
2529 error=_('unpacking %s: %s') % (short(node),
2532 error=_('unpacking %s: %s') % (short(node),
2530 stringutil.forcebytestr(e)),
2533 stringutil.forcebytestr(e)),
2531 node=node)
2534 node=node)
2532 state['skipread'].add(node)
2535 state['skipread'].add(node)
2533
2536
2534 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2537 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2535 revisionscount=False, trackedsize=False,
2538 revisionscount=False, trackedsize=False,
2536 storedsize=False):
2539 storedsize=False):
2537 d = {}
2540 d = {}
2538
2541
2539 if exclusivefiles:
2542 if exclusivefiles:
2540 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2543 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2541 if not self._inline:
2544 if not self._inline:
2542 d['exclusivefiles'].append((self.opener, self.datafile))
2545 d['exclusivefiles'].append((self.opener, self.datafile))
2543
2546
2544 if sharedfiles:
2547 if sharedfiles:
2545 d['sharedfiles'] = []
2548 d['sharedfiles'] = []
2546
2549
2547 if revisionscount:
2550 if revisionscount:
2548 d['revisionscount'] = len(self)
2551 d['revisionscount'] = len(self)
2549
2552
2550 if trackedsize:
2553 if trackedsize:
2551 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2554 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2552
2555
2553 if storedsize:
2556 if storedsize:
2554 d['storedsize'] = sum(self.opener.stat(path).st_size
2557 d['storedsize'] = sum(self.opener.stat(path).st_size
2555 for path in self.files())
2558 for path in self.files())
2556
2559
2557 return d
2560 return d
General Comments 0
You need to be logged in to leave comments. Login now