##// END OF EJS Templates
storageutil: extract revision number iteration...
Gregory Szorc -
r39917:d6315361 default
parent child Browse files
Show More
@@ -1,2704 +1,2694 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 if not flag & REVIDX_KNOWN_FLAGS:
154 if not flag & REVIDX_KNOWN_FLAGS:
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 raise error.ProgrammingError(msg)
156 raise error.ProgrammingError(msg)
157 if flag not in REVIDX_FLAGS_ORDER:
157 if flag not in REVIDX_FLAGS_ORDER:
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag in _flagprocessors:
160 if flag in _flagprocessors:
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 raise error.Abort(msg)
162 raise error.Abort(msg)
163 _flagprocessors[flag] = processor
163 _flagprocessors[flag] = processor
164
164
165 def getoffset(q):
165 def getoffset(q):
166 return int(q >> 16)
166 return int(q >> 16)
167
167
168 def gettype(q):
168 def gettype(q):
169 return int(q & 0xFFFF)
169 return int(q & 0xFFFF)
170
170
171 def offset_type(offset, type):
171 def offset_type(offset, type):
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 raise ValueError('unknown revlog index flags')
173 raise ValueError('unknown revlog index flags')
174 return int(int(offset) << 16 | type)
174 return int(int(offset) << 16 | type)
175
175
176 @attr.s(slots=True, frozen=True)
176 @attr.s(slots=True, frozen=True)
177 class _revisioninfo(object):
177 class _revisioninfo(object):
178 """Information about a revision that allows building its fulltext
178 """Information about a revision that allows building its fulltext
179 node: expected hash of the revision
179 node: expected hash of the revision
180 p1, p2: parent revs of the revision
180 p1, p2: parent revs of the revision
181 btext: built text cache consisting of a one-element list
181 btext: built text cache consisting of a one-element list
182 cachedelta: (baserev, uncompressed_delta) or None
182 cachedelta: (baserev, uncompressed_delta) or None
183 flags: flags associated to the revision storage
183 flags: flags associated to the revision storage
184
184
185 One of btext[0] or cachedelta must be set.
185 One of btext[0] or cachedelta must be set.
186 """
186 """
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195 @interfaceutil.implementer(repository.irevisiondelta)
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
197 class revlogrevisiondelta(object):
198 node = attr.ib()
198 node = attr.ib()
199 p1node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
205 delta = attr.ib()
206 linknode = attr.ib(default=None)
206 linknode = attr.ib(default=None)
207
207
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
209 @attr.s(frozen=True)
209 @attr.s(frozen=True)
210 class revlogproblem(object):
210 class revlogproblem(object):
211 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
212 error = attr.ib(default=None)
212 error = attr.ib(default=None)
213 node = attr.ib(default=None)
213 node = attr.ib(default=None)
214
214
215 # index v0:
215 # index v0:
216 # 4 bytes: offset
216 # 4 bytes: offset
217 # 4 bytes: compressed length
217 # 4 bytes: compressed length
218 # 4 bytes: base rev
218 # 4 bytes: base rev
219 # 4 bytes: link rev
219 # 4 bytes: link rev
220 # 20 bytes: parent 1 nodeid
220 # 20 bytes: parent 1 nodeid
221 # 20 bytes: parent 2 nodeid
221 # 20 bytes: parent 2 nodeid
222 # 20 bytes: nodeid
222 # 20 bytes: nodeid
223 indexformatv0 = struct.Struct(">4l20s20s20s")
223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 indexformatv0_pack = indexformatv0.pack
224 indexformatv0_pack = indexformatv0.pack
225 indexformatv0_unpack = indexformatv0.unpack
225 indexformatv0_unpack = indexformatv0.unpack
226
226
227 class revlogoldindex(list):
227 class revlogoldindex(list):
228 def __getitem__(self, i):
228 def __getitem__(self, i):
229 if i == -1:
229 if i == -1:
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 return list.__getitem__(self, i)
231 return list.__getitem__(self, i)
232
232
233 class revlogoldio(object):
233 class revlogoldio(object):
234 def __init__(self):
234 def __init__(self):
235 self.size = indexformatv0.size
235 self.size = indexformatv0.size
236
236
237 def parseindex(self, data, inline):
237 def parseindex(self, data, inline):
238 s = self.size
238 s = self.size
239 index = []
239 index = []
240 nodemap = {nullid: nullrev}
240 nodemap = {nullid: nullrev}
241 n = off = 0
241 n = off = 0
242 l = len(data)
242 l = len(data)
243 while off + s <= l:
243 while off + s <= l:
244 cur = data[off:off + s]
244 cur = data[off:off + s]
245 off += s
245 off += s
246 e = indexformatv0_unpack(cur)
246 e = indexformatv0_unpack(cur)
247 # transform to revlogv1 format
247 # transform to revlogv1 format
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 index.append(e2)
250 index.append(e2)
251 nodemap[e[6]] = n
251 nodemap[e[6]] = n
252 n += 1
252 n += 1
253
253
254 return revlogoldindex(index), nodemap, None
254 return revlogoldindex(index), nodemap, None
255
255
256 def packentry(self, entry, node, version, rev):
256 def packentry(self, entry, node, version, rev):
257 if gettype(entry[0]):
257 if gettype(entry[0]):
258 raise error.RevlogError(_('index entry flags need revlog '
258 raise error.RevlogError(_('index entry flags need revlog '
259 'version 1'))
259 'version 1'))
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 node(entry[5]), node(entry[6]), entry[7])
261 node(entry[5]), node(entry[6]), entry[7])
262 return indexformatv0_pack(*e2)
262 return indexformatv0_pack(*e2)
263
263
264 # index ng:
264 # index ng:
265 # 6 bytes: offset
265 # 6 bytes: offset
266 # 2 bytes: flags
266 # 2 bytes: flags
267 # 4 bytes: compressed length
267 # 4 bytes: compressed length
268 # 4 bytes: uncompressed length
268 # 4 bytes: uncompressed length
269 # 4 bytes: base rev
269 # 4 bytes: base rev
270 # 4 bytes: link rev
270 # 4 bytes: link rev
271 # 4 bytes: parent 1 rev
271 # 4 bytes: parent 1 rev
272 # 4 bytes: parent 2 rev
272 # 4 bytes: parent 2 rev
273 # 32 bytes: nodeid
273 # 32 bytes: nodeid
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 indexformatng_pack = indexformatng.pack
275 indexformatng_pack = indexformatng.pack
276 versionformat = struct.Struct(">I")
276 versionformat = struct.Struct(">I")
277 versionformat_pack = versionformat.pack
277 versionformat_pack = versionformat.pack
278 versionformat_unpack = versionformat.unpack
278 versionformat_unpack = versionformat.unpack
279
279
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 # signed integer)
281 # signed integer)
282 _maxentrysize = 0x7fffffff
282 _maxentrysize = 0x7fffffff
283
283
284 class revlogio(object):
284 class revlogio(object):
285 def __init__(self):
285 def __init__(self):
286 self.size = indexformatng.size
286 self.size = indexformatng.size
287
287
288 def parseindex(self, data, inline):
288 def parseindex(self, data, inline):
289 # call the C implementation to parse the index data
289 # call the C implementation to parse the index data
290 index, cache = parsers.parse_index2(data, inline)
290 index, cache = parsers.parse_index2(data, inline)
291 return index, getattr(index, 'nodemap', None), cache
291 return index, getattr(index, 'nodemap', None), cache
292
292
293 def packentry(self, entry, node, version, rev):
293 def packentry(self, entry, node, version, rev):
294 p = indexformatng_pack(*entry)
294 p = indexformatng_pack(*entry)
295 if rev == 0:
295 if rev == 0:
296 p = versionformat_pack(version) + p[4:]
296 p = versionformat_pack(version) + p[4:]
297 return p
297 return p
298
298
299 class revlog(object):
299 class revlog(object):
300 """
300 """
301 the underlying revision storage object
301 the underlying revision storage object
302
302
303 A revlog consists of two parts, an index and the revision data.
303 A revlog consists of two parts, an index and the revision data.
304
304
305 The index is a file with a fixed record size containing
305 The index is a file with a fixed record size containing
306 information on each revision, including its nodeid (hash), the
306 information on each revision, including its nodeid (hash), the
307 nodeids of its parents, the position and offset of its data within
307 nodeids of its parents, the position and offset of its data within
308 the data file, and the revision it's based on. Finally, each entry
308 the data file, and the revision it's based on. Finally, each entry
309 contains a linkrev entry that can serve as a pointer to external
309 contains a linkrev entry that can serve as a pointer to external
310 data.
310 data.
311
311
312 The revision data itself is a linear collection of data chunks.
312 The revision data itself is a linear collection of data chunks.
313 Each chunk represents a revision and is usually represented as a
313 Each chunk represents a revision and is usually represented as a
314 delta against the previous chunk. To bound lookup time, runs of
314 delta against the previous chunk. To bound lookup time, runs of
315 deltas are limited to about 2 times the length of the original
315 deltas are limited to about 2 times the length of the original
316 version data. This makes retrieval of a version proportional to
316 version data. This makes retrieval of a version proportional to
317 its size, or O(1) relative to the number of revisions.
317 its size, or O(1) relative to the number of revisions.
318
318
319 Both pieces of the revlog are written to in an append-only
319 Both pieces of the revlog are written to in an append-only
320 fashion, which means we never need to rewrite a file to insert or
320 fashion, which means we never need to rewrite a file to insert or
321 remove data, and can use some simple techniques to avoid the need
321 remove data, and can use some simple techniques to avoid the need
322 for locking while reading.
322 for locking while reading.
323
323
324 If checkambig, indexfile is opened with checkambig=True at
324 If checkambig, indexfile is opened with checkambig=True at
325 writing, to avoid file stat ambiguity.
325 writing, to avoid file stat ambiguity.
326
326
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 index will be mmapped rather than read if it is larger than the
328 index will be mmapped rather than read if it is larger than the
329 configured threshold.
329 configured threshold.
330
330
331 If censorable is True, the revlog can have censored revisions.
331 If censorable is True, the revlog can have censored revisions.
332 """
332 """
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 mmaplargeindex=False, censorable=False):
334 mmaplargeindex=False, censorable=False):
335 """
335 """
336 create a revlog object
336 create a revlog object
337
337
338 opener is a function that abstracts the file opening operation
338 opener is a function that abstracts the file opening operation
339 and can be used to implement COW semantics or the like.
339 and can be used to implement COW semantics or the like.
340 """
340 """
341 self.indexfile = indexfile
341 self.indexfile = indexfile
342 self.datafile = datafile or (indexfile[:-2] + ".d")
342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 self.opener = opener
343 self.opener = opener
344 # When True, indexfile is opened with checkambig=True at writing, to
344 # When True, indexfile is opened with checkambig=True at writing, to
345 # avoid file stat ambiguity.
345 # avoid file stat ambiguity.
346 self._checkambig = checkambig
346 self._checkambig = checkambig
347 self._censorable = censorable
347 self._censorable = censorable
348 # 3-tuple of (node, rev, text) for a raw revision.
348 # 3-tuple of (node, rev, text) for a raw revision.
349 self._cache = None
349 self._cache = None
350 # Maps rev to chain base rev.
350 # Maps rev to chain base rev.
351 self._chainbasecache = util.lrucachedict(100)
351 self._chainbasecache = util.lrucachedict(100)
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 self._chunkcache = (0, '')
353 self._chunkcache = (0, '')
354 # How much data to read and cache into the raw revlog data cache.
354 # How much data to read and cache into the raw revlog data cache.
355 self._chunkcachesize = 65536
355 self._chunkcachesize = 65536
356 self._maxchainlen = None
356 self._maxchainlen = None
357 self._deltabothparents = True
357 self._deltabothparents = True
358 self.index = []
358 self.index = []
359 # Mapping of partial identifiers to full nodes.
359 # Mapping of partial identifiers to full nodes.
360 self._pcache = {}
360 self._pcache = {}
361 # Mapping of revision integer to full node.
361 # Mapping of revision integer to full node.
362 self._nodecache = {nullid: nullrev}
362 self._nodecache = {nullid: nullrev}
363 self._nodepos = None
363 self._nodepos = None
364 self._compengine = 'zlib'
364 self._compengine = 'zlib'
365 self._maxdeltachainspan = -1
365 self._maxdeltachainspan = -1
366 self._withsparseread = False
366 self._withsparseread = False
367 self._sparserevlog = False
367 self._sparserevlog = False
368 self._srdensitythreshold = 0.50
368 self._srdensitythreshold = 0.50
369 self._srmingapsize = 262144
369 self._srmingapsize = 262144
370
370
371 # Make copy of flag processors so each revlog instance can support
371 # Make copy of flag processors so each revlog instance can support
372 # custom flags.
372 # custom flags.
373 self._flagprocessors = dict(_flagprocessors)
373 self._flagprocessors = dict(_flagprocessors)
374
374
375 mmapindexthreshold = None
375 mmapindexthreshold = None
376 v = REVLOG_DEFAULT_VERSION
376 v = REVLOG_DEFAULT_VERSION
377 opts = getattr(opener, 'options', None)
377 opts = getattr(opener, 'options', None)
378 if opts is not None:
378 if opts is not None:
379 if 'revlogv2' in opts:
379 if 'revlogv2' in opts:
380 # version 2 revlogs always use generaldelta.
380 # version 2 revlogs always use generaldelta.
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 elif 'revlogv1' in opts:
382 elif 'revlogv1' in opts:
383 if 'generaldelta' in opts:
383 if 'generaldelta' in opts:
384 v |= FLAG_GENERALDELTA
384 v |= FLAG_GENERALDELTA
385 else:
385 else:
386 v = 0
386 v = 0
387 if 'chunkcachesize' in opts:
387 if 'chunkcachesize' in opts:
388 self._chunkcachesize = opts['chunkcachesize']
388 self._chunkcachesize = opts['chunkcachesize']
389 if 'maxchainlen' in opts:
389 if 'maxchainlen' in opts:
390 self._maxchainlen = opts['maxchainlen']
390 self._maxchainlen = opts['maxchainlen']
391 if 'deltabothparents' in opts:
391 if 'deltabothparents' in opts:
392 self._deltabothparents = opts['deltabothparents']
392 self._deltabothparents = opts['deltabothparents']
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 if 'compengine' in opts:
394 if 'compengine' in opts:
395 self._compengine = opts['compengine']
395 self._compengine = opts['compengine']
396 if 'maxdeltachainspan' in opts:
396 if 'maxdeltachainspan' in opts:
397 self._maxdeltachainspan = opts['maxdeltachainspan']
397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 mmapindexthreshold = opts['mmapindexthreshold']
399 mmapindexthreshold = opts['mmapindexthreshold']
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
402 # sparse-revlog forces sparse-read
402 # sparse-revlog forces sparse-read
403 self._withsparseread = self._sparserevlog or withsparseread
403 self._withsparseread = self._sparserevlog or withsparseread
404 if 'sparse-read-density-threshold' in opts:
404 if 'sparse-read-density-threshold' in opts:
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 if 'sparse-read-min-gap-size' in opts:
406 if 'sparse-read-min-gap-size' in opts:
407 self._srmingapsize = opts['sparse-read-min-gap-size']
407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 if opts.get('enableellipsis'):
408 if opts.get('enableellipsis'):
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410
410
411 if self._chunkcachesize <= 0:
411 if self._chunkcachesize <= 0:
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 'greater than 0') % self._chunkcachesize)
413 'greater than 0') % self._chunkcachesize)
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 'power of 2') % self._chunkcachesize)
416 'power of 2') % self._chunkcachesize)
417
417
418 indexdata = ''
418 indexdata = ''
419 self._initempty = True
419 self._initempty = True
420 try:
420 try:
421 with self._indexfp() as f:
421 with self._indexfp() as f:
422 if (mmapindexthreshold is not None and
422 if (mmapindexthreshold is not None and
423 self.opener.fstat(f).st_size >= mmapindexthreshold):
423 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 indexdata = util.buffer(util.mmapread(f))
424 indexdata = util.buffer(util.mmapread(f))
425 else:
425 else:
426 indexdata = f.read()
426 indexdata = f.read()
427 if len(indexdata) > 0:
427 if len(indexdata) > 0:
428 v = versionformat_unpack(indexdata[:4])[0]
428 v = versionformat_unpack(indexdata[:4])[0]
429 self._initempty = False
429 self._initempty = False
430 except IOError as inst:
430 except IOError as inst:
431 if inst.errno != errno.ENOENT:
431 if inst.errno != errno.ENOENT:
432 raise
432 raise
433
433
434 self.version = v
434 self.version = v
435 self._inline = v & FLAG_INLINE_DATA
435 self._inline = v & FLAG_INLINE_DATA
436 self._generaldelta = v & FLAG_GENERALDELTA
436 self._generaldelta = v & FLAG_GENERALDELTA
437 flags = v & ~0xFFFF
437 flags = v & ~0xFFFF
438 fmt = v & 0xFFFF
438 fmt = v & 0xFFFF
439 if fmt == REVLOGV0:
439 if fmt == REVLOGV0:
440 if flags:
440 if flags:
441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 'revlog %s') %
442 'revlog %s') %
443 (flags >> 16, fmt, self.indexfile))
443 (flags >> 16, fmt, self.indexfile))
444 elif fmt == REVLOGV1:
444 elif fmt == REVLOGV1:
445 if flags & ~REVLOGV1_FLAGS:
445 if flags & ~REVLOGV1_FLAGS:
446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
447 'revlog %s') %
447 'revlog %s') %
448 (flags >> 16, fmt, self.indexfile))
448 (flags >> 16, fmt, self.indexfile))
449 elif fmt == REVLOGV2:
449 elif fmt == REVLOGV2:
450 if flags & ~REVLOGV2_FLAGS:
450 if flags & ~REVLOGV2_FLAGS:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
452 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
453 (flags >> 16, fmt, self.indexfile))
454 else:
454 else:
455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
456 (fmt, self.indexfile))
456 (fmt, self.indexfile))
457
457
458 self._storedeltachains = True
458 self._storedeltachains = True
459
459
460 self._io = revlogio()
460 self._io = revlogio()
461 if self.version == REVLOGV0:
461 if self.version == REVLOGV0:
462 self._io = revlogoldio()
462 self._io = revlogoldio()
463 try:
463 try:
464 d = self._io.parseindex(indexdata, self._inline)
464 d = self._io.parseindex(indexdata, self._inline)
465 except (ValueError, IndexError):
465 except (ValueError, IndexError):
466 raise error.RevlogError(_("index %s is corrupted") %
466 raise error.RevlogError(_("index %s is corrupted") %
467 self.indexfile)
467 self.indexfile)
468 self.index, nodemap, self._chunkcache = d
468 self.index, nodemap, self._chunkcache = d
469 if nodemap is not None:
469 if nodemap is not None:
470 self.nodemap = self._nodecache = nodemap
470 self.nodemap = self._nodecache = nodemap
471 if not self._chunkcache:
471 if not self._chunkcache:
472 self._chunkclear()
472 self._chunkclear()
473 # revnum -> (chain-length, sum-delta-length)
473 # revnum -> (chain-length, sum-delta-length)
474 self._chaininfocache = {}
474 self._chaininfocache = {}
475 # revlog header -> revlog compressor
475 # revlog header -> revlog compressor
476 self._decompressors = {}
476 self._decompressors = {}
477
477
478 @util.propertycache
478 @util.propertycache
479 def _compressor(self):
479 def _compressor(self):
480 return util.compengines[self._compengine].revlogcompressor()
480 return util.compengines[self._compengine].revlogcompressor()
481
481
482 def _indexfp(self, mode='r'):
482 def _indexfp(self, mode='r'):
483 """file object for the revlog's index file"""
483 """file object for the revlog's index file"""
484 args = {r'mode': mode}
484 args = {r'mode': mode}
485 if mode != 'r':
485 if mode != 'r':
486 args[r'checkambig'] = self._checkambig
486 args[r'checkambig'] = self._checkambig
487 if mode == 'w':
487 if mode == 'w':
488 args[r'atomictemp'] = True
488 args[r'atomictemp'] = True
489 return self.opener(self.indexfile, **args)
489 return self.opener(self.indexfile, **args)
490
490
491 def _datafp(self, mode='r'):
491 def _datafp(self, mode='r'):
492 """file object for the revlog's data file"""
492 """file object for the revlog's data file"""
493 return self.opener(self.datafile, mode=mode)
493 return self.opener(self.datafile, mode=mode)
494
494
495 @contextlib.contextmanager
495 @contextlib.contextmanager
496 def _datareadfp(self, existingfp=None):
496 def _datareadfp(self, existingfp=None):
497 """file object suitable to read data"""
497 """file object suitable to read data"""
498 if existingfp is not None:
498 if existingfp is not None:
499 yield existingfp
499 yield existingfp
500 else:
500 else:
501 if self._inline:
501 if self._inline:
502 func = self._indexfp
502 func = self._indexfp
503 else:
503 else:
504 func = self._datafp
504 func = self._datafp
505 with func() as fp:
505 with func() as fp:
506 yield fp
506 yield fp
507
507
508 def tip(self):
508 def tip(self):
509 return self.node(len(self.index) - 1)
509 return self.node(len(self.index) - 1)
510 def __contains__(self, rev):
510 def __contains__(self, rev):
511 return 0 <= rev < len(self)
511 return 0 <= rev < len(self)
512 def __len__(self):
512 def __len__(self):
513 return len(self.index)
513 return len(self.index)
514 def __iter__(self):
514 def __iter__(self):
515 return iter(pycompat.xrange(len(self)))
515 return iter(pycompat.xrange(len(self)))
516 def revs(self, start=0, stop=None):
516 def revs(self, start=0, stop=None):
517 """iterate over all rev in this revlog (from start to stop)"""
517 """iterate over all rev in this revlog (from start to stop)"""
518 step = 1
518 return storageutil.iterrevs(len(self), start=start, stop=stop)
519 length = len(self)
520 if stop is not None:
521 if start > stop:
522 step = -1
523 stop += step
524 if stop > length:
525 stop = length
526 else:
527 stop = length
528 return pycompat.xrange(start, stop, step)
529
519
530 @util.propertycache
520 @util.propertycache
531 def nodemap(self):
521 def nodemap(self):
532 if self.index:
522 if self.index:
533 # populate mapping down to the initial node
523 # populate mapping down to the initial node
534 node0 = self.index[0][7] # get around changelog filtering
524 node0 = self.index[0][7] # get around changelog filtering
535 self.rev(node0)
525 self.rev(node0)
536 return self._nodecache
526 return self._nodecache
537
527
538 def hasnode(self, node):
528 def hasnode(self, node):
539 try:
529 try:
540 self.rev(node)
530 self.rev(node)
541 return True
531 return True
542 except KeyError:
532 except KeyError:
543 return False
533 return False
544
534
545 def candelta(self, baserev, rev):
535 def candelta(self, baserev, rev):
546 """whether two revisions (baserev, rev) can be delta-ed or not"""
536 """whether two revisions (baserev, rev) can be delta-ed or not"""
547 # Disable delta if either rev requires a content-changing flag
537 # Disable delta if either rev requires a content-changing flag
548 # processor (ex. LFS). This is because such flag processor can alter
538 # processor (ex. LFS). This is because such flag processor can alter
549 # the rawtext content that the delta will be based on, and two clients
539 # the rawtext content that the delta will be based on, and two clients
550 # could have a same revlog node with different flags (i.e. different
540 # could have a same revlog node with different flags (i.e. different
551 # rawtext contents) and the delta could be incompatible.
541 # rawtext contents) and the delta could be incompatible.
552 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
542 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
553 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
543 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
554 return False
544 return False
555 return True
545 return True
556
546
557 def clearcaches(self):
547 def clearcaches(self):
558 self._cache = None
548 self._cache = None
559 self._chainbasecache.clear()
549 self._chainbasecache.clear()
560 self._chunkcache = (0, '')
550 self._chunkcache = (0, '')
561 self._pcache = {}
551 self._pcache = {}
562
552
563 try:
553 try:
564 self._nodecache.clearcaches()
554 self._nodecache.clearcaches()
565 except AttributeError:
555 except AttributeError:
566 self._nodecache = {nullid: nullrev}
556 self._nodecache = {nullid: nullrev}
567 self._nodepos = None
557 self._nodepos = None
568
558
569 def rev(self, node):
559 def rev(self, node):
570 try:
560 try:
571 return self._nodecache[node]
561 return self._nodecache[node]
572 except TypeError:
562 except TypeError:
573 raise
563 raise
574 except error.RevlogError:
564 except error.RevlogError:
575 # parsers.c radix tree lookup failed
565 # parsers.c radix tree lookup failed
576 if node == wdirid or node in wdirfilenodeids:
566 if node == wdirid or node in wdirfilenodeids:
577 raise error.WdirUnsupported
567 raise error.WdirUnsupported
578 raise error.LookupError(node, self.indexfile, _('no node'))
568 raise error.LookupError(node, self.indexfile, _('no node'))
579 except KeyError:
569 except KeyError:
580 # pure python cache lookup failed
570 # pure python cache lookup failed
581 n = self._nodecache
571 n = self._nodecache
582 i = self.index
572 i = self.index
583 p = self._nodepos
573 p = self._nodepos
584 if p is None:
574 if p is None:
585 p = len(i) - 1
575 p = len(i) - 1
586 else:
576 else:
587 assert p < len(i)
577 assert p < len(i)
588 for r in pycompat.xrange(p, -1, -1):
578 for r in pycompat.xrange(p, -1, -1):
589 v = i[r][7]
579 v = i[r][7]
590 n[v] = r
580 n[v] = r
591 if v == node:
581 if v == node:
592 self._nodepos = r - 1
582 self._nodepos = r - 1
593 return r
583 return r
594 if node == wdirid or node in wdirfilenodeids:
584 if node == wdirid or node in wdirfilenodeids:
595 raise error.WdirUnsupported
585 raise error.WdirUnsupported
596 raise error.LookupError(node, self.indexfile, _('no node'))
586 raise error.LookupError(node, self.indexfile, _('no node'))
597
587
598 # Accessors for index entries.
588 # Accessors for index entries.
599
589
600 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
590 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
601 # are flags.
591 # are flags.
602 def start(self, rev):
592 def start(self, rev):
603 return int(self.index[rev][0] >> 16)
593 return int(self.index[rev][0] >> 16)
604
594
605 def flags(self, rev):
595 def flags(self, rev):
606 return self.index[rev][0] & 0xFFFF
596 return self.index[rev][0] & 0xFFFF
607
597
608 def length(self, rev):
598 def length(self, rev):
609 return self.index[rev][1]
599 return self.index[rev][1]
610
600
611 def rawsize(self, rev):
601 def rawsize(self, rev):
612 """return the length of the uncompressed text for a given revision"""
602 """return the length of the uncompressed text for a given revision"""
613 l = self.index[rev][2]
603 l = self.index[rev][2]
614 if l >= 0:
604 if l >= 0:
615 return l
605 return l
616
606
617 t = self.revision(rev, raw=True)
607 t = self.revision(rev, raw=True)
618 return len(t)
608 return len(t)
619
609
620 def size(self, rev):
610 def size(self, rev):
621 """length of non-raw text (processed by a "read" flag processor)"""
611 """length of non-raw text (processed by a "read" flag processor)"""
622 # fast path: if no "read" flag processor could change the content,
612 # fast path: if no "read" flag processor could change the content,
623 # size is rawsize. note: ELLIPSIS is known to not change the content.
613 # size is rawsize. note: ELLIPSIS is known to not change the content.
624 flags = self.flags(rev)
614 flags = self.flags(rev)
625 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
615 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
626 return self.rawsize(rev)
616 return self.rawsize(rev)
627
617
628 return len(self.revision(rev, raw=False))
618 return len(self.revision(rev, raw=False))
629
619
630 def chainbase(self, rev):
620 def chainbase(self, rev):
631 base = self._chainbasecache.get(rev)
621 base = self._chainbasecache.get(rev)
632 if base is not None:
622 if base is not None:
633 return base
623 return base
634
624
635 index = self.index
625 index = self.index
636 iterrev = rev
626 iterrev = rev
637 base = index[iterrev][3]
627 base = index[iterrev][3]
638 while base != iterrev:
628 while base != iterrev:
639 iterrev = base
629 iterrev = base
640 base = index[iterrev][3]
630 base = index[iterrev][3]
641
631
642 self._chainbasecache[rev] = base
632 self._chainbasecache[rev] = base
643 return base
633 return base
644
634
645 def linkrev(self, rev):
635 def linkrev(self, rev):
646 return self.index[rev][4]
636 return self.index[rev][4]
647
637
648 def parentrevs(self, rev):
638 def parentrevs(self, rev):
649 try:
639 try:
650 entry = self.index[rev]
640 entry = self.index[rev]
651 except IndexError:
641 except IndexError:
652 if rev == wdirrev:
642 if rev == wdirrev:
653 raise error.WdirUnsupported
643 raise error.WdirUnsupported
654 raise
644 raise
655
645
656 return entry[5], entry[6]
646 return entry[5], entry[6]
657
647
658 def node(self, rev):
648 def node(self, rev):
659 try:
649 try:
660 return self.index[rev][7]
650 return self.index[rev][7]
661 except IndexError:
651 except IndexError:
662 if rev == wdirrev:
652 if rev == wdirrev:
663 raise error.WdirUnsupported
653 raise error.WdirUnsupported
664 raise
654 raise
665
655
666 # Derived from index values.
656 # Derived from index values.
667
657
668 def end(self, rev):
658 def end(self, rev):
669 return self.start(rev) + self.length(rev)
659 return self.start(rev) + self.length(rev)
670
660
671 def parents(self, node):
661 def parents(self, node):
672 i = self.index
662 i = self.index
673 d = i[self.rev(node)]
663 d = i[self.rev(node)]
674 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
664 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
675
665
676 def chainlen(self, rev):
666 def chainlen(self, rev):
677 return self._chaininfo(rev)[0]
667 return self._chaininfo(rev)[0]
678
668
679 def _chaininfo(self, rev):
669 def _chaininfo(self, rev):
680 chaininfocache = self._chaininfocache
670 chaininfocache = self._chaininfocache
681 if rev in chaininfocache:
671 if rev in chaininfocache:
682 return chaininfocache[rev]
672 return chaininfocache[rev]
683 index = self.index
673 index = self.index
684 generaldelta = self._generaldelta
674 generaldelta = self._generaldelta
685 iterrev = rev
675 iterrev = rev
686 e = index[iterrev]
676 e = index[iterrev]
687 clen = 0
677 clen = 0
688 compresseddeltalen = 0
678 compresseddeltalen = 0
689 while iterrev != e[3]:
679 while iterrev != e[3]:
690 clen += 1
680 clen += 1
691 compresseddeltalen += e[1]
681 compresseddeltalen += e[1]
692 if generaldelta:
682 if generaldelta:
693 iterrev = e[3]
683 iterrev = e[3]
694 else:
684 else:
695 iterrev -= 1
685 iterrev -= 1
696 if iterrev in chaininfocache:
686 if iterrev in chaininfocache:
697 t = chaininfocache[iterrev]
687 t = chaininfocache[iterrev]
698 clen += t[0]
688 clen += t[0]
699 compresseddeltalen += t[1]
689 compresseddeltalen += t[1]
700 break
690 break
701 e = index[iterrev]
691 e = index[iterrev]
702 else:
692 else:
703 # Add text length of base since decompressing that also takes
693 # Add text length of base since decompressing that also takes
704 # work. For cache hits the length is already included.
694 # work. For cache hits the length is already included.
705 compresseddeltalen += e[1]
695 compresseddeltalen += e[1]
706 r = (clen, compresseddeltalen)
696 r = (clen, compresseddeltalen)
707 chaininfocache[rev] = r
697 chaininfocache[rev] = r
708 return r
698 return r
709
699
710 def _deltachain(self, rev, stoprev=None):
700 def _deltachain(self, rev, stoprev=None):
711 """Obtain the delta chain for a revision.
701 """Obtain the delta chain for a revision.
712
702
713 ``stoprev`` specifies a revision to stop at. If not specified, we
703 ``stoprev`` specifies a revision to stop at. If not specified, we
714 stop at the base of the chain.
704 stop at the base of the chain.
715
705
716 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
706 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
717 revs in ascending order and ``stopped`` is a bool indicating whether
707 revs in ascending order and ``stopped`` is a bool indicating whether
718 ``stoprev`` was hit.
708 ``stoprev`` was hit.
719 """
709 """
720 # Try C implementation.
710 # Try C implementation.
721 try:
711 try:
722 return self.index.deltachain(rev, stoprev, self._generaldelta)
712 return self.index.deltachain(rev, stoprev, self._generaldelta)
723 except AttributeError:
713 except AttributeError:
724 pass
714 pass
725
715
726 chain = []
716 chain = []
727
717
728 # Alias to prevent attribute lookup in tight loop.
718 # Alias to prevent attribute lookup in tight loop.
729 index = self.index
719 index = self.index
730 generaldelta = self._generaldelta
720 generaldelta = self._generaldelta
731
721
732 iterrev = rev
722 iterrev = rev
733 e = index[iterrev]
723 e = index[iterrev]
734 while iterrev != e[3] and iterrev != stoprev:
724 while iterrev != e[3] and iterrev != stoprev:
735 chain.append(iterrev)
725 chain.append(iterrev)
736 if generaldelta:
726 if generaldelta:
737 iterrev = e[3]
727 iterrev = e[3]
738 else:
728 else:
739 iterrev -= 1
729 iterrev -= 1
740 e = index[iterrev]
730 e = index[iterrev]
741
731
742 if iterrev == stoprev:
732 if iterrev == stoprev:
743 stopped = True
733 stopped = True
744 else:
734 else:
745 chain.append(iterrev)
735 chain.append(iterrev)
746 stopped = False
736 stopped = False
747
737
748 chain.reverse()
738 chain.reverse()
749 return chain, stopped
739 return chain, stopped
750
740
751 def ancestors(self, revs, stoprev=0, inclusive=False):
741 def ancestors(self, revs, stoprev=0, inclusive=False):
752 """Generate the ancestors of 'revs' in reverse topological order.
742 """Generate the ancestors of 'revs' in reverse topological order.
753 Does not generate revs lower than stoprev.
743 Does not generate revs lower than stoprev.
754
744
755 See the documentation for ancestor.lazyancestors for more details."""
745 See the documentation for ancestor.lazyancestors for more details."""
756
746
757 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
747 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
758 inclusive=inclusive)
748 inclusive=inclusive)
759
749
760 def descendants(self, revs):
750 def descendants(self, revs):
761 """Generate the descendants of 'revs' in revision order.
751 """Generate the descendants of 'revs' in revision order.
762
752
763 Yield a sequence of revision numbers starting with a child of
753 Yield a sequence of revision numbers starting with a child of
764 some rev in revs, i.e., each revision is *not* considered a
754 some rev in revs, i.e., each revision is *not* considered a
765 descendant of itself. Results are ordered by revision number (a
755 descendant of itself. Results are ordered by revision number (a
766 topological sort)."""
756 topological sort)."""
767 first = min(revs)
757 first = min(revs)
768 if first == nullrev:
758 if first == nullrev:
769 for i in self:
759 for i in self:
770 yield i
760 yield i
771 return
761 return
772
762
773 seen = set(revs)
763 seen = set(revs)
774 for i in self.revs(start=first + 1):
764 for i in self.revs(start=first + 1):
775 for x in self.parentrevs(i):
765 for x in self.parentrevs(i):
776 if x != nullrev and x in seen:
766 if x != nullrev and x in seen:
777 seen.add(i)
767 seen.add(i)
778 yield i
768 yield i
779 break
769 break
780
770
781 def findcommonmissing(self, common=None, heads=None):
771 def findcommonmissing(self, common=None, heads=None):
782 """Return a tuple of the ancestors of common and the ancestors of heads
772 """Return a tuple of the ancestors of common and the ancestors of heads
783 that are not ancestors of common. In revset terminology, we return the
773 that are not ancestors of common. In revset terminology, we return the
784 tuple:
774 tuple:
785
775
786 ::common, (::heads) - (::common)
776 ::common, (::heads) - (::common)
787
777
788 The list is sorted by revision number, meaning it is
778 The list is sorted by revision number, meaning it is
789 topologically sorted.
779 topologically sorted.
790
780
791 'heads' and 'common' are both lists of node IDs. If heads is
781 'heads' and 'common' are both lists of node IDs. If heads is
792 not supplied, uses all of the revlog's heads. If common is not
782 not supplied, uses all of the revlog's heads. If common is not
793 supplied, uses nullid."""
783 supplied, uses nullid."""
794 if common is None:
784 if common is None:
795 common = [nullid]
785 common = [nullid]
796 if heads is None:
786 if heads is None:
797 heads = self.heads()
787 heads = self.heads()
798
788
799 common = [self.rev(n) for n in common]
789 common = [self.rev(n) for n in common]
800 heads = [self.rev(n) for n in heads]
790 heads = [self.rev(n) for n in heads]
801
791
802 # we want the ancestors, but inclusive
792 # we want the ancestors, but inclusive
803 class lazyset(object):
793 class lazyset(object):
804 def __init__(self, lazyvalues):
794 def __init__(self, lazyvalues):
805 self.addedvalues = set()
795 self.addedvalues = set()
806 self.lazyvalues = lazyvalues
796 self.lazyvalues = lazyvalues
807
797
808 def __contains__(self, value):
798 def __contains__(self, value):
809 return value in self.addedvalues or value in self.lazyvalues
799 return value in self.addedvalues or value in self.lazyvalues
810
800
811 def __iter__(self):
801 def __iter__(self):
812 added = self.addedvalues
802 added = self.addedvalues
813 for r in added:
803 for r in added:
814 yield r
804 yield r
815 for r in self.lazyvalues:
805 for r in self.lazyvalues:
816 if not r in added:
806 if not r in added:
817 yield r
807 yield r
818
808
819 def add(self, value):
809 def add(self, value):
820 self.addedvalues.add(value)
810 self.addedvalues.add(value)
821
811
822 def update(self, values):
812 def update(self, values):
823 self.addedvalues.update(values)
813 self.addedvalues.update(values)
824
814
825 has = lazyset(self.ancestors(common))
815 has = lazyset(self.ancestors(common))
826 has.add(nullrev)
816 has.add(nullrev)
827 has.update(common)
817 has.update(common)
828
818
829 # take all ancestors from heads that aren't in has
819 # take all ancestors from heads that aren't in has
830 missing = set()
820 missing = set()
831 visit = collections.deque(r for r in heads if r not in has)
821 visit = collections.deque(r for r in heads if r not in has)
832 while visit:
822 while visit:
833 r = visit.popleft()
823 r = visit.popleft()
834 if r in missing:
824 if r in missing:
835 continue
825 continue
836 else:
826 else:
837 missing.add(r)
827 missing.add(r)
838 for p in self.parentrevs(r):
828 for p in self.parentrevs(r):
839 if p not in has:
829 if p not in has:
840 visit.append(p)
830 visit.append(p)
841 missing = list(missing)
831 missing = list(missing)
842 missing.sort()
832 missing.sort()
843 return has, [self.node(miss) for miss in missing]
833 return has, [self.node(miss) for miss in missing]
844
834
845 def incrementalmissingrevs(self, common=None):
835 def incrementalmissingrevs(self, common=None):
846 """Return an object that can be used to incrementally compute the
836 """Return an object that can be used to incrementally compute the
847 revision numbers of the ancestors of arbitrary sets that are not
837 revision numbers of the ancestors of arbitrary sets that are not
848 ancestors of common. This is an ancestor.incrementalmissingancestors
838 ancestors of common. This is an ancestor.incrementalmissingancestors
849 object.
839 object.
850
840
851 'common' is a list of revision numbers. If common is not supplied, uses
841 'common' is a list of revision numbers. If common is not supplied, uses
852 nullrev.
842 nullrev.
853 """
843 """
854 if common is None:
844 if common is None:
855 common = [nullrev]
845 common = [nullrev]
856
846
857 return ancestor.incrementalmissingancestors(self.parentrevs, common)
847 return ancestor.incrementalmissingancestors(self.parentrevs, common)
858
848
859 def findmissingrevs(self, common=None, heads=None):
849 def findmissingrevs(self, common=None, heads=None):
860 """Return the revision numbers of the ancestors of heads that
850 """Return the revision numbers of the ancestors of heads that
861 are not ancestors of common.
851 are not ancestors of common.
862
852
863 More specifically, return a list of revision numbers corresponding to
853 More specifically, return a list of revision numbers corresponding to
864 nodes N such that every N satisfies the following constraints:
854 nodes N such that every N satisfies the following constraints:
865
855
866 1. N is an ancestor of some node in 'heads'
856 1. N is an ancestor of some node in 'heads'
867 2. N is not an ancestor of any node in 'common'
857 2. N is not an ancestor of any node in 'common'
868
858
869 The list is sorted by revision number, meaning it is
859 The list is sorted by revision number, meaning it is
870 topologically sorted.
860 topologically sorted.
871
861
872 'heads' and 'common' are both lists of revision numbers. If heads is
862 'heads' and 'common' are both lists of revision numbers. If heads is
873 not supplied, uses all of the revlog's heads. If common is not
863 not supplied, uses all of the revlog's heads. If common is not
874 supplied, uses nullid."""
864 supplied, uses nullid."""
875 if common is None:
865 if common is None:
876 common = [nullrev]
866 common = [nullrev]
877 if heads is None:
867 if heads is None:
878 heads = self.headrevs()
868 heads = self.headrevs()
879
869
880 inc = self.incrementalmissingrevs(common=common)
870 inc = self.incrementalmissingrevs(common=common)
881 return inc.missingancestors(heads)
871 return inc.missingancestors(heads)
882
872
883 def findmissing(self, common=None, heads=None):
873 def findmissing(self, common=None, heads=None):
884 """Return the ancestors of heads that are not ancestors of common.
874 """Return the ancestors of heads that are not ancestors of common.
885
875
886 More specifically, return a list of nodes N such that every N
876 More specifically, return a list of nodes N such that every N
887 satisfies the following constraints:
877 satisfies the following constraints:
888
878
889 1. N is an ancestor of some node in 'heads'
879 1. N is an ancestor of some node in 'heads'
890 2. N is not an ancestor of any node in 'common'
880 2. N is not an ancestor of any node in 'common'
891
881
892 The list is sorted by revision number, meaning it is
882 The list is sorted by revision number, meaning it is
893 topologically sorted.
883 topologically sorted.
894
884
895 'heads' and 'common' are both lists of node IDs. If heads is
885 'heads' and 'common' are both lists of node IDs. If heads is
896 not supplied, uses all of the revlog's heads. If common is not
886 not supplied, uses all of the revlog's heads. If common is not
897 supplied, uses nullid."""
887 supplied, uses nullid."""
898 if common is None:
888 if common is None:
899 common = [nullid]
889 common = [nullid]
900 if heads is None:
890 if heads is None:
901 heads = self.heads()
891 heads = self.heads()
902
892
903 common = [self.rev(n) for n in common]
893 common = [self.rev(n) for n in common]
904 heads = [self.rev(n) for n in heads]
894 heads = [self.rev(n) for n in heads]
905
895
906 inc = self.incrementalmissingrevs(common=common)
896 inc = self.incrementalmissingrevs(common=common)
907 return [self.node(r) for r in inc.missingancestors(heads)]
897 return [self.node(r) for r in inc.missingancestors(heads)]
908
898
909 def nodesbetween(self, roots=None, heads=None):
899 def nodesbetween(self, roots=None, heads=None):
910 """Return a topological path from 'roots' to 'heads'.
900 """Return a topological path from 'roots' to 'heads'.
911
901
912 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
902 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
913 topologically sorted list of all nodes N that satisfy both of
903 topologically sorted list of all nodes N that satisfy both of
914 these constraints:
904 these constraints:
915
905
916 1. N is a descendant of some node in 'roots'
906 1. N is a descendant of some node in 'roots'
917 2. N is an ancestor of some node in 'heads'
907 2. N is an ancestor of some node in 'heads'
918
908
919 Every node is considered to be both a descendant and an ancestor
909 Every node is considered to be both a descendant and an ancestor
920 of itself, so every reachable node in 'roots' and 'heads' will be
910 of itself, so every reachable node in 'roots' and 'heads' will be
921 included in 'nodes'.
911 included in 'nodes'.
922
912
923 'outroots' is the list of reachable nodes in 'roots', i.e., the
913 'outroots' is the list of reachable nodes in 'roots', i.e., the
924 subset of 'roots' that is returned in 'nodes'. Likewise,
914 subset of 'roots' that is returned in 'nodes'. Likewise,
925 'outheads' is the subset of 'heads' that is also in 'nodes'.
915 'outheads' is the subset of 'heads' that is also in 'nodes'.
926
916
927 'roots' and 'heads' are both lists of node IDs. If 'roots' is
917 'roots' and 'heads' are both lists of node IDs. If 'roots' is
928 unspecified, uses nullid as the only root. If 'heads' is
918 unspecified, uses nullid as the only root. If 'heads' is
929 unspecified, uses list of all of the revlog's heads."""
919 unspecified, uses list of all of the revlog's heads."""
930 nonodes = ([], [], [])
920 nonodes = ([], [], [])
931 if roots is not None:
921 if roots is not None:
932 roots = list(roots)
922 roots = list(roots)
933 if not roots:
923 if not roots:
934 return nonodes
924 return nonodes
935 lowestrev = min([self.rev(n) for n in roots])
925 lowestrev = min([self.rev(n) for n in roots])
936 else:
926 else:
937 roots = [nullid] # Everybody's a descendant of nullid
927 roots = [nullid] # Everybody's a descendant of nullid
938 lowestrev = nullrev
928 lowestrev = nullrev
939 if (lowestrev == nullrev) and (heads is None):
929 if (lowestrev == nullrev) and (heads is None):
940 # We want _all_ the nodes!
930 # We want _all_ the nodes!
941 return ([self.node(r) for r in self], [nullid], list(self.heads()))
931 return ([self.node(r) for r in self], [nullid], list(self.heads()))
942 if heads is None:
932 if heads is None:
943 # All nodes are ancestors, so the latest ancestor is the last
933 # All nodes are ancestors, so the latest ancestor is the last
944 # node.
934 # node.
945 highestrev = len(self) - 1
935 highestrev = len(self) - 1
946 # Set ancestors to None to signal that every node is an ancestor.
936 # Set ancestors to None to signal that every node is an ancestor.
947 ancestors = None
937 ancestors = None
948 # Set heads to an empty dictionary for later discovery of heads
938 # Set heads to an empty dictionary for later discovery of heads
949 heads = {}
939 heads = {}
950 else:
940 else:
951 heads = list(heads)
941 heads = list(heads)
952 if not heads:
942 if not heads:
953 return nonodes
943 return nonodes
954 ancestors = set()
944 ancestors = set()
955 # Turn heads into a dictionary so we can remove 'fake' heads.
945 # Turn heads into a dictionary so we can remove 'fake' heads.
956 # Also, later we will be using it to filter out the heads we can't
946 # Also, later we will be using it to filter out the heads we can't
957 # find from roots.
947 # find from roots.
958 heads = dict.fromkeys(heads, False)
948 heads = dict.fromkeys(heads, False)
959 # Start at the top and keep marking parents until we're done.
949 # Start at the top and keep marking parents until we're done.
960 nodestotag = set(heads)
950 nodestotag = set(heads)
961 # Remember where the top was so we can use it as a limit later.
951 # Remember where the top was so we can use it as a limit later.
962 highestrev = max([self.rev(n) for n in nodestotag])
952 highestrev = max([self.rev(n) for n in nodestotag])
963 while nodestotag:
953 while nodestotag:
964 # grab a node to tag
954 # grab a node to tag
965 n = nodestotag.pop()
955 n = nodestotag.pop()
966 # Never tag nullid
956 # Never tag nullid
967 if n == nullid:
957 if n == nullid:
968 continue
958 continue
969 # A node's revision number represents its place in a
959 # A node's revision number represents its place in a
970 # topologically sorted list of nodes.
960 # topologically sorted list of nodes.
971 r = self.rev(n)
961 r = self.rev(n)
972 if r >= lowestrev:
962 if r >= lowestrev:
973 if n not in ancestors:
963 if n not in ancestors:
974 # If we are possibly a descendant of one of the roots
964 # If we are possibly a descendant of one of the roots
975 # and we haven't already been marked as an ancestor
965 # and we haven't already been marked as an ancestor
976 ancestors.add(n) # Mark as ancestor
966 ancestors.add(n) # Mark as ancestor
977 # Add non-nullid parents to list of nodes to tag.
967 # Add non-nullid parents to list of nodes to tag.
978 nodestotag.update([p for p in self.parents(n) if
968 nodestotag.update([p for p in self.parents(n) if
979 p != nullid])
969 p != nullid])
980 elif n in heads: # We've seen it before, is it a fake head?
970 elif n in heads: # We've seen it before, is it a fake head?
981 # So it is, real heads should not be the ancestors of
971 # So it is, real heads should not be the ancestors of
982 # any other heads.
972 # any other heads.
983 heads.pop(n)
973 heads.pop(n)
984 if not ancestors:
974 if not ancestors:
985 return nonodes
975 return nonodes
986 # Now that we have our set of ancestors, we want to remove any
976 # Now that we have our set of ancestors, we want to remove any
987 # roots that are not ancestors.
977 # roots that are not ancestors.
988
978
989 # If one of the roots was nullid, everything is included anyway.
979 # If one of the roots was nullid, everything is included anyway.
990 if lowestrev > nullrev:
980 if lowestrev > nullrev:
991 # But, since we weren't, let's recompute the lowest rev to not
981 # But, since we weren't, let's recompute the lowest rev to not
992 # include roots that aren't ancestors.
982 # include roots that aren't ancestors.
993
983
994 # Filter out roots that aren't ancestors of heads
984 # Filter out roots that aren't ancestors of heads
995 roots = [root for root in roots if root in ancestors]
985 roots = [root for root in roots if root in ancestors]
996 # Recompute the lowest revision
986 # Recompute the lowest revision
997 if roots:
987 if roots:
998 lowestrev = min([self.rev(root) for root in roots])
988 lowestrev = min([self.rev(root) for root in roots])
999 else:
989 else:
1000 # No more roots? Return empty list
990 # No more roots? Return empty list
1001 return nonodes
991 return nonodes
1002 else:
992 else:
1003 # We are descending from nullid, and don't need to care about
993 # We are descending from nullid, and don't need to care about
1004 # any other roots.
994 # any other roots.
1005 lowestrev = nullrev
995 lowestrev = nullrev
1006 roots = [nullid]
996 roots = [nullid]
1007 # Transform our roots list into a set.
997 # Transform our roots list into a set.
1008 descendants = set(roots)
998 descendants = set(roots)
1009 # Also, keep the original roots so we can filter out roots that aren't
999 # Also, keep the original roots so we can filter out roots that aren't
1010 # 'real' roots (i.e. are descended from other roots).
1000 # 'real' roots (i.e. are descended from other roots).
1011 roots = descendants.copy()
1001 roots = descendants.copy()
1012 # Our topologically sorted list of output nodes.
1002 # Our topologically sorted list of output nodes.
1013 orderedout = []
1003 orderedout = []
1014 # Don't start at nullid since we don't want nullid in our output list,
1004 # Don't start at nullid since we don't want nullid in our output list,
1015 # and if nullid shows up in descendants, empty parents will look like
1005 # and if nullid shows up in descendants, empty parents will look like
1016 # they're descendants.
1006 # they're descendants.
1017 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1007 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1018 n = self.node(r)
1008 n = self.node(r)
1019 isdescendant = False
1009 isdescendant = False
1020 if lowestrev == nullrev: # Everybody is a descendant of nullid
1010 if lowestrev == nullrev: # Everybody is a descendant of nullid
1021 isdescendant = True
1011 isdescendant = True
1022 elif n in descendants:
1012 elif n in descendants:
1023 # n is already a descendant
1013 # n is already a descendant
1024 isdescendant = True
1014 isdescendant = True
1025 # This check only needs to be done here because all the roots
1015 # This check only needs to be done here because all the roots
1026 # will start being marked is descendants before the loop.
1016 # will start being marked is descendants before the loop.
1027 if n in roots:
1017 if n in roots:
1028 # If n was a root, check if it's a 'real' root.
1018 # If n was a root, check if it's a 'real' root.
1029 p = tuple(self.parents(n))
1019 p = tuple(self.parents(n))
1030 # If any of its parents are descendants, it's not a root.
1020 # If any of its parents are descendants, it's not a root.
1031 if (p[0] in descendants) or (p[1] in descendants):
1021 if (p[0] in descendants) or (p[1] in descendants):
1032 roots.remove(n)
1022 roots.remove(n)
1033 else:
1023 else:
1034 p = tuple(self.parents(n))
1024 p = tuple(self.parents(n))
1035 # A node is a descendant if either of its parents are
1025 # A node is a descendant if either of its parents are
1036 # descendants. (We seeded the dependents list with the roots
1026 # descendants. (We seeded the dependents list with the roots
1037 # up there, remember?)
1027 # up there, remember?)
1038 if (p[0] in descendants) or (p[1] in descendants):
1028 if (p[0] in descendants) or (p[1] in descendants):
1039 descendants.add(n)
1029 descendants.add(n)
1040 isdescendant = True
1030 isdescendant = True
1041 if isdescendant and ((ancestors is None) or (n in ancestors)):
1031 if isdescendant and ((ancestors is None) or (n in ancestors)):
1042 # Only include nodes that are both descendants and ancestors.
1032 # Only include nodes that are both descendants and ancestors.
1043 orderedout.append(n)
1033 orderedout.append(n)
1044 if (ancestors is not None) and (n in heads):
1034 if (ancestors is not None) and (n in heads):
1045 # We're trying to figure out which heads are reachable
1035 # We're trying to figure out which heads are reachable
1046 # from roots.
1036 # from roots.
1047 # Mark this head as having been reached
1037 # Mark this head as having been reached
1048 heads[n] = True
1038 heads[n] = True
1049 elif ancestors is None:
1039 elif ancestors is None:
1050 # Otherwise, we're trying to discover the heads.
1040 # Otherwise, we're trying to discover the heads.
1051 # Assume this is a head because if it isn't, the next step
1041 # Assume this is a head because if it isn't, the next step
1052 # will eventually remove it.
1042 # will eventually remove it.
1053 heads[n] = True
1043 heads[n] = True
1054 # But, obviously its parents aren't.
1044 # But, obviously its parents aren't.
1055 for p in self.parents(n):
1045 for p in self.parents(n):
1056 heads.pop(p, None)
1046 heads.pop(p, None)
1057 heads = [head for head, flag in heads.iteritems() if flag]
1047 heads = [head for head, flag in heads.iteritems() if flag]
1058 roots = list(roots)
1048 roots = list(roots)
1059 assert orderedout
1049 assert orderedout
1060 assert roots
1050 assert roots
1061 assert heads
1051 assert heads
1062 return (orderedout, roots, heads)
1052 return (orderedout, roots, heads)
1063
1053
1064 def headrevs(self):
1054 def headrevs(self):
1065 try:
1055 try:
1066 return self.index.headrevs()
1056 return self.index.headrevs()
1067 except AttributeError:
1057 except AttributeError:
1068 return self._headrevs()
1058 return self._headrevs()
1069
1059
1070 def computephases(self, roots):
1060 def computephases(self, roots):
1071 return self.index.computephasesmapsets(roots)
1061 return self.index.computephasesmapsets(roots)
1072
1062
1073 def _headrevs(self):
1063 def _headrevs(self):
1074 count = len(self)
1064 count = len(self)
1075 if not count:
1065 if not count:
1076 return [nullrev]
1066 return [nullrev]
1077 # we won't iter over filtered rev so nobody is a head at start
1067 # we won't iter over filtered rev so nobody is a head at start
1078 ishead = [0] * (count + 1)
1068 ishead = [0] * (count + 1)
1079 index = self.index
1069 index = self.index
1080 for r in self:
1070 for r in self:
1081 ishead[r] = 1 # I may be an head
1071 ishead[r] = 1 # I may be an head
1082 e = index[r]
1072 e = index[r]
1083 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1073 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1084 return [r for r, val in enumerate(ishead) if val]
1074 return [r for r, val in enumerate(ishead) if val]
1085
1075
1086 def heads(self, start=None, stop=None):
1076 def heads(self, start=None, stop=None):
1087 """return the list of all nodes that have no children
1077 """return the list of all nodes that have no children
1088
1078
1089 if start is specified, only heads that are descendants of
1079 if start is specified, only heads that are descendants of
1090 start will be returned
1080 start will be returned
1091 if stop is specified, it will consider all the revs from stop
1081 if stop is specified, it will consider all the revs from stop
1092 as if they had no children
1082 as if they had no children
1093 """
1083 """
1094 if start is None and stop is None:
1084 if start is None and stop is None:
1095 if not len(self):
1085 if not len(self):
1096 return [nullid]
1086 return [nullid]
1097 return [self.node(r) for r in self.headrevs()]
1087 return [self.node(r) for r in self.headrevs()]
1098
1088
1099 if start is None:
1089 if start is None:
1100 start = nullid
1090 start = nullid
1101 if stop is None:
1091 if stop is None:
1102 stop = []
1092 stop = []
1103 stoprevs = set([self.rev(n) for n in stop])
1093 stoprevs = set([self.rev(n) for n in stop])
1104 startrev = self.rev(start)
1094 startrev = self.rev(start)
1105 reachable = {startrev}
1095 reachable = {startrev}
1106 heads = {startrev}
1096 heads = {startrev}
1107
1097
1108 parentrevs = self.parentrevs
1098 parentrevs = self.parentrevs
1109 for r in self.revs(start=startrev + 1):
1099 for r in self.revs(start=startrev + 1):
1110 for p in parentrevs(r):
1100 for p in parentrevs(r):
1111 if p in reachable:
1101 if p in reachable:
1112 if r not in stoprevs:
1102 if r not in stoprevs:
1113 reachable.add(r)
1103 reachable.add(r)
1114 heads.add(r)
1104 heads.add(r)
1115 if p in heads and p not in stoprevs:
1105 if p in heads and p not in stoprevs:
1116 heads.remove(p)
1106 heads.remove(p)
1117
1107
1118 return [self.node(r) for r in heads]
1108 return [self.node(r) for r in heads]
1119
1109
1120 def children(self, node):
1110 def children(self, node):
1121 """find the children of a given node"""
1111 """find the children of a given node"""
1122 c = []
1112 c = []
1123 p = self.rev(node)
1113 p = self.rev(node)
1124 for r in self.revs(start=p + 1):
1114 for r in self.revs(start=p + 1):
1125 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1115 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1126 if prevs:
1116 if prevs:
1127 for pr in prevs:
1117 for pr in prevs:
1128 if pr == p:
1118 if pr == p:
1129 c.append(self.node(r))
1119 c.append(self.node(r))
1130 elif p == nullrev:
1120 elif p == nullrev:
1131 c.append(self.node(r))
1121 c.append(self.node(r))
1132 return c
1122 return c
1133
1123
1134 def commonancestorsheads(self, a, b):
1124 def commonancestorsheads(self, a, b):
1135 """calculate all the heads of the common ancestors of nodes a and b"""
1125 """calculate all the heads of the common ancestors of nodes a and b"""
1136 a, b = self.rev(a), self.rev(b)
1126 a, b = self.rev(a), self.rev(b)
1137 ancs = self._commonancestorsheads(a, b)
1127 ancs = self._commonancestorsheads(a, b)
1138 return pycompat.maplist(self.node, ancs)
1128 return pycompat.maplist(self.node, ancs)
1139
1129
1140 def _commonancestorsheads(self, *revs):
1130 def _commonancestorsheads(self, *revs):
1141 """calculate all the heads of the common ancestors of revs"""
1131 """calculate all the heads of the common ancestors of revs"""
1142 try:
1132 try:
1143 ancs = self.index.commonancestorsheads(*revs)
1133 ancs = self.index.commonancestorsheads(*revs)
1144 except (AttributeError, OverflowError): # C implementation failed
1134 except (AttributeError, OverflowError): # C implementation failed
1145 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1135 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1146 return ancs
1136 return ancs
1147
1137
1148 def isancestor(self, a, b):
1138 def isancestor(self, a, b):
1149 """return True if node a is an ancestor of node b
1139 """return True if node a is an ancestor of node b
1150
1140
1151 A revision is considered an ancestor of itself."""
1141 A revision is considered an ancestor of itself."""
1152 a, b = self.rev(a), self.rev(b)
1142 a, b = self.rev(a), self.rev(b)
1153 return self.isancestorrev(a, b)
1143 return self.isancestorrev(a, b)
1154
1144
1155 def isancestorrev(self, a, b):
1145 def isancestorrev(self, a, b):
1156 """return True if revision a is an ancestor of revision b
1146 """return True if revision a is an ancestor of revision b
1157
1147
1158 A revision is considered an ancestor of itself.
1148 A revision is considered an ancestor of itself.
1159
1149
1160 The implementation of this is trivial but the use of
1150 The implementation of this is trivial but the use of
1161 commonancestorsheads is not."""
1151 commonancestorsheads is not."""
1162 if a == nullrev:
1152 if a == nullrev:
1163 return True
1153 return True
1164 elif a == b:
1154 elif a == b:
1165 return True
1155 return True
1166 elif a > b:
1156 elif a > b:
1167 return False
1157 return False
1168 return a in self._commonancestorsheads(a, b)
1158 return a in self._commonancestorsheads(a, b)
1169
1159
1170 def ancestor(self, a, b):
1160 def ancestor(self, a, b):
1171 """calculate the "best" common ancestor of nodes a and b"""
1161 """calculate the "best" common ancestor of nodes a and b"""
1172
1162
1173 a, b = self.rev(a), self.rev(b)
1163 a, b = self.rev(a), self.rev(b)
1174 try:
1164 try:
1175 ancs = self.index.ancestors(a, b)
1165 ancs = self.index.ancestors(a, b)
1176 except (AttributeError, OverflowError):
1166 except (AttributeError, OverflowError):
1177 ancs = ancestor.ancestors(self.parentrevs, a, b)
1167 ancs = ancestor.ancestors(self.parentrevs, a, b)
1178 if ancs:
1168 if ancs:
1179 # choose a consistent winner when there's a tie
1169 # choose a consistent winner when there's a tie
1180 return min(map(self.node, ancs))
1170 return min(map(self.node, ancs))
1181 return nullid
1171 return nullid
1182
1172
1183 def _match(self, id):
1173 def _match(self, id):
1184 if isinstance(id, int):
1174 if isinstance(id, int):
1185 # rev
1175 # rev
1186 return self.node(id)
1176 return self.node(id)
1187 if len(id) == 20:
1177 if len(id) == 20:
1188 # possibly a binary node
1178 # possibly a binary node
1189 # odds of a binary node being all hex in ASCII are 1 in 10**25
1179 # odds of a binary node being all hex in ASCII are 1 in 10**25
1190 try:
1180 try:
1191 node = id
1181 node = id
1192 self.rev(node) # quick search the index
1182 self.rev(node) # quick search the index
1193 return node
1183 return node
1194 except error.LookupError:
1184 except error.LookupError:
1195 pass # may be partial hex id
1185 pass # may be partial hex id
1196 try:
1186 try:
1197 # str(rev)
1187 # str(rev)
1198 rev = int(id)
1188 rev = int(id)
1199 if "%d" % rev != id:
1189 if "%d" % rev != id:
1200 raise ValueError
1190 raise ValueError
1201 if rev < 0:
1191 if rev < 0:
1202 rev = len(self) + rev
1192 rev = len(self) + rev
1203 if rev < 0 or rev >= len(self):
1193 if rev < 0 or rev >= len(self):
1204 raise ValueError
1194 raise ValueError
1205 return self.node(rev)
1195 return self.node(rev)
1206 except (ValueError, OverflowError):
1196 except (ValueError, OverflowError):
1207 pass
1197 pass
1208 if len(id) == 40:
1198 if len(id) == 40:
1209 try:
1199 try:
1210 # a full hex nodeid?
1200 # a full hex nodeid?
1211 node = bin(id)
1201 node = bin(id)
1212 self.rev(node)
1202 self.rev(node)
1213 return node
1203 return node
1214 except (TypeError, error.LookupError):
1204 except (TypeError, error.LookupError):
1215 pass
1205 pass
1216
1206
1217 def _partialmatch(self, id):
1207 def _partialmatch(self, id):
1218 # we don't care wdirfilenodeids as they should be always full hash
1208 # we don't care wdirfilenodeids as they should be always full hash
1219 maybewdir = wdirhex.startswith(id)
1209 maybewdir = wdirhex.startswith(id)
1220 try:
1210 try:
1221 partial = self.index.partialmatch(id)
1211 partial = self.index.partialmatch(id)
1222 if partial and self.hasnode(partial):
1212 if partial and self.hasnode(partial):
1223 if maybewdir:
1213 if maybewdir:
1224 # single 'ff...' match in radix tree, ambiguous with wdir
1214 # single 'ff...' match in radix tree, ambiguous with wdir
1225 raise error.RevlogError
1215 raise error.RevlogError
1226 return partial
1216 return partial
1227 if maybewdir:
1217 if maybewdir:
1228 # no 'ff...' match in radix tree, wdir identified
1218 # no 'ff...' match in radix tree, wdir identified
1229 raise error.WdirUnsupported
1219 raise error.WdirUnsupported
1230 return None
1220 return None
1231 except error.RevlogError:
1221 except error.RevlogError:
1232 # parsers.c radix tree lookup gave multiple matches
1222 # parsers.c radix tree lookup gave multiple matches
1233 # fast path: for unfiltered changelog, radix tree is accurate
1223 # fast path: for unfiltered changelog, radix tree is accurate
1234 if not getattr(self, 'filteredrevs', None):
1224 if not getattr(self, 'filteredrevs', None):
1235 raise error.AmbiguousPrefixLookupError(
1225 raise error.AmbiguousPrefixLookupError(
1236 id, self.indexfile, _('ambiguous identifier'))
1226 id, self.indexfile, _('ambiguous identifier'))
1237 # fall through to slow path that filters hidden revisions
1227 # fall through to slow path that filters hidden revisions
1238 except (AttributeError, ValueError):
1228 except (AttributeError, ValueError):
1239 # we are pure python, or key was too short to search radix tree
1229 # we are pure python, or key was too short to search radix tree
1240 pass
1230 pass
1241
1231
1242 if id in self._pcache:
1232 if id in self._pcache:
1243 return self._pcache[id]
1233 return self._pcache[id]
1244
1234
1245 if len(id) <= 40:
1235 if len(id) <= 40:
1246 try:
1236 try:
1247 # hex(node)[:...]
1237 # hex(node)[:...]
1248 l = len(id) // 2 # grab an even number of digits
1238 l = len(id) // 2 # grab an even number of digits
1249 prefix = bin(id[:l * 2])
1239 prefix = bin(id[:l * 2])
1250 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1240 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1251 nl = [n for n in nl if hex(n).startswith(id) and
1241 nl = [n for n in nl if hex(n).startswith(id) and
1252 self.hasnode(n)]
1242 self.hasnode(n)]
1253 if nullhex.startswith(id):
1243 if nullhex.startswith(id):
1254 nl.append(nullid)
1244 nl.append(nullid)
1255 if len(nl) > 0:
1245 if len(nl) > 0:
1256 if len(nl) == 1 and not maybewdir:
1246 if len(nl) == 1 and not maybewdir:
1257 self._pcache[id] = nl[0]
1247 self._pcache[id] = nl[0]
1258 return nl[0]
1248 return nl[0]
1259 raise error.AmbiguousPrefixLookupError(
1249 raise error.AmbiguousPrefixLookupError(
1260 id, self.indexfile, _('ambiguous identifier'))
1250 id, self.indexfile, _('ambiguous identifier'))
1261 if maybewdir:
1251 if maybewdir:
1262 raise error.WdirUnsupported
1252 raise error.WdirUnsupported
1263 return None
1253 return None
1264 except TypeError:
1254 except TypeError:
1265 pass
1255 pass
1266
1256
1267 def lookup(self, id):
1257 def lookup(self, id):
1268 """locate a node based on:
1258 """locate a node based on:
1269 - revision number or str(revision number)
1259 - revision number or str(revision number)
1270 - nodeid or subset of hex nodeid
1260 - nodeid or subset of hex nodeid
1271 """
1261 """
1272 n = self._match(id)
1262 n = self._match(id)
1273 if n is not None:
1263 if n is not None:
1274 return n
1264 return n
1275 n = self._partialmatch(id)
1265 n = self._partialmatch(id)
1276 if n:
1266 if n:
1277 return n
1267 return n
1278
1268
1279 raise error.LookupError(id, self.indexfile, _('no match found'))
1269 raise error.LookupError(id, self.indexfile, _('no match found'))
1280
1270
1281 def shortest(self, node, minlength=1):
1271 def shortest(self, node, minlength=1):
1282 """Find the shortest unambiguous prefix that matches node."""
1272 """Find the shortest unambiguous prefix that matches node."""
1283 def isvalid(prefix):
1273 def isvalid(prefix):
1284 try:
1274 try:
1285 node = self._partialmatch(prefix)
1275 node = self._partialmatch(prefix)
1286 except error.AmbiguousPrefixLookupError:
1276 except error.AmbiguousPrefixLookupError:
1287 return False
1277 return False
1288 except error.WdirUnsupported:
1278 except error.WdirUnsupported:
1289 # single 'ff...' match
1279 # single 'ff...' match
1290 return True
1280 return True
1291 if node is None:
1281 if node is None:
1292 raise error.LookupError(node, self.indexfile, _('no node'))
1282 raise error.LookupError(node, self.indexfile, _('no node'))
1293 return True
1283 return True
1294
1284
1295 def maybewdir(prefix):
1285 def maybewdir(prefix):
1296 return all(c == 'f' for c in prefix)
1286 return all(c == 'f' for c in prefix)
1297
1287
1298 hexnode = hex(node)
1288 hexnode = hex(node)
1299
1289
1300 def disambiguate(hexnode, minlength):
1290 def disambiguate(hexnode, minlength):
1301 """Disambiguate against wdirid."""
1291 """Disambiguate against wdirid."""
1302 for length in range(minlength, 41):
1292 for length in range(minlength, 41):
1303 prefix = hexnode[:length]
1293 prefix = hexnode[:length]
1304 if not maybewdir(prefix):
1294 if not maybewdir(prefix):
1305 return prefix
1295 return prefix
1306
1296
1307 if not getattr(self, 'filteredrevs', None):
1297 if not getattr(self, 'filteredrevs', None):
1308 try:
1298 try:
1309 length = max(self.index.shortest(node), minlength)
1299 length = max(self.index.shortest(node), minlength)
1310 return disambiguate(hexnode, length)
1300 return disambiguate(hexnode, length)
1311 except error.RevlogError:
1301 except error.RevlogError:
1312 if node != wdirid:
1302 if node != wdirid:
1313 raise error.LookupError(node, self.indexfile, _('no node'))
1303 raise error.LookupError(node, self.indexfile, _('no node'))
1314 except AttributeError:
1304 except AttributeError:
1315 # Fall through to pure code
1305 # Fall through to pure code
1316 pass
1306 pass
1317
1307
1318 if node == wdirid:
1308 if node == wdirid:
1319 for length in range(minlength, 41):
1309 for length in range(minlength, 41):
1320 prefix = hexnode[:length]
1310 prefix = hexnode[:length]
1321 if isvalid(prefix):
1311 if isvalid(prefix):
1322 return prefix
1312 return prefix
1323
1313
1324 for length in range(minlength, 41):
1314 for length in range(minlength, 41):
1325 prefix = hexnode[:length]
1315 prefix = hexnode[:length]
1326 if isvalid(prefix):
1316 if isvalid(prefix):
1327 return disambiguate(hexnode, length)
1317 return disambiguate(hexnode, length)
1328
1318
1329 def cmp(self, node, text):
1319 def cmp(self, node, text):
1330 """compare text with a given file revision
1320 """compare text with a given file revision
1331
1321
1332 returns True if text is different than what is stored.
1322 returns True if text is different than what is stored.
1333 """
1323 """
1334 p1, p2 = self.parents(node)
1324 p1, p2 = self.parents(node)
1335 return storageutil.hashrevisionsha1(text, p1, p2) != node
1325 return storageutil.hashrevisionsha1(text, p1, p2) != node
1336
1326
1337 def _cachesegment(self, offset, data):
1327 def _cachesegment(self, offset, data):
1338 """Add a segment to the revlog cache.
1328 """Add a segment to the revlog cache.
1339
1329
1340 Accepts an absolute offset and the data that is at that location.
1330 Accepts an absolute offset and the data that is at that location.
1341 """
1331 """
1342 o, d = self._chunkcache
1332 o, d = self._chunkcache
1343 # try to add to existing cache
1333 # try to add to existing cache
1344 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1334 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1345 self._chunkcache = o, d + data
1335 self._chunkcache = o, d + data
1346 else:
1336 else:
1347 self._chunkcache = offset, data
1337 self._chunkcache = offset, data
1348
1338
1349 def _readsegment(self, offset, length, df=None):
1339 def _readsegment(self, offset, length, df=None):
1350 """Load a segment of raw data from the revlog.
1340 """Load a segment of raw data from the revlog.
1351
1341
1352 Accepts an absolute offset, length to read, and an optional existing
1342 Accepts an absolute offset, length to read, and an optional existing
1353 file handle to read from.
1343 file handle to read from.
1354
1344
1355 If an existing file handle is passed, it will be seeked and the
1345 If an existing file handle is passed, it will be seeked and the
1356 original seek position will NOT be restored.
1346 original seek position will NOT be restored.
1357
1347
1358 Returns a str or buffer of raw byte data.
1348 Returns a str or buffer of raw byte data.
1359 """
1349 """
1360 # Cache data both forward and backward around the requested
1350 # Cache data both forward and backward around the requested
1361 # data, in a fixed size window. This helps speed up operations
1351 # data, in a fixed size window. This helps speed up operations
1362 # involving reading the revlog backwards.
1352 # involving reading the revlog backwards.
1363 cachesize = self._chunkcachesize
1353 cachesize = self._chunkcachesize
1364 realoffset = offset & ~(cachesize - 1)
1354 realoffset = offset & ~(cachesize - 1)
1365 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1355 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1366 - realoffset)
1356 - realoffset)
1367 with self._datareadfp(df) as df:
1357 with self._datareadfp(df) as df:
1368 df.seek(realoffset)
1358 df.seek(realoffset)
1369 d = df.read(reallength)
1359 d = df.read(reallength)
1370 self._cachesegment(realoffset, d)
1360 self._cachesegment(realoffset, d)
1371 if offset != realoffset or reallength != length:
1361 if offset != realoffset or reallength != length:
1372 return util.buffer(d, offset - realoffset, length)
1362 return util.buffer(d, offset - realoffset, length)
1373 return d
1363 return d
1374
1364
1375 def _getsegment(self, offset, length, df=None):
1365 def _getsegment(self, offset, length, df=None):
1376 """Obtain a segment of raw data from the revlog.
1366 """Obtain a segment of raw data from the revlog.
1377
1367
1378 Accepts an absolute offset, length of bytes to obtain, and an
1368 Accepts an absolute offset, length of bytes to obtain, and an
1379 optional file handle to the already-opened revlog. If the file
1369 optional file handle to the already-opened revlog. If the file
1380 handle is used, it's original seek position will not be preserved.
1370 handle is used, it's original seek position will not be preserved.
1381
1371
1382 Requests for data may be returned from a cache.
1372 Requests for data may be returned from a cache.
1383
1373
1384 Returns a str or a buffer instance of raw byte data.
1374 Returns a str or a buffer instance of raw byte data.
1385 """
1375 """
1386 o, d = self._chunkcache
1376 o, d = self._chunkcache
1387 l = len(d)
1377 l = len(d)
1388
1378
1389 # is it in the cache?
1379 # is it in the cache?
1390 cachestart = offset - o
1380 cachestart = offset - o
1391 cacheend = cachestart + length
1381 cacheend = cachestart + length
1392 if cachestart >= 0 and cacheend <= l:
1382 if cachestart >= 0 and cacheend <= l:
1393 if cachestart == 0 and cacheend == l:
1383 if cachestart == 0 and cacheend == l:
1394 return d # avoid a copy
1384 return d # avoid a copy
1395 return util.buffer(d, cachestart, cacheend - cachestart)
1385 return util.buffer(d, cachestart, cacheend - cachestart)
1396
1386
1397 return self._readsegment(offset, length, df=df)
1387 return self._readsegment(offset, length, df=df)
1398
1388
1399 def _getsegmentforrevs(self, startrev, endrev, df=None):
1389 def _getsegmentforrevs(self, startrev, endrev, df=None):
1400 """Obtain a segment of raw data corresponding to a range of revisions.
1390 """Obtain a segment of raw data corresponding to a range of revisions.
1401
1391
1402 Accepts the start and end revisions and an optional already-open
1392 Accepts the start and end revisions and an optional already-open
1403 file handle to be used for reading. If the file handle is read, its
1393 file handle to be used for reading. If the file handle is read, its
1404 seek position will not be preserved.
1394 seek position will not be preserved.
1405
1395
1406 Requests for data may be satisfied by a cache.
1396 Requests for data may be satisfied by a cache.
1407
1397
1408 Returns a 2-tuple of (offset, data) for the requested range of
1398 Returns a 2-tuple of (offset, data) for the requested range of
1409 revisions. Offset is the integer offset from the beginning of the
1399 revisions. Offset is the integer offset from the beginning of the
1410 revlog and data is a str or buffer of the raw byte data.
1400 revlog and data is a str or buffer of the raw byte data.
1411
1401
1412 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1402 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1413 to determine where each revision's data begins and ends.
1403 to determine where each revision's data begins and ends.
1414 """
1404 """
1415 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1405 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1416 # (functions are expensive).
1406 # (functions are expensive).
1417 index = self.index
1407 index = self.index
1418 istart = index[startrev]
1408 istart = index[startrev]
1419 start = int(istart[0] >> 16)
1409 start = int(istart[0] >> 16)
1420 if startrev == endrev:
1410 if startrev == endrev:
1421 end = start + istart[1]
1411 end = start + istart[1]
1422 else:
1412 else:
1423 iend = index[endrev]
1413 iend = index[endrev]
1424 end = int(iend[0] >> 16) + iend[1]
1414 end = int(iend[0] >> 16) + iend[1]
1425
1415
1426 if self._inline:
1416 if self._inline:
1427 start += (startrev + 1) * self._io.size
1417 start += (startrev + 1) * self._io.size
1428 end += (endrev + 1) * self._io.size
1418 end += (endrev + 1) * self._io.size
1429 length = end - start
1419 length = end - start
1430
1420
1431 return start, self._getsegment(start, length, df=df)
1421 return start, self._getsegment(start, length, df=df)
1432
1422
1433 def _chunk(self, rev, df=None):
1423 def _chunk(self, rev, df=None):
1434 """Obtain a single decompressed chunk for a revision.
1424 """Obtain a single decompressed chunk for a revision.
1435
1425
1436 Accepts an integer revision and an optional already-open file handle
1426 Accepts an integer revision and an optional already-open file handle
1437 to be used for reading. If used, the seek position of the file will not
1427 to be used for reading. If used, the seek position of the file will not
1438 be preserved.
1428 be preserved.
1439
1429
1440 Returns a str holding uncompressed data for the requested revision.
1430 Returns a str holding uncompressed data for the requested revision.
1441 """
1431 """
1442 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1432 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1443
1433
1444 def _chunks(self, revs, df=None, targetsize=None):
1434 def _chunks(self, revs, df=None, targetsize=None):
1445 """Obtain decompressed chunks for the specified revisions.
1435 """Obtain decompressed chunks for the specified revisions.
1446
1436
1447 Accepts an iterable of numeric revisions that are assumed to be in
1437 Accepts an iterable of numeric revisions that are assumed to be in
1448 ascending order. Also accepts an optional already-open file handle
1438 ascending order. Also accepts an optional already-open file handle
1449 to be used for reading. If used, the seek position of the file will
1439 to be used for reading. If used, the seek position of the file will
1450 not be preserved.
1440 not be preserved.
1451
1441
1452 This function is similar to calling ``self._chunk()`` multiple times,
1442 This function is similar to calling ``self._chunk()`` multiple times,
1453 but is faster.
1443 but is faster.
1454
1444
1455 Returns a list with decompressed data for each requested revision.
1445 Returns a list with decompressed data for each requested revision.
1456 """
1446 """
1457 if not revs:
1447 if not revs:
1458 return []
1448 return []
1459 start = self.start
1449 start = self.start
1460 length = self.length
1450 length = self.length
1461 inline = self._inline
1451 inline = self._inline
1462 iosize = self._io.size
1452 iosize = self._io.size
1463 buffer = util.buffer
1453 buffer = util.buffer
1464
1454
1465 l = []
1455 l = []
1466 ladd = l.append
1456 ladd = l.append
1467
1457
1468 if not self._withsparseread:
1458 if not self._withsparseread:
1469 slicedchunks = (revs,)
1459 slicedchunks = (revs,)
1470 else:
1460 else:
1471 slicedchunks = deltautil.slicechunk(self, revs,
1461 slicedchunks = deltautil.slicechunk(self, revs,
1472 targetsize=targetsize)
1462 targetsize=targetsize)
1473
1463
1474 for revschunk in slicedchunks:
1464 for revschunk in slicedchunks:
1475 firstrev = revschunk[0]
1465 firstrev = revschunk[0]
1476 # Skip trailing revisions with empty diff
1466 # Skip trailing revisions with empty diff
1477 for lastrev in revschunk[::-1]:
1467 for lastrev in revschunk[::-1]:
1478 if length(lastrev) != 0:
1468 if length(lastrev) != 0:
1479 break
1469 break
1480
1470
1481 try:
1471 try:
1482 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1472 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1483 except OverflowError:
1473 except OverflowError:
1484 # issue4215 - we can't cache a run of chunks greater than
1474 # issue4215 - we can't cache a run of chunks greater than
1485 # 2G on Windows
1475 # 2G on Windows
1486 return [self._chunk(rev, df=df) for rev in revschunk]
1476 return [self._chunk(rev, df=df) for rev in revschunk]
1487
1477
1488 decomp = self.decompress
1478 decomp = self.decompress
1489 for rev in revschunk:
1479 for rev in revschunk:
1490 chunkstart = start(rev)
1480 chunkstart = start(rev)
1491 if inline:
1481 if inline:
1492 chunkstart += (rev + 1) * iosize
1482 chunkstart += (rev + 1) * iosize
1493 chunklength = length(rev)
1483 chunklength = length(rev)
1494 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1484 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1495
1485
1496 return l
1486 return l
1497
1487
1498 def _chunkclear(self):
1488 def _chunkclear(self):
1499 """Clear the raw chunk cache."""
1489 """Clear the raw chunk cache."""
1500 self._chunkcache = (0, '')
1490 self._chunkcache = (0, '')
1501
1491
1502 def deltaparent(self, rev):
1492 def deltaparent(self, rev):
1503 """return deltaparent of the given revision"""
1493 """return deltaparent of the given revision"""
1504 base = self.index[rev][3]
1494 base = self.index[rev][3]
1505 if base == rev:
1495 if base == rev:
1506 return nullrev
1496 return nullrev
1507 elif self._generaldelta:
1497 elif self._generaldelta:
1508 return base
1498 return base
1509 else:
1499 else:
1510 return rev - 1
1500 return rev - 1
1511
1501
1512 def issnapshot(self, rev):
1502 def issnapshot(self, rev):
1513 """tells whether rev is a snapshot
1503 """tells whether rev is a snapshot
1514 """
1504 """
1515 if rev == nullrev:
1505 if rev == nullrev:
1516 return True
1506 return True
1517 deltap = self.deltaparent(rev)
1507 deltap = self.deltaparent(rev)
1518 if deltap == nullrev:
1508 if deltap == nullrev:
1519 return True
1509 return True
1520 p1, p2 = self.parentrevs(rev)
1510 p1, p2 = self.parentrevs(rev)
1521 if deltap in (p1, p2):
1511 if deltap in (p1, p2):
1522 return False
1512 return False
1523 return self.issnapshot(deltap)
1513 return self.issnapshot(deltap)
1524
1514
1525 def snapshotdepth(self, rev):
1515 def snapshotdepth(self, rev):
1526 """number of snapshot in the chain before this one"""
1516 """number of snapshot in the chain before this one"""
1527 if not self.issnapshot(rev):
1517 if not self.issnapshot(rev):
1528 raise error.ProgrammingError('revision %d not a snapshot')
1518 raise error.ProgrammingError('revision %d not a snapshot')
1529 return len(self._deltachain(rev)[0]) - 1
1519 return len(self._deltachain(rev)[0]) - 1
1530
1520
1531 def revdiff(self, rev1, rev2):
1521 def revdiff(self, rev1, rev2):
1532 """return or calculate a delta between two revisions
1522 """return or calculate a delta between two revisions
1533
1523
1534 The delta calculated is in binary form and is intended to be written to
1524 The delta calculated is in binary form and is intended to be written to
1535 revlog data directly. So this function needs raw revision data.
1525 revlog data directly. So this function needs raw revision data.
1536 """
1526 """
1537 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1527 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1538 return bytes(self._chunk(rev2))
1528 return bytes(self._chunk(rev2))
1539
1529
1540 return mdiff.textdiff(self.revision(rev1, raw=True),
1530 return mdiff.textdiff(self.revision(rev1, raw=True),
1541 self.revision(rev2, raw=True))
1531 self.revision(rev2, raw=True))
1542
1532
1543 def revision(self, nodeorrev, _df=None, raw=False):
1533 def revision(self, nodeorrev, _df=None, raw=False):
1544 """return an uncompressed revision of a given node or revision
1534 """return an uncompressed revision of a given node or revision
1545 number.
1535 number.
1546
1536
1547 _df - an existing file handle to read from. (internal-only)
1537 _df - an existing file handle to read from. (internal-only)
1548 raw - an optional argument specifying if the revision data is to be
1538 raw - an optional argument specifying if the revision data is to be
1549 treated as raw data when applying flag transforms. 'raw' should be set
1539 treated as raw data when applying flag transforms. 'raw' should be set
1550 to True when generating changegroups or in debug commands.
1540 to True when generating changegroups or in debug commands.
1551 """
1541 """
1552 if isinstance(nodeorrev, int):
1542 if isinstance(nodeorrev, int):
1553 rev = nodeorrev
1543 rev = nodeorrev
1554 node = self.node(rev)
1544 node = self.node(rev)
1555 else:
1545 else:
1556 node = nodeorrev
1546 node = nodeorrev
1557 rev = None
1547 rev = None
1558
1548
1559 cachedrev = None
1549 cachedrev = None
1560 flags = None
1550 flags = None
1561 rawtext = None
1551 rawtext = None
1562 if node == nullid:
1552 if node == nullid:
1563 return ""
1553 return ""
1564 if self._cache:
1554 if self._cache:
1565 if self._cache[0] == node:
1555 if self._cache[0] == node:
1566 # _cache only stores rawtext
1556 # _cache only stores rawtext
1567 if raw:
1557 if raw:
1568 return self._cache[2]
1558 return self._cache[2]
1569 # duplicated, but good for perf
1559 # duplicated, but good for perf
1570 if rev is None:
1560 if rev is None:
1571 rev = self.rev(node)
1561 rev = self.rev(node)
1572 if flags is None:
1562 if flags is None:
1573 flags = self.flags(rev)
1563 flags = self.flags(rev)
1574 # no extra flags set, no flag processor runs, text = rawtext
1564 # no extra flags set, no flag processor runs, text = rawtext
1575 if flags == REVIDX_DEFAULT_FLAGS:
1565 if flags == REVIDX_DEFAULT_FLAGS:
1576 return self._cache[2]
1566 return self._cache[2]
1577 # rawtext is reusable. need to run flag processor
1567 # rawtext is reusable. need to run flag processor
1578 rawtext = self._cache[2]
1568 rawtext = self._cache[2]
1579
1569
1580 cachedrev = self._cache[1]
1570 cachedrev = self._cache[1]
1581
1571
1582 # look up what we need to read
1572 # look up what we need to read
1583 if rawtext is None:
1573 if rawtext is None:
1584 if rev is None:
1574 if rev is None:
1585 rev = self.rev(node)
1575 rev = self.rev(node)
1586
1576
1587 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1577 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1588 if stopped:
1578 if stopped:
1589 rawtext = self._cache[2]
1579 rawtext = self._cache[2]
1590
1580
1591 # drop cache to save memory
1581 # drop cache to save memory
1592 self._cache = None
1582 self._cache = None
1593
1583
1594 targetsize = None
1584 targetsize = None
1595 rawsize = self.index[rev][2]
1585 rawsize = self.index[rev][2]
1596 if 0 <= rawsize:
1586 if 0 <= rawsize:
1597 targetsize = 4 * rawsize
1587 targetsize = 4 * rawsize
1598
1588
1599 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1589 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1600 if rawtext is None:
1590 if rawtext is None:
1601 rawtext = bytes(bins[0])
1591 rawtext = bytes(bins[0])
1602 bins = bins[1:]
1592 bins = bins[1:]
1603
1593
1604 rawtext = mdiff.patches(rawtext, bins)
1594 rawtext = mdiff.patches(rawtext, bins)
1605 self._cache = (node, rev, rawtext)
1595 self._cache = (node, rev, rawtext)
1606
1596
1607 if flags is None:
1597 if flags is None:
1608 if rev is None:
1598 if rev is None:
1609 rev = self.rev(node)
1599 rev = self.rev(node)
1610 flags = self.flags(rev)
1600 flags = self.flags(rev)
1611
1601
1612 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1602 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1613 if validatehash:
1603 if validatehash:
1614 self.checkhash(text, node, rev=rev)
1604 self.checkhash(text, node, rev=rev)
1615
1605
1616 return text
1606 return text
1617
1607
1618 def hash(self, text, p1, p2):
1608 def hash(self, text, p1, p2):
1619 """Compute a node hash.
1609 """Compute a node hash.
1620
1610
1621 Available as a function so that subclasses can replace the hash
1611 Available as a function so that subclasses can replace the hash
1622 as needed.
1612 as needed.
1623 """
1613 """
1624 return storageutil.hashrevisionsha1(text, p1, p2)
1614 return storageutil.hashrevisionsha1(text, p1, p2)
1625
1615
1626 def _processflags(self, text, flags, operation, raw=False):
1616 def _processflags(self, text, flags, operation, raw=False):
1627 """Inspect revision data flags and applies transforms defined by
1617 """Inspect revision data flags and applies transforms defined by
1628 registered flag processors.
1618 registered flag processors.
1629
1619
1630 ``text`` - the revision data to process
1620 ``text`` - the revision data to process
1631 ``flags`` - the revision flags
1621 ``flags`` - the revision flags
1632 ``operation`` - the operation being performed (read or write)
1622 ``operation`` - the operation being performed (read or write)
1633 ``raw`` - an optional argument describing if the raw transform should be
1623 ``raw`` - an optional argument describing if the raw transform should be
1634 applied.
1624 applied.
1635
1625
1636 This method processes the flags in the order (or reverse order if
1626 This method processes the flags in the order (or reverse order if
1637 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1627 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1638 flag processors registered for present flags. The order of flags defined
1628 flag processors registered for present flags. The order of flags defined
1639 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1629 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1640
1630
1641 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1631 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1642 processed text and ``validatehash`` is a bool indicating whether the
1632 processed text and ``validatehash`` is a bool indicating whether the
1643 returned text should be checked for hash integrity.
1633 returned text should be checked for hash integrity.
1644
1634
1645 Note: If the ``raw`` argument is set, it has precedence over the
1635 Note: If the ``raw`` argument is set, it has precedence over the
1646 operation and will only update the value of ``validatehash``.
1636 operation and will only update the value of ``validatehash``.
1647 """
1637 """
1648 # fast path: no flag processors will run
1638 # fast path: no flag processors will run
1649 if flags == 0:
1639 if flags == 0:
1650 return text, True
1640 return text, True
1651 if not operation in ('read', 'write'):
1641 if not operation in ('read', 'write'):
1652 raise error.ProgrammingError(_("invalid '%s' operation") %
1642 raise error.ProgrammingError(_("invalid '%s' operation") %
1653 operation)
1643 operation)
1654 # Check all flags are known.
1644 # Check all flags are known.
1655 if flags & ~REVIDX_KNOWN_FLAGS:
1645 if flags & ~REVIDX_KNOWN_FLAGS:
1656 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1646 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1657 (flags & ~REVIDX_KNOWN_FLAGS))
1647 (flags & ~REVIDX_KNOWN_FLAGS))
1658 validatehash = True
1648 validatehash = True
1659 # Depending on the operation (read or write), the order might be
1649 # Depending on the operation (read or write), the order might be
1660 # reversed due to non-commutative transforms.
1650 # reversed due to non-commutative transforms.
1661 orderedflags = REVIDX_FLAGS_ORDER
1651 orderedflags = REVIDX_FLAGS_ORDER
1662 if operation == 'write':
1652 if operation == 'write':
1663 orderedflags = reversed(orderedflags)
1653 orderedflags = reversed(orderedflags)
1664
1654
1665 for flag in orderedflags:
1655 for flag in orderedflags:
1666 # If a flagprocessor has been registered for a known flag, apply the
1656 # If a flagprocessor has been registered for a known flag, apply the
1667 # related operation transform and update result tuple.
1657 # related operation transform and update result tuple.
1668 if flag & flags:
1658 if flag & flags:
1669 vhash = True
1659 vhash = True
1670
1660
1671 if flag not in self._flagprocessors:
1661 if flag not in self._flagprocessors:
1672 message = _("missing processor for flag '%#x'") % (flag)
1662 message = _("missing processor for flag '%#x'") % (flag)
1673 raise error.RevlogError(message)
1663 raise error.RevlogError(message)
1674
1664
1675 processor = self._flagprocessors[flag]
1665 processor = self._flagprocessors[flag]
1676 if processor is not None:
1666 if processor is not None:
1677 readtransform, writetransform, rawtransform = processor
1667 readtransform, writetransform, rawtransform = processor
1678
1668
1679 if raw:
1669 if raw:
1680 vhash = rawtransform(self, text)
1670 vhash = rawtransform(self, text)
1681 elif operation == 'read':
1671 elif operation == 'read':
1682 text, vhash = readtransform(self, text)
1672 text, vhash = readtransform(self, text)
1683 else: # write operation
1673 else: # write operation
1684 text, vhash = writetransform(self, text)
1674 text, vhash = writetransform(self, text)
1685 validatehash = validatehash and vhash
1675 validatehash = validatehash and vhash
1686
1676
1687 return text, validatehash
1677 return text, validatehash
1688
1678
1689 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1679 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1690 """Check node hash integrity.
1680 """Check node hash integrity.
1691
1681
1692 Available as a function so that subclasses can extend hash mismatch
1682 Available as a function so that subclasses can extend hash mismatch
1693 behaviors as needed.
1683 behaviors as needed.
1694 """
1684 """
1695 try:
1685 try:
1696 if p1 is None and p2 is None:
1686 if p1 is None and p2 is None:
1697 p1, p2 = self.parents(node)
1687 p1, p2 = self.parents(node)
1698 if node != self.hash(text, p1, p2):
1688 if node != self.hash(text, p1, p2):
1699 revornode = rev
1689 revornode = rev
1700 if revornode is None:
1690 if revornode is None:
1701 revornode = templatefilters.short(hex(node))
1691 revornode = templatefilters.short(hex(node))
1702 raise error.RevlogError(_("integrity check failed on %s:%s")
1692 raise error.RevlogError(_("integrity check failed on %s:%s")
1703 % (self.indexfile, pycompat.bytestr(revornode)))
1693 % (self.indexfile, pycompat.bytestr(revornode)))
1704 except error.RevlogError:
1694 except error.RevlogError:
1705 if self._censorable and storageutil.iscensoredtext(text):
1695 if self._censorable and storageutil.iscensoredtext(text):
1706 raise error.CensoredNodeError(self.indexfile, node, text)
1696 raise error.CensoredNodeError(self.indexfile, node, text)
1707 raise
1697 raise
1708
1698
1709 def _enforceinlinesize(self, tr, fp=None):
1699 def _enforceinlinesize(self, tr, fp=None):
1710 """Check if the revlog is too big for inline and convert if so.
1700 """Check if the revlog is too big for inline and convert if so.
1711
1701
1712 This should be called after revisions are added to the revlog. If the
1702 This should be called after revisions are added to the revlog. If the
1713 revlog has grown too large to be an inline revlog, it will convert it
1703 revlog has grown too large to be an inline revlog, it will convert it
1714 to use multiple index and data files.
1704 to use multiple index and data files.
1715 """
1705 """
1716 tiprev = len(self) - 1
1706 tiprev = len(self) - 1
1717 if (not self._inline or
1707 if (not self._inline or
1718 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1708 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1719 return
1709 return
1720
1710
1721 trinfo = tr.find(self.indexfile)
1711 trinfo = tr.find(self.indexfile)
1722 if trinfo is None:
1712 if trinfo is None:
1723 raise error.RevlogError(_("%s not found in the transaction")
1713 raise error.RevlogError(_("%s not found in the transaction")
1724 % self.indexfile)
1714 % self.indexfile)
1725
1715
1726 trindex = trinfo[2]
1716 trindex = trinfo[2]
1727 if trindex is not None:
1717 if trindex is not None:
1728 dataoff = self.start(trindex)
1718 dataoff = self.start(trindex)
1729 else:
1719 else:
1730 # revlog was stripped at start of transaction, use all leftover data
1720 # revlog was stripped at start of transaction, use all leftover data
1731 trindex = len(self) - 1
1721 trindex = len(self) - 1
1732 dataoff = self.end(tiprev)
1722 dataoff = self.end(tiprev)
1733
1723
1734 tr.add(self.datafile, dataoff)
1724 tr.add(self.datafile, dataoff)
1735
1725
1736 if fp:
1726 if fp:
1737 fp.flush()
1727 fp.flush()
1738 fp.close()
1728 fp.close()
1739
1729
1740 with self._datafp('w') as df:
1730 with self._datafp('w') as df:
1741 for r in self:
1731 for r in self:
1742 df.write(self._getsegmentforrevs(r, r)[1])
1732 df.write(self._getsegmentforrevs(r, r)[1])
1743
1733
1744 with self._indexfp('w') as fp:
1734 with self._indexfp('w') as fp:
1745 self.version &= ~FLAG_INLINE_DATA
1735 self.version &= ~FLAG_INLINE_DATA
1746 self._inline = False
1736 self._inline = False
1747 io = self._io
1737 io = self._io
1748 for i in self:
1738 for i in self:
1749 e = io.packentry(self.index[i], self.node, self.version, i)
1739 e = io.packentry(self.index[i], self.node, self.version, i)
1750 fp.write(e)
1740 fp.write(e)
1751
1741
1752 # the temp file replace the real index when we exit the context
1742 # the temp file replace the real index when we exit the context
1753 # manager
1743 # manager
1754
1744
1755 tr.replace(self.indexfile, trindex * self._io.size)
1745 tr.replace(self.indexfile, trindex * self._io.size)
1756 self._chunkclear()
1746 self._chunkclear()
1757
1747
1758 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1748 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1759 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1749 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1760 """add a revision to the log
1750 """add a revision to the log
1761
1751
1762 text - the revision data to add
1752 text - the revision data to add
1763 transaction - the transaction object used for rollback
1753 transaction - the transaction object used for rollback
1764 link - the linkrev data to add
1754 link - the linkrev data to add
1765 p1, p2 - the parent nodeids of the revision
1755 p1, p2 - the parent nodeids of the revision
1766 cachedelta - an optional precomputed delta
1756 cachedelta - an optional precomputed delta
1767 node - nodeid of revision; typically node is not specified, and it is
1757 node - nodeid of revision; typically node is not specified, and it is
1768 computed by default as hash(text, p1, p2), however subclasses might
1758 computed by default as hash(text, p1, p2), however subclasses might
1769 use different hashing method (and override checkhash() in such case)
1759 use different hashing method (and override checkhash() in such case)
1770 flags - the known flags to set on the revision
1760 flags - the known flags to set on the revision
1771 deltacomputer - an optional deltacomputer instance shared between
1761 deltacomputer - an optional deltacomputer instance shared between
1772 multiple calls
1762 multiple calls
1773 """
1763 """
1774 if link == nullrev:
1764 if link == nullrev:
1775 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1765 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1776 % self.indexfile)
1766 % self.indexfile)
1777
1767
1778 if flags:
1768 if flags:
1779 node = node or self.hash(text, p1, p2)
1769 node = node or self.hash(text, p1, p2)
1780
1770
1781 rawtext, validatehash = self._processflags(text, flags, 'write')
1771 rawtext, validatehash = self._processflags(text, flags, 'write')
1782
1772
1783 # If the flag processor modifies the revision data, ignore any provided
1773 # If the flag processor modifies the revision data, ignore any provided
1784 # cachedelta.
1774 # cachedelta.
1785 if rawtext != text:
1775 if rawtext != text:
1786 cachedelta = None
1776 cachedelta = None
1787
1777
1788 if len(rawtext) > _maxentrysize:
1778 if len(rawtext) > _maxentrysize:
1789 raise error.RevlogError(
1779 raise error.RevlogError(
1790 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1780 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1791 % (self.indexfile, len(rawtext)))
1781 % (self.indexfile, len(rawtext)))
1792
1782
1793 node = node or self.hash(rawtext, p1, p2)
1783 node = node or self.hash(rawtext, p1, p2)
1794 if node in self.nodemap:
1784 if node in self.nodemap:
1795 return node
1785 return node
1796
1786
1797 if validatehash:
1787 if validatehash:
1798 self.checkhash(rawtext, node, p1=p1, p2=p2)
1788 self.checkhash(rawtext, node, p1=p1, p2=p2)
1799
1789
1800 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1790 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1801 flags, cachedelta=cachedelta,
1791 flags, cachedelta=cachedelta,
1802 deltacomputer=deltacomputer)
1792 deltacomputer=deltacomputer)
1803
1793
1804 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1794 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1805 cachedelta=None, deltacomputer=None):
1795 cachedelta=None, deltacomputer=None):
1806 """add a raw revision with known flags, node and parents
1796 """add a raw revision with known flags, node and parents
1807 useful when reusing a revision not stored in this revlog (ex: received
1797 useful when reusing a revision not stored in this revlog (ex: received
1808 over wire, or read from an external bundle).
1798 over wire, or read from an external bundle).
1809 """
1799 """
1810 dfh = None
1800 dfh = None
1811 if not self._inline:
1801 if not self._inline:
1812 dfh = self._datafp("a+")
1802 dfh = self._datafp("a+")
1813 ifh = self._indexfp("a+")
1803 ifh = self._indexfp("a+")
1814 try:
1804 try:
1815 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1805 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1816 flags, cachedelta, ifh, dfh,
1806 flags, cachedelta, ifh, dfh,
1817 deltacomputer=deltacomputer)
1807 deltacomputer=deltacomputer)
1818 finally:
1808 finally:
1819 if dfh:
1809 if dfh:
1820 dfh.close()
1810 dfh.close()
1821 ifh.close()
1811 ifh.close()
1822
1812
1823 def compress(self, data):
1813 def compress(self, data):
1824 """Generate a possibly-compressed representation of data."""
1814 """Generate a possibly-compressed representation of data."""
1825 if not data:
1815 if not data:
1826 return '', data
1816 return '', data
1827
1817
1828 compressed = self._compressor.compress(data)
1818 compressed = self._compressor.compress(data)
1829
1819
1830 if compressed:
1820 if compressed:
1831 # The revlog compressor added the header in the returned data.
1821 # The revlog compressor added the header in the returned data.
1832 return '', compressed
1822 return '', compressed
1833
1823
1834 if data[0:1] == '\0':
1824 if data[0:1] == '\0':
1835 return '', data
1825 return '', data
1836 return 'u', data
1826 return 'u', data
1837
1827
1838 def decompress(self, data):
1828 def decompress(self, data):
1839 """Decompress a revlog chunk.
1829 """Decompress a revlog chunk.
1840
1830
1841 The chunk is expected to begin with a header identifying the
1831 The chunk is expected to begin with a header identifying the
1842 format type so it can be routed to an appropriate decompressor.
1832 format type so it can be routed to an appropriate decompressor.
1843 """
1833 """
1844 if not data:
1834 if not data:
1845 return data
1835 return data
1846
1836
1847 # Revlogs are read much more frequently than they are written and many
1837 # Revlogs are read much more frequently than they are written and many
1848 # chunks only take microseconds to decompress, so performance is
1838 # chunks only take microseconds to decompress, so performance is
1849 # important here.
1839 # important here.
1850 #
1840 #
1851 # We can make a few assumptions about revlogs:
1841 # We can make a few assumptions about revlogs:
1852 #
1842 #
1853 # 1) the majority of chunks will be compressed (as opposed to inline
1843 # 1) the majority of chunks will be compressed (as opposed to inline
1854 # raw data).
1844 # raw data).
1855 # 2) decompressing *any* data will likely by at least 10x slower than
1845 # 2) decompressing *any* data will likely by at least 10x slower than
1856 # returning raw inline data.
1846 # returning raw inline data.
1857 # 3) we want to prioritize common and officially supported compression
1847 # 3) we want to prioritize common and officially supported compression
1858 # engines
1848 # engines
1859 #
1849 #
1860 # It follows that we want to optimize for "decompress compressed data
1850 # It follows that we want to optimize for "decompress compressed data
1861 # when encoded with common and officially supported compression engines"
1851 # when encoded with common and officially supported compression engines"
1862 # case over "raw data" and "data encoded by less common or non-official
1852 # case over "raw data" and "data encoded by less common or non-official
1863 # compression engines." That is why we have the inline lookup first
1853 # compression engines." That is why we have the inline lookup first
1864 # followed by the compengines lookup.
1854 # followed by the compengines lookup.
1865 #
1855 #
1866 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1856 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1867 # compressed chunks. And this matters for changelog and manifest reads.
1857 # compressed chunks. And this matters for changelog and manifest reads.
1868 t = data[0:1]
1858 t = data[0:1]
1869
1859
1870 if t == 'x':
1860 if t == 'x':
1871 try:
1861 try:
1872 return _zlibdecompress(data)
1862 return _zlibdecompress(data)
1873 except zlib.error as e:
1863 except zlib.error as e:
1874 raise error.RevlogError(_('revlog decompress error: %s') %
1864 raise error.RevlogError(_('revlog decompress error: %s') %
1875 stringutil.forcebytestr(e))
1865 stringutil.forcebytestr(e))
1876 # '\0' is more common than 'u' so it goes first.
1866 # '\0' is more common than 'u' so it goes first.
1877 elif t == '\0':
1867 elif t == '\0':
1878 return data
1868 return data
1879 elif t == 'u':
1869 elif t == 'u':
1880 return util.buffer(data, 1)
1870 return util.buffer(data, 1)
1881
1871
1882 try:
1872 try:
1883 compressor = self._decompressors[t]
1873 compressor = self._decompressors[t]
1884 except KeyError:
1874 except KeyError:
1885 try:
1875 try:
1886 engine = util.compengines.forrevlogheader(t)
1876 engine = util.compengines.forrevlogheader(t)
1887 compressor = engine.revlogcompressor()
1877 compressor = engine.revlogcompressor()
1888 self._decompressors[t] = compressor
1878 self._decompressors[t] = compressor
1889 except KeyError:
1879 except KeyError:
1890 raise error.RevlogError(_('unknown compression type %r') % t)
1880 raise error.RevlogError(_('unknown compression type %r') % t)
1891
1881
1892 return compressor.decompress(data)
1882 return compressor.decompress(data)
1893
1883
1894 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1884 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1895 cachedelta, ifh, dfh, alwayscache=False,
1885 cachedelta, ifh, dfh, alwayscache=False,
1896 deltacomputer=None):
1886 deltacomputer=None):
1897 """internal function to add revisions to the log
1887 """internal function to add revisions to the log
1898
1888
1899 see addrevision for argument descriptions.
1889 see addrevision for argument descriptions.
1900
1890
1901 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1891 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1902
1892
1903 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1893 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1904 be used.
1894 be used.
1905
1895
1906 invariants:
1896 invariants:
1907 - rawtext is optional (can be None); if not set, cachedelta must be set.
1897 - rawtext is optional (can be None); if not set, cachedelta must be set.
1908 if both are set, they must correspond to each other.
1898 if both are set, they must correspond to each other.
1909 """
1899 """
1910 if node == nullid:
1900 if node == nullid:
1911 raise error.RevlogError(_("%s: attempt to add null revision") %
1901 raise error.RevlogError(_("%s: attempt to add null revision") %
1912 self.indexfile)
1902 self.indexfile)
1913 if node == wdirid or node in wdirfilenodeids:
1903 if node == wdirid or node in wdirfilenodeids:
1914 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1904 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1915 self.indexfile)
1905 self.indexfile)
1916
1906
1917 if self._inline:
1907 if self._inline:
1918 fh = ifh
1908 fh = ifh
1919 else:
1909 else:
1920 fh = dfh
1910 fh = dfh
1921
1911
1922 btext = [rawtext]
1912 btext = [rawtext]
1923
1913
1924 curr = len(self)
1914 curr = len(self)
1925 prev = curr - 1
1915 prev = curr - 1
1926 offset = self.end(prev)
1916 offset = self.end(prev)
1927 p1r, p2r = self.rev(p1), self.rev(p2)
1917 p1r, p2r = self.rev(p1), self.rev(p2)
1928
1918
1929 # full versions are inserted when the needed deltas
1919 # full versions are inserted when the needed deltas
1930 # become comparable to the uncompressed text
1920 # become comparable to the uncompressed text
1931 if rawtext is None:
1921 if rawtext is None:
1932 # need rawtext size, before changed by flag processors, which is
1922 # need rawtext size, before changed by flag processors, which is
1933 # the non-raw size. use revlog explicitly to avoid filelog's extra
1923 # the non-raw size. use revlog explicitly to avoid filelog's extra
1934 # logic that might remove metadata size.
1924 # logic that might remove metadata size.
1935 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1925 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1936 cachedelta[1])
1926 cachedelta[1])
1937 else:
1927 else:
1938 textlen = len(rawtext)
1928 textlen = len(rawtext)
1939
1929
1940 if deltacomputer is None:
1930 if deltacomputer is None:
1941 deltacomputer = deltautil.deltacomputer(self)
1931 deltacomputer = deltautil.deltacomputer(self)
1942
1932
1943 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1933 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1944
1934
1945 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1935 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1946
1936
1947 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1937 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1948 deltainfo.base, link, p1r, p2r, node)
1938 deltainfo.base, link, p1r, p2r, node)
1949 self.index.append(e)
1939 self.index.append(e)
1950 self.nodemap[node] = curr
1940 self.nodemap[node] = curr
1951
1941
1952 entry = self._io.packentry(e, self.node, self.version, curr)
1942 entry = self._io.packentry(e, self.node, self.version, curr)
1953 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1943 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1954 link, offset)
1944 link, offset)
1955
1945
1956 rawtext = btext[0]
1946 rawtext = btext[0]
1957
1947
1958 if alwayscache and rawtext is None:
1948 if alwayscache and rawtext is None:
1959 rawtext = deltacomputer.buildtext(revinfo, fh)
1949 rawtext = deltacomputer.buildtext(revinfo, fh)
1960
1950
1961 if type(rawtext) == bytes: # only accept immutable objects
1951 if type(rawtext) == bytes: # only accept immutable objects
1962 self._cache = (node, curr, rawtext)
1952 self._cache = (node, curr, rawtext)
1963 self._chainbasecache[curr] = deltainfo.chainbase
1953 self._chainbasecache[curr] = deltainfo.chainbase
1964 return node
1954 return node
1965
1955
1966 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1956 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1967 # Files opened in a+ mode have inconsistent behavior on various
1957 # Files opened in a+ mode have inconsistent behavior on various
1968 # platforms. Windows requires that a file positioning call be made
1958 # platforms. Windows requires that a file positioning call be made
1969 # when the file handle transitions between reads and writes. See
1959 # when the file handle transitions between reads and writes. See
1970 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1960 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1971 # platforms, Python or the platform itself can be buggy. Some versions
1961 # platforms, Python or the platform itself can be buggy. Some versions
1972 # of Solaris have been observed to not append at the end of the file
1962 # of Solaris have been observed to not append at the end of the file
1973 # if the file was seeked to before the end. See issue4943 for more.
1963 # if the file was seeked to before the end. See issue4943 for more.
1974 #
1964 #
1975 # We work around this issue by inserting a seek() before writing.
1965 # We work around this issue by inserting a seek() before writing.
1976 # Note: This is likely not necessary on Python 3.
1966 # Note: This is likely not necessary on Python 3.
1977 ifh.seek(0, os.SEEK_END)
1967 ifh.seek(0, os.SEEK_END)
1978 if dfh:
1968 if dfh:
1979 dfh.seek(0, os.SEEK_END)
1969 dfh.seek(0, os.SEEK_END)
1980
1970
1981 curr = len(self) - 1
1971 curr = len(self) - 1
1982 if not self._inline:
1972 if not self._inline:
1983 transaction.add(self.datafile, offset)
1973 transaction.add(self.datafile, offset)
1984 transaction.add(self.indexfile, curr * len(entry))
1974 transaction.add(self.indexfile, curr * len(entry))
1985 if data[0]:
1975 if data[0]:
1986 dfh.write(data[0])
1976 dfh.write(data[0])
1987 dfh.write(data[1])
1977 dfh.write(data[1])
1988 ifh.write(entry)
1978 ifh.write(entry)
1989 else:
1979 else:
1990 offset += curr * self._io.size
1980 offset += curr * self._io.size
1991 transaction.add(self.indexfile, offset, curr)
1981 transaction.add(self.indexfile, offset, curr)
1992 ifh.write(entry)
1982 ifh.write(entry)
1993 ifh.write(data[0])
1983 ifh.write(data[0])
1994 ifh.write(data[1])
1984 ifh.write(data[1])
1995 self._enforceinlinesize(transaction, ifh)
1985 self._enforceinlinesize(transaction, ifh)
1996
1986
1997 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1987 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1998 """
1988 """
1999 add a delta group
1989 add a delta group
2000
1990
2001 given a set of deltas, add them to the revision log. the
1991 given a set of deltas, add them to the revision log. the
2002 first delta is against its parent, which should be in our
1992 first delta is against its parent, which should be in our
2003 log, the rest are against the previous delta.
1993 log, the rest are against the previous delta.
2004
1994
2005 If ``addrevisioncb`` is defined, it will be called with arguments of
1995 If ``addrevisioncb`` is defined, it will be called with arguments of
2006 this revlog and the node that was added.
1996 this revlog and the node that was added.
2007 """
1997 """
2008
1998
2009 nodes = []
1999 nodes = []
2010
2000
2011 r = len(self)
2001 r = len(self)
2012 end = 0
2002 end = 0
2013 if r:
2003 if r:
2014 end = self.end(r - 1)
2004 end = self.end(r - 1)
2015 ifh = self._indexfp("a+")
2005 ifh = self._indexfp("a+")
2016 isize = r * self._io.size
2006 isize = r * self._io.size
2017 if self._inline:
2007 if self._inline:
2018 transaction.add(self.indexfile, end + isize, r)
2008 transaction.add(self.indexfile, end + isize, r)
2019 dfh = None
2009 dfh = None
2020 else:
2010 else:
2021 transaction.add(self.indexfile, isize, r)
2011 transaction.add(self.indexfile, isize, r)
2022 transaction.add(self.datafile, end)
2012 transaction.add(self.datafile, end)
2023 dfh = self._datafp("a+")
2013 dfh = self._datafp("a+")
2024 def flush():
2014 def flush():
2025 if dfh:
2015 if dfh:
2026 dfh.flush()
2016 dfh.flush()
2027 ifh.flush()
2017 ifh.flush()
2028 try:
2018 try:
2029 deltacomputer = deltautil.deltacomputer(self)
2019 deltacomputer = deltautil.deltacomputer(self)
2030 # loop through our set of deltas
2020 # loop through our set of deltas
2031 for data in deltas:
2021 for data in deltas:
2032 node, p1, p2, linknode, deltabase, delta, flags = data
2022 node, p1, p2, linknode, deltabase, delta, flags = data
2033 link = linkmapper(linknode)
2023 link = linkmapper(linknode)
2034 flags = flags or REVIDX_DEFAULT_FLAGS
2024 flags = flags or REVIDX_DEFAULT_FLAGS
2035
2025
2036 nodes.append(node)
2026 nodes.append(node)
2037
2027
2038 if node in self.nodemap:
2028 if node in self.nodemap:
2039 # this can happen if two branches make the same change
2029 # this can happen if two branches make the same change
2040 continue
2030 continue
2041
2031
2042 for p in (p1, p2):
2032 for p in (p1, p2):
2043 if p not in self.nodemap:
2033 if p not in self.nodemap:
2044 raise error.LookupError(p, self.indexfile,
2034 raise error.LookupError(p, self.indexfile,
2045 _('unknown parent'))
2035 _('unknown parent'))
2046
2036
2047 if deltabase not in self.nodemap:
2037 if deltabase not in self.nodemap:
2048 raise error.LookupError(deltabase, self.indexfile,
2038 raise error.LookupError(deltabase, self.indexfile,
2049 _('unknown delta base'))
2039 _('unknown delta base'))
2050
2040
2051 baserev = self.rev(deltabase)
2041 baserev = self.rev(deltabase)
2052
2042
2053 if baserev != nullrev and self.iscensored(baserev):
2043 if baserev != nullrev and self.iscensored(baserev):
2054 # if base is censored, delta must be full replacement in a
2044 # if base is censored, delta must be full replacement in a
2055 # single patch operation
2045 # single patch operation
2056 hlen = struct.calcsize(">lll")
2046 hlen = struct.calcsize(">lll")
2057 oldlen = self.rawsize(baserev)
2047 oldlen = self.rawsize(baserev)
2058 newlen = len(delta) - hlen
2048 newlen = len(delta) - hlen
2059 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2049 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2060 raise error.CensoredBaseError(self.indexfile,
2050 raise error.CensoredBaseError(self.indexfile,
2061 self.node(baserev))
2051 self.node(baserev))
2062
2052
2063 if not flags and self._peek_iscensored(baserev, delta, flush):
2053 if not flags and self._peek_iscensored(baserev, delta, flush):
2064 flags |= REVIDX_ISCENSORED
2054 flags |= REVIDX_ISCENSORED
2065
2055
2066 # We assume consumers of addrevisioncb will want to retrieve
2056 # We assume consumers of addrevisioncb will want to retrieve
2067 # the added revision, which will require a call to
2057 # the added revision, which will require a call to
2068 # revision(). revision() will fast path if there is a cache
2058 # revision(). revision() will fast path if there is a cache
2069 # hit. So, we tell _addrevision() to always cache in this case.
2059 # hit. So, we tell _addrevision() to always cache in this case.
2070 # We're only using addgroup() in the context of changegroup
2060 # We're only using addgroup() in the context of changegroup
2071 # generation so the revision data can always be handled as raw
2061 # generation so the revision data can always be handled as raw
2072 # by the flagprocessor.
2062 # by the flagprocessor.
2073 self._addrevision(node, None, transaction, link,
2063 self._addrevision(node, None, transaction, link,
2074 p1, p2, flags, (baserev, delta),
2064 p1, p2, flags, (baserev, delta),
2075 ifh, dfh,
2065 ifh, dfh,
2076 alwayscache=bool(addrevisioncb),
2066 alwayscache=bool(addrevisioncb),
2077 deltacomputer=deltacomputer)
2067 deltacomputer=deltacomputer)
2078
2068
2079 if addrevisioncb:
2069 if addrevisioncb:
2080 addrevisioncb(self, node)
2070 addrevisioncb(self, node)
2081
2071
2082 if not dfh and not self._inline:
2072 if not dfh and not self._inline:
2083 # addrevision switched from inline to conventional
2073 # addrevision switched from inline to conventional
2084 # reopen the index
2074 # reopen the index
2085 ifh.close()
2075 ifh.close()
2086 dfh = self._datafp("a+")
2076 dfh = self._datafp("a+")
2087 ifh = self._indexfp("a+")
2077 ifh = self._indexfp("a+")
2088 finally:
2078 finally:
2089 if dfh:
2079 if dfh:
2090 dfh.close()
2080 dfh.close()
2091 ifh.close()
2081 ifh.close()
2092
2082
2093 return nodes
2083 return nodes
2094
2084
2095 def iscensored(self, rev):
2085 def iscensored(self, rev):
2096 """Check if a file revision is censored."""
2086 """Check if a file revision is censored."""
2097 if not self._censorable:
2087 if not self._censorable:
2098 return False
2088 return False
2099
2089
2100 return self.flags(rev) & REVIDX_ISCENSORED
2090 return self.flags(rev) & REVIDX_ISCENSORED
2101
2091
2102 def _peek_iscensored(self, baserev, delta, flush):
2092 def _peek_iscensored(self, baserev, delta, flush):
2103 """Quickly check if a delta produces a censored revision."""
2093 """Quickly check if a delta produces a censored revision."""
2104 if not self._censorable:
2094 if not self._censorable:
2105 return False
2095 return False
2106
2096
2107 # Fragile heuristic: unless new file meta keys are added alphabetically
2097 # Fragile heuristic: unless new file meta keys are added alphabetically
2108 # preceding "censored", all censored revisions are prefixed by
2098 # preceding "censored", all censored revisions are prefixed by
2109 # "\1\ncensored:". A delta producing such a censored revision must be a
2099 # "\1\ncensored:". A delta producing such a censored revision must be a
2110 # full-replacement delta, so we inspect the first and only patch in the
2100 # full-replacement delta, so we inspect the first and only patch in the
2111 # delta for this prefix.
2101 # delta for this prefix.
2112 hlen = struct.calcsize(">lll")
2102 hlen = struct.calcsize(">lll")
2113 if len(delta) <= hlen:
2103 if len(delta) <= hlen:
2114 return False
2104 return False
2115
2105
2116 oldlen = self.rawsize(baserev)
2106 oldlen = self.rawsize(baserev)
2117 newlen = len(delta) - hlen
2107 newlen = len(delta) - hlen
2118 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2108 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2119 return False
2109 return False
2120
2110
2121 add = "\1\ncensored:"
2111 add = "\1\ncensored:"
2122 addlen = len(add)
2112 addlen = len(add)
2123 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2113 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2124
2114
2125 def getstrippoint(self, minlink):
2115 def getstrippoint(self, minlink):
2126 """find the minimum rev that must be stripped to strip the linkrev
2116 """find the minimum rev that must be stripped to strip the linkrev
2127
2117
2128 Returns a tuple containing the minimum rev and a set of all revs that
2118 Returns a tuple containing the minimum rev and a set of all revs that
2129 have linkrevs that will be broken by this strip.
2119 have linkrevs that will be broken by this strip.
2130 """
2120 """
2131 brokenrevs = set()
2121 brokenrevs = set()
2132 strippoint = len(self)
2122 strippoint = len(self)
2133
2123
2134 heads = {}
2124 heads = {}
2135 futurelargelinkrevs = set()
2125 futurelargelinkrevs = set()
2136 for head in self.headrevs():
2126 for head in self.headrevs():
2137 headlinkrev = self.linkrev(head)
2127 headlinkrev = self.linkrev(head)
2138 heads[head] = headlinkrev
2128 heads[head] = headlinkrev
2139 if headlinkrev >= minlink:
2129 if headlinkrev >= minlink:
2140 futurelargelinkrevs.add(headlinkrev)
2130 futurelargelinkrevs.add(headlinkrev)
2141
2131
2142 # This algorithm involves walking down the rev graph, starting at the
2132 # This algorithm involves walking down the rev graph, starting at the
2143 # heads. Since the revs are topologically sorted according to linkrev,
2133 # heads. Since the revs are topologically sorted according to linkrev,
2144 # once all head linkrevs are below the minlink, we know there are
2134 # once all head linkrevs are below the minlink, we know there are
2145 # no more revs that could have a linkrev greater than minlink.
2135 # no more revs that could have a linkrev greater than minlink.
2146 # So we can stop walking.
2136 # So we can stop walking.
2147 while futurelargelinkrevs:
2137 while futurelargelinkrevs:
2148 strippoint -= 1
2138 strippoint -= 1
2149 linkrev = heads.pop(strippoint)
2139 linkrev = heads.pop(strippoint)
2150
2140
2151 if linkrev < minlink:
2141 if linkrev < minlink:
2152 brokenrevs.add(strippoint)
2142 brokenrevs.add(strippoint)
2153 else:
2143 else:
2154 futurelargelinkrevs.remove(linkrev)
2144 futurelargelinkrevs.remove(linkrev)
2155
2145
2156 for p in self.parentrevs(strippoint):
2146 for p in self.parentrevs(strippoint):
2157 if p != nullrev:
2147 if p != nullrev:
2158 plinkrev = self.linkrev(p)
2148 plinkrev = self.linkrev(p)
2159 heads[p] = plinkrev
2149 heads[p] = plinkrev
2160 if plinkrev >= minlink:
2150 if plinkrev >= minlink:
2161 futurelargelinkrevs.add(plinkrev)
2151 futurelargelinkrevs.add(plinkrev)
2162
2152
2163 return strippoint, brokenrevs
2153 return strippoint, brokenrevs
2164
2154
2165 def strip(self, minlink, transaction):
2155 def strip(self, minlink, transaction):
2166 """truncate the revlog on the first revision with a linkrev >= minlink
2156 """truncate the revlog on the first revision with a linkrev >= minlink
2167
2157
2168 This function is called when we're stripping revision minlink and
2158 This function is called when we're stripping revision minlink and
2169 its descendants from the repository.
2159 its descendants from the repository.
2170
2160
2171 We have to remove all revisions with linkrev >= minlink, because
2161 We have to remove all revisions with linkrev >= minlink, because
2172 the equivalent changelog revisions will be renumbered after the
2162 the equivalent changelog revisions will be renumbered after the
2173 strip.
2163 strip.
2174
2164
2175 So we truncate the revlog on the first of these revisions, and
2165 So we truncate the revlog on the first of these revisions, and
2176 trust that the caller has saved the revisions that shouldn't be
2166 trust that the caller has saved the revisions that shouldn't be
2177 removed and that it'll re-add them after this truncation.
2167 removed and that it'll re-add them after this truncation.
2178 """
2168 """
2179 if len(self) == 0:
2169 if len(self) == 0:
2180 return
2170 return
2181
2171
2182 rev, _ = self.getstrippoint(minlink)
2172 rev, _ = self.getstrippoint(minlink)
2183 if rev == len(self):
2173 if rev == len(self):
2184 return
2174 return
2185
2175
2186 # first truncate the files on disk
2176 # first truncate the files on disk
2187 end = self.start(rev)
2177 end = self.start(rev)
2188 if not self._inline:
2178 if not self._inline:
2189 transaction.add(self.datafile, end)
2179 transaction.add(self.datafile, end)
2190 end = rev * self._io.size
2180 end = rev * self._io.size
2191 else:
2181 else:
2192 end += rev * self._io.size
2182 end += rev * self._io.size
2193
2183
2194 transaction.add(self.indexfile, end)
2184 transaction.add(self.indexfile, end)
2195
2185
2196 # then reset internal state in memory to forget those revisions
2186 # then reset internal state in memory to forget those revisions
2197 self._cache = None
2187 self._cache = None
2198 self._chaininfocache = {}
2188 self._chaininfocache = {}
2199 self._chunkclear()
2189 self._chunkclear()
2200 for x in pycompat.xrange(rev, len(self)):
2190 for x in pycompat.xrange(rev, len(self)):
2201 del self.nodemap[self.node(x)]
2191 del self.nodemap[self.node(x)]
2202
2192
2203 del self.index[rev:-1]
2193 del self.index[rev:-1]
2204 self._nodepos = None
2194 self._nodepos = None
2205
2195
2206 def checksize(self):
2196 def checksize(self):
2207 expected = 0
2197 expected = 0
2208 if len(self):
2198 if len(self):
2209 expected = max(0, self.end(len(self) - 1))
2199 expected = max(0, self.end(len(self) - 1))
2210
2200
2211 try:
2201 try:
2212 with self._datafp() as f:
2202 with self._datafp() as f:
2213 f.seek(0, 2)
2203 f.seek(0, 2)
2214 actual = f.tell()
2204 actual = f.tell()
2215 dd = actual - expected
2205 dd = actual - expected
2216 except IOError as inst:
2206 except IOError as inst:
2217 if inst.errno != errno.ENOENT:
2207 if inst.errno != errno.ENOENT:
2218 raise
2208 raise
2219 dd = 0
2209 dd = 0
2220
2210
2221 try:
2211 try:
2222 f = self.opener(self.indexfile)
2212 f = self.opener(self.indexfile)
2223 f.seek(0, 2)
2213 f.seek(0, 2)
2224 actual = f.tell()
2214 actual = f.tell()
2225 f.close()
2215 f.close()
2226 s = self._io.size
2216 s = self._io.size
2227 i = max(0, actual // s)
2217 i = max(0, actual // s)
2228 di = actual - (i * s)
2218 di = actual - (i * s)
2229 if self._inline:
2219 if self._inline:
2230 databytes = 0
2220 databytes = 0
2231 for r in self:
2221 for r in self:
2232 databytes += max(0, self.length(r))
2222 databytes += max(0, self.length(r))
2233 dd = 0
2223 dd = 0
2234 di = actual - len(self) * s - databytes
2224 di = actual - len(self) * s - databytes
2235 except IOError as inst:
2225 except IOError as inst:
2236 if inst.errno != errno.ENOENT:
2226 if inst.errno != errno.ENOENT:
2237 raise
2227 raise
2238 di = 0
2228 di = 0
2239
2229
2240 return (dd, di)
2230 return (dd, di)
2241
2231
2242 def files(self):
2232 def files(self):
2243 res = [self.indexfile]
2233 res = [self.indexfile]
2244 if not self._inline:
2234 if not self._inline:
2245 res.append(self.datafile)
2235 res.append(self.datafile)
2246 return res
2236 return res
2247
2237
2248 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2238 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2249 assumehaveparentrevisions=False, deltaprevious=False):
2239 assumehaveparentrevisions=False, deltaprevious=False):
2250 if nodesorder not in ('nodes', 'storage', None):
2240 if nodesorder not in ('nodes', 'storage', None):
2251 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2241 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2252 nodesorder)
2242 nodesorder)
2253
2243
2254 if nodesorder is None and not self._generaldelta:
2244 if nodesorder is None and not self._generaldelta:
2255 nodesorder = 'storage'
2245 nodesorder = 'storage'
2256
2246
2257 frev = self.rev
2247 frev = self.rev
2258 fnode = self.node
2248 fnode = self.node
2259
2249
2260 if nodesorder == 'nodes':
2250 if nodesorder == 'nodes':
2261 revs = [frev(n) for n in nodes]
2251 revs = [frev(n) for n in nodes]
2262 elif nodesorder == 'storage':
2252 elif nodesorder == 'storage':
2263 revs = sorted(frev(n) for n in nodes)
2253 revs = sorted(frev(n) for n in nodes)
2264 else:
2254 else:
2265 assert self._generaldelta
2255 assert self._generaldelta
2266 revs = set(frev(n) for n in nodes)
2256 revs = set(frev(n) for n in nodes)
2267 revs = dagop.linearize(revs, self.parentrevs)
2257 revs = dagop.linearize(revs, self.parentrevs)
2268
2258
2269 prevrev = None
2259 prevrev = None
2270
2260
2271 if deltaprevious or assumehaveparentrevisions:
2261 if deltaprevious or assumehaveparentrevisions:
2272 prevrev = self.parentrevs(revs[0])[0]
2262 prevrev = self.parentrevs(revs[0])[0]
2273
2263
2274 # Set of revs available to delta against.
2264 # Set of revs available to delta against.
2275 available = set()
2265 available = set()
2276
2266
2277 for rev in revs:
2267 for rev in revs:
2278 if rev == nullrev:
2268 if rev == nullrev:
2279 continue
2269 continue
2280
2270
2281 node = fnode(rev)
2271 node = fnode(rev)
2282 deltaparentrev = self.deltaparent(rev)
2272 deltaparentrev = self.deltaparent(rev)
2283 p1rev, p2rev = self.parentrevs(rev)
2273 p1rev, p2rev = self.parentrevs(rev)
2284
2274
2285 # Forced delta against previous mode.
2275 # Forced delta against previous mode.
2286 if deltaprevious:
2276 if deltaprevious:
2287 baserev = prevrev
2277 baserev = prevrev
2288
2278
2289 # Revlog is configured to use full snapshots. Stick to that.
2279 # Revlog is configured to use full snapshots. Stick to that.
2290 elif not self._storedeltachains:
2280 elif not self._storedeltachains:
2291 baserev = nullrev
2281 baserev = nullrev
2292
2282
2293 # There is a delta in storage. We try to use that because it
2283 # There is a delta in storage. We try to use that because it
2294 # amounts to effectively copying data from storage and is
2284 # amounts to effectively copying data from storage and is
2295 # therefore the fastest.
2285 # therefore the fastest.
2296 elif deltaparentrev != nullrev:
2286 elif deltaparentrev != nullrev:
2297 # Base revision was already emitted in this group. We can
2287 # Base revision was already emitted in this group. We can
2298 # always safely use the delta.
2288 # always safely use the delta.
2299 if deltaparentrev in available:
2289 if deltaparentrev in available:
2300 baserev = deltaparentrev
2290 baserev = deltaparentrev
2301
2291
2302 # Base revision is a parent that hasn't been emitted already.
2292 # Base revision is a parent that hasn't been emitted already.
2303 # Use it if we can assume the receiver has the parent revision.
2293 # Use it if we can assume the receiver has the parent revision.
2304 elif (assumehaveparentrevisions
2294 elif (assumehaveparentrevisions
2305 and deltaparentrev in (p1rev, p2rev)):
2295 and deltaparentrev in (p1rev, p2rev)):
2306 baserev = deltaparentrev
2296 baserev = deltaparentrev
2307
2297
2308 # No guarantee the receiver has the delta parent. Send delta
2298 # No guarantee the receiver has the delta parent. Send delta
2309 # against last revision (if possible), which in the common case
2299 # against last revision (if possible), which in the common case
2310 # should be similar enough to this revision that the delta is
2300 # should be similar enough to this revision that the delta is
2311 # reasonable.
2301 # reasonable.
2312 elif prevrev is not None:
2302 elif prevrev is not None:
2313 baserev = prevrev
2303 baserev = prevrev
2314 else:
2304 else:
2315 baserev = nullrev
2305 baserev = nullrev
2316
2306
2317 # Storage has a fulltext revision.
2307 # Storage has a fulltext revision.
2318
2308
2319 # Let's use the previous revision, which is as good a guess as any.
2309 # Let's use the previous revision, which is as good a guess as any.
2320 # There is definitely room to improve this logic.
2310 # There is definitely room to improve this logic.
2321 elif prevrev is not None:
2311 elif prevrev is not None:
2322 baserev = prevrev
2312 baserev = prevrev
2323 else:
2313 else:
2324 baserev = nullrev
2314 baserev = nullrev
2325
2315
2326 # But we can't actually use our chosen delta base for whatever
2316 # But we can't actually use our chosen delta base for whatever
2327 # reason. Reset to fulltext.
2317 # reason. Reset to fulltext.
2328 if baserev != nullrev and not self.candelta(baserev, rev):
2318 if baserev != nullrev and not self.candelta(baserev, rev):
2329 baserev = nullrev
2319 baserev = nullrev
2330
2320
2331 revision = None
2321 revision = None
2332 delta = None
2322 delta = None
2333 baserevisionsize = None
2323 baserevisionsize = None
2334
2324
2335 if revisiondata:
2325 if revisiondata:
2336 if self.iscensored(baserev) or self.iscensored(rev):
2326 if self.iscensored(baserev) or self.iscensored(rev):
2337 try:
2327 try:
2338 revision = self.revision(node, raw=True)
2328 revision = self.revision(node, raw=True)
2339 except error.CensoredNodeError as e:
2329 except error.CensoredNodeError as e:
2340 revision = e.tombstone
2330 revision = e.tombstone
2341
2331
2342 if baserev != nullrev:
2332 if baserev != nullrev:
2343 baserevisionsize = self.rawsize(baserev)
2333 baserevisionsize = self.rawsize(baserev)
2344
2334
2345 elif baserev == nullrev and not deltaprevious:
2335 elif baserev == nullrev and not deltaprevious:
2346 revision = self.revision(node, raw=True)
2336 revision = self.revision(node, raw=True)
2347 available.add(rev)
2337 available.add(rev)
2348 else:
2338 else:
2349 delta = self.revdiff(baserev, rev)
2339 delta = self.revdiff(baserev, rev)
2350 available.add(rev)
2340 available.add(rev)
2351
2341
2352 yield revlogrevisiondelta(
2342 yield revlogrevisiondelta(
2353 node=node,
2343 node=node,
2354 p1node=fnode(p1rev),
2344 p1node=fnode(p1rev),
2355 p2node=fnode(p2rev),
2345 p2node=fnode(p2rev),
2356 basenode=fnode(baserev),
2346 basenode=fnode(baserev),
2357 flags=self.flags(rev),
2347 flags=self.flags(rev),
2358 baserevisionsize=baserevisionsize,
2348 baserevisionsize=baserevisionsize,
2359 revision=revision,
2349 revision=revision,
2360 delta=delta)
2350 delta=delta)
2361
2351
2362 prevrev = rev
2352 prevrev = rev
2363
2353
2364 DELTAREUSEALWAYS = 'always'
2354 DELTAREUSEALWAYS = 'always'
2365 DELTAREUSESAMEREVS = 'samerevs'
2355 DELTAREUSESAMEREVS = 'samerevs'
2366 DELTAREUSENEVER = 'never'
2356 DELTAREUSENEVER = 'never'
2367
2357
2368 DELTAREUSEFULLADD = 'fulladd'
2358 DELTAREUSEFULLADD = 'fulladd'
2369
2359
2370 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2360 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2371
2361
2372 def clone(self, tr, destrevlog, addrevisioncb=None,
2362 def clone(self, tr, destrevlog, addrevisioncb=None,
2373 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2363 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2374 """Copy this revlog to another, possibly with format changes.
2364 """Copy this revlog to another, possibly with format changes.
2375
2365
2376 The destination revlog will contain the same revisions and nodes.
2366 The destination revlog will contain the same revisions and nodes.
2377 However, it may not be bit-for-bit identical due to e.g. delta encoding
2367 However, it may not be bit-for-bit identical due to e.g. delta encoding
2378 differences.
2368 differences.
2379
2369
2380 The ``deltareuse`` argument control how deltas from the existing revlog
2370 The ``deltareuse`` argument control how deltas from the existing revlog
2381 are preserved in the destination revlog. The argument can have the
2371 are preserved in the destination revlog. The argument can have the
2382 following values:
2372 following values:
2383
2373
2384 DELTAREUSEALWAYS
2374 DELTAREUSEALWAYS
2385 Deltas will always be reused (if possible), even if the destination
2375 Deltas will always be reused (if possible), even if the destination
2386 revlog would not select the same revisions for the delta. This is the
2376 revlog would not select the same revisions for the delta. This is the
2387 fastest mode of operation.
2377 fastest mode of operation.
2388 DELTAREUSESAMEREVS
2378 DELTAREUSESAMEREVS
2389 Deltas will be reused if the destination revlog would pick the same
2379 Deltas will be reused if the destination revlog would pick the same
2390 revisions for the delta. This mode strikes a balance between speed
2380 revisions for the delta. This mode strikes a balance between speed
2391 and optimization.
2381 and optimization.
2392 DELTAREUSENEVER
2382 DELTAREUSENEVER
2393 Deltas will never be reused. This is the slowest mode of execution.
2383 Deltas will never be reused. This is the slowest mode of execution.
2394 This mode can be used to recompute deltas (e.g. if the diff/delta
2384 This mode can be used to recompute deltas (e.g. if the diff/delta
2395 algorithm changes).
2385 algorithm changes).
2396
2386
2397 Delta computation can be slow, so the choice of delta reuse policy can
2387 Delta computation can be slow, so the choice of delta reuse policy can
2398 significantly affect run time.
2388 significantly affect run time.
2399
2389
2400 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2390 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2401 two extremes. Deltas will be reused if they are appropriate. But if the
2391 two extremes. Deltas will be reused if they are appropriate. But if the
2402 delta could choose a better revision, it will do so. This means if you
2392 delta could choose a better revision, it will do so. This means if you
2403 are converting a non-generaldelta revlog to a generaldelta revlog,
2393 are converting a non-generaldelta revlog to a generaldelta revlog,
2404 deltas will be recomputed if the delta's parent isn't a parent of the
2394 deltas will be recomputed if the delta's parent isn't a parent of the
2405 revision.
2395 revision.
2406
2396
2407 In addition to the delta policy, the ``deltabothparents`` argument
2397 In addition to the delta policy, the ``deltabothparents`` argument
2408 controls whether to compute deltas against both parents for merges.
2398 controls whether to compute deltas against both parents for merges.
2409 By default, the current default is used.
2399 By default, the current default is used.
2410 """
2400 """
2411 if deltareuse not in self.DELTAREUSEALL:
2401 if deltareuse not in self.DELTAREUSEALL:
2412 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2402 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2413
2403
2414 if len(destrevlog):
2404 if len(destrevlog):
2415 raise ValueError(_('destination revlog is not empty'))
2405 raise ValueError(_('destination revlog is not empty'))
2416
2406
2417 if getattr(self, 'filteredrevs', None):
2407 if getattr(self, 'filteredrevs', None):
2418 raise ValueError(_('source revlog has filtered revisions'))
2408 raise ValueError(_('source revlog has filtered revisions'))
2419 if getattr(destrevlog, 'filteredrevs', None):
2409 if getattr(destrevlog, 'filteredrevs', None):
2420 raise ValueError(_('destination revlog has filtered revisions'))
2410 raise ValueError(_('destination revlog has filtered revisions'))
2421
2411
2422 # lazydeltabase controls whether to reuse a cached delta, if possible.
2412 # lazydeltabase controls whether to reuse a cached delta, if possible.
2423 oldlazydeltabase = destrevlog._lazydeltabase
2413 oldlazydeltabase = destrevlog._lazydeltabase
2424 oldamd = destrevlog._deltabothparents
2414 oldamd = destrevlog._deltabothparents
2425
2415
2426 try:
2416 try:
2427 if deltareuse == self.DELTAREUSEALWAYS:
2417 if deltareuse == self.DELTAREUSEALWAYS:
2428 destrevlog._lazydeltabase = True
2418 destrevlog._lazydeltabase = True
2429 elif deltareuse == self.DELTAREUSESAMEREVS:
2419 elif deltareuse == self.DELTAREUSESAMEREVS:
2430 destrevlog._lazydeltabase = False
2420 destrevlog._lazydeltabase = False
2431
2421
2432 destrevlog._deltabothparents = deltabothparents or oldamd
2422 destrevlog._deltabothparents = deltabothparents or oldamd
2433
2423
2434 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2424 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2435 self.DELTAREUSESAMEREVS)
2425 self.DELTAREUSESAMEREVS)
2436
2426
2437 deltacomputer = deltautil.deltacomputer(destrevlog)
2427 deltacomputer = deltautil.deltacomputer(destrevlog)
2438 index = self.index
2428 index = self.index
2439 for rev in self:
2429 for rev in self:
2440 entry = index[rev]
2430 entry = index[rev]
2441
2431
2442 # Some classes override linkrev to take filtered revs into
2432 # Some classes override linkrev to take filtered revs into
2443 # account. Use raw entry from index.
2433 # account. Use raw entry from index.
2444 flags = entry[0] & 0xffff
2434 flags = entry[0] & 0xffff
2445 linkrev = entry[4]
2435 linkrev = entry[4]
2446 p1 = index[entry[5]][7]
2436 p1 = index[entry[5]][7]
2447 p2 = index[entry[6]][7]
2437 p2 = index[entry[6]][7]
2448 node = entry[7]
2438 node = entry[7]
2449
2439
2450 # (Possibly) reuse the delta from the revlog if allowed and
2440 # (Possibly) reuse the delta from the revlog if allowed and
2451 # the revlog chunk is a delta.
2441 # the revlog chunk is a delta.
2452 cachedelta = None
2442 cachedelta = None
2453 rawtext = None
2443 rawtext = None
2454 if populatecachedelta:
2444 if populatecachedelta:
2455 dp = self.deltaparent(rev)
2445 dp = self.deltaparent(rev)
2456 if dp != nullrev:
2446 if dp != nullrev:
2457 cachedelta = (dp, bytes(self._chunk(rev)))
2447 cachedelta = (dp, bytes(self._chunk(rev)))
2458
2448
2459 if not cachedelta:
2449 if not cachedelta:
2460 rawtext = self.revision(rev, raw=True)
2450 rawtext = self.revision(rev, raw=True)
2461
2451
2462
2452
2463 if deltareuse == self.DELTAREUSEFULLADD:
2453 if deltareuse == self.DELTAREUSEFULLADD:
2464 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2454 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2465 cachedelta=cachedelta,
2455 cachedelta=cachedelta,
2466 node=node, flags=flags,
2456 node=node, flags=flags,
2467 deltacomputer=deltacomputer)
2457 deltacomputer=deltacomputer)
2468 else:
2458 else:
2469 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2459 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2470 checkambig=False)
2460 checkambig=False)
2471 dfh = None
2461 dfh = None
2472 if not destrevlog._inline:
2462 if not destrevlog._inline:
2473 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2463 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2474 try:
2464 try:
2475 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2465 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2476 p2, flags, cachedelta, ifh, dfh,
2466 p2, flags, cachedelta, ifh, dfh,
2477 deltacomputer=deltacomputer)
2467 deltacomputer=deltacomputer)
2478 finally:
2468 finally:
2479 if dfh:
2469 if dfh:
2480 dfh.close()
2470 dfh.close()
2481 ifh.close()
2471 ifh.close()
2482
2472
2483 if addrevisioncb:
2473 if addrevisioncb:
2484 addrevisioncb(self, rev, node)
2474 addrevisioncb(self, rev, node)
2485 finally:
2475 finally:
2486 destrevlog._lazydeltabase = oldlazydeltabase
2476 destrevlog._lazydeltabase = oldlazydeltabase
2487 destrevlog._deltabothparents = oldamd
2477 destrevlog._deltabothparents = oldamd
2488
2478
2489 def censorrevision(self, node, tombstone=b''):
2479 def censorrevision(self, node, tombstone=b''):
2490 if (self.version & 0xFFFF) == REVLOGV0:
2480 if (self.version & 0xFFFF) == REVLOGV0:
2491 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2481 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2492 self.version)
2482 self.version)
2493
2483
2494 rev = self.rev(node)
2484 rev = self.rev(node)
2495 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2485 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2496
2486
2497 if len(tombstone) > self.rawsize(rev):
2487 if len(tombstone) > self.rawsize(rev):
2498 raise error.Abort(_('censor tombstone must be no longer than '
2488 raise error.Abort(_('censor tombstone must be no longer than '
2499 'censored data'))
2489 'censored data'))
2500
2490
2501 # Using two files instead of one makes it easy to rewrite entry-by-entry
2491 # Using two files instead of one makes it easy to rewrite entry-by-entry
2502 idxread = self.opener(self.indexfile, 'r')
2492 idxread = self.opener(self.indexfile, 'r')
2503 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2493 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2504 if self.version & FLAG_INLINE_DATA:
2494 if self.version & FLAG_INLINE_DATA:
2505 dataread, datawrite = idxread, idxwrite
2495 dataread, datawrite = idxread, idxwrite
2506 else:
2496 else:
2507 dataread = self.opener(self.datafile, 'r')
2497 dataread = self.opener(self.datafile, 'r')
2508 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2498 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2509
2499
2510 # Copy all revlog data up to the entry to be censored.
2500 # Copy all revlog data up to the entry to be censored.
2511 offset = self.start(rev)
2501 offset = self.start(rev)
2512
2502
2513 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2503 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2514 idxwrite.write(chunk)
2504 idxwrite.write(chunk)
2515 for chunk in util.filechunkiter(dataread, limit=offset):
2505 for chunk in util.filechunkiter(dataread, limit=offset):
2516 datawrite.write(chunk)
2506 datawrite.write(chunk)
2517
2507
2518 def rewriteindex(r, newoffs, newdata=None):
2508 def rewriteindex(r, newoffs, newdata=None):
2519 """Rewrite the index entry with a new data offset and new data.
2509 """Rewrite the index entry with a new data offset and new data.
2520
2510
2521 The newdata argument, if given, is a tuple of three positive
2511 The newdata argument, if given, is a tuple of three positive
2522 integers: (new compressed, new uncompressed, added flag bits).
2512 integers: (new compressed, new uncompressed, added flag bits).
2523 """
2513 """
2524 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2514 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2525 flags = gettype(offlags)
2515 flags = gettype(offlags)
2526 if newdata:
2516 if newdata:
2527 comp, uncomp, nflags = newdata
2517 comp, uncomp, nflags = newdata
2528 flags |= nflags
2518 flags |= nflags
2529 offlags = offset_type(newoffs, flags)
2519 offlags = offset_type(newoffs, flags)
2530 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2520 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2531 idxwrite.write(self._io.packentry(e, None, self.version, r))
2521 idxwrite.write(self._io.packentry(e, None, self.version, r))
2532 idxread.seek(self._io.size, 1)
2522 idxread.seek(self._io.size, 1)
2533
2523
2534 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2524 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2535 """Write the given fulltext with the given data offset.
2525 """Write the given fulltext with the given data offset.
2536
2526
2537 Returns:
2527 Returns:
2538 The integer number of data bytes written, for tracking data
2528 The integer number of data bytes written, for tracking data
2539 offsets.
2529 offsets.
2540 """
2530 """
2541 flag, compdata = self.compress(data)
2531 flag, compdata = self.compress(data)
2542 newcomp = len(flag) + len(compdata)
2532 newcomp = len(flag) + len(compdata)
2543 rewriteindex(r, offs, (newcomp, len(data), nflags))
2533 rewriteindex(r, offs, (newcomp, len(data), nflags))
2544 datawrite.write(flag)
2534 datawrite.write(flag)
2545 datawrite.write(compdata)
2535 datawrite.write(compdata)
2546 dataread.seek(self.length(r), 1)
2536 dataread.seek(self.length(r), 1)
2547 return newcomp
2537 return newcomp
2548
2538
2549 # Rewrite censored entry with (padded) tombstone data.
2539 # Rewrite censored entry with (padded) tombstone data.
2550 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2540 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2551 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2541 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2552
2542
2553 # Rewrite all following filelog revisions fixing up offsets and deltas.
2543 # Rewrite all following filelog revisions fixing up offsets and deltas.
2554 for srev in pycompat.xrange(rev + 1, len(self)):
2544 for srev in pycompat.xrange(rev + 1, len(self)):
2555 if rev in self.parentrevs(srev):
2545 if rev in self.parentrevs(srev):
2556 # Immediate children of censored node must be re-added as
2546 # Immediate children of censored node must be re-added as
2557 # fulltext.
2547 # fulltext.
2558 try:
2548 try:
2559 revdata = self.revision(srev)
2549 revdata = self.revision(srev)
2560 except error.CensoredNodeError as e:
2550 except error.CensoredNodeError as e:
2561 revdata = e.tombstone
2551 revdata = e.tombstone
2562 dlen = rewrite(srev, offset, revdata)
2552 dlen = rewrite(srev, offset, revdata)
2563 else:
2553 else:
2564 # Copy any other revision data verbatim after fixing up the
2554 # Copy any other revision data verbatim after fixing up the
2565 # offset.
2555 # offset.
2566 rewriteindex(srev, offset)
2556 rewriteindex(srev, offset)
2567 dlen = self.length(srev)
2557 dlen = self.length(srev)
2568 for chunk in util.filechunkiter(dataread, limit=dlen):
2558 for chunk in util.filechunkiter(dataread, limit=dlen):
2569 datawrite.write(chunk)
2559 datawrite.write(chunk)
2570 offset += dlen
2560 offset += dlen
2571
2561
2572 idxread.close()
2562 idxread.close()
2573 idxwrite.close()
2563 idxwrite.close()
2574 if dataread is not idxread:
2564 if dataread is not idxread:
2575 dataread.close()
2565 dataread.close()
2576 datawrite.close()
2566 datawrite.close()
2577
2567
2578 def verifyintegrity(self, state):
2568 def verifyintegrity(self, state):
2579 """Verifies the integrity of the revlog.
2569 """Verifies the integrity of the revlog.
2580
2570
2581 Yields ``revlogproblem`` instances describing problems that are
2571 Yields ``revlogproblem`` instances describing problems that are
2582 found.
2572 found.
2583 """
2573 """
2584 dd, di = self.checksize()
2574 dd, di = self.checksize()
2585 if dd:
2575 if dd:
2586 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2576 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2587 if di:
2577 if di:
2588 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2578 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2589
2579
2590 version = self.version & 0xFFFF
2580 version = self.version & 0xFFFF
2591
2581
2592 # The verifier tells us what version revlog we should be.
2582 # The verifier tells us what version revlog we should be.
2593 if version != state['expectedversion']:
2583 if version != state['expectedversion']:
2594 yield revlogproblem(
2584 yield revlogproblem(
2595 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2585 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2596 (self.indexfile, version, state['expectedversion']))
2586 (self.indexfile, version, state['expectedversion']))
2597
2587
2598 state['skipread'] = set()
2588 state['skipread'] = set()
2599
2589
2600 for rev in self:
2590 for rev in self:
2601 node = self.node(rev)
2591 node = self.node(rev)
2602
2592
2603 # Verify contents. 4 cases to care about:
2593 # Verify contents. 4 cases to care about:
2604 #
2594 #
2605 # common: the most common case
2595 # common: the most common case
2606 # rename: with a rename
2596 # rename: with a rename
2607 # meta: file content starts with b'\1\n', the metadata
2597 # meta: file content starts with b'\1\n', the metadata
2608 # header defined in filelog.py, but without a rename
2598 # header defined in filelog.py, but without a rename
2609 # ext: content stored externally
2599 # ext: content stored externally
2610 #
2600 #
2611 # More formally, their differences are shown below:
2601 # More formally, their differences are shown below:
2612 #
2602 #
2613 # | common | rename | meta | ext
2603 # | common | rename | meta | ext
2614 # -------------------------------------------------------
2604 # -------------------------------------------------------
2615 # flags() | 0 | 0 | 0 | not 0
2605 # flags() | 0 | 0 | 0 | not 0
2616 # renamed() | False | True | False | ?
2606 # renamed() | False | True | False | ?
2617 # rawtext[0:2]=='\1\n'| False | True | True | ?
2607 # rawtext[0:2]=='\1\n'| False | True | True | ?
2618 #
2608 #
2619 # "rawtext" means the raw text stored in revlog data, which
2609 # "rawtext" means the raw text stored in revlog data, which
2620 # could be retrieved by "revision(rev, raw=True)". "text"
2610 # could be retrieved by "revision(rev, raw=True)". "text"
2621 # mentioned below is "revision(rev, raw=False)".
2611 # mentioned below is "revision(rev, raw=False)".
2622 #
2612 #
2623 # There are 3 different lengths stored physically:
2613 # There are 3 different lengths stored physically:
2624 # 1. L1: rawsize, stored in revlog index
2614 # 1. L1: rawsize, stored in revlog index
2625 # 2. L2: len(rawtext), stored in revlog data
2615 # 2. L2: len(rawtext), stored in revlog data
2626 # 3. L3: len(text), stored in revlog data if flags==0, or
2616 # 3. L3: len(text), stored in revlog data if flags==0, or
2627 # possibly somewhere else if flags!=0
2617 # possibly somewhere else if flags!=0
2628 #
2618 #
2629 # L1 should be equal to L2. L3 could be different from them.
2619 # L1 should be equal to L2. L3 could be different from them.
2630 # "text" may or may not affect commit hash depending on flag
2620 # "text" may or may not affect commit hash depending on flag
2631 # processors (see revlog.addflagprocessor).
2621 # processors (see revlog.addflagprocessor).
2632 #
2622 #
2633 # | common | rename | meta | ext
2623 # | common | rename | meta | ext
2634 # -------------------------------------------------
2624 # -------------------------------------------------
2635 # rawsize() | L1 | L1 | L1 | L1
2625 # rawsize() | L1 | L1 | L1 | L1
2636 # size() | L1 | L2-LM | L1(*) | L1 (?)
2626 # size() | L1 | L2-LM | L1(*) | L1 (?)
2637 # len(rawtext) | L2 | L2 | L2 | L2
2627 # len(rawtext) | L2 | L2 | L2 | L2
2638 # len(text) | L2 | L2 | L2 | L3
2628 # len(text) | L2 | L2 | L2 | L3
2639 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2629 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2640 #
2630 #
2641 # LM: length of metadata, depending on rawtext
2631 # LM: length of metadata, depending on rawtext
2642 # (*): not ideal, see comment in filelog.size
2632 # (*): not ideal, see comment in filelog.size
2643 # (?): could be "- len(meta)" if the resolved content has
2633 # (?): could be "- len(meta)" if the resolved content has
2644 # rename metadata
2634 # rename metadata
2645 #
2635 #
2646 # Checks needed to be done:
2636 # Checks needed to be done:
2647 # 1. length check: L1 == L2, in all cases.
2637 # 1. length check: L1 == L2, in all cases.
2648 # 2. hash check: depending on flag processor, we may need to
2638 # 2. hash check: depending on flag processor, we may need to
2649 # use either "text" (external), or "rawtext" (in revlog).
2639 # use either "text" (external), or "rawtext" (in revlog).
2650
2640
2651 try:
2641 try:
2652 skipflags = state.get('skipflags', 0)
2642 skipflags = state.get('skipflags', 0)
2653 if skipflags:
2643 if skipflags:
2654 skipflags &= self.flags(rev)
2644 skipflags &= self.flags(rev)
2655
2645
2656 if skipflags:
2646 if skipflags:
2657 state['skipread'].add(node)
2647 state['skipread'].add(node)
2658 else:
2648 else:
2659 # Side-effect: read content and verify hash.
2649 # Side-effect: read content and verify hash.
2660 self.revision(node)
2650 self.revision(node)
2661
2651
2662 l1 = self.rawsize(rev)
2652 l1 = self.rawsize(rev)
2663 l2 = len(self.revision(node, raw=True))
2653 l2 = len(self.revision(node, raw=True))
2664
2654
2665 if l1 != l2:
2655 if l1 != l2:
2666 yield revlogproblem(
2656 yield revlogproblem(
2667 error=_('unpacked size is %d, %d expected') % (l2, l1),
2657 error=_('unpacked size is %d, %d expected') % (l2, l1),
2668 node=node)
2658 node=node)
2669
2659
2670 except error.CensoredNodeError:
2660 except error.CensoredNodeError:
2671 if state['erroroncensored']:
2661 if state['erroroncensored']:
2672 yield revlogproblem(error=_('censored file data'),
2662 yield revlogproblem(error=_('censored file data'),
2673 node=node)
2663 node=node)
2674 state['skipread'].add(node)
2664 state['skipread'].add(node)
2675 except Exception as e:
2665 except Exception as e:
2676 yield revlogproblem(
2666 yield revlogproblem(
2677 error=_('unpacking %s: %s') % (short(node), e),
2667 error=_('unpacking %s: %s') % (short(node), e),
2678 node=node)
2668 node=node)
2679 state['skipread'].add(node)
2669 state['skipread'].add(node)
2680
2670
2681 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2671 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2682 revisionscount=False, trackedsize=False,
2672 revisionscount=False, trackedsize=False,
2683 storedsize=False):
2673 storedsize=False):
2684 d = {}
2674 d = {}
2685
2675
2686 if exclusivefiles:
2676 if exclusivefiles:
2687 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2677 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2688 if not self._inline:
2678 if not self._inline:
2689 d['exclusivefiles'].append((self.opener, self.datafile))
2679 d['exclusivefiles'].append((self.opener, self.datafile))
2690
2680
2691 if sharedfiles:
2681 if sharedfiles:
2692 d['sharedfiles'] = []
2682 d['sharedfiles'] = []
2693
2683
2694 if revisionscount:
2684 if revisionscount:
2695 d['revisionscount'] = len(self)
2685 d['revisionscount'] = len(self)
2696
2686
2697 if trackedsize:
2687 if trackedsize:
2698 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2688 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2699
2689
2700 if storedsize:
2690 if storedsize:
2701 d['storedsize'] = sum(self.opener.stat(path).st_size
2691 d['storedsize'] = sum(self.opener.stat(path).st_size
2702 for path in self.files())
2692 for path in self.files())
2703
2693
2704 return d
2694 return d
@@ -1,83 +1,101 b''
1 # storageutil.py - Storage functionality agnostic of backend implementation.
1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import hashlib
10 import hashlib
11 import re
11 import re
12
12
13 from ..node import (
13 from ..node import (
14 nullid,
14 nullid,
15 )
15 )
16 from .. import (
17 pycompat,
18 )
16
19
17 _nullhash = hashlib.sha1(nullid)
20 _nullhash = hashlib.sha1(nullid)
18
21
19 def hashrevisionsha1(text, p1, p2):
22 def hashrevisionsha1(text, p1, p2):
20 """Compute the SHA-1 for revision data and its parents.
23 """Compute the SHA-1 for revision data and its parents.
21
24
22 This hash combines both the current file contents and its history
25 This hash combines both the current file contents and its history
23 in a manner that makes it easy to distinguish nodes with the same
26 in a manner that makes it easy to distinguish nodes with the same
24 content in the revision graph.
27 content in the revision graph.
25 """
28 """
26 # As of now, if one of the parent node is null, p2 is null
29 # As of now, if one of the parent node is null, p2 is null
27 if p2 == nullid:
30 if p2 == nullid:
28 # deep copy of a hash is faster than creating one
31 # deep copy of a hash is faster than creating one
29 s = _nullhash.copy()
32 s = _nullhash.copy()
30 s.update(p1)
33 s.update(p1)
31 else:
34 else:
32 # none of the parent nodes are nullid
35 # none of the parent nodes are nullid
33 if p1 < p2:
36 if p1 < p2:
34 a = p1
37 a = p1
35 b = p2
38 b = p2
36 else:
39 else:
37 a = p2
40 a = p2
38 b = p1
41 b = p1
39 s = hashlib.sha1(a)
42 s = hashlib.sha1(a)
40 s.update(b)
43 s.update(b)
41 s.update(text)
44 s.update(text)
42 return s.digest()
45 return s.digest()
43
46
44 METADATA_RE = re.compile(b'\x01\n')
47 METADATA_RE = re.compile(b'\x01\n')
45
48
46 def parsemeta(text):
49 def parsemeta(text):
47 """Parse metadata header from revision data.
50 """Parse metadata header from revision data.
48
51
49 Returns a 2-tuple of (metadata, offset), where both can be None if there
52 Returns a 2-tuple of (metadata, offset), where both can be None if there
50 is no metadata.
53 is no metadata.
51 """
54 """
52 # text can be buffer, so we can't use .startswith or .index
55 # text can be buffer, so we can't use .startswith or .index
53 if text[:2] != b'\x01\n':
56 if text[:2] != b'\x01\n':
54 return None, None
57 return None, None
55 s = METADATA_RE.search(text, 2).start()
58 s = METADATA_RE.search(text, 2).start()
56 mtext = text[2:s]
59 mtext = text[2:s]
57 meta = {}
60 meta = {}
58 for l in mtext.splitlines():
61 for l in mtext.splitlines():
59 k, v = l.split(b': ', 1)
62 k, v = l.split(b': ', 1)
60 meta[k] = v
63 meta[k] = v
61 return meta, s + 2
64 return meta, s + 2
62
65
63 def packmeta(meta, text):
66 def packmeta(meta, text):
64 """Add metadata to fulltext to produce revision text."""
67 """Add metadata to fulltext to produce revision text."""
65 keys = sorted(meta)
68 keys = sorted(meta)
66 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
69 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
67 return b'\x01\n%s\x01\n%s' % (metatext, text)
70 return b'\x01\n%s\x01\n%s' % (metatext, text)
68
71
69 def iscensoredtext(text):
72 def iscensoredtext(text):
70 meta = parsemeta(text)[0]
73 meta = parsemeta(text)[0]
71 return meta and b'censored' in meta
74 return meta and b'censored' in meta
72
75
73 def filtermetadata(text):
76 def filtermetadata(text):
74 """Extract just the revision data from source text.
77 """Extract just the revision data from source text.
75
78
76 Returns ``text`` unless it has a metadata header, in which case we return
79 Returns ``text`` unless it has a metadata header, in which case we return
77 a new buffer without hte metadata.
80 a new buffer without hte metadata.
78 """
81 """
79 if not text.startswith(b'\x01\n'):
82 if not text.startswith(b'\x01\n'):
80 return text
83 return text
81
84
82 offset = text.index(b'\x01\n', 2)
85 offset = text.index(b'\x01\n', 2)
83 return text[offset + 2:]
86 return text[offset + 2:]
87
88 def iterrevs(storelen, start=0, stop=None):
89 """Iterate over revision numbers in a store."""
90 step = 1
91
92 if stop is not None:
93 if start > stop:
94 step = -1
95 stop += step
96 if stop > storelen:
97 stop = storelen
98 else:
99 stop = storelen
100
101 return pycompat.xrange(start, stop, step)
General Comments 0
You need to be logged in to leave comments. Login now