##// END OF EJS Templates
upgrade: clarify "aggressivemergedelta" handling...
Boris Feld -
r40872:64051af1 default
parent child Browse files
Show More
@@ -1,2597 +1,2597 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 _insertflagprocessor(flag, processor, _flagprocessors)
154 _insertflagprocessor(flag, processor, _flagprocessors)
155
155
156 def _insertflagprocessor(flag, processor, flagprocessors):
156 def _insertflagprocessor(flag, processor, flagprocessors):
157 if not flag & REVIDX_KNOWN_FLAGS:
157 if not flag & REVIDX_KNOWN_FLAGS:
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag not in REVIDX_FLAGS_ORDER:
160 if flag not in REVIDX_FLAGS_ORDER:
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 raise error.ProgrammingError(msg)
162 raise error.ProgrammingError(msg)
163 if flag in flagprocessors:
163 if flag in flagprocessors:
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 raise error.Abort(msg)
165 raise error.Abort(msg)
166 flagprocessors[flag] = processor
166 flagprocessors[flag] = processor
167
167
168 def getoffset(q):
168 def getoffset(q):
169 return int(q >> 16)
169 return int(q >> 16)
170
170
171 def gettype(q):
171 def gettype(q):
172 return int(q & 0xFFFF)
172 return int(q & 0xFFFF)
173
173
174 def offset_type(offset, type):
174 def offset_type(offset, type):
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 raise ValueError('unknown revlog index flags')
176 raise ValueError('unknown revlog index flags')
177 return int(int(offset) << 16 | type)
177 return int(int(offset) << 16 | type)
178
178
179 @attr.s(slots=True, frozen=True)
179 @attr.s(slots=True, frozen=True)
180 class _revisioninfo(object):
180 class _revisioninfo(object):
181 """Information about a revision that allows building its fulltext
181 """Information about a revision that allows building its fulltext
182 node: expected hash of the revision
182 node: expected hash of the revision
183 p1, p2: parent revs of the revision
183 p1, p2: parent revs of the revision
184 btext: built text cache consisting of a one-element list
184 btext: built text cache consisting of a one-element list
185 cachedelta: (baserev, uncompressed_delta) or None
185 cachedelta: (baserev, uncompressed_delta) or None
186 flags: flags associated to the revision storage
186 flags: flags associated to the revision storage
187
187
188 One of btext[0] or cachedelta must be set.
188 One of btext[0] or cachedelta must be set.
189 """
189 """
190 node = attr.ib()
190 node = attr.ib()
191 p1 = attr.ib()
191 p1 = attr.ib()
192 p2 = attr.ib()
192 p2 = attr.ib()
193 btext = attr.ib()
193 btext = attr.ib()
194 textlen = attr.ib()
194 textlen = attr.ib()
195 cachedelta = attr.ib()
195 cachedelta = attr.ib()
196 flags = attr.ib()
196 flags = attr.ib()
197
197
198 @interfaceutil.implementer(repository.irevisiondelta)
198 @interfaceutil.implementer(repository.irevisiondelta)
199 @attr.s(slots=True)
199 @attr.s(slots=True)
200 class revlogrevisiondelta(object):
200 class revlogrevisiondelta(object):
201 node = attr.ib()
201 node = attr.ib()
202 p1node = attr.ib()
202 p1node = attr.ib()
203 p2node = attr.ib()
203 p2node = attr.ib()
204 basenode = attr.ib()
204 basenode = attr.ib()
205 flags = attr.ib()
205 flags = attr.ib()
206 baserevisionsize = attr.ib()
206 baserevisionsize = attr.ib()
207 revision = attr.ib()
207 revision = attr.ib()
208 delta = attr.ib()
208 delta = attr.ib()
209 linknode = attr.ib(default=None)
209 linknode = attr.ib(default=None)
210
210
211 @interfaceutil.implementer(repository.iverifyproblem)
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
216 node = attr.ib(default=None)
217
217
218 # index v0:
218 # index v0:
219 # 4 bytes: offset
219 # 4 bytes: offset
220 # 4 bytes: compressed length
220 # 4 bytes: compressed length
221 # 4 bytes: base rev
221 # 4 bytes: base rev
222 # 4 bytes: link rev
222 # 4 bytes: link rev
223 # 20 bytes: parent 1 nodeid
223 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 2 nodeid
224 # 20 bytes: parent 2 nodeid
225 # 20 bytes: nodeid
225 # 20 bytes: nodeid
226 indexformatv0 = struct.Struct(">4l20s20s20s")
226 indexformatv0 = struct.Struct(">4l20s20s20s")
227 indexformatv0_pack = indexformatv0.pack
227 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_unpack = indexformatv0.unpack
228 indexformatv0_unpack = indexformatv0.unpack
229
229
230 class revlogoldindex(list):
230 class revlogoldindex(list):
231 def __getitem__(self, i):
231 def __getitem__(self, i):
232 if i == -1:
232 if i == -1:
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 return list.__getitem__(self, i)
234 return list.__getitem__(self, i)
235
235
236 class revlogoldio(object):
236 class revlogoldio(object):
237 def __init__(self):
237 def __init__(self):
238 self.size = indexformatv0.size
238 self.size = indexformatv0.size
239
239
240 def parseindex(self, data, inline):
240 def parseindex(self, data, inline):
241 s = self.size
241 s = self.size
242 index = []
242 index = []
243 nodemap = {nullid: nullrev}
243 nodemap = {nullid: nullrev}
244 n = off = 0
244 n = off = 0
245 l = len(data)
245 l = len(data)
246 while off + s <= l:
246 while off + s <= l:
247 cur = data[off:off + s]
247 cur = data[off:off + s]
248 off += s
248 off += s
249 e = indexformatv0_unpack(cur)
249 e = indexformatv0_unpack(cur)
250 # transform to revlogv1 format
250 # transform to revlogv1 format
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 index.append(e2)
253 index.append(e2)
254 nodemap[e[6]] = n
254 nodemap[e[6]] = n
255 n += 1
255 n += 1
256
256
257 return revlogoldindex(index), nodemap, None
257 return revlogoldindex(index), nodemap, None
258
258
259 def packentry(self, entry, node, version, rev):
259 def packentry(self, entry, node, version, rev):
260 if gettype(entry[0]):
260 if gettype(entry[0]):
261 raise error.RevlogError(_('index entry flags need revlog '
261 raise error.RevlogError(_('index entry flags need revlog '
262 'version 1'))
262 'version 1'))
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 node(entry[5]), node(entry[6]), entry[7])
264 node(entry[5]), node(entry[6]), entry[7])
265 return indexformatv0_pack(*e2)
265 return indexformatv0_pack(*e2)
266
266
267 # index ng:
267 # index ng:
268 # 6 bytes: offset
268 # 6 bytes: offset
269 # 2 bytes: flags
269 # 2 bytes: flags
270 # 4 bytes: compressed length
270 # 4 bytes: compressed length
271 # 4 bytes: uncompressed length
271 # 4 bytes: uncompressed length
272 # 4 bytes: base rev
272 # 4 bytes: base rev
273 # 4 bytes: link rev
273 # 4 bytes: link rev
274 # 4 bytes: parent 1 rev
274 # 4 bytes: parent 1 rev
275 # 4 bytes: parent 2 rev
275 # 4 bytes: parent 2 rev
276 # 32 bytes: nodeid
276 # 32 bytes: nodeid
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 indexformatng_pack = indexformatng.pack
278 indexformatng_pack = indexformatng.pack
279 versionformat = struct.Struct(">I")
279 versionformat = struct.Struct(">I")
280 versionformat_pack = versionformat.pack
280 versionformat_pack = versionformat.pack
281 versionformat_unpack = versionformat.unpack
281 versionformat_unpack = versionformat.unpack
282
282
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 # signed integer)
284 # signed integer)
285 _maxentrysize = 0x7fffffff
285 _maxentrysize = 0x7fffffff
286
286
287 class revlogio(object):
287 class revlogio(object):
288 def __init__(self):
288 def __init__(self):
289 self.size = indexformatng.size
289 self.size = indexformatng.size
290
290
291 def parseindex(self, data, inline):
291 def parseindex(self, data, inline):
292 # call the C implementation to parse the index data
292 # call the C implementation to parse the index data
293 index, cache = parsers.parse_index2(data, inline)
293 index, cache = parsers.parse_index2(data, inline)
294 return index, getattr(index, 'nodemap', None), cache
294 return index, getattr(index, 'nodemap', None), cache
295
295
296 def packentry(self, entry, node, version, rev):
296 def packentry(self, entry, node, version, rev):
297 p = indexformatng_pack(*entry)
297 p = indexformatng_pack(*entry)
298 if rev == 0:
298 if rev == 0:
299 p = versionformat_pack(version) + p[4:]
299 p = versionformat_pack(version) + p[4:]
300 return p
300 return p
301
301
302 class revlog(object):
302 class revlog(object):
303 """
303 """
304 the underlying revision storage object
304 the underlying revision storage object
305
305
306 A revlog consists of two parts, an index and the revision data.
306 A revlog consists of two parts, an index and the revision data.
307
307
308 The index is a file with a fixed record size containing
308 The index is a file with a fixed record size containing
309 information on each revision, including its nodeid (hash), the
309 information on each revision, including its nodeid (hash), the
310 nodeids of its parents, the position and offset of its data within
310 nodeids of its parents, the position and offset of its data within
311 the data file, and the revision it's based on. Finally, each entry
311 the data file, and the revision it's based on. Finally, each entry
312 contains a linkrev entry that can serve as a pointer to external
312 contains a linkrev entry that can serve as a pointer to external
313 data.
313 data.
314
314
315 The revision data itself is a linear collection of data chunks.
315 The revision data itself is a linear collection of data chunks.
316 Each chunk represents a revision and is usually represented as a
316 Each chunk represents a revision and is usually represented as a
317 delta against the previous chunk. To bound lookup time, runs of
317 delta against the previous chunk. To bound lookup time, runs of
318 deltas are limited to about 2 times the length of the original
318 deltas are limited to about 2 times the length of the original
319 version data. This makes retrieval of a version proportional to
319 version data. This makes retrieval of a version proportional to
320 its size, or O(1) relative to the number of revisions.
320 its size, or O(1) relative to the number of revisions.
321
321
322 Both pieces of the revlog are written to in an append-only
322 Both pieces of the revlog are written to in an append-only
323 fashion, which means we never need to rewrite a file to insert or
323 fashion, which means we never need to rewrite a file to insert or
324 remove data, and can use some simple techniques to avoid the need
324 remove data, and can use some simple techniques to avoid the need
325 for locking while reading.
325 for locking while reading.
326
326
327 If checkambig, indexfile is opened with checkambig=True at
327 If checkambig, indexfile is opened with checkambig=True at
328 writing, to avoid file stat ambiguity.
328 writing, to avoid file stat ambiguity.
329
329
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 index will be mmapped rather than read if it is larger than the
331 index will be mmapped rather than read if it is larger than the
332 configured threshold.
332 configured threshold.
333
333
334 If censorable is True, the revlog can have censored revisions.
334 If censorable is True, the revlog can have censored revisions.
335 """
335 """
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 mmaplargeindex=False, censorable=False):
337 mmaplargeindex=False, censorable=False):
338 """
338 """
339 create a revlog object
339 create a revlog object
340
340
341 opener is a function that abstracts the file opening operation
341 opener is a function that abstracts the file opening operation
342 and can be used to implement COW semantics or the like.
342 and can be used to implement COW semantics or the like.
343 """
343 """
344 self.indexfile = indexfile
344 self.indexfile = indexfile
345 self.datafile = datafile or (indexfile[:-2] + ".d")
345 self.datafile = datafile or (indexfile[:-2] + ".d")
346 self.opener = opener
346 self.opener = opener
347 # When True, indexfile is opened with checkambig=True at writing, to
347 # When True, indexfile is opened with checkambig=True at writing, to
348 # avoid file stat ambiguity.
348 # avoid file stat ambiguity.
349 self._checkambig = checkambig
349 self._checkambig = checkambig
350 self._censorable = censorable
350 self._censorable = censorable
351 # 3-tuple of (node, rev, text) for a raw revision.
351 # 3-tuple of (node, rev, text) for a raw revision.
352 self._revisioncache = None
352 self._revisioncache = None
353 # Maps rev to chain base rev.
353 # Maps rev to chain base rev.
354 self._chainbasecache = util.lrucachedict(100)
354 self._chainbasecache = util.lrucachedict(100)
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 self._chunkcache = (0, '')
356 self._chunkcache = (0, '')
357 # How much data to read and cache into the raw revlog data cache.
357 # How much data to read and cache into the raw revlog data cache.
358 self._chunkcachesize = 65536
358 self._chunkcachesize = 65536
359 self._maxchainlen = None
359 self._maxchainlen = None
360 self._deltabothparents = True
360 self._deltabothparents = True
361 self.index = []
361 self.index = []
362 # Mapping of partial identifiers to full nodes.
362 # Mapping of partial identifiers to full nodes.
363 self._pcache = {}
363 self._pcache = {}
364 # Mapping of revision integer to full node.
364 # Mapping of revision integer to full node.
365 self._nodecache = {nullid: nullrev}
365 self._nodecache = {nullid: nullrev}
366 self._nodepos = None
366 self._nodepos = None
367 self._compengine = 'zlib'
367 self._compengine = 'zlib'
368 self._maxdeltachainspan = -1
368 self._maxdeltachainspan = -1
369 self._withsparseread = False
369 self._withsparseread = False
370 self._sparserevlog = False
370 self._sparserevlog = False
371 self._srdensitythreshold = 0.50
371 self._srdensitythreshold = 0.50
372 self._srmingapsize = 262144
372 self._srmingapsize = 262144
373
373
374 # Make copy of flag processors so each revlog instance can support
374 # Make copy of flag processors so each revlog instance can support
375 # custom flags.
375 # custom flags.
376 self._flagprocessors = dict(_flagprocessors)
376 self._flagprocessors = dict(_flagprocessors)
377
377
378 # 2-tuple of file handles being used for active writing.
378 # 2-tuple of file handles being used for active writing.
379 self._writinghandles = None
379 self._writinghandles = None
380
380
381 mmapindexthreshold = None
381 mmapindexthreshold = None
382 v = REVLOG_DEFAULT_VERSION
382 v = REVLOG_DEFAULT_VERSION
383 opts = getattr(opener, 'options', None)
383 opts = getattr(opener, 'options', None)
384 if opts is not None:
384 if opts is not None:
385 if 'revlogv2' in opts:
385 if 'revlogv2' in opts:
386 # version 2 revlogs always use generaldelta.
386 # version 2 revlogs always use generaldelta.
387 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
387 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
388 elif 'revlogv1' in opts:
388 elif 'revlogv1' in opts:
389 if 'generaldelta' in opts:
389 if 'generaldelta' in opts:
390 v |= FLAG_GENERALDELTA
390 v |= FLAG_GENERALDELTA
391 else:
391 else:
392 v = 0
392 v = 0
393 if 'chunkcachesize' in opts:
393 if 'chunkcachesize' in opts:
394 self._chunkcachesize = opts['chunkcachesize']
394 self._chunkcachesize = opts['chunkcachesize']
395 if 'maxchainlen' in opts:
395 if 'maxchainlen' in opts:
396 self._maxchainlen = opts['maxchainlen']
396 self._maxchainlen = opts['maxchainlen']
397 if 'deltabothparents' in opts:
397 if 'deltabothparents' in opts:
398 self._deltabothparents = opts['deltabothparents']
398 self._deltabothparents = opts['deltabothparents']
399 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
399 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
400 if 'compengine' in opts:
400 if 'compengine' in opts:
401 self._compengine = opts['compengine']
401 self._compengine = opts['compengine']
402 if 'maxdeltachainspan' in opts:
402 if 'maxdeltachainspan' in opts:
403 self._maxdeltachainspan = opts['maxdeltachainspan']
403 self._maxdeltachainspan = opts['maxdeltachainspan']
404 if mmaplargeindex and 'mmapindexthreshold' in opts:
404 if mmaplargeindex and 'mmapindexthreshold' in opts:
405 mmapindexthreshold = opts['mmapindexthreshold']
405 mmapindexthreshold = opts['mmapindexthreshold']
406 self._sparserevlog = bool(opts.get('sparse-revlog', False))
406 self._sparserevlog = bool(opts.get('sparse-revlog', False))
407 withsparseread = bool(opts.get('with-sparse-read', False))
407 withsparseread = bool(opts.get('with-sparse-read', False))
408 # sparse-revlog forces sparse-read
408 # sparse-revlog forces sparse-read
409 self._withsparseread = self._sparserevlog or withsparseread
409 self._withsparseread = self._sparserevlog or withsparseread
410 if 'sparse-read-density-threshold' in opts:
410 if 'sparse-read-density-threshold' in opts:
411 self._srdensitythreshold = opts['sparse-read-density-threshold']
411 self._srdensitythreshold = opts['sparse-read-density-threshold']
412 if 'sparse-read-min-gap-size' in opts:
412 if 'sparse-read-min-gap-size' in opts:
413 self._srmingapsize = opts['sparse-read-min-gap-size']
413 self._srmingapsize = opts['sparse-read-min-gap-size']
414 if opts.get('enableellipsis'):
414 if opts.get('enableellipsis'):
415 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
415 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
416
416
417 # revlog v0 doesn't have flag processors
417 # revlog v0 doesn't have flag processors
418 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
418 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
419 _insertflagprocessor(flag, processor, self._flagprocessors)
419 _insertflagprocessor(flag, processor, self._flagprocessors)
420
420
421 if self._chunkcachesize <= 0:
421 if self._chunkcachesize <= 0:
422 raise error.RevlogError(_('revlog chunk cache size %r is not '
422 raise error.RevlogError(_('revlog chunk cache size %r is not '
423 'greater than 0') % self._chunkcachesize)
423 'greater than 0') % self._chunkcachesize)
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 raise error.RevlogError(_('revlog chunk cache size %r is not a '
425 raise error.RevlogError(_('revlog chunk cache size %r is not a '
426 'power of 2') % self._chunkcachesize)
426 'power of 2') % self._chunkcachesize)
427
427
428 self._loadindex(v, mmapindexthreshold)
428 self._loadindex(v, mmapindexthreshold)
429
429
430 def _loadindex(self, v, mmapindexthreshold):
430 def _loadindex(self, v, mmapindexthreshold):
431 indexdata = ''
431 indexdata = ''
432 self._initempty = True
432 self._initempty = True
433 try:
433 try:
434 with self._indexfp() as f:
434 with self._indexfp() as f:
435 if (mmapindexthreshold is not None and
435 if (mmapindexthreshold is not None and
436 self.opener.fstat(f).st_size >= mmapindexthreshold):
436 self.opener.fstat(f).st_size >= mmapindexthreshold):
437 indexdata = util.buffer(util.mmapread(f))
437 indexdata = util.buffer(util.mmapread(f))
438 else:
438 else:
439 indexdata = f.read()
439 indexdata = f.read()
440 if len(indexdata) > 0:
440 if len(indexdata) > 0:
441 v = versionformat_unpack(indexdata[:4])[0]
441 v = versionformat_unpack(indexdata[:4])[0]
442 self._initempty = False
442 self._initempty = False
443 except IOError as inst:
443 except IOError as inst:
444 if inst.errno != errno.ENOENT:
444 if inst.errno != errno.ENOENT:
445 raise
445 raise
446
446
447 self.version = v
447 self.version = v
448 self._inline = v & FLAG_INLINE_DATA
448 self._inline = v & FLAG_INLINE_DATA
449 self._generaldelta = v & FLAG_GENERALDELTA
449 self._generaldelta = v & FLAG_GENERALDELTA
450 flags = v & ~0xFFFF
450 flags = v & ~0xFFFF
451 fmt = v & 0xFFFF
451 fmt = v & 0xFFFF
452 if fmt == REVLOGV0:
452 if fmt == REVLOGV0:
453 if flags:
453 if flags:
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
454 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
455 'revlog %s') %
455 'revlog %s') %
456 (flags >> 16, fmt, self.indexfile))
456 (flags >> 16, fmt, self.indexfile))
457 elif fmt == REVLOGV1:
457 elif fmt == REVLOGV1:
458 if flags & ~REVLOGV1_FLAGS:
458 if flags & ~REVLOGV1_FLAGS:
459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
459 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 'revlog %s') %
460 'revlog %s') %
461 (flags >> 16, fmt, self.indexfile))
461 (flags >> 16, fmt, self.indexfile))
462 elif fmt == REVLOGV2:
462 elif fmt == REVLOGV2:
463 if flags & ~REVLOGV2_FLAGS:
463 if flags & ~REVLOGV2_FLAGS:
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 'revlog %s') %
465 'revlog %s') %
466 (flags >> 16, fmt, self.indexfile))
466 (flags >> 16, fmt, self.indexfile))
467 else:
467 else:
468 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
468 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 (fmt, self.indexfile))
469 (fmt, self.indexfile))
470
470
471 self._storedeltachains = True
471 self._storedeltachains = True
472
472
473 self._io = revlogio()
473 self._io = revlogio()
474 if self.version == REVLOGV0:
474 if self.version == REVLOGV0:
475 self._io = revlogoldio()
475 self._io = revlogoldio()
476 try:
476 try:
477 d = self._io.parseindex(indexdata, self._inline)
477 d = self._io.parseindex(indexdata, self._inline)
478 except (ValueError, IndexError):
478 except (ValueError, IndexError):
479 raise error.RevlogError(_("index %s is corrupted") %
479 raise error.RevlogError(_("index %s is corrupted") %
480 self.indexfile)
480 self.indexfile)
481 self.index, nodemap, self._chunkcache = d
481 self.index, nodemap, self._chunkcache = d
482 if nodemap is not None:
482 if nodemap is not None:
483 self.nodemap = self._nodecache = nodemap
483 self.nodemap = self._nodecache = nodemap
484 if not self._chunkcache:
484 if not self._chunkcache:
485 self._chunkclear()
485 self._chunkclear()
486 # revnum -> (chain-length, sum-delta-length)
486 # revnum -> (chain-length, sum-delta-length)
487 self._chaininfocache = {}
487 self._chaininfocache = {}
488 # revlog header -> revlog compressor
488 # revlog header -> revlog compressor
489 self._decompressors = {}
489 self._decompressors = {}
490
490
491 @util.propertycache
491 @util.propertycache
492 def _compressor(self):
492 def _compressor(self):
493 return util.compengines[self._compengine].revlogcompressor()
493 return util.compengines[self._compengine].revlogcompressor()
494
494
495 def _indexfp(self, mode='r'):
495 def _indexfp(self, mode='r'):
496 """file object for the revlog's index file"""
496 """file object for the revlog's index file"""
497 args = {r'mode': mode}
497 args = {r'mode': mode}
498 if mode != 'r':
498 if mode != 'r':
499 args[r'checkambig'] = self._checkambig
499 args[r'checkambig'] = self._checkambig
500 if mode == 'w':
500 if mode == 'w':
501 args[r'atomictemp'] = True
501 args[r'atomictemp'] = True
502 return self.opener(self.indexfile, **args)
502 return self.opener(self.indexfile, **args)
503
503
504 def _datafp(self, mode='r'):
504 def _datafp(self, mode='r'):
505 """file object for the revlog's data file"""
505 """file object for the revlog's data file"""
506 return self.opener(self.datafile, mode=mode)
506 return self.opener(self.datafile, mode=mode)
507
507
508 @contextlib.contextmanager
508 @contextlib.contextmanager
509 def _datareadfp(self, existingfp=None):
509 def _datareadfp(self, existingfp=None):
510 """file object suitable to read data"""
510 """file object suitable to read data"""
511 # Use explicit file handle, if given.
511 # Use explicit file handle, if given.
512 if existingfp is not None:
512 if existingfp is not None:
513 yield existingfp
513 yield existingfp
514
514
515 # Use a file handle being actively used for writes, if available.
515 # Use a file handle being actively used for writes, if available.
516 # There is some danger to doing this because reads will seek the
516 # There is some danger to doing this because reads will seek the
517 # file. However, _writeentry() performs a SEEK_END before all writes,
517 # file. However, _writeentry() performs a SEEK_END before all writes,
518 # so we should be safe.
518 # so we should be safe.
519 elif self._writinghandles:
519 elif self._writinghandles:
520 if self._inline:
520 if self._inline:
521 yield self._writinghandles[0]
521 yield self._writinghandles[0]
522 else:
522 else:
523 yield self._writinghandles[1]
523 yield self._writinghandles[1]
524
524
525 # Otherwise open a new file handle.
525 # Otherwise open a new file handle.
526 else:
526 else:
527 if self._inline:
527 if self._inline:
528 func = self._indexfp
528 func = self._indexfp
529 else:
529 else:
530 func = self._datafp
530 func = self._datafp
531 with func() as fp:
531 with func() as fp:
532 yield fp
532 yield fp
533
533
534 def tip(self):
534 def tip(self):
535 return self.node(len(self.index) - 1)
535 return self.node(len(self.index) - 1)
536 def __contains__(self, rev):
536 def __contains__(self, rev):
537 return 0 <= rev < len(self)
537 return 0 <= rev < len(self)
538 def __len__(self):
538 def __len__(self):
539 return len(self.index)
539 return len(self.index)
540 def __iter__(self):
540 def __iter__(self):
541 return iter(pycompat.xrange(len(self)))
541 return iter(pycompat.xrange(len(self)))
542 def revs(self, start=0, stop=None):
542 def revs(self, start=0, stop=None):
543 """iterate over all rev in this revlog (from start to stop)"""
543 """iterate over all rev in this revlog (from start to stop)"""
544 return storageutil.iterrevs(len(self), start=start, stop=stop)
544 return storageutil.iterrevs(len(self), start=start, stop=stop)
545
545
546 @util.propertycache
546 @util.propertycache
547 def nodemap(self):
547 def nodemap(self):
548 if self.index:
548 if self.index:
549 # populate mapping down to the initial node
549 # populate mapping down to the initial node
550 node0 = self.index[0][7] # get around changelog filtering
550 node0 = self.index[0][7] # get around changelog filtering
551 self.rev(node0)
551 self.rev(node0)
552 return self._nodecache
552 return self._nodecache
553
553
554 def hasnode(self, node):
554 def hasnode(self, node):
555 try:
555 try:
556 self.rev(node)
556 self.rev(node)
557 return True
557 return True
558 except KeyError:
558 except KeyError:
559 return False
559 return False
560
560
561 def candelta(self, baserev, rev):
561 def candelta(self, baserev, rev):
562 """whether two revisions (baserev, rev) can be delta-ed or not"""
562 """whether two revisions (baserev, rev) can be delta-ed or not"""
563 # Disable delta if either rev requires a content-changing flag
563 # Disable delta if either rev requires a content-changing flag
564 # processor (ex. LFS). This is because such flag processor can alter
564 # processor (ex. LFS). This is because such flag processor can alter
565 # the rawtext content that the delta will be based on, and two clients
565 # the rawtext content that the delta will be based on, and two clients
566 # could have a same revlog node with different flags (i.e. different
566 # could have a same revlog node with different flags (i.e. different
567 # rawtext contents) and the delta could be incompatible.
567 # rawtext contents) and the delta could be incompatible.
568 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
568 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
569 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
569 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
570 return False
570 return False
571 return True
571 return True
572
572
573 def clearcaches(self):
573 def clearcaches(self):
574 self._revisioncache = None
574 self._revisioncache = None
575 self._chainbasecache.clear()
575 self._chainbasecache.clear()
576 self._chunkcache = (0, '')
576 self._chunkcache = (0, '')
577 self._pcache = {}
577 self._pcache = {}
578
578
579 try:
579 try:
580 self._nodecache.clearcaches()
580 self._nodecache.clearcaches()
581 except AttributeError:
581 except AttributeError:
582 self._nodecache = {nullid: nullrev}
582 self._nodecache = {nullid: nullrev}
583 self._nodepos = None
583 self._nodepos = None
584
584
585 def rev(self, node):
585 def rev(self, node):
586 try:
586 try:
587 return self._nodecache[node]
587 return self._nodecache[node]
588 except TypeError:
588 except TypeError:
589 raise
589 raise
590 except error.RevlogError:
590 except error.RevlogError:
591 # parsers.c radix tree lookup failed
591 # parsers.c radix tree lookup failed
592 if node == wdirid or node in wdirfilenodeids:
592 if node == wdirid or node in wdirfilenodeids:
593 raise error.WdirUnsupported
593 raise error.WdirUnsupported
594 raise error.LookupError(node, self.indexfile, _('no node'))
594 raise error.LookupError(node, self.indexfile, _('no node'))
595 except KeyError:
595 except KeyError:
596 # pure python cache lookup failed
596 # pure python cache lookup failed
597 n = self._nodecache
597 n = self._nodecache
598 i = self.index
598 i = self.index
599 p = self._nodepos
599 p = self._nodepos
600 if p is None:
600 if p is None:
601 p = len(i) - 1
601 p = len(i) - 1
602 else:
602 else:
603 assert p < len(i)
603 assert p < len(i)
604 for r in pycompat.xrange(p, -1, -1):
604 for r in pycompat.xrange(p, -1, -1):
605 v = i[r][7]
605 v = i[r][7]
606 n[v] = r
606 n[v] = r
607 if v == node:
607 if v == node:
608 self._nodepos = r - 1
608 self._nodepos = r - 1
609 return r
609 return r
610 if node == wdirid or node in wdirfilenodeids:
610 if node == wdirid or node in wdirfilenodeids:
611 raise error.WdirUnsupported
611 raise error.WdirUnsupported
612 raise error.LookupError(node, self.indexfile, _('no node'))
612 raise error.LookupError(node, self.indexfile, _('no node'))
613
613
614 # Accessors for index entries.
614 # Accessors for index entries.
615
615
616 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
616 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
617 # are flags.
617 # are flags.
618 def start(self, rev):
618 def start(self, rev):
619 return int(self.index[rev][0] >> 16)
619 return int(self.index[rev][0] >> 16)
620
620
621 def flags(self, rev):
621 def flags(self, rev):
622 return self.index[rev][0] & 0xFFFF
622 return self.index[rev][0] & 0xFFFF
623
623
624 def length(self, rev):
624 def length(self, rev):
625 return self.index[rev][1]
625 return self.index[rev][1]
626
626
627 def rawsize(self, rev):
627 def rawsize(self, rev):
628 """return the length of the uncompressed text for a given revision"""
628 """return the length of the uncompressed text for a given revision"""
629 l = self.index[rev][2]
629 l = self.index[rev][2]
630 if l >= 0:
630 if l >= 0:
631 return l
631 return l
632
632
633 t = self.revision(rev, raw=True)
633 t = self.revision(rev, raw=True)
634 return len(t)
634 return len(t)
635
635
636 def size(self, rev):
636 def size(self, rev):
637 """length of non-raw text (processed by a "read" flag processor)"""
637 """length of non-raw text (processed by a "read" flag processor)"""
638 # fast path: if no "read" flag processor could change the content,
638 # fast path: if no "read" flag processor could change the content,
639 # size is rawsize. note: ELLIPSIS is known to not change the content.
639 # size is rawsize. note: ELLIPSIS is known to not change the content.
640 flags = self.flags(rev)
640 flags = self.flags(rev)
641 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
641 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
642 return self.rawsize(rev)
642 return self.rawsize(rev)
643
643
644 return len(self.revision(rev, raw=False))
644 return len(self.revision(rev, raw=False))
645
645
646 def chainbase(self, rev):
646 def chainbase(self, rev):
647 base = self._chainbasecache.get(rev)
647 base = self._chainbasecache.get(rev)
648 if base is not None:
648 if base is not None:
649 return base
649 return base
650
650
651 index = self.index
651 index = self.index
652 iterrev = rev
652 iterrev = rev
653 base = index[iterrev][3]
653 base = index[iterrev][3]
654 while base != iterrev:
654 while base != iterrev:
655 iterrev = base
655 iterrev = base
656 base = index[iterrev][3]
656 base = index[iterrev][3]
657
657
658 self._chainbasecache[rev] = base
658 self._chainbasecache[rev] = base
659 return base
659 return base
660
660
661 def linkrev(self, rev):
661 def linkrev(self, rev):
662 return self.index[rev][4]
662 return self.index[rev][4]
663
663
664 def parentrevs(self, rev):
664 def parentrevs(self, rev):
665 try:
665 try:
666 entry = self.index[rev]
666 entry = self.index[rev]
667 except IndexError:
667 except IndexError:
668 if rev == wdirrev:
668 if rev == wdirrev:
669 raise error.WdirUnsupported
669 raise error.WdirUnsupported
670 raise
670 raise
671
671
672 return entry[5], entry[6]
672 return entry[5], entry[6]
673
673
674 # fast parentrevs(rev) where rev isn't filtered
674 # fast parentrevs(rev) where rev isn't filtered
675 _uncheckedparentrevs = parentrevs
675 _uncheckedparentrevs = parentrevs
676
676
677 def node(self, rev):
677 def node(self, rev):
678 try:
678 try:
679 return self.index[rev][7]
679 return self.index[rev][7]
680 except IndexError:
680 except IndexError:
681 if rev == wdirrev:
681 if rev == wdirrev:
682 raise error.WdirUnsupported
682 raise error.WdirUnsupported
683 raise
683 raise
684
684
685 # Derived from index values.
685 # Derived from index values.
686
686
687 def end(self, rev):
687 def end(self, rev):
688 return self.start(rev) + self.length(rev)
688 return self.start(rev) + self.length(rev)
689
689
690 def parents(self, node):
690 def parents(self, node):
691 i = self.index
691 i = self.index
692 d = i[self.rev(node)]
692 d = i[self.rev(node)]
693 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
693 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
694
694
695 def chainlen(self, rev):
695 def chainlen(self, rev):
696 return self._chaininfo(rev)[0]
696 return self._chaininfo(rev)[0]
697
697
698 def _chaininfo(self, rev):
698 def _chaininfo(self, rev):
699 chaininfocache = self._chaininfocache
699 chaininfocache = self._chaininfocache
700 if rev in chaininfocache:
700 if rev in chaininfocache:
701 return chaininfocache[rev]
701 return chaininfocache[rev]
702 index = self.index
702 index = self.index
703 generaldelta = self._generaldelta
703 generaldelta = self._generaldelta
704 iterrev = rev
704 iterrev = rev
705 e = index[iterrev]
705 e = index[iterrev]
706 clen = 0
706 clen = 0
707 compresseddeltalen = 0
707 compresseddeltalen = 0
708 while iterrev != e[3]:
708 while iterrev != e[3]:
709 clen += 1
709 clen += 1
710 compresseddeltalen += e[1]
710 compresseddeltalen += e[1]
711 if generaldelta:
711 if generaldelta:
712 iterrev = e[3]
712 iterrev = e[3]
713 else:
713 else:
714 iterrev -= 1
714 iterrev -= 1
715 if iterrev in chaininfocache:
715 if iterrev in chaininfocache:
716 t = chaininfocache[iterrev]
716 t = chaininfocache[iterrev]
717 clen += t[0]
717 clen += t[0]
718 compresseddeltalen += t[1]
718 compresseddeltalen += t[1]
719 break
719 break
720 e = index[iterrev]
720 e = index[iterrev]
721 else:
721 else:
722 # Add text length of base since decompressing that also takes
722 # Add text length of base since decompressing that also takes
723 # work. For cache hits the length is already included.
723 # work. For cache hits the length is already included.
724 compresseddeltalen += e[1]
724 compresseddeltalen += e[1]
725 r = (clen, compresseddeltalen)
725 r = (clen, compresseddeltalen)
726 chaininfocache[rev] = r
726 chaininfocache[rev] = r
727 return r
727 return r
728
728
729 def _deltachain(self, rev, stoprev=None):
729 def _deltachain(self, rev, stoprev=None):
730 """Obtain the delta chain for a revision.
730 """Obtain the delta chain for a revision.
731
731
732 ``stoprev`` specifies a revision to stop at. If not specified, we
732 ``stoprev`` specifies a revision to stop at. If not specified, we
733 stop at the base of the chain.
733 stop at the base of the chain.
734
734
735 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
735 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
736 revs in ascending order and ``stopped`` is a bool indicating whether
736 revs in ascending order and ``stopped`` is a bool indicating whether
737 ``stoprev`` was hit.
737 ``stoprev`` was hit.
738 """
738 """
739 # Try C implementation.
739 # Try C implementation.
740 try:
740 try:
741 return self.index.deltachain(rev, stoprev, self._generaldelta)
741 return self.index.deltachain(rev, stoprev, self._generaldelta)
742 except AttributeError:
742 except AttributeError:
743 pass
743 pass
744
744
745 chain = []
745 chain = []
746
746
747 # Alias to prevent attribute lookup in tight loop.
747 # Alias to prevent attribute lookup in tight loop.
748 index = self.index
748 index = self.index
749 generaldelta = self._generaldelta
749 generaldelta = self._generaldelta
750
750
751 iterrev = rev
751 iterrev = rev
752 e = index[iterrev]
752 e = index[iterrev]
753 while iterrev != e[3] and iterrev != stoprev:
753 while iterrev != e[3] and iterrev != stoprev:
754 chain.append(iterrev)
754 chain.append(iterrev)
755 if generaldelta:
755 if generaldelta:
756 iterrev = e[3]
756 iterrev = e[3]
757 else:
757 else:
758 iterrev -= 1
758 iterrev -= 1
759 e = index[iterrev]
759 e = index[iterrev]
760
760
761 if iterrev == stoprev:
761 if iterrev == stoprev:
762 stopped = True
762 stopped = True
763 else:
763 else:
764 chain.append(iterrev)
764 chain.append(iterrev)
765 stopped = False
765 stopped = False
766
766
767 chain.reverse()
767 chain.reverse()
768 return chain, stopped
768 return chain, stopped
769
769
770 def ancestors(self, revs, stoprev=0, inclusive=False):
770 def ancestors(self, revs, stoprev=0, inclusive=False):
771 """Generate the ancestors of 'revs' in reverse revision order.
771 """Generate the ancestors of 'revs' in reverse revision order.
772 Does not generate revs lower than stoprev.
772 Does not generate revs lower than stoprev.
773
773
774 See the documentation for ancestor.lazyancestors for more details."""
774 See the documentation for ancestor.lazyancestors for more details."""
775
775
776 # first, make sure start revisions aren't filtered
776 # first, make sure start revisions aren't filtered
777 revs = list(revs)
777 revs = list(revs)
778 checkrev = self.node
778 checkrev = self.node
779 for r in revs:
779 for r in revs:
780 checkrev(r)
780 checkrev(r)
781 # and we're sure ancestors aren't filtered as well
781 # and we're sure ancestors aren't filtered as well
782 if util.safehasattr(parsers, 'rustlazyancestors'):
782 if util.safehasattr(parsers, 'rustlazyancestors'):
783 return ancestor.rustlazyancestors(
783 return ancestor.rustlazyancestors(
784 self.index, revs,
784 self.index, revs,
785 stoprev=stoprev, inclusive=inclusive)
785 stoprev=stoprev, inclusive=inclusive)
786 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
786 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
787 stoprev=stoprev, inclusive=inclusive)
787 stoprev=stoprev, inclusive=inclusive)
788
788
789 def descendants(self, revs):
789 def descendants(self, revs):
790 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
790 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
791
791
792 def findcommonmissing(self, common=None, heads=None):
792 def findcommonmissing(self, common=None, heads=None):
793 """Return a tuple of the ancestors of common and the ancestors of heads
793 """Return a tuple of the ancestors of common and the ancestors of heads
794 that are not ancestors of common. In revset terminology, we return the
794 that are not ancestors of common. In revset terminology, we return the
795 tuple:
795 tuple:
796
796
797 ::common, (::heads) - (::common)
797 ::common, (::heads) - (::common)
798
798
799 The list is sorted by revision number, meaning it is
799 The list is sorted by revision number, meaning it is
800 topologically sorted.
800 topologically sorted.
801
801
802 'heads' and 'common' are both lists of node IDs. If heads is
802 'heads' and 'common' are both lists of node IDs. If heads is
803 not supplied, uses all of the revlog's heads. If common is not
803 not supplied, uses all of the revlog's heads. If common is not
804 supplied, uses nullid."""
804 supplied, uses nullid."""
805 if common is None:
805 if common is None:
806 common = [nullid]
806 common = [nullid]
807 if heads is None:
807 if heads is None:
808 heads = self.heads()
808 heads = self.heads()
809
809
810 common = [self.rev(n) for n in common]
810 common = [self.rev(n) for n in common]
811 heads = [self.rev(n) for n in heads]
811 heads = [self.rev(n) for n in heads]
812
812
813 # we want the ancestors, but inclusive
813 # we want the ancestors, but inclusive
814 class lazyset(object):
814 class lazyset(object):
815 def __init__(self, lazyvalues):
815 def __init__(self, lazyvalues):
816 self.addedvalues = set()
816 self.addedvalues = set()
817 self.lazyvalues = lazyvalues
817 self.lazyvalues = lazyvalues
818
818
819 def __contains__(self, value):
819 def __contains__(self, value):
820 return value in self.addedvalues or value in self.lazyvalues
820 return value in self.addedvalues or value in self.lazyvalues
821
821
822 def __iter__(self):
822 def __iter__(self):
823 added = self.addedvalues
823 added = self.addedvalues
824 for r in added:
824 for r in added:
825 yield r
825 yield r
826 for r in self.lazyvalues:
826 for r in self.lazyvalues:
827 if not r in added:
827 if not r in added:
828 yield r
828 yield r
829
829
830 def add(self, value):
830 def add(self, value):
831 self.addedvalues.add(value)
831 self.addedvalues.add(value)
832
832
833 def update(self, values):
833 def update(self, values):
834 self.addedvalues.update(values)
834 self.addedvalues.update(values)
835
835
836 has = lazyset(self.ancestors(common))
836 has = lazyset(self.ancestors(common))
837 has.add(nullrev)
837 has.add(nullrev)
838 has.update(common)
838 has.update(common)
839
839
840 # take all ancestors from heads that aren't in has
840 # take all ancestors from heads that aren't in has
841 missing = set()
841 missing = set()
842 visit = collections.deque(r for r in heads if r not in has)
842 visit = collections.deque(r for r in heads if r not in has)
843 while visit:
843 while visit:
844 r = visit.popleft()
844 r = visit.popleft()
845 if r in missing:
845 if r in missing:
846 continue
846 continue
847 else:
847 else:
848 missing.add(r)
848 missing.add(r)
849 for p in self.parentrevs(r):
849 for p in self.parentrevs(r):
850 if p not in has:
850 if p not in has:
851 visit.append(p)
851 visit.append(p)
852 missing = list(missing)
852 missing = list(missing)
853 missing.sort()
853 missing.sort()
854 return has, [self.node(miss) for miss in missing]
854 return has, [self.node(miss) for miss in missing]
855
855
856 def incrementalmissingrevs(self, common=None):
856 def incrementalmissingrevs(self, common=None):
857 """Return an object that can be used to incrementally compute the
857 """Return an object that can be used to incrementally compute the
858 revision numbers of the ancestors of arbitrary sets that are not
858 revision numbers of the ancestors of arbitrary sets that are not
859 ancestors of common. This is an ancestor.incrementalmissingancestors
859 ancestors of common. This is an ancestor.incrementalmissingancestors
860 object.
860 object.
861
861
862 'common' is a list of revision numbers. If common is not supplied, uses
862 'common' is a list of revision numbers. If common is not supplied, uses
863 nullrev.
863 nullrev.
864 """
864 """
865 if common is None:
865 if common is None:
866 common = [nullrev]
866 common = [nullrev]
867
867
868 return ancestor.incrementalmissingancestors(self.parentrevs, common)
868 return ancestor.incrementalmissingancestors(self.parentrevs, common)
869
869
870 def findmissingrevs(self, common=None, heads=None):
870 def findmissingrevs(self, common=None, heads=None):
871 """Return the revision numbers of the ancestors of heads that
871 """Return the revision numbers of the ancestors of heads that
872 are not ancestors of common.
872 are not ancestors of common.
873
873
874 More specifically, return a list of revision numbers corresponding to
874 More specifically, return a list of revision numbers corresponding to
875 nodes N such that every N satisfies the following constraints:
875 nodes N such that every N satisfies the following constraints:
876
876
877 1. N is an ancestor of some node in 'heads'
877 1. N is an ancestor of some node in 'heads'
878 2. N is not an ancestor of any node in 'common'
878 2. N is not an ancestor of any node in 'common'
879
879
880 The list is sorted by revision number, meaning it is
880 The list is sorted by revision number, meaning it is
881 topologically sorted.
881 topologically sorted.
882
882
883 'heads' and 'common' are both lists of revision numbers. If heads is
883 'heads' and 'common' are both lists of revision numbers. If heads is
884 not supplied, uses all of the revlog's heads. If common is not
884 not supplied, uses all of the revlog's heads. If common is not
885 supplied, uses nullid."""
885 supplied, uses nullid."""
886 if common is None:
886 if common is None:
887 common = [nullrev]
887 common = [nullrev]
888 if heads is None:
888 if heads is None:
889 heads = self.headrevs()
889 heads = self.headrevs()
890
890
891 inc = self.incrementalmissingrevs(common=common)
891 inc = self.incrementalmissingrevs(common=common)
892 return inc.missingancestors(heads)
892 return inc.missingancestors(heads)
893
893
894 def findmissing(self, common=None, heads=None):
894 def findmissing(self, common=None, heads=None):
895 """Return the ancestors of heads that are not ancestors of common.
895 """Return the ancestors of heads that are not ancestors of common.
896
896
897 More specifically, return a list of nodes N such that every N
897 More specifically, return a list of nodes N such that every N
898 satisfies the following constraints:
898 satisfies the following constraints:
899
899
900 1. N is an ancestor of some node in 'heads'
900 1. N is an ancestor of some node in 'heads'
901 2. N is not an ancestor of any node in 'common'
901 2. N is not an ancestor of any node in 'common'
902
902
903 The list is sorted by revision number, meaning it is
903 The list is sorted by revision number, meaning it is
904 topologically sorted.
904 topologically sorted.
905
905
906 'heads' and 'common' are both lists of node IDs. If heads is
906 'heads' and 'common' are both lists of node IDs. If heads is
907 not supplied, uses all of the revlog's heads. If common is not
907 not supplied, uses all of the revlog's heads. If common is not
908 supplied, uses nullid."""
908 supplied, uses nullid."""
909 if common is None:
909 if common is None:
910 common = [nullid]
910 common = [nullid]
911 if heads is None:
911 if heads is None:
912 heads = self.heads()
912 heads = self.heads()
913
913
914 common = [self.rev(n) for n in common]
914 common = [self.rev(n) for n in common]
915 heads = [self.rev(n) for n in heads]
915 heads = [self.rev(n) for n in heads]
916
916
917 inc = self.incrementalmissingrevs(common=common)
917 inc = self.incrementalmissingrevs(common=common)
918 return [self.node(r) for r in inc.missingancestors(heads)]
918 return [self.node(r) for r in inc.missingancestors(heads)]
919
919
920 def nodesbetween(self, roots=None, heads=None):
920 def nodesbetween(self, roots=None, heads=None):
921 """Return a topological path from 'roots' to 'heads'.
921 """Return a topological path from 'roots' to 'heads'.
922
922
923 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
923 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
924 topologically sorted list of all nodes N that satisfy both of
924 topologically sorted list of all nodes N that satisfy both of
925 these constraints:
925 these constraints:
926
926
927 1. N is a descendant of some node in 'roots'
927 1. N is a descendant of some node in 'roots'
928 2. N is an ancestor of some node in 'heads'
928 2. N is an ancestor of some node in 'heads'
929
929
930 Every node is considered to be both a descendant and an ancestor
930 Every node is considered to be both a descendant and an ancestor
931 of itself, so every reachable node in 'roots' and 'heads' will be
931 of itself, so every reachable node in 'roots' and 'heads' will be
932 included in 'nodes'.
932 included in 'nodes'.
933
933
934 'outroots' is the list of reachable nodes in 'roots', i.e., the
934 'outroots' is the list of reachable nodes in 'roots', i.e., the
935 subset of 'roots' that is returned in 'nodes'. Likewise,
935 subset of 'roots' that is returned in 'nodes'. Likewise,
936 'outheads' is the subset of 'heads' that is also in 'nodes'.
936 'outheads' is the subset of 'heads' that is also in 'nodes'.
937
937
938 'roots' and 'heads' are both lists of node IDs. If 'roots' is
938 'roots' and 'heads' are both lists of node IDs. If 'roots' is
939 unspecified, uses nullid as the only root. If 'heads' is
939 unspecified, uses nullid as the only root. If 'heads' is
940 unspecified, uses list of all of the revlog's heads."""
940 unspecified, uses list of all of the revlog's heads."""
941 nonodes = ([], [], [])
941 nonodes = ([], [], [])
942 if roots is not None:
942 if roots is not None:
943 roots = list(roots)
943 roots = list(roots)
944 if not roots:
944 if not roots:
945 return nonodes
945 return nonodes
946 lowestrev = min([self.rev(n) for n in roots])
946 lowestrev = min([self.rev(n) for n in roots])
947 else:
947 else:
948 roots = [nullid] # Everybody's a descendant of nullid
948 roots = [nullid] # Everybody's a descendant of nullid
949 lowestrev = nullrev
949 lowestrev = nullrev
950 if (lowestrev == nullrev) and (heads is None):
950 if (lowestrev == nullrev) and (heads is None):
951 # We want _all_ the nodes!
951 # We want _all_ the nodes!
952 return ([self.node(r) for r in self], [nullid], list(self.heads()))
952 return ([self.node(r) for r in self], [nullid], list(self.heads()))
953 if heads is None:
953 if heads is None:
954 # All nodes are ancestors, so the latest ancestor is the last
954 # All nodes are ancestors, so the latest ancestor is the last
955 # node.
955 # node.
956 highestrev = len(self) - 1
956 highestrev = len(self) - 1
957 # Set ancestors to None to signal that every node is an ancestor.
957 # Set ancestors to None to signal that every node is an ancestor.
958 ancestors = None
958 ancestors = None
959 # Set heads to an empty dictionary for later discovery of heads
959 # Set heads to an empty dictionary for later discovery of heads
960 heads = {}
960 heads = {}
961 else:
961 else:
962 heads = list(heads)
962 heads = list(heads)
963 if not heads:
963 if not heads:
964 return nonodes
964 return nonodes
965 ancestors = set()
965 ancestors = set()
966 # Turn heads into a dictionary so we can remove 'fake' heads.
966 # Turn heads into a dictionary so we can remove 'fake' heads.
967 # Also, later we will be using it to filter out the heads we can't
967 # Also, later we will be using it to filter out the heads we can't
968 # find from roots.
968 # find from roots.
969 heads = dict.fromkeys(heads, False)
969 heads = dict.fromkeys(heads, False)
970 # Start at the top and keep marking parents until we're done.
970 # Start at the top and keep marking parents until we're done.
971 nodestotag = set(heads)
971 nodestotag = set(heads)
972 # Remember where the top was so we can use it as a limit later.
972 # Remember where the top was so we can use it as a limit later.
973 highestrev = max([self.rev(n) for n in nodestotag])
973 highestrev = max([self.rev(n) for n in nodestotag])
974 while nodestotag:
974 while nodestotag:
975 # grab a node to tag
975 # grab a node to tag
976 n = nodestotag.pop()
976 n = nodestotag.pop()
977 # Never tag nullid
977 # Never tag nullid
978 if n == nullid:
978 if n == nullid:
979 continue
979 continue
980 # A node's revision number represents its place in a
980 # A node's revision number represents its place in a
981 # topologically sorted list of nodes.
981 # topologically sorted list of nodes.
982 r = self.rev(n)
982 r = self.rev(n)
983 if r >= lowestrev:
983 if r >= lowestrev:
984 if n not in ancestors:
984 if n not in ancestors:
985 # If we are possibly a descendant of one of the roots
985 # If we are possibly a descendant of one of the roots
986 # and we haven't already been marked as an ancestor
986 # and we haven't already been marked as an ancestor
987 ancestors.add(n) # Mark as ancestor
987 ancestors.add(n) # Mark as ancestor
988 # Add non-nullid parents to list of nodes to tag.
988 # Add non-nullid parents to list of nodes to tag.
989 nodestotag.update([p for p in self.parents(n) if
989 nodestotag.update([p for p in self.parents(n) if
990 p != nullid])
990 p != nullid])
991 elif n in heads: # We've seen it before, is it a fake head?
991 elif n in heads: # We've seen it before, is it a fake head?
992 # So it is, real heads should not be the ancestors of
992 # So it is, real heads should not be the ancestors of
993 # any other heads.
993 # any other heads.
994 heads.pop(n)
994 heads.pop(n)
995 if not ancestors:
995 if not ancestors:
996 return nonodes
996 return nonodes
997 # Now that we have our set of ancestors, we want to remove any
997 # Now that we have our set of ancestors, we want to remove any
998 # roots that are not ancestors.
998 # roots that are not ancestors.
999
999
1000 # If one of the roots was nullid, everything is included anyway.
1000 # If one of the roots was nullid, everything is included anyway.
1001 if lowestrev > nullrev:
1001 if lowestrev > nullrev:
1002 # But, since we weren't, let's recompute the lowest rev to not
1002 # But, since we weren't, let's recompute the lowest rev to not
1003 # include roots that aren't ancestors.
1003 # include roots that aren't ancestors.
1004
1004
1005 # Filter out roots that aren't ancestors of heads
1005 # Filter out roots that aren't ancestors of heads
1006 roots = [root for root in roots if root in ancestors]
1006 roots = [root for root in roots if root in ancestors]
1007 # Recompute the lowest revision
1007 # Recompute the lowest revision
1008 if roots:
1008 if roots:
1009 lowestrev = min([self.rev(root) for root in roots])
1009 lowestrev = min([self.rev(root) for root in roots])
1010 else:
1010 else:
1011 # No more roots? Return empty list
1011 # No more roots? Return empty list
1012 return nonodes
1012 return nonodes
1013 else:
1013 else:
1014 # We are descending from nullid, and don't need to care about
1014 # We are descending from nullid, and don't need to care about
1015 # any other roots.
1015 # any other roots.
1016 lowestrev = nullrev
1016 lowestrev = nullrev
1017 roots = [nullid]
1017 roots = [nullid]
1018 # Transform our roots list into a set.
1018 # Transform our roots list into a set.
1019 descendants = set(roots)
1019 descendants = set(roots)
1020 # Also, keep the original roots so we can filter out roots that aren't
1020 # Also, keep the original roots so we can filter out roots that aren't
1021 # 'real' roots (i.e. are descended from other roots).
1021 # 'real' roots (i.e. are descended from other roots).
1022 roots = descendants.copy()
1022 roots = descendants.copy()
1023 # Our topologically sorted list of output nodes.
1023 # Our topologically sorted list of output nodes.
1024 orderedout = []
1024 orderedout = []
1025 # Don't start at nullid since we don't want nullid in our output list,
1025 # Don't start at nullid since we don't want nullid in our output list,
1026 # and if nullid shows up in descendants, empty parents will look like
1026 # and if nullid shows up in descendants, empty parents will look like
1027 # they're descendants.
1027 # they're descendants.
1028 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1028 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1029 n = self.node(r)
1029 n = self.node(r)
1030 isdescendant = False
1030 isdescendant = False
1031 if lowestrev == nullrev: # Everybody is a descendant of nullid
1031 if lowestrev == nullrev: # Everybody is a descendant of nullid
1032 isdescendant = True
1032 isdescendant = True
1033 elif n in descendants:
1033 elif n in descendants:
1034 # n is already a descendant
1034 # n is already a descendant
1035 isdescendant = True
1035 isdescendant = True
1036 # This check only needs to be done here because all the roots
1036 # This check only needs to be done here because all the roots
1037 # will start being marked is descendants before the loop.
1037 # will start being marked is descendants before the loop.
1038 if n in roots:
1038 if n in roots:
1039 # If n was a root, check if it's a 'real' root.
1039 # If n was a root, check if it's a 'real' root.
1040 p = tuple(self.parents(n))
1040 p = tuple(self.parents(n))
1041 # If any of its parents are descendants, it's not a root.
1041 # If any of its parents are descendants, it's not a root.
1042 if (p[0] in descendants) or (p[1] in descendants):
1042 if (p[0] in descendants) or (p[1] in descendants):
1043 roots.remove(n)
1043 roots.remove(n)
1044 else:
1044 else:
1045 p = tuple(self.parents(n))
1045 p = tuple(self.parents(n))
1046 # A node is a descendant if either of its parents are
1046 # A node is a descendant if either of its parents are
1047 # descendants. (We seeded the dependents list with the roots
1047 # descendants. (We seeded the dependents list with the roots
1048 # up there, remember?)
1048 # up there, remember?)
1049 if (p[0] in descendants) or (p[1] in descendants):
1049 if (p[0] in descendants) or (p[1] in descendants):
1050 descendants.add(n)
1050 descendants.add(n)
1051 isdescendant = True
1051 isdescendant = True
1052 if isdescendant and ((ancestors is None) or (n in ancestors)):
1052 if isdescendant and ((ancestors is None) or (n in ancestors)):
1053 # Only include nodes that are both descendants and ancestors.
1053 # Only include nodes that are both descendants and ancestors.
1054 orderedout.append(n)
1054 orderedout.append(n)
1055 if (ancestors is not None) and (n in heads):
1055 if (ancestors is not None) and (n in heads):
1056 # We're trying to figure out which heads are reachable
1056 # We're trying to figure out which heads are reachable
1057 # from roots.
1057 # from roots.
1058 # Mark this head as having been reached
1058 # Mark this head as having been reached
1059 heads[n] = True
1059 heads[n] = True
1060 elif ancestors is None:
1060 elif ancestors is None:
1061 # Otherwise, we're trying to discover the heads.
1061 # Otherwise, we're trying to discover the heads.
1062 # Assume this is a head because if it isn't, the next step
1062 # Assume this is a head because if it isn't, the next step
1063 # will eventually remove it.
1063 # will eventually remove it.
1064 heads[n] = True
1064 heads[n] = True
1065 # But, obviously its parents aren't.
1065 # But, obviously its parents aren't.
1066 for p in self.parents(n):
1066 for p in self.parents(n):
1067 heads.pop(p, None)
1067 heads.pop(p, None)
1068 heads = [head for head, flag in heads.iteritems() if flag]
1068 heads = [head for head, flag in heads.iteritems() if flag]
1069 roots = list(roots)
1069 roots = list(roots)
1070 assert orderedout
1070 assert orderedout
1071 assert roots
1071 assert roots
1072 assert heads
1072 assert heads
1073 return (orderedout, roots, heads)
1073 return (orderedout, roots, heads)
1074
1074
1075 def headrevs(self):
1075 def headrevs(self):
1076 try:
1076 try:
1077 return self.index.headrevs()
1077 return self.index.headrevs()
1078 except AttributeError:
1078 except AttributeError:
1079 return self._headrevs()
1079 return self._headrevs()
1080
1080
1081 def computephases(self, roots):
1081 def computephases(self, roots):
1082 return self.index.computephasesmapsets(roots)
1082 return self.index.computephasesmapsets(roots)
1083
1083
1084 def _headrevs(self):
1084 def _headrevs(self):
1085 count = len(self)
1085 count = len(self)
1086 if not count:
1086 if not count:
1087 return [nullrev]
1087 return [nullrev]
1088 # we won't iter over filtered rev so nobody is a head at start
1088 # we won't iter over filtered rev so nobody is a head at start
1089 ishead = [0] * (count + 1)
1089 ishead = [0] * (count + 1)
1090 index = self.index
1090 index = self.index
1091 for r in self:
1091 for r in self:
1092 ishead[r] = 1 # I may be an head
1092 ishead[r] = 1 # I may be an head
1093 e = index[r]
1093 e = index[r]
1094 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1094 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1095 return [r for r, val in enumerate(ishead) if val]
1095 return [r for r, val in enumerate(ishead) if val]
1096
1096
1097 def heads(self, start=None, stop=None):
1097 def heads(self, start=None, stop=None):
1098 """return the list of all nodes that have no children
1098 """return the list of all nodes that have no children
1099
1099
1100 if start is specified, only heads that are descendants of
1100 if start is specified, only heads that are descendants of
1101 start will be returned
1101 start will be returned
1102 if stop is specified, it will consider all the revs from stop
1102 if stop is specified, it will consider all the revs from stop
1103 as if they had no children
1103 as if they had no children
1104 """
1104 """
1105 if start is None and stop is None:
1105 if start is None and stop is None:
1106 if not len(self):
1106 if not len(self):
1107 return [nullid]
1107 return [nullid]
1108 return [self.node(r) for r in self.headrevs()]
1108 return [self.node(r) for r in self.headrevs()]
1109
1109
1110 if start is None:
1110 if start is None:
1111 start = nullrev
1111 start = nullrev
1112 else:
1112 else:
1113 start = self.rev(start)
1113 start = self.rev(start)
1114
1114
1115 stoprevs = set(self.rev(n) for n in stop or [])
1115 stoprevs = set(self.rev(n) for n in stop or [])
1116
1116
1117 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1117 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1118 stoprevs=stoprevs)
1118 stoprevs=stoprevs)
1119
1119
1120 return [self.node(rev) for rev in revs]
1120 return [self.node(rev) for rev in revs]
1121
1121
1122 def children(self, node):
1122 def children(self, node):
1123 """find the children of a given node"""
1123 """find the children of a given node"""
1124 c = []
1124 c = []
1125 p = self.rev(node)
1125 p = self.rev(node)
1126 for r in self.revs(start=p + 1):
1126 for r in self.revs(start=p + 1):
1127 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1127 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1128 if prevs:
1128 if prevs:
1129 for pr in prevs:
1129 for pr in prevs:
1130 if pr == p:
1130 if pr == p:
1131 c.append(self.node(r))
1131 c.append(self.node(r))
1132 elif p == nullrev:
1132 elif p == nullrev:
1133 c.append(self.node(r))
1133 c.append(self.node(r))
1134 return c
1134 return c
1135
1135
1136 def commonancestorsheads(self, a, b):
1136 def commonancestorsheads(self, a, b):
1137 """calculate all the heads of the common ancestors of nodes a and b"""
1137 """calculate all the heads of the common ancestors of nodes a and b"""
1138 a, b = self.rev(a), self.rev(b)
1138 a, b = self.rev(a), self.rev(b)
1139 ancs = self._commonancestorsheads(a, b)
1139 ancs = self._commonancestorsheads(a, b)
1140 return pycompat.maplist(self.node, ancs)
1140 return pycompat.maplist(self.node, ancs)
1141
1141
1142 def _commonancestorsheads(self, *revs):
1142 def _commonancestorsheads(self, *revs):
1143 """calculate all the heads of the common ancestors of revs"""
1143 """calculate all the heads of the common ancestors of revs"""
1144 try:
1144 try:
1145 ancs = self.index.commonancestorsheads(*revs)
1145 ancs = self.index.commonancestorsheads(*revs)
1146 except (AttributeError, OverflowError): # C implementation failed
1146 except (AttributeError, OverflowError): # C implementation failed
1147 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1147 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1148 return ancs
1148 return ancs
1149
1149
1150 def isancestor(self, a, b):
1150 def isancestor(self, a, b):
1151 """return True if node a is an ancestor of node b
1151 """return True if node a is an ancestor of node b
1152
1152
1153 A revision is considered an ancestor of itself."""
1153 A revision is considered an ancestor of itself."""
1154 a, b = self.rev(a), self.rev(b)
1154 a, b = self.rev(a), self.rev(b)
1155 return self.isancestorrev(a, b)
1155 return self.isancestorrev(a, b)
1156
1156
1157 def isancestorrev(self, a, b):
1157 def isancestorrev(self, a, b):
1158 """return True if revision a is an ancestor of revision b
1158 """return True if revision a is an ancestor of revision b
1159
1159
1160 A revision is considered an ancestor of itself.
1160 A revision is considered an ancestor of itself.
1161
1161
1162 The implementation of this is trivial but the use of
1162 The implementation of this is trivial but the use of
1163 commonancestorsheads is not."""
1163 commonancestorsheads is not."""
1164 if a == nullrev:
1164 if a == nullrev:
1165 return True
1165 return True
1166 elif a == b:
1166 elif a == b:
1167 return True
1167 return True
1168 elif a > b:
1168 elif a > b:
1169 return False
1169 return False
1170 return a in self._commonancestorsheads(a, b)
1170 return a in self._commonancestorsheads(a, b)
1171
1171
1172 def ancestor(self, a, b):
1172 def ancestor(self, a, b):
1173 """calculate the "best" common ancestor of nodes a and b"""
1173 """calculate the "best" common ancestor of nodes a and b"""
1174
1174
1175 a, b = self.rev(a), self.rev(b)
1175 a, b = self.rev(a), self.rev(b)
1176 try:
1176 try:
1177 ancs = self.index.ancestors(a, b)
1177 ancs = self.index.ancestors(a, b)
1178 except (AttributeError, OverflowError):
1178 except (AttributeError, OverflowError):
1179 ancs = ancestor.ancestors(self.parentrevs, a, b)
1179 ancs = ancestor.ancestors(self.parentrevs, a, b)
1180 if ancs:
1180 if ancs:
1181 # choose a consistent winner when there's a tie
1181 # choose a consistent winner when there's a tie
1182 return min(map(self.node, ancs))
1182 return min(map(self.node, ancs))
1183 return nullid
1183 return nullid
1184
1184
1185 def _match(self, id):
1185 def _match(self, id):
1186 if isinstance(id, int):
1186 if isinstance(id, int):
1187 # rev
1187 # rev
1188 return self.node(id)
1188 return self.node(id)
1189 if len(id) == 20:
1189 if len(id) == 20:
1190 # possibly a binary node
1190 # possibly a binary node
1191 # odds of a binary node being all hex in ASCII are 1 in 10**25
1191 # odds of a binary node being all hex in ASCII are 1 in 10**25
1192 try:
1192 try:
1193 node = id
1193 node = id
1194 self.rev(node) # quick search the index
1194 self.rev(node) # quick search the index
1195 return node
1195 return node
1196 except error.LookupError:
1196 except error.LookupError:
1197 pass # may be partial hex id
1197 pass # may be partial hex id
1198 try:
1198 try:
1199 # str(rev)
1199 # str(rev)
1200 rev = int(id)
1200 rev = int(id)
1201 if "%d" % rev != id:
1201 if "%d" % rev != id:
1202 raise ValueError
1202 raise ValueError
1203 if rev < 0:
1203 if rev < 0:
1204 rev = len(self) + rev
1204 rev = len(self) + rev
1205 if rev < 0 or rev >= len(self):
1205 if rev < 0 or rev >= len(self):
1206 raise ValueError
1206 raise ValueError
1207 return self.node(rev)
1207 return self.node(rev)
1208 except (ValueError, OverflowError):
1208 except (ValueError, OverflowError):
1209 pass
1209 pass
1210 if len(id) == 40:
1210 if len(id) == 40:
1211 try:
1211 try:
1212 # a full hex nodeid?
1212 # a full hex nodeid?
1213 node = bin(id)
1213 node = bin(id)
1214 self.rev(node)
1214 self.rev(node)
1215 return node
1215 return node
1216 except (TypeError, error.LookupError):
1216 except (TypeError, error.LookupError):
1217 pass
1217 pass
1218
1218
1219 def _partialmatch(self, id):
1219 def _partialmatch(self, id):
1220 # we don't care wdirfilenodeids as they should be always full hash
1220 # we don't care wdirfilenodeids as they should be always full hash
1221 maybewdir = wdirhex.startswith(id)
1221 maybewdir = wdirhex.startswith(id)
1222 try:
1222 try:
1223 partial = self.index.partialmatch(id)
1223 partial = self.index.partialmatch(id)
1224 if partial and self.hasnode(partial):
1224 if partial and self.hasnode(partial):
1225 if maybewdir:
1225 if maybewdir:
1226 # single 'ff...' match in radix tree, ambiguous with wdir
1226 # single 'ff...' match in radix tree, ambiguous with wdir
1227 raise error.RevlogError
1227 raise error.RevlogError
1228 return partial
1228 return partial
1229 if maybewdir:
1229 if maybewdir:
1230 # no 'ff...' match in radix tree, wdir identified
1230 # no 'ff...' match in radix tree, wdir identified
1231 raise error.WdirUnsupported
1231 raise error.WdirUnsupported
1232 return None
1232 return None
1233 except error.RevlogError:
1233 except error.RevlogError:
1234 # parsers.c radix tree lookup gave multiple matches
1234 # parsers.c radix tree lookup gave multiple matches
1235 # fast path: for unfiltered changelog, radix tree is accurate
1235 # fast path: for unfiltered changelog, radix tree is accurate
1236 if not getattr(self, 'filteredrevs', None):
1236 if not getattr(self, 'filteredrevs', None):
1237 raise error.AmbiguousPrefixLookupError(
1237 raise error.AmbiguousPrefixLookupError(
1238 id, self.indexfile, _('ambiguous identifier'))
1238 id, self.indexfile, _('ambiguous identifier'))
1239 # fall through to slow path that filters hidden revisions
1239 # fall through to slow path that filters hidden revisions
1240 except (AttributeError, ValueError):
1240 except (AttributeError, ValueError):
1241 # we are pure python, or key was too short to search radix tree
1241 # we are pure python, or key was too short to search radix tree
1242 pass
1242 pass
1243
1243
1244 if id in self._pcache:
1244 if id in self._pcache:
1245 return self._pcache[id]
1245 return self._pcache[id]
1246
1246
1247 if len(id) <= 40:
1247 if len(id) <= 40:
1248 try:
1248 try:
1249 # hex(node)[:...]
1249 # hex(node)[:...]
1250 l = len(id) // 2 # grab an even number of digits
1250 l = len(id) // 2 # grab an even number of digits
1251 prefix = bin(id[:l * 2])
1251 prefix = bin(id[:l * 2])
1252 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1252 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1253 nl = [n for n in nl if hex(n).startswith(id) and
1253 nl = [n for n in nl if hex(n).startswith(id) and
1254 self.hasnode(n)]
1254 self.hasnode(n)]
1255 if nullhex.startswith(id):
1255 if nullhex.startswith(id):
1256 nl.append(nullid)
1256 nl.append(nullid)
1257 if len(nl) > 0:
1257 if len(nl) > 0:
1258 if len(nl) == 1 and not maybewdir:
1258 if len(nl) == 1 and not maybewdir:
1259 self._pcache[id] = nl[0]
1259 self._pcache[id] = nl[0]
1260 return nl[0]
1260 return nl[0]
1261 raise error.AmbiguousPrefixLookupError(
1261 raise error.AmbiguousPrefixLookupError(
1262 id, self.indexfile, _('ambiguous identifier'))
1262 id, self.indexfile, _('ambiguous identifier'))
1263 if maybewdir:
1263 if maybewdir:
1264 raise error.WdirUnsupported
1264 raise error.WdirUnsupported
1265 return None
1265 return None
1266 except TypeError:
1266 except TypeError:
1267 pass
1267 pass
1268
1268
1269 def lookup(self, id):
1269 def lookup(self, id):
1270 """locate a node based on:
1270 """locate a node based on:
1271 - revision number or str(revision number)
1271 - revision number or str(revision number)
1272 - nodeid or subset of hex nodeid
1272 - nodeid or subset of hex nodeid
1273 """
1273 """
1274 n = self._match(id)
1274 n = self._match(id)
1275 if n is not None:
1275 if n is not None:
1276 return n
1276 return n
1277 n = self._partialmatch(id)
1277 n = self._partialmatch(id)
1278 if n:
1278 if n:
1279 return n
1279 return n
1280
1280
1281 raise error.LookupError(id, self.indexfile, _('no match found'))
1281 raise error.LookupError(id, self.indexfile, _('no match found'))
1282
1282
1283 def shortest(self, node, minlength=1):
1283 def shortest(self, node, minlength=1):
1284 """Find the shortest unambiguous prefix that matches node."""
1284 """Find the shortest unambiguous prefix that matches node."""
1285 def isvalid(prefix):
1285 def isvalid(prefix):
1286 try:
1286 try:
1287 node = self._partialmatch(prefix)
1287 node = self._partialmatch(prefix)
1288 except error.AmbiguousPrefixLookupError:
1288 except error.AmbiguousPrefixLookupError:
1289 return False
1289 return False
1290 except error.WdirUnsupported:
1290 except error.WdirUnsupported:
1291 # single 'ff...' match
1291 # single 'ff...' match
1292 return True
1292 return True
1293 if node is None:
1293 if node is None:
1294 raise error.LookupError(node, self.indexfile, _('no node'))
1294 raise error.LookupError(node, self.indexfile, _('no node'))
1295 return True
1295 return True
1296
1296
1297 def maybewdir(prefix):
1297 def maybewdir(prefix):
1298 return all(c == 'f' for c in prefix)
1298 return all(c == 'f' for c in prefix)
1299
1299
1300 hexnode = hex(node)
1300 hexnode = hex(node)
1301
1301
1302 def disambiguate(hexnode, minlength):
1302 def disambiguate(hexnode, minlength):
1303 """Disambiguate against wdirid."""
1303 """Disambiguate against wdirid."""
1304 for length in range(minlength, 41):
1304 for length in range(minlength, 41):
1305 prefix = hexnode[:length]
1305 prefix = hexnode[:length]
1306 if not maybewdir(prefix):
1306 if not maybewdir(prefix):
1307 return prefix
1307 return prefix
1308
1308
1309 if not getattr(self, 'filteredrevs', None):
1309 if not getattr(self, 'filteredrevs', None):
1310 try:
1310 try:
1311 length = max(self.index.shortest(node), minlength)
1311 length = max(self.index.shortest(node), minlength)
1312 return disambiguate(hexnode, length)
1312 return disambiguate(hexnode, length)
1313 except error.RevlogError:
1313 except error.RevlogError:
1314 if node != wdirid:
1314 if node != wdirid:
1315 raise error.LookupError(node, self.indexfile, _('no node'))
1315 raise error.LookupError(node, self.indexfile, _('no node'))
1316 except AttributeError:
1316 except AttributeError:
1317 # Fall through to pure code
1317 # Fall through to pure code
1318 pass
1318 pass
1319
1319
1320 if node == wdirid:
1320 if node == wdirid:
1321 for length in range(minlength, 41):
1321 for length in range(minlength, 41):
1322 prefix = hexnode[:length]
1322 prefix = hexnode[:length]
1323 if isvalid(prefix):
1323 if isvalid(prefix):
1324 return prefix
1324 return prefix
1325
1325
1326 for length in range(minlength, 41):
1326 for length in range(minlength, 41):
1327 prefix = hexnode[:length]
1327 prefix = hexnode[:length]
1328 if isvalid(prefix):
1328 if isvalid(prefix):
1329 return disambiguate(hexnode, length)
1329 return disambiguate(hexnode, length)
1330
1330
1331 def cmp(self, node, text):
1331 def cmp(self, node, text):
1332 """compare text with a given file revision
1332 """compare text with a given file revision
1333
1333
1334 returns True if text is different than what is stored.
1334 returns True if text is different than what is stored.
1335 """
1335 """
1336 p1, p2 = self.parents(node)
1336 p1, p2 = self.parents(node)
1337 return storageutil.hashrevisionsha1(text, p1, p2) != node
1337 return storageutil.hashrevisionsha1(text, p1, p2) != node
1338
1338
1339 def _cachesegment(self, offset, data):
1339 def _cachesegment(self, offset, data):
1340 """Add a segment to the revlog cache.
1340 """Add a segment to the revlog cache.
1341
1341
1342 Accepts an absolute offset and the data that is at that location.
1342 Accepts an absolute offset and the data that is at that location.
1343 """
1343 """
1344 o, d = self._chunkcache
1344 o, d = self._chunkcache
1345 # try to add to existing cache
1345 # try to add to existing cache
1346 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1346 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1347 self._chunkcache = o, d + data
1347 self._chunkcache = o, d + data
1348 else:
1348 else:
1349 self._chunkcache = offset, data
1349 self._chunkcache = offset, data
1350
1350
1351 def _readsegment(self, offset, length, df=None):
1351 def _readsegment(self, offset, length, df=None):
1352 """Load a segment of raw data from the revlog.
1352 """Load a segment of raw data from the revlog.
1353
1353
1354 Accepts an absolute offset, length to read, and an optional existing
1354 Accepts an absolute offset, length to read, and an optional existing
1355 file handle to read from.
1355 file handle to read from.
1356
1356
1357 If an existing file handle is passed, it will be seeked and the
1357 If an existing file handle is passed, it will be seeked and the
1358 original seek position will NOT be restored.
1358 original seek position will NOT be restored.
1359
1359
1360 Returns a str or buffer of raw byte data.
1360 Returns a str or buffer of raw byte data.
1361
1361
1362 Raises if the requested number of bytes could not be read.
1362 Raises if the requested number of bytes could not be read.
1363 """
1363 """
1364 # Cache data both forward and backward around the requested
1364 # Cache data both forward and backward around the requested
1365 # data, in a fixed size window. This helps speed up operations
1365 # data, in a fixed size window. This helps speed up operations
1366 # involving reading the revlog backwards.
1366 # involving reading the revlog backwards.
1367 cachesize = self._chunkcachesize
1367 cachesize = self._chunkcachesize
1368 realoffset = offset & ~(cachesize - 1)
1368 realoffset = offset & ~(cachesize - 1)
1369 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1369 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1370 - realoffset)
1370 - realoffset)
1371 with self._datareadfp(df) as df:
1371 with self._datareadfp(df) as df:
1372 df.seek(realoffset)
1372 df.seek(realoffset)
1373 d = df.read(reallength)
1373 d = df.read(reallength)
1374
1374
1375 self._cachesegment(realoffset, d)
1375 self._cachesegment(realoffset, d)
1376 if offset != realoffset or reallength != length:
1376 if offset != realoffset or reallength != length:
1377 startoffset = offset - realoffset
1377 startoffset = offset - realoffset
1378 if len(d) - startoffset < length:
1378 if len(d) - startoffset < length:
1379 raise error.RevlogError(
1379 raise error.RevlogError(
1380 _('partial read of revlog %s; expected %d bytes from '
1380 _('partial read of revlog %s; expected %d bytes from '
1381 'offset %d, got %d') %
1381 'offset %d, got %d') %
1382 (self.indexfile if self._inline else self.datafile,
1382 (self.indexfile if self._inline else self.datafile,
1383 length, realoffset, len(d) - startoffset))
1383 length, realoffset, len(d) - startoffset))
1384
1384
1385 return util.buffer(d, startoffset, length)
1385 return util.buffer(d, startoffset, length)
1386
1386
1387 if len(d) < length:
1387 if len(d) < length:
1388 raise error.RevlogError(
1388 raise error.RevlogError(
1389 _('partial read of revlog %s; expected %d bytes from offset '
1389 _('partial read of revlog %s; expected %d bytes from offset '
1390 '%d, got %d') %
1390 '%d, got %d') %
1391 (self.indexfile if self._inline else self.datafile,
1391 (self.indexfile if self._inline else self.datafile,
1392 length, offset, len(d)))
1392 length, offset, len(d)))
1393
1393
1394 return d
1394 return d
1395
1395
1396 def _getsegment(self, offset, length, df=None):
1396 def _getsegment(self, offset, length, df=None):
1397 """Obtain a segment of raw data from the revlog.
1397 """Obtain a segment of raw data from the revlog.
1398
1398
1399 Accepts an absolute offset, length of bytes to obtain, and an
1399 Accepts an absolute offset, length of bytes to obtain, and an
1400 optional file handle to the already-opened revlog. If the file
1400 optional file handle to the already-opened revlog. If the file
1401 handle is used, it's original seek position will not be preserved.
1401 handle is used, it's original seek position will not be preserved.
1402
1402
1403 Requests for data may be returned from a cache.
1403 Requests for data may be returned from a cache.
1404
1404
1405 Returns a str or a buffer instance of raw byte data.
1405 Returns a str or a buffer instance of raw byte data.
1406 """
1406 """
1407 o, d = self._chunkcache
1407 o, d = self._chunkcache
1408 l = len(d)
1408 l = len(d)
1409
1409
1410 # is it in the cache?
1410 # is it in the cache?
1411 cachestart = offset - o
1411 cachestart = offset - o
1412 cacheend = cachestart + length
1412 cacheend = cachestart + length
1413 if cachestart >= 0 and cacheend <= l:
1413 if cachestart >= 0 and cacheend <= l:
1414 if cachestart == 0 and cacheend == l:
1414 if cachestart == 0 and cacheend == l:
1415 return d # avoid a copy
1415 return d # avoid a copy
1416 return util.buffer(d, cachestart, cacheend - cachestart)
1416 return util.buffer(d, cachestart, cacheend - cachestart)
1417
1417
1418 return self._readsegment(offset, length, df=df)
1418 return self._readsegment(offset, length, df=df)
1419
1419
1420 def _getsegmentforrevs(self, startrev, endrev, df=None):
1420 def _getsegmentforrevs(self, startrev, endrev, df=None):
1421 """Obtain a segment of raw data corresponding to a range of revisions.
1421 """Obtain a segment of raw data corresponding to a range of revisions.
1422
1422
1423 Accepts the start and end revisions and an optional already-open
1423 Accepts the start and end revisions and an optional already-open
1424 file handle to be used for reading. If the file handle is read, its
1424 file handle to be used for reading. If the file handle is read, its
1425 seek position will not be preserved.
1425 seek position will not be preserved.
1426
1426
1427 Requests for data may be satisfied by a cache.
1427 Requests for data may be satisfied by a cache.
1428
1428
1429 Returns a 2-tuple of (offset, data) for the requested range of
1429 Returns a 2-tuple of (offset, data) for the requested range of
1430 revisions. Offset is the integer offset from the beginning of the
1430 revisions. Offset is the integer offset from the beginning of the
1431 revlog and data is a str or buffer of the raw byte data.
1431 revlog and data is a str or buffer of the raw byte data.
1432
1432
1433 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1433 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1434 to determine where each revision's data begins and ends.
1434 to determine where each revision's data begins and ends.
1435 """
1435 """
1436 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1436 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1437 # (functions are expensive).
1437 # (functions are expensive).
1438 index = self.index
1438 index = self.index
1439 istart = index[startrev]
1439 istart = index[startrev]
1440 start = int(istart[0] >> 16)
1440 start = int(istart[0] >> 16)
1441 if startrev == endrev:
1441 if startrev == endrev:
1442 end = start + istart[1]
1442 end = start + istart[1]
1443 else:
1443 else:
1444 iend = index[endrev]
1444 iend = index[endrev]
1445 end = int(iend[0] >> 16) + iend[1]
1445 end = int(iend[0] >> 16) + iend[1]
1446
1446
1447 if self._inline:
1447 if self._inline:
1448 start += (startrev + 1) * self._io.size
1448 start += (startrev + 1) * self._io.size
1449 end += (endrev + 1) * self._io.size
1449 end += (endrev + 1) * self._io.size
1450 length = end - start
1450 length = end - start
1451
1451
1452 return start, self._getsegment(start, length, df=df)
1452 return start, self._getsegment(start, length, df=df)
1453
1453
1454 def _chunk(self, rev, df=None):
1454 def _chunk(self, rev, df=None):
1455 """Obtain a single decompressed chunk for a revision.
1455 """Obtain a single decompressed chunk for a revision.
1456
1456
1457 Accepts an integer revision and an optional already-open file handle
1457 Accepts an integer revision and an optional already-open file handle
1458 to be used for reading. If used, the seek position of the file will not
1458 to be used for reading. If used, the seek position of the file will not
1459 be preserved.
1459 be preserved.
1460
1460
1461 Returns a str holding uncompressed data for the requested revision.
1461 Returns a str holding uncompressed data for the requested revision.
1462 """
1462 """
1463 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1463 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1464
1464
1465 def _chunks(self, revs, df=None, targetsize=None):
1465 def _chunks(self, revs, df=None, targetsize=None):
1466 """Obtain decompressed chunks for the specified revisions.
1466 """Obtain decompressed chunks for the specified revisions.
1467
1467
1468 Accepts an iterable of numeric revisions that are assumed to be in
1468 Accepts an iterable of numeric revisions that are assumed to be in
1469 ascending order. Also accepts an optional already-open file handle
1469 ascending order. Also accepts an optional already-open file handle
1470 to be used for reading. If used, the seek position of the file will
1470 to be used for reading. If used, the seek position of the file will
1471 not be preserved.
1471 not be preserved.
1472
1472
1473 This function is similar to calling ``self._chunk()`` multiple times,
1473 This function is similar to calling ``self._chunk()`` multiple times,
1474 but is faster.
1474 but is faster.
1475
1475
1476 Returns a list with decompressed data for each requested revision.
1476 Returns a list with decompressed data for each requested revision.
1477 """
1477 """
1478 if not revs:
1478 if not revs:
1479 return []
1479 return []
1480 start = self.start
1480 start = self.start
1481 length = self.length
1481 length = self.length
1482 inline = self._inline
1482 inline = self._inline
1483 iosize = self._io.size
1483 iosize = self._io.size
1484 buffer = util.buffer
1484 buffer = util.buffer
1485
1485
1486 l = []
1486 l = []
1487 ladd = l.append
1487 ladd = l.append
1488
1488
1489 if not self._withsparseread:
1489 if not self._withsparseread:
1490 slicedchunks = (revs,)
1490 slicedchunks = (revs,)
1491 else:
1491 else:
1492 slicedchunks = deltautil.slicechunk(self, revs,
1492 slicedchunks = deltautil.slicechunk(self, revs,
1493 targetsize=targetsize)
1493 targetsize=targetsize)
1494
1494
1495 for revschunk in slicedchunks:
1495 for revschunk in slicedchunks:
1496 firstrev = revschunk[0]
1496 firstrev = revschunk[0]
1497 # Skip trailing revisions with empty diff
1497 # Skip trailing revisions with empty diff
1498 for lastrev in revschunk[::-1]:
1498 for lastrev in revschunk[::-1]:
1499 if length(lastrev) != 0:
1499 if length(lastrev) != 0:
1500 break
1500 break
1501
1501
1502 try:
1502 try:
1503 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1503 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1504 except OverflowError:
1504 except OverflowError:
1505 # issue4215 - we can't cache a run of chunks greater than
1505 # issue4215 - we can't cache a run of chunks greater than
1506 # 2G on Windows
1506 # 2G on Windows
1507 return [self._chunk(rev, df=df) for rev in revschunk]
1507 return [self._chunk(rev, df=df) for rev in revschunk]
1508
1508
1509 decomp = self.decompress
1509 decomp = self.decompress
1510 for rev in revschunk:
1510 for rev in revschunk:
1511 chunkstart = start(rev)
1511 chunkstart = start(rev)
1512 if inline:
1512 if inline:
1513 chunkstart += (rev + 1) * iosize
1513 chunkstart += (rev + 1) * iosize
1514 chunklength = length(rev)
1514 chunklength = length(rev)
1515 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1515 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1516
1516
1517 return l
1517 return l
1518
1518
1519 def _chunkclear(self):
1519 def _chunkclear(self):
1520 """Clear the raw chunk cache."""
1520 """Clear the raw chunk cache."""
1521 self._chunkcache = (0, '')
1521 self._chunkcache = (0, '')
1522
1522
1523 def deltaparent(self, rev):
1523 def deltaparent(self, rev):
1524 """return deltaparent of the given revision"""
1524 """return deltaparent of the given revision"""
1525 base = self.index[rev][3]
1525 base = self.index[rev][3]
1526 if base == rev:
1526 if base == rev:
1527 return nullrev
1527 return nullrev
1528 elif self._generaldelta:
1528 elif self._generaldelta:
1529 return base
1529 return base
1530 else:
1530 else:
1531 return rev - 1
1531 return rev - 1
1532
1532
1533 def issnapshot(self, rev):
1533 def issnapshot(self, rev):
1534 """tells whether rev is a snapshot
1534 """tells whether rev is a snapshot
1535 """
1535 """
1536 if rev == nullrev:
1536 if rev == nullrev:
1537 return True
1537 return True
1538 deltap = self.deltaparent(rev)
1538 deltap = self.deltaparent(rev)
1539 if deltap == nullrev:
1539 if deltap == nullrev:
1540 return True
1540 return True
1541 p1, p2 = self.parentrevs(rev)
1541 p1, p2 = self.parentrevs(rev)
1542 if deltap in (p1, p2):
1542 if deltap in (p1, p2):
1543 return False
1543 return False
1544 return self.issnapshot(deltap)
1544 return self.issnapshot(deltap)
1545
1545
1546 def snapshotdepth(self, rev):
1546 def snapshotdepth(self, rev):
1547 """number of snapshot in the chain before this one"""
1547 """number of snapshot in the chain before this one"""
1548 if not self.issnapshot(rev):
1548 if not self.issnapshot(rev):
1549 raise error.ProgrammingError('revision %d not a snapshot')
1549 raise error.ProgrammingError('revision %d not a snapshot')
1550 return len(self._deltachain(rev)[0]) - 1
1550 return len(self._deltachain(rev)[0]) - 1
1551
1551
1552 def revdiff(self, rev1, rev2):
1552 def revdiff(self, rev1, rev2):
1553 """return or calculate a delta between two revisions
1553 """return or calculate a delta between two revisions
1554
1554
1555 The delta calculated is in binary form and is intended to be written to
1555 The delta calculated is in binary form and is intended to be written to
1556 revlog data directly. So this function needs raw revision data.
1556 revlog data directly. So this function needs raw revision data.
1557 """
1557 """
1558 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1558 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1559 return bytes(self._chunk(rev2))
1559 return bytes(self._chunk(rev2))
1560
1560
1561 return mdiff.textdiff(self.revision(rev1, raw=True),
1561 return mdiff.textdiff(self.revision(rev1, raw=True),
1562 self.revision(rev2, raw=True))
1562 self.revision(rev2, raw=True))
1563
1563
1564 def revision(self, nodeorrev, _df=None, raw=False):
1564 def revision(self, nodeorrev, _df=None, raw=False):
1565 """return an uncompressed revision of a given node or revision
1565 """return an uncompressed revision of a given node or revision
1566 number.
1566 number.
1567
1567
1568 _df - an existing file handle to read from. (internal-only)
1568 _df - an existing file handle to read from. (internal-only)
1569 raw - an optional argument specifying if the revision data is to be
1569 raw - an optional argument specifying if the revision data is to be
1570 treated as raw data when applying flag transforms. 'raw' should be set
1570 treated as raw data when applying flag transforms. 'raw' should be set
1571 to True when generating changegroups or in debug commands.
1571 to True when generating changegroups or in debug commands.
1572 """
1572 """
1573 if isinstance(nodeorrev, int):
1573 if isinstance(nodeorrev, int):
1574 rev = nodeorrev
1574 rev = nodeorrev
1575 node = self.node(rev)
1575 node = self.node(rev)
1576 else:
1576 else:
1577 node = nodeorrev
1577 node = nodeorrev
1578 rev = None
1578 rev = None
1579
1579
1580 cachedrev = None
1580 cachedrev = None
1581 flags = None
1581 flags = None
1582 rawtext = None
1582 rawtext = None
1583 if node == nullid:
1583 if node == nullid:
1584 return ""
1584 return ""
1585 if self._revisioncache:
1585 if self._revisioncache:
1586 if self._revisioncache[0] == node:
1586 if self._revisioncache[0] == node:
1587 # _cache only stores rawtext
1587 # _cache only stores rawtext
1588 if raw:
1588 if raw:
1589 return self._revisioncache[2]
1589 return self._revisioncache[2]
1590 # duplicated, but good for perf
1590 # duplicated, but good for perf
1591 if rev is None:
1591 if rev is None:
1592 rev = self.rev(node)
1592 rev = self.rev(node)
1593 if flags is None:
1593 if flags is None:
1594 flags = self.flags(rev)
1594 flags = self.flags(rev)
1595 # no extra flags set, no flag processor runs, text = rawtext
1595 # no extra flags set, no flag processor runs, text = rawtext
1596 if flags == REVIDX_DEFAULT_FLAGS:
1596 if flags == REVIDX_DEFAULT_FLAGS:
1597 return self._revisioncache[2]
1597 return self._revisioncache[2]
1598 # rawtext is reusable. need to run flag processor
1598 # rawtext is reusable. need to run flag processor
1599 rawtext = self._revisioncache[2]
1599 rawtext = self._revisioncache[2]
1600
1600
1601 cachedrev = self._revisioncache[1]
1601 cachedrev = self._revisioncache[1]
1602
1602
1603 # look up what we need to read
1603 # look up what we need to read
1604 if rawtext is None:
1604 if rawtext is None:
1605 if rev is None:
1605 if rev is None:
1606 rev = self.rev(node)
1606 rev = self.rev(node)
1607
1607
1608 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1608 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1609 if stopped:
1609 if stopped:
1610 rawtext = self._revisioncache[2]
1610 rawtext = self._revisioncache[2]
1611
1611
1612 # drop cache to save memory
1612 # drop cache to save memory
1613 self._revisioncache = None
1613 self._revisioncache = None
1614
1614
1615 targetsize = None
1615 targetsize = None
1616 rawsize = self.index[rev][2]
1616 rawsize = self.index[rev][2]
1617 if 0 <= rawsize:
1617 if 0 <= rawsize:
1618 targetsize = 4 * rawsize
1618 targetsize = 4 * rawsize
1619
1619
1620 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1620 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1621 if rawtext is None:
1621 if rawtext is None:
1622 rawtext = bytes(bins[0])
1622 rawtext = bytes(bins[0])
1623 bins = bins[1:]
1623 bins = bins[1:]
1624
1624
1625 rawtext = mdiff.patches(rawtext, bins)
1625 rawtext = mdiff.patches(rawtext, bins)
1626 self._revisioncache = (node, rev, rawtext)
1626 self._revisioncache = (node, rev, rawtext)
1627
1627
1628 if flags is None:
1628 if flags is None:
1629 if rev is None:
1629 if rev is None:
1630 rev = self.rev(node)
1630 rev = self.rev(node)
1631 flags = self.flags(rev)
1631 flags = self.flags(rev)
1632
1632
1633 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1633 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1634 if validatehash:
1634 if validatehash:
1635 self.checkhash(text, node, rev=rev)
1635 self.checkhash(text, node, rev=rev)
1636
1636
1637 return text
1637 return text
1638
1638
1639 def hash(self, text, p1, p2):
1639 def hash(self, text, p1, p2):
1640 """Compute a node hash.
1640 """Compute a node hash.
1641
1641
1642 Available as a function so that subclasses can replace the hash
1642 Available as a function so that subclasses can replace the hash
1643 as needed.
1643 as needed.
1644 """
1644 """
1645 return storageutil.hashrevisionsha1(text, p1, p2)
1645 return storageutil.hashrevisionsha1(text, p1, p2)
1646
1646
1647 def _processflags(self, text, flags, operation, raw=False):
1647 def _processflags(self, text, flags, operation, raw=False):
1648 """Inspect revision data flags and applies transforms defined by
1648 """Inspect revision data flags and applies transforms defined by
1649 registered flag processors.
1649 registered flag processors.
1650
1650
1651 ``text`` - the revision data to process
1651 ``text`` - the revision data to process
1652 ``flags`` - the revision flags
1652 ``flags`` - the revision flags
1653 ``operation`` - the operation being performed (read or write)
1653 ``operation`` - the operation being performed (read or write)
1654 ``raw`` - an optional argument describing if the raw transform should be
1654 ``raw`` - an optional argument describing if the raw transform should be
1655 applied.
1655 applied.
1656
1656
1657 This method processes the flags in the order (or reverse order if
1657 This method processes the flags in the order (or reverse order if
1658 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1658 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1659 flag processors registered for present flags. The order of flags defined
1659 flag processors registered for present flags. The order of flags defined
1660 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1660 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1661
1661
1662 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1662 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1663 processed text and ``validatehash`` is a bool indicating whether the
1663 processed text and ``validatehash`` is a bool indicating whether the
1664 returned text should be checked for hash integrity.
1664 returned text should be checked for hash integrity.
1665
1665
1666 Note: If the ``raw`` argument is set, it has precedence over the
1666 Note: If the ``raw`` argument is set, it has precedence over the
1667 operation and will only update the value of ``validatehash``.
1667 operation and will only update the value of ``validatehash``.
1668 """
1668 """
1669 # fast path: no flag processors will run
1669 # fast path: no flag processors will run
1670 if flags == 0:
1670 if flags == 0:
1671 return text, True
1671 return text, True
1672 if not operation in ('read', 'write'):
1672 if not operation in ('read', 'write'):
1673 raise error.ProgrammingError(_("invalid '%s' operation") %
1673 raise error.ProgrammingError(_("invalid '%s' operation") %
1674 operation)
1674 operation)
1675 # Check all flags are known.
1675 # Check all flags are known.
1676 if flags & ~REVIDX_KNOWN_FLAGS:
1676 if flags & ~REVIDX_KNOWN_FLAGS:
1677 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1677 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1678 (flags & ~REVIDX_KNOWN_FLAGS))
1678 (flags & ~REVIDX_KNOWN_FLAGS))
1679 validatehash = True
1679 validatehash = True
1680 # Depending on the operation (read or write), the order might be
1680 # Depending on the operation (read or write), the order might be
1681 # reversed due to non-commutative transforms.
1681 # reversed due to non-commutative transforms.
1682 orderedflags = REVIDX_FLAGS_ORDER
1682 orderedflags = REVIDX_FLAGS_ORDER
1683 if operation == 'write':
1683 if operation == 'write':
1684 orderedflags = reversed(orderedflags)
1684 orderedflags = reversed(orderedflags)
1685
1685
1686 for flag in orderedflags:
1686 for flag in orderedflags:
1687 # If a flagprocessor has been registered for a known flag, apply the
1687 # If a flagprocessor has been registered for a known flag, apply the
1688 # related operation transform and update result tuple.
1688 # related operation transform and update result tuple.
1689 if flag & flags:
1689 if flag & flags:
1690 vhash = True
1690 vhash = True
1691
1691
1692 if flag not in self._flagprocessors:
1692 if flag not in self._flagprocessors:
1693 message = _("missing processor for flag '%#x'") % (flag)
1693 message = _("missing processor for flag '%#x'") % (flag)
1694 raise error.RevlogError(message)
1694 raise error.RevlogError(message)
1695
1695
1696 processor = self._flagprocessors[flag]
1696 processor = self._flagprocessors[flag]
1697 if processor is not None:
1697 if processor is not None:
1698 readtransform, writetransform, rawtransform = processor
1698 readtransform, writetransform, rawtransform = processor
1699
1699
1700 if raw:
1700 if raw:
1701 vhash = rawtransform(self, text)
1701 vhash = rawtransform(self, text)
1702 elif operation == 'read':
1702 elif operation == 'read':
1703 text, vhash = readtransform(self, text)
1703 text, vhash = readtransform(self, text)
1704 else: # write operation
1704 else: # write operation
1705 text, vhash = writetransform(self, text)
1705 text, vhash = writetransform(self, text)
1706 validatehash = validatehash and vhash
1706 validatehash = validatehash and vhash
1707
1707
1708 return text, validatehash
1708 return text, validatehash
1709
1709
1710 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1710 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1711 """Check node hash integrity.
1711 """Check node hash integrity.
1712
1712
1713 Available as a function so that subclasses can extend hash mismatch
1713 Available as a function so that subclasses can extend hash mismatch
1714 behaviors as needed.
1714 behaviors as needed.
1715 """
1715 """
1716 try:
1716 try:
1717 if p1 is None and p2 is None:
1717 if p1 is None and p2 is None:
1718 p1, p2 = self.parents(node)
1718 p1, p2 = self.parents(node)
1719 if node != self.hash(text, p1, p2):
1719 if node != self.hash(text, p1, p2):
1720 # Clear the revision cache on hash failure. The revision cache
1720 # Clear the revision cache on hash failure. The revision cache
1721 # only stores the raw revision and clearing the cache does have
1721 # only stores the raw revision and clearing the cache does have
1722 # the side-effect that we won't have a cache hit when the raw
1722 # the side-effect that we won't have a cache hit when the raw
1723 # revision data is accessed. But this case should be rare and
1723 # revision data is accessed. But this case should be rare and
1724 # it is extra work to teach the cache about the hash
1724 # it is extra work to teach the cache about the hash
1725 # verification state.
1725 # verification state.
1726 if self._revisioncache and self._revisioncache[0] == node:
1726 if self._revisioncache and self._revisioncache[0] == node:
1727 self._revisioncache = None
1727 self._revisioncache = None
1728
1728
1729 revornode = rev
1729 revornode = rev
1730 if revornode is None:
1730 if revornode is None:
1731 revornode = templatefilters.short(hex(node))
1731 revornode = templatefilters.short(hex(node))
1732 raise error.RevlogError(_("integrity check failed on %s:%s")
1732 raise error.RevlogError(_("integrity check failed on %s:%s")
1733 % (self.indexfile, pycompat.bytestr(revornode)))
1733 % (self.indexfile, pycompat.bytestr(revornode)))
1734 except error.RevlogError:
1734 except error.RevlogError:
1735 if self._censorable and storageutil.iscensoredtext(text):
1735 if self._censorable and storageutil.iscensoredtext(text):
1736 raise error.CensoredNodeError(self.indexfile, node, text)
1736 raise error.CensoredNodeError(self.indexfile, node, text)
1737 raise
1737 raise
1738
1738
1739 def _enforceinlinesize(self, tr, fp=None):
1739 def _enforceinlinesize(self, tr, fp=None):
1740 """Check if the revlog is too big for inline and convert if so.
1740 """Check if the revlog is too big for inline and convert if so.
1741
1741
1742 This should be called after revisions are added to the revlog. If the
1742 This should be called after revisions are added to the revlog. If the
1743 revlog has grown too large to be an inline revlog, it will convert it
1743 revlog has grown too large to be an inline revlog, it will convert it
1744 to use multiple index and data files.
1744 to use multiple index and data files.
1745 """
1745 """
1746 tiprev = len(self) - 1
1746 tiprev = len(self) - 1
1747 if (not self._inline or
1747 if (not self._inline or
1748 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1748 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1749 return
1749 return
1750
1750
1751 trinfo = tr.find(self.indexfile)
1751 trinfo = tr.find(self.indexfile)
1752 if trinfo is None:
1752 if trinfo is None:
1753 raise error.RevlogError(_("%s not found in the transaction")
1753 raise error.RevlogError(_("%s not found in the transaction")
1754 % self.indexfile)
1754 % self.indexfile)
1755
1755
1756 trindex = trinfo[2]
1756 trindex = trinfo[2]
1757 if trindex is not None:
1757 if trindex is not None:
1758 dataoff = self.start(trindex)
1758 dataoff = self.start(trindex)
1759 else:
1759 else:
1760 # revlog was stripped at start of transaction, use all leftover data
1760 # revlog was stripped at start of transaction, use all leftover data
1761 trindex = len(self) - 1
1761 trindex = len(self) - 1
1762 dataoff = self.end(tiprev)
1762 dataoff = self.end(tiprev)
1763
1763
1764 tr.add(self.datafile, dataoff)
1764 tr.add(self.datafile, dataoff)
1765
1765
1766 if fp:
1766 if fp:
1767 fp.flush()
1767 fp.flush()
1768 fp.close()
1768 fp.close()
1769 # We can't use the cached file handle after close(). So prevent
1769 # We can't use the cached file handle after close(). So prevent
1770 # its usage.
1770 # its usage.
1771 self._writinghandles = None
1771 self._writinghandles = None
1772
1772
1773 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1773 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1774 for r in self:
1774 for r in self:
1775 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1775 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1776
1776
1777 with self._indexfp('w') as fp:
1777 with self._indexfp('w') as fp:
1778 self.version &= ~FLAG_INLINE_DATA
1778 self.version &= ~FLAG_INLINE_DATA
1779 self._inline = False
1779 self._inline = False
1780 io = self._io
1780 io = self._io
1781 for i in self:
1781 for i in self:
1782 e = io.packentry(self.index[i], self.node, self.version, i)
1782 e = io.packentry(self.index[i], self.node, self.version, i)
1783 fp.write(e)
1783 fp.write(e)
1784
1784
1785 # the temp file replace the real index when we exit the context
1785 # the temp file replace the real index when we exit the context
1786 # manager
1786 # manager
1787
1787
1788 tr.replace(self.indexfile, trindex * self._io.size)
1788 tr.replace(self.indexfile, trindex * self._io.size)
1789 self._chunkclear()
1789 self._chunkclear()
1790
1790
1791 def _nodeduplicatecallback(self, transaction, node):
1791 def _nodeduplicatecallback(self, transaction, node):
1792 """called when trying to add a node already stored.
1792 """called when trying to add a node already stored.
1793 """
1793 """
1794
1794
1795 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1795 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1796 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1796 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1797 """add a revision to the log
1797 """add a revision to the log
1798
1798
1799 text - the revision data to add
1799 text - the revision data to add
1800 transaction - the transaction object used for rollback
1800 transaction - the transaction object used for rollback
1801 link - the linkrev data to add
1801 link - the linkrev data to add
1802 p1, p2 - the parent nodeids of the revision
1802 p1, p2 - the parent nodeids of the revision
1803 cachedelta - an optional precomputed delta
1803 cachedelta - an optional precomputed delta
1804 node - nodeid of revision; typically node is not specified, and it is
1804 node - nodeid of revision; typically node is not specified, and it is
1805 computed by default as hash(text, p1, p2), however subclasses might
1805 computed by default as hash(text, p1, p2), however subclasses might
1806 use different hashing method (and override checkhash() in such case)
1806 use different hashing method (and override checkhash() in such case)
1807 flags - the known flags to set on the revision
1807 flags - the known flags to set on the revision
1808 deltacomputer - an optional deltacomputer instance shared between
1808 deltacomputer - an optional deltacomputer instance shared between
1809 multiple calls
1809 multiple calls
1810 """
1810 """
1811 if link == nullrev:
1811 if link == nullrev:
1812 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1812 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1813 % self.indexfile)
1813 % self.indexfile)
1814
1814
1815 if flags:
1815 if flags:
1816 node = node or self.hash(text, p1, p2)
1816 node = node or self.hash(text, p1, p2)
1817
1817
1818 rawtext, validatehash = self._processflags(text, flags, 'write')
1818 rawtext, validatehash = self._processflags(text, flags, 'write')
1819
1819
1820 # If the flag processor modifies the revision data, ignore any provided
1820 # If the flag processor modifies the revision data, ignore any provided
1821 # cachedelta.
1821 # cachedelta.
1822 if rawtext != text:
1822 if rawtext != text:
1823 cachedelta = None
1823 cachedelta = None
1824
1824
1825 if len(rawtext) > _maxentrysize:
1825 if len(rawtext) > _maxentrysize:
1826 raise error.RevlogError(
1826 raise error.RevlogError(
1827 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1827 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1828 % (self.indexfile, len(rawtext)))
1828 % (self.indexfile, len(rawtext)))
1829
1829
1830 node = node or self.hash(rawtext, p1, p2)
1830 node = node or self.hash(rawtext, p1, p2)
1831 if node in self.nodemap:
1831 if node in self.nodemap:
1832 return node
1832 return node
1833
1833
1834 if validatehash:
1834 if validatehash:
1835 self.checkhash(rawtext, node, p1=p1, p2=p2)
1835 self.checkhash(rawtext, node, p1=p1, p2=p2)
1836
1836
1837 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1837 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1838 flags, cachedelta=cachedelta,
1838 flags, cachedelta=cachedelta,
1839 deltacomputer=deltacomputer)
1839 deltacomputer=deltacomputer)
1840
1840
1841 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1841 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1842 cachedelta=None, deltacomputer=None):
1842 cachedelta=None, deltacomputer=None):
1843 """add a raw revision with known flags, node and parents
1843 """add a raw revision with known flags, node and parents
1844 useful when reusing a revision not stored in this revlog (ex: received
1844 useful when reusing a revision not stored in this revlog (ex: received
1845 over wire, or read from an external bundle).
1845 over wire, or read from an external bundle).
1846 """
1846 """
1847 dfh = None
1847 dfh = None
1848 if not self._inline:
1848 if not self._inline:
1849 dfh = self._datafp("a+")
1849 dfh = self._datafp("a+")
1850 ifh = self._indexfp("a+")
1850 ifh = self._indexfp("a+")
1851 try:
1851 try:
1852 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1852 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1853 flags, cachedelta, ifh, dfh,
1853 flags, cachedelta, ifh, dfh,
1854 deltacomputer=deltacomputer)
1854 deltacomputer=deltacomputer)
1855 finally:
1855 finally:
1856 if dfh:
1856 if dfh:
1857 dfh.close()
1857 dfh.close()
1858 ifh.close()
1858 ifh.close()
1859
1859
1860 def compress(self, data):
1860 def compress(self, data):
1861 """Generate a possibly-compressed representation of data."""
1861 """Generate a possibly-compressed representation of data."""
1862 if not data:
1862 if not data:
1863 return '', data
1863 return '', data
1864
1864
1865 compressed = self._compressor.compress(data)
1865 compressed = self._compressor.compress(data)
1866
1866
1867 if compressed:
1867 if compressed:
1868 # The revlog compressor added the header in the returned data.
1868 # The revlog compressor added the header in the returned data.
1869 return '', compressed
1869 return '', compressed
1870
1870
1871 if data[0:1] == '\0':
1871 if data[0:1] == '\0':
1872 return '', data
1872 return '', data
1873 return 'u', data
1873 return 'u', data
1874
1874
1875 def decompress(self, data):
1875 def decompress(self, data):
1876 """Decompress a revlog chunk.
1876 """Decompress a revlog chunk.
1877
1877
1878 The chunk is expected to begin with a header identifying the
1878 The chunk is expected to begin with a header identifying the
1879 format type so it can be routed to an appropriate decompressor.
1879 format type so it can be routed to an appropriate decompressor.
1880 """
1880 """
1881 if not data:
1881 if not data:
1882 return data
1882 return data
1883
1883
1884 # Revlogs are read much more frequently than they are written and many
1884 # Revlogs are read much more frequently than they are written and many
1885 # chunks only take microseconds to decompress, so performance is
1885 # chunks only take microseconds to decompress, so performance is
1886 # important here.
1886 # important here.
1887 #
1887 #
1888 # We can make a few assumptions about revlogs:
1888 # We can make a few assumptions about revlogs:
1889 #
1889 #
1890 # 1) the majority of chunks will be compressed (as opposed to inline
1890 # 1) the majority of chunks will be compressed (as opposed to inline
1891 # raw data).
1891 # raw data).
1892 # 2) decompressing *any* data will likely by at least 10x slower than
1892 # 2) decompressing *any* data will likely by at least 10x slower than
1893 # returning raw inline data.
1893 # returning raw inline data.
1894 # 3) we want to prioritize common and officially supported compression
1894 # 3) we want to prioritize common and officially supported compression
1895 # engines
1895 # engines
1896 #
1896 #
1897 # It follows that we want to optimize for "decompress compressed data
1897 # It follows that we want to optimize for "decompress compressed data
1898 # when encoded with common and officially supported compression engines"
1898 # when encoded with common and officially supported compression engines"
1899 # case over "raw data" and "data encoded by less common or non-official
1899 # case over "raw data" and "data encoded by less common or non-official
1900 # compression engines." That is why we have the inline lookup first
1900 # compression engines." That is why we have the inline lookup first
1901 # followed by the compengines lookup.
1901 # followed by the compengines lookup.
1902 #
1902 #
1903 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1903 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1904 # compressed chunks. And this matters for changelog and manifest reads.
1904 # compressed chunks. And this matters for changelog and manifest reads.
1905 t = data[0:1]
1905 t = data[0:1]
1906
1906
1907 if t == 'x':
1907 if t == 'x':
1908 try:
1908 try:
1909 return _zlibdecompress(data)
1909 return _zlibdecompress(data)
1910 except zlib.error as e:
1910 except zlib.error as e:
1911 raise error.RevlogError(_('revlog decompress error: %s') %
1911 raise error.RevlogError(_('revlog decompress error: %s') %
1912 stringutil.forcebytestr(e))
1912 stringutil.forcebytestr(e))
1913 # '\0' is more common than 'u' so it goes first.
1913 # '\0' is more common than 'u' so it goes first.
1914 elif t == '\0':
1914 elif t == '\0':
1915 return data
1915 return data
1916 elif t == 'u':
1916 elif t == 'u':
1917 return util.buffer(data, 1)
1917 return util.buffer(data, 1)
1918
1918
1919 try:
1919 try:
1920 compressor = self._decompressors[t]
1920 compressor = self._decompressors[t]
1921 except KeyError:
1921 except KeyError:
1922 try:
1922 try:
1923 engine = util.compengines.forrevlogheader(t)
1923 engine = util.compengines.forrevlogheader(t)
1924 compressor = engine.revlogcompressor()
1924 compressor = engine.revlogcompressor()
1925 self._decompressors[t] = compressor
1925 self._decompressors[t] = compressor
1926 except KeyError:
1926 except KeyError:
1927 raise error.RevlogError(_('unknown compression type %r') % t)
1927 raise error.RevlogError(_('unknown compression type %r') % t)
1928
1928
1929 return compressor.decompress(data)
1929 return compressor.decompress(data)
1930
1930
1931 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1931 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1932 cachedelta, ifh, dfh, alwayscache=False,
1932 cachedelta, ifh, dfh, alwayscache=False,
1933 deltacomputer=None):
1933 deltacomputer=None):
1934 """internal function to add revisions to the log
1934 """internal function to add revisions to the log
1935
1935
1936 see addrevision for argument descriptions.
1936 see addrevision for argument descriptions.
1937
1937
1938 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1938 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1939
1939
1940 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1940 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1941 be used.
1941 be used.
1942
1942
1943 invariants:
1943 invariants:
1944 - rawtext is optional (can be None); if not set, cachedelta must be set.
1944 - rawtext is optional (can be None); if not set, cachedelta must be set.
1945 if both are set, they must correspond to each other.
1945 if both are set, they must correspond to each other.
1946 """
1946 """
1947 if node == nullid:
1947 if node == nullid:
1948 raise error.RevlogError(_("%s: attempt to add null revision") %
1948 raise error.RevlogError(_("%s: attempt to add null revision") %
1949 self.indexfile)
1949 self.indexfile)
1950 if node == wdirid or node in wdirfilenodeids:
1950 if node == wdirid or node in wdirfilenodeids:
1951 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1951 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1952 self.indexfile)
1952 self.indexfile)
1953
1953
1954 if self._inline:
1954 if self._inline:
1955 fh = ifh
1955 fh = ifh
1956 else:
1956 else:
1957 fh = dfh
1957 fh = dfh
1958
1958
1959 btext = [rawtext]
1959 btext = [rawtext]
1960
1960
1961 curr = len(self)
1961 curr = len(self)
1962 prev = curr - 1
1962 prev = curr - 1
1963 offset = self.end(prev)
1963 offset = self.end(prev)
1964 p1r, p2r = self.rev(p1), self.rev(p2)
1964 p1r, p2r = self.rev(p1), self.rev(p2)
1965
1965
1966 # full versions are inserted when the needed deltas
1966 # full versions are inserted when the needed deltas
1967 # become comparable to the uncompressed text
1967 # become comparable to the uncompressed text
1968 if rawtext is None:
1968 if rawtext is None:
1969 # need rawtext size, before changed by flag processors, which is
1969 # need rawtext size, before changed by flag processors, which is
1970 # the non-raw size. use revlog explicitly to avoid filelog's extra
1970 # the non-raw size. use revlog explicitly to avoid filelog's extra
1971 # logic that might remove metadata size.
1971 # logic that might remove metadata size.
1972 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1972 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1973 cachedelta[1])
1973 cachedelta[1])
1974 else:
1974 else:
1975 textlen = len(rawtext)
1975 textlen = len(rawtext)
1976
1976
1977 if deltacomputer is None:
1977 if deltacomputer is None:
1978 deltacomputer = deltautil.deltacomputer(self)
1978 deltacomputer = deltautil.deltacomputer(self)
1979
1979
1980 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1980 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1981
1981
1982 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1982 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1983
1983
1984 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1984 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1985 deltainfo.base, link, p1r, p2r, node)
1985 deltainfo.base, link, p1r, p2r, node)
1986 self.index.append(e)
1986 self.index.append(e)
1987 self.nodemap[node] = curr
1987 self.nodemap[node] = curr
1988
1988
1989 # Reset the pure node cache start lookup offset to account for new
1989 # Reset the pure node cache start lookup offset to account for new
1990 # revision.
1990 # revision.
1991 if self._nodepos is not None:
1991 if self._nodepos is not None:
1992 self._nodepos = curr
1992 self._nodepos = curr
1993
1993
1994 entry = self._io.packentry(e, self.node, self.version, curr)
1994 entry = self._io.packentry(e, self.node, self.version, curr)
1995 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1995 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1996 link, offset)
1996 link, offset)
1997
1997
1998 rawtext = btext[0]
1998 rawtext = btext[0]
1999
1999
2000 if alwayscache and rawtext is None:
2000 if alwayscache and rawtext is None:
2001 rawtext = deltacomputer.buildtext(revinfo, fh)
2001 rawtext = deltacomputer.buildtext(revinfo, fh)
2002
2002
2003 if type(rawtext) == bytes: # only accept immutable objects
2003 if type(rawtext) == bytes: # only accept immutable objects
2004 self._revisioncache = (node, curr, rawtext)
2004 self._revisioncache = (node, curr, rawtext)
2005 self._chainbasecache[curr] = deltainfo.chainbase
2005 self._chainbasecache[curr] = deltainfo.chainbase
2006 return node
2006 return node
2007
2007
2008 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2008 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2009 # Files opened in a+ mode have inconsistent behavior on various
2009 # Files opened in a+ mode have inconsistent behavior on various
2010 # platforms. Windows requires that a file positioning call be made
2010 # platforms. Windows requires that a file positioning call be made
2011 # when the file handle transitions between reads and writes. See
2011 # when the file handle transitions between reads and writes. See
2012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2013 # platforms, Python or the platform itself can be buggy. Some versions
2013 # platforms, Python or the platform itself can be buggy. Some versions
2014 # of Solaris have been observed to not append at the end of the file
2014 # of Solaris have been observed to not append at the end of the file
2015 # if the file was seeked to before the end. See issue4943 for more.
2015 # if the file was seeked to before the end. See issue4943 for more.
2016 #
2016 #
2017 # We work around this issue by inserting a seek() before writing.
2017 # We work around this issue by inserting a seek() before writing.
2018 # Note: This is likely not necessary on Python 3. However, because
2018 # Note: This is likely not necessary on Python 3. However, because
2019 # the file handle is reused for reads and may be seeked there, we need
2019 # the file handle is reused for reads and may be seeked there, we need
2020 # to be careful before changing this.
2020 # to be careful before changing this.
2021 ifh.seek(0, os.SEEK_END)
2021 ifh.seek(0, os.SEEK_END)
2022 if dfh:
2022 if dfh:
2023 dfh.seek(0, os.SEEK_END)
2023 dfh.seek(0, os.SEEK_END)
2024
2024
2025 curr = len(self) - 1
2025 curr = len(self) - 1
2026 if not self._inline:
2026 if not self._inline:
2027 transaction.add(self.datafile, offset)
2027 transaction.add(self.datafile, offset)
2028 transaction.add(self.indexfile, curr * len(entry))
2028 transaction.add(self.indexfile, curr * len(entry))
2029 if data[0]:
2029 if data[0]:
2030 dfh.write(data[0])
2030 dfh.write(data[0])
2031 dfh.write(data[1])
2031 dfh.write(data[1])
2032 ifh.write(entry)
2032 ifh.write(entry)
2033 else:
2033 else:
2034 offset += curr * self._io.size
2034 offset += curr * self._io.size
2035 transaction.add(self.indexfile, offset, curr)
2035 transaction.add(self.indexfile, offset, curr)
2036 ifh.write(entry)
2036 ifh.write(entry)
2037 ifh.write(data[0])
2037 ifh.write(data[0])
2038 ifh.write(data[1])
2038 ifh.write(data[1])
2039 self._enforceinlinesize(transaction, ifh)
2039 self._enforceinlinesize(transaction, ifh)
2040
2040
2041 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2041 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2042 """
2042 """
2043 add a delta group
2043 add a delta group
2044
2044
2045 given a set of deltas, add them to the revision log. the
2045 given a set of deltas, add them to the revision log. the
2046 first delta is against its parent, which should be in our
2046 first delta is against its parent, which should be in our
2047 log, the rest are against the previous delta.
2047 log, the rest are against the previous delta.
2048
2048
2049 If ``addrevisioncb`` is defined, it will be called with arguments of
2049 If ``addrevisioncb`` is defined, it will be called with arguments of
2050 this revlog and the node that was added.
2050 this revlog and the node that was added.
2051 """
2051 """
2052
2052
2053 if self._writinghandles:
2053 if self._writinghandles:
2054 raise error.ProgrammingError('cannot nest addgroup() calls')
2054 raise error.ProgrammingError('cannot nest addgroup() calls')
2055
2055
2056 nodes = []
2056 nodes = []
2057
2057
2058 r = len(self)
2058 r = len(self)
2059 end = 0
2059 end = 0
2060 if r:
2060 if r:
2061 end = self.end(r - 1)
2061 end = self.end(r - 1)
2062 ifh = self._indexfp("a+")
2062 ifh = self._indexfp("a+")
2063 isize = r * self._io.size
2063 isize = r * self._io.size
2064 if self._inline:
2064 if self._inline:
2065 transaction.add(self.indexfile, end + isize, r)
2065 transaction.add(self.indexfile, end + isize, r)
2066 dfh = None
2066 dfh = None
2067 else:
2067 else:
2068 transaction.add(self.indexfile, isize, r)
2068 transaction.add(self.indexfile, isize, r)
2069 transaction.add(self.datafile, end)
2069 transaction.add(self.datafile, end)
2070 dfh = self._datafp("a+")
2070 dfh = self._datafp("a+")
2071 def flush():
2071 def flush():
2072 if dfh:
2072 if dfh:
2073 dfh.flush()
2073 dfh.flush()
2074 ifh.flush()
2074 ifh.flush()
2075
2075
2076 self._writinghandles = (ifh, dfh)
2076 self._writinghandles = (ifh, dfh)
2077
2077
2078 try:
2078 try:
2079 deltacomputer = deltautil.deltacomputer(self)
2079 deltacomputer = deltautil.deltacomputer(self)
2080 # loop through our set of deltas
2080 # loop through our set of deltas
2081 for data in deltas:
2081 for data in deltas:
2082 node, p1, p2, linknode, deltabase, delta, flags = data
2082 node, p1, p2, linknode, deltabase, delta, flags = data
2083 link = linkmapper(linknode)
2083 link = linkmapper(linknode)
2084 flags = flags or REVIDX_DEFAULT_FLAGS
2084 flags = flags or REVIDX_DEFAULT_FLAGS
2085
2085
2086 nodes.append(node)
2086 nodes.append(node)
2087
2087
2088 if node in self.nodemap:
2088 if node in self.nodemap:
2089 self._nodeduplicatecallback(transaction, node)
2089 self._nodeduplicatecallback(transaction, node)
2090 # this can happen if two branches make the same change
2090 # this can happen if two branches make the same change
2091 continue
2091 continue
2092
2092
2093 for p in (p1, p2):
2093 for p in (p1, p2):
2094 if p not in self.nodemap:
2094 if p not in self.nodemap:
2095 raise error.LookupError(p, self.indexfile,
2095 raise error.LookupError(p, self.indexfile,
2096 _('unknown parent'))
2096 _('unknown parent'))
2097
2097
2098 if deltabase not in self.nodemap:
2098 if deltabase not in self.nodemap:
2099 raise error.LookupError(deltabase, self.indexfile,
2099 raise error.LookupError(deltabase, self.indexfile,
2100 _('unknown delta base'))
2100 _('unknown delta base'))
2101
2101
2102 baserev = self.rev(deltabase)
2102 baserev = self.rev(deltabase)
2103
2103
2104 if baserev != nullrev and self.iscensored(baserev):
2104 if baserev != nullrev and self.iscensored(baserev):
2105 # if base is censored, delta must be full replacement in a
2105 # if base is censored, delta must be full replacement in a
2106 # single patch operation
2106 # single patch operation
2107 hlen = struct.calcsize(">lll")
2107 hlen = struct.calcsize(">lll")
2108 oldlen = self.rawsize(baserev)
2108 oldlen = self.rawsize(baserev)
2109 newlen = len(delta) - hlen
2109 newlen = len(delta) - hlen
2110 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2110 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2111 raise error.CensoredBaseError(self.indexfile,
2111 raise error.CensoredBaseError(self.indexfile,
2112 self.node(baserev))
2112 self.node(baserev))
2113
2113
2114 if not flags and self._peek_iscensored(baserev, delta, flush):
2114 if not flags and self._peek_iscensored(baserev, delta, flush):
2115 flags |= REVIDX_ISCENSORED
2115 flags |= REVIDX_ISCENSORED
2116
2116
2117 # We assume consumers of addrevisioncb will want to retrieve
2117 # We assume consumers of addrevisioncb will want to retrieve
2118 # the added revision, which will require a call to
2118 # the added revision, which will require a call to
2119 # revision(). revision() will fast path if there is a cache
2119 # revision(). revision() will fast path if there is a cache
2120 # hit. So, we tell _addrevision() to always cache in this case.
2120 # hit. So, we tell _addrevision() to always cache in this case.
2121 # We're only using addgroup() in the context of changegroup
2121 # We're only using addgroup() in the context of changegroup
2122 # generation so the revision data can always be handled as raw
2122 # generation so the revision data can always be handled as raw
2123 # by the flagprocessor.
2123 # by the flagprocessor.
2124 self._addrevision(node, None, transaction, link,
2124 self._addrevision(node, None, transaction, link,
2125 p1, p2, flags, (baserev, delta),
2125 p1, p2, flags, (baserev, delta),
2126 ifh, dfh,
2126 ifh, dfh,
2127 alwayscache=bool(addrevisioncb),
2127 alwayscache=bool(addrevisioncb),
2128 deltacomputer=deltacomputer)
2128 deltacomputer=deltacomputer)
2129
2129
2130 if addrevisioncb:
2130 if addrevisioncb:
2131 addrevisioncb(self, node)
2131 addrevisioncb(self, node)
2132
2132
2133 if not dfh and not self._inline:
2133 if not dfh and not self._inline:
2134 # addrevision switched from inline to conventional
2134 # addrevision switched from inline to conventional
2135 # reopen the index
2135 # reopen the index
2136 ifh.close()
2136 ifh.close()
2137 dfh = self._datafp("a+")
2137 dfh = self._datafp("a+")
2138 ifh = self._indexfp("a+")
2138 ifh = self._indexfp("a+")
2139 self._writinghandles = (ifh, dfh)
2139 self._writinghandles = (ifh, dfh)
2140 finally:
2140 finally:
2141 self._writinghandles = None
2141 self._writinghandles = None
2142
2142
2143 if dfh:
2143 if dfh:
2144 dfh.close()
2144 dfh.close()
2145 ifh.close()
2145 ifh.close()
2146
2146
2147 return nodes
2147 return nodes
2148
2148
2149 def iscensored(self, rev):
2149 def iscensored(self, rev):
2150 """Check if a file revision is censored."""
2150 """Check if a file revision is censored."""
2151 if not self._censorable:
2151 if not self._censorable:
2152 return False
2152 return False
2153
2153
2154 return self.flags(rev) & REVIDX_ISCENSORED
2154 return self.flags(rev) & REVIDX_ISCENSORED
2155
2155
2156 def _peek_iscensored(self, baserev, delta, flush):
2156 def _peek_iscensored(self, baserev, delta, flush):
2157 """Quickly check if a delta produces a censored revision."""
2157 """Quickly check if a delta produces a censored revision."""
2158 if not self._censorable:
2158 if not self._censorable:
2159 return False
2159 return False
2160
2160
2161 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2161 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2162
2162
2163 def getstrippoint(self, minlink):
2163 def getstrippoint(self, minlink):
2164 """find the minimum rev that must be stripped to strip the linkrev
2164 """find the minimum rev that must be stripped to strip the linkrev
2165
2165
2166 Returns a tuple containing the minimum rev and a set of all revs that
2166 Returns a tuple containing the minimum rev and a set of all revs that
2167 have linkrevs that will be broken by this strip.
2167 have linkrevs that will be broken by this strip.
2168 """
2168 """
2169 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2169 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2170 self.headrevs(),
2170 self.headrevs(),
2171 self.linkrev, self.parentrevs)
2171 self.linkrev, self.parentrevs)
2172
2172
2173 def strip(self, minlink, transaction):
2173 def strip(self, minlink, transaction):
2174 """truncate the revlog on the first revision with a linkrev >= minlink
2174 """truncate the revlog on the first revision with a linkrev >= minlink
2175
2175
2176 This function is called when we're stripping revision minlink and
2176 This function is called when we're stripping revision minlink and
2177 its descendants from the repository.
2177 its descendants from the repository.
2178
2178
2179 We have to remove all revisions with linkrev >= minlink, because
2179 We have to remove all revisions with linkrev >= minlink, because
2180 the equivalent changelog revisions will be renumbered after the
2180 the equivalent changelog revisions will be renumbered after the
2181 strip.
2181 strip.
2182
2182
2183 So we truncate the revlog on the first of these revisions, and
2183 So we truncate the revlog on the first of these revisions, and
2184 trust that the caller has saved the revisions that shouldn't be
2184 trust that the caller has saved the revisions that shouldn't be
2185 removed and that it'll re-add them after this truncation.
2185 removed and that it'll re-add them after this truncation.
2186 """
2186 """
2187 if len(self) == 0:
2187 if len(self) == 0:
2188 return
2188 return
2189
2189
2190 rev, _ = self.getstrippoint(minlink)
2190 rev, _ = self.getstrippoint(minlink)
2191 if rev == len(self):
2191 if rev == len(self):
2192 return
2192 return
2193
2193
2194 # first truncate the files on disk
2194 # first truncate the files on disk
2195 end = self.start(rev)
2195 end = self.start(rev)
2196 if not self._inline:
2196 if not self._inline:
2197 transaction.add(self.datafile, end)
2197 transaction.add(self.datafile, end)
2198 end = rev * self._io.size
2198 end = rev * self._io.size
2199 else:
2199 else:
2200 end += rev * self._io.size
2200 end += rev * self._io.size
2201
2201
2202 transaction.add(self.indexfile, end)
2202 transaction.add(self.indexfile, end)
2203
2203
2204 # then reset internal state in memory to forget those revisions
2204 # then reset internal state in memory to forget those revisions
2205 self._revisioncache = None
2205 self._revisioncache = None
2206 self._chaininfocache = {}
2206 self._chaininfocache = {}
2207 self._chunkclear()
2207 self._chunkclear()
2208 for x in pycompat.xrange(rev, len(self)):
2208 for x in pycompat.xrange(rev, len(self)):
2209 del self.nodemap[self.node(x)]
2209 del self.nodemap[self.node(x)]
2210
2210
2211 del self.index[rev:-1]
2211 del self.index[rev:-1]
2212 self._nodepos = None
2212 self._nodepos = None
2213
2213
2214 def checksize(self):
2214 def checksize(self):
2215 expected = 0
2215 expected = 0
2216 if len(self):
2216 if len(self):
2217 expected = max(0, self.end(len(self) - 1))
2217 expected = max(0, self.end(len(self) - 1))
2218
2218
2219 try:
2219 try:
2220 with self._datafp() as f:
2220 with self._datafp() as f:
2221 f.seek(0, 2)
2221 f.seek(0, 2)
2222 actual = f.tell()
2222 actual = f.tell()
2223 dd = actual - expected
2223 dd = actual - expected
2224 except IOError as inst:
2224 except IOError as inst:
2225 if inst.errno != errno.ENOENT:
2225 if inst.errno != errno.ENOENT:
2226 raise
2226 raise
2227 dd = 0
2227 dd = 0
2228
2228
2229 try:
2229 try:
2230 f = self.opener(self.indexfile)
2230 f = self.opener(self.indexfile)
2231 f.seek(0, 2)
2231 f.seek(0, 2)
2232 actual = f.tell()
2232 actual = f.tell()
2233 f.close()
2233 f.close()
2234 s = self._io.size
2234 s = self._io.size
2235 i = max(0, actual // s)
2235 i = max(0, actual // s)
2236 di = actual - (i * s)
2236 di = actual - (i * s)
2237 if self._inline:
2237 if self._inline:
2238 databytes = 0
2238 databytes = 0
2239 for r in self:
2239 for r in self:
2240 databytes += max(0, self.length(r))
2240 databytes += max(0, self.length(r))
2241 dd = 0
2241 dd = 0
2242 di = actual - len(self) * s - databytes
2242 di = actual - len(self) * s - databytes
2243 except IOError as inst:
2243 except IOError as inst:
2244 if inst.errno != errno.ENOENT:
2244 if inst.errno != errno.ENOENT:
2245 raise
2245 raise
2246 di = 0
2246 di = 0
2247
2247
2248 return (dd, di)
2248 return (dd, di)
2249
2249
2250 def files(self):
2250 def files(self):
2251 res = [self.indexfile]
2251 res = [self.indexfile]
2252 if not self._inline:
2252 if not self._inline:
2253 res.append(self.datafile)
2253 res.append(self.datafile)
2254 return res
2254 return res
2255
2255
2256 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2256 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2257 assumehaveparentrevisions=False,
2257 assumehaveparentrevisions=False,
2258 deltamode=repository.CG_DELTAMODE_STD):
2258 deltamode=repository.CG_DELTAMODE_STD):
2259 if nodesorder not in ('nodes', 'storage', 'linear', None):
2259 if nodesorder not in ('nodes', 'storage', 'linear', None):
2260 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2260 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2261 nodesorder)
2261 nodesorder)
2262
2262
2263 if nodesorder is None and not self._generaldelta:
2263 if nodesorder is None and not self._generaldelta:
2264 nodesorder = 'storage'
2264 nodesorder = 'storage'
2265
2265
2266 if (not self._storedeltachains and
2266 if (not self._storedeltachains and
2267 deltamode != repository.CG_DELTAMODE_PREV):
2267 deltamode != repository.CG_DELTAMODE_PREV):
2268 deltamode = repository.CG_DELTAMODE_FULL
2268 deltamode = repository.CG_DELTAMODE_FULL
2269
2269
2270 return storageutil.emitrevisions(
2270 return storageutil.emitrevisions(
2271 self, nodes, nodesorder, revlogrevisiondelta,
2271 self, nodes, nodesorder, revlogrevisiondelta,
2272 deltaparentfn=self.deltaparent,
2272 deltaparentfn=self.deltaparent,
2273 candeltafn=self.candelta,
2273 candeltafn=self.candelta,
2274 rawsizefn=self.rawsize,
2274 rawsizefn=self.rawsize,
2275 revdifffn=self.revdiff,
2275 revdifffn=self.revdiff,
2276 flagsfn=self.flags,
2276 flagsfn=self.flags,
2277 deltamode=deltamode,
2277 deltamode=deltamode,
2278 revisiondata=revisiondata,
2278 revisiondata=revisiondata,
2279 assumehaveparentrevisions=assumehaveparentrevisions)
2279 assumehaveparentrevisions=assumehaveparentrevisions)
2280
2280
2281 DELTAREUSEALWAYS = 'always'
2281 DELTAREUSEALWAYS = 'always'
2282 DELTAREUSESAMEREVS = 'samerevs'
2282 DELTAREUSESAMEREVS = 'samerevs'
2283 DELTAREUSENEVER = 'never'
2283 DELTAREUSENEVER = 'never'
2284
2284
2285 DELTAREUSEFULLADD = 'fulladd'
2285 DELTAREUSEFULLADD = 'fulladd'
2286
2286
2287 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2287 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2288
2288
2289 def clone(self, tr, destrevlog, addrevisioncb=None,
2289 def clone(self, tr, destrevlog, addrevisioncb=None,
2290 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2290 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2291 """Copy this revlog to another, possibly with format changes.
2291 """Copy this revlog to another, possibly with format changes.
2292
2292
2293 The destination revlog will contain the same revisions and nodes.
2293 The destination revlog will contain the same revisions and nodes.
2294 However, it may not be bit-for-bit identical due to e.g. delta encoding
2294 However, it may not be bit-for-bit identical due to e.g. delta encoding
2295 differences.
2295 differences.
2296
2296
2297 The ``deltareuse`` argument control how deltas from the existing revlog
2297 The ``deltareuse`` argument control how deltas from the existing revlog
2298 are preserved in the destination revlog. The argument can have the
2298 are preserved in the destination revlog. The argument can have the
2299 following values:
2299 following values:
2300
2300
2301 DELTAREUSEALWAYS
2301 DELTAREUSEALWAYS
2302 Deltas will always be reused (if possible), even if the destination
2302 Deltas will always be reused (if possible), even if the destination
2303 revlog would not select the same revisions for the delta. This is the
2303 revlog would not select the same revisions for the delta. This is the
2304 fastest mode of operation.
2304 fastest mode of operation.
2305 DELTAREUSESAMEREVS
2305 DELTAREUSESAMEREVS
2306 Deltas will be reused if the destination revlog would pick the same
2306 Deltas will be reused if the destination revlog would pick the same
2307 revisions for the delta. This mode strikes a balance between speed
2307 revisions for the delta. This mode strikes a balance between speed
2308 and optimization.
2308 and optimization.
2309 DELTAREUSENEVER
2309 DELTAREUSENEVER
2310 Deltas will never be reused. This is the slowest mode of execution.
2310 Deltas will never be reused. This is the slowest mode of execution.
2311 This mode can be used to recompute deltas (e.g. if the diff/delta
2311 This mode can be used to recompute deltas (e.g. if the diff/delta
2312 algorithm changes).
2312 algorithm changes).
2313
2313
2314 Delta computation can be slow, so the choice of delta reuse policy can
2314 Delta computation can be slow, so the choice of delta reuse policy can
2315 significantly affect run time.
2315 significantly affect run time.
2316
2316
2317 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2317 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2318 two extremes. Deltas will be reused if they are appropriate. But if the
2318 two extremes. Deltas will be reused if they are appropriate. But if the
2319 delta could choose a better revision, it will do so. This means if you
2319 delta could choose a better revision, it will do so. This means if you
2320 are converting a non-generaldelta revlog to a generaldelta revlog,
2320 are converting a non-generaldelta revlog to a generaldelta revlog,
2321 deltas will be recomputed if the delta's parent isn't a parent of the
2321 deltas will be recomputed if the delta's parent isn't a parent of the
2322 revision.
2322 revision.
2323
2323
2324 In addition to the delta policy, the ``deltabothparents`` argument
2324 In addition to the delta policy, the ``forcedeltabothparents``
2325 controls whether to compute deltas against both parents for merges.
2325 argument controls whether to force compute deltas against both parents
2326 By default, the current default is used.
2326 for merges. By default, the current default is used.
2327 """
2327 """
2328 if deltareuse not in self.DELTAREUSEALL:
2328 if deltareuse not in self.DELTAREUSEALL:
2329 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2329 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2330
2330
2331 if len(destrevlog):
2331 if len(destrevlog):
2332 raise ValueError(_('destination revlog is not empty'))
2332 raise ValueError(_('destination revlog is not empty'))
2333
2333
2334 if getattr(self, 'filteredrevs', None):
2334 if getattr(self, 'filteredrevs', None):
2335 raise ValueError(_('source revlog has filtered revisions'))
2335 raise ValueError(_('source revlog has filtered revisions'))
2336 if getattr(destrevlog, 'filteredrevs', None):
2336 if getattr(destrevlog, 'filteredrevs', None):
2337 raise ValueError(_('destination revlog has filtered revisions'))
2337 raise ValueError(_('destination revlog has filtered revisions'))
2338
2338
2339 # lazydeltabase controls whether to reuse a cached delta, if possible.
2339 # lazydeltabase controls whether to reuse a cached delta, if possible.
2340 oldlazydeltabase = destrevlog._lazydeltabase
2340 oldlazydeltabase = destrevlog._lazydeltabase
2341 oldamd = destrevlog._deltabothparents
2341 oldamd = destrevlog._deltabothparents
2342
2342
2343 try:
2343 try:
2344 if deltareuse == self.DELTAREUSEALWAYS:
2344 if deltareuse == self.DELTAREUSEALWAYS:
2345 destrevlog._lazydeltabase = True
2345 destrevlog._lazydeltabase = True
2346 elif deltareuse == self.DELTAREUSESAMEREVS:
2346 elif deltareuse == self.DELTAREUSESAMEREVS:
2347 destrevlog._lazydeltabase = False
2347 destrevlog._lazydeltabase = False
2348
2348
2349 destrevlog._deltabothparents = deltabothparents or oldamd
2349 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2350
2350
2351 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2351 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2352 self.DELTAREUSESAMEREVS)
2352 self.DELTAREUSESAMEREVS)
2353
2353
2354 deltacomputer = deltautil.deltacomputer(destrevlog)
2354 deltacomputer = deltautil.deltacomputer(destrevlog)
2355 index = self.index
2355 index = self.index
2356 for rev in self:
2356 for rev in self:
2357 entry = index[rev]
2357 entry = index[rev]
2358
2358
2359 # Some classes override linkrev to take filtered revs into
2359 # Some classes override linkrev to take filtered revs into
2360 # account. Use raw entry from index.
2360 # account. Use raw entry from index.
2361 flags = entry[0] & 0xffff
2361 flags = entry[0] & 0xffff
2362 linkrev = entry[4]
2362 linkrev = entry[4]
2363 p1 = index[entry[5]][7]
2363 p1 = index[entry[5]][7]
2364 p2 = index[entry[6]][7]
2364 p2 = index[entry[6]][7]
2365 node = entry[7]
2365 node = entry[7]
2366
2366
2367 # (Possibly) reuse the delta from the revlog if allowed and
2367 # (Possibly) reuse the delta from the revlog if allowed and
2368 # the revlog chunk is a delta.
2368 # the revlog chunk is a delta.
2369 cachedelta = None
2369 cachedelta = None
2370 rawtext = None
2370 rawtext = None
2371 if populatecachedelta:
2371 if populatecachedelta:
2372 dp = self.deltaparent(rev)
2372 dp = self.deltaparent(rev)
2373 if dp != nullrev:
2373 if dp != nullrev:
2374 cachedelta = (dp, bytes(self._chunk(rev)))
2374 cachedelta = (dp, bytes(self._chunk(rev)))
2375
2375
2376 if not cachedelta:
2376 if not cachedelta:
2377 rawtext = self.revision(rev, raw=True)
2377 rawtext = self.revision(rev, raw=True)
2378
2378
2379
2379
2380 if deltareuse == self.DELTAREUSEFULLADD:
2380 if deltareuse == self.DELTAREUSEFULLADD:
2381 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2381 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2382 cachedelta=cachedelta,
2382 cachedelta=cachedelta,
2383 node=node, flags=flags,
2383 node=node, flags=flags,
2384 deltacomputer=deltacomputer)
2384 deltacomputer=deltacomputer)
2385 else:
2385 else:
2386 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2386 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2387 checkambig=False)
2387 checkambig=False)
2388 dfh = None
2388 dfh = None
2389 if not destrevlog._inline:
2389 if not destrevlog._inline:
2390 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2390 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2391 try:
2391 try:
2392 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2392 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2393 p2, flags, cachedelta, ifh, dfh,
2393 p2, flags, cachedelta, ifh, dfh,
2394 deltacomputer=deltacomputer)
2394 deltacomputer=deltacomputer)
2395 finally:
2395 finally:
2396 if dfh:
2396 if dfh:
2397 dfh.close()
2397 dfh.close()
2398 ifh.close()
2398 ifh.close()
2399
2399
2400 if addrevisioncb:
2400 if addrevisioncb:
2401 addrevisioncb(self, rev, node)
2401 addrevisioncb(self, rev, node)
2402 finally:
2402 finally:
2403 destrevlog._lazydeltabase = oldlazydeltabase
2403 destrevlog._lazydeltabase = oldlazydeltabase
2404 destrevlog._deltabothparents = oldamd
2404 destrevlog._deltabothparents = oldamd
2405
2405
2406 def censorrevision(self, tr, censornode, tombstone=b''):
2406 def censorrevision(self, tr, censornode, tombstone=b''):
2407 if (self.version & 0xFFFF) == REVLOGV0:
2407 if (self.version & 0xFFFF) == REVLOGV0:
2408 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2408 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2409 self.version)
2409 self.version)
2410
2410
2411 censorrev = self.rev(censornode)
2411 censorrev = self.rev(censornode)
2412 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2412 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2413
2413
2414 if len(tombstone) > self.rawsize(censorrev):
2414 if len(tombstone) > self.rawsize(censorrev):
2415 raise error.Abort(_('censor tombstone must be no longer than '
2415 raise error.Abort(_('censor tombstone must be no longer than '
2416 'censored data'))
2416 'censored data'))
2417
2417
2418 # Rewriting the revlog in place is hard. Our strategy for censoring is
2418 # Rewriting the revlog in place is hard. Our strategy for censoring is
2419 # to create a new revlog, copy all revisions to it, then replace the
2419 # to create a new revlog, copy all revisions to it, then replace the
2420 # revlogs on transaction close.
2420 # revlogs on transaction close.
2421
2421
2422 newindexfile = self.indexfile + b'.tmpcensored'
2422 newindexfile = self.indexfile + b'.tmpcensored'
2423 newdatafile = self.datafile + b'.tmpcensored'
2423 newdatafile = self.datafile + b'.tmpcensored'
2424
2424
2425 # This is a bit dangerous. We could easily have a mismatch of state.
2425 # This is a bit dangerous. We could easily have a mismatch of state.
2426 newrl = revlog(self.opener, newindexfile, newdatafile,
2426 newrl = revlog(self.opener, newindexfile, newdatafile,
2427 censorable=True)
2427 censorable=True)
2428 newrl.version = self.version
2428 newrl.version = self.version
2429 newrl._generaldelta = self._generaldelta
2429 newrl._generaldelta = self._generaldelta
2430 newrl._io = self._io
2430 newrl._io = self._io
2431
2431
2432 for rev in self.revs():
2432 for rev in self.revs():
2433 node = self.node(rev)
2433 node = self.node(rev)
2434 p1, p2 = self.parents(node)
2434 p1, p2 = self.parents(node)
2435
2435
2436 if rev == censorrev:
2436 if rev == censorrev:
2437 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2437 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2438 p1, p2, censornode, REVIDX_ISCENSORED)
2438 p1, p2, censornode, REVIDX_ISCENSORED)
2439
2439
2440 if newrl.deltaparent(rev) != nullrev:
2440 if newrl.deltaparent(rev) != nullrev:
2441 raise error.Abort(_('censored revision stored as delta; '
2441 raise error.Abort(_('censored revision stored as delta; '
2442 'cannot censor'),
2442 'cannot censor'),
2443 hint=_('censoring of revlogs is not '
2443 hint=_('censoring of revlogs is not '
2444 'fully implemented; please report '
2444 'fully implemented; please report '
2445 'this bug'))
2445 'this bug'))
2446 continue
2446 continue
2447
2447
2448 if self.iscensored(rev):
2448 if self.iscensored(rev):
2449 if self.deltaparent(rev) != nullrev:
2449 if self.deltaparent(rev) != nullrev:
2450 raise error.Abort(_('cannot censor due to censored '
2450 raise error.Abort(_('cannot censor due to censored '
2451 'revision having delta stored'))
2451 'revision having delta stored'))
2452 rawtext = self._chunk(rev)
2452 rawtext = self._chunk(rev)
2453 else:
2453 else:
2454 rawtext = self.revision(rev, raw=True)
2454 rawtext = self.revision(rev, raw=True)
2455
2455
2456 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2456 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2457 self.flags(rev))
2457 self.flags(rev))
2458
2458
2459 tr.addbackup(self.indexfile, location='store')
2459 tr.addbackup(self.indexfile, location='store')
2460 if not self._inline:
2460 if not self._inline:
2461 tr.addbackup(self.datafile, location='store')
2461 tr.addbackup(self.datafile, location='store')
2462
2462
2463 self.opener.rename(newrl.indexfile, self.indexfile)
2463 self.opener.rename(newrl.indexfile, self.indexfile)
2464 if not self._inline:
2464 if not self._inline:
2465 self.opener.rename(newrl.datafile, self.datafile)
2465 self.opener.rename(newrl.datafile, self.datafile)
2466
2466
2467 self.clearcaches()
2467 self.clearcaches()
2468 self._loadindex(self.version, None)
2468 self._loadindex(self.version, None)
2469
2469
2470 def verifyintegrity(self, state):
2470 def verifyintegrity(self, state):
2471 """Verifies the integrity of the revlog.
2471 """Verifies the integrity of the revlog.
2472
2472
2473 Yields ``revlogproblem`` instances describing problems that are
2473 Yields ``revlogproblem`` instances describing problems that are
2474 found.
2474 found.
2475 """
2475 """
2476 dd, di = self.checksize()
2476 dd, di = self.checksize()
2477 if dd:
2477 if dd:
2478 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2478 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2479 if di:
2479 if di:
2480 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2480 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2481
2481
2482 version = self.version & 0xFFFF
2482 version = self.version & 0xFFFF
2483
2483
2484 # The verifier tells us what version revlog we should be.
2484 # The verifier tells us what version revlog we should be.
2485 if version != state['expectedversion']:
2485 if version != state['expectedversion']:
2486 yield revlogproblem(
2486 yield revlogproblem(
2487 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2487 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2488 (self.indexfile, version, state['expectedversion']))
2488 (self.indexfile, version, state['expectedversion']))
2489
2489
2490 state['skipread'] = set()
2490 state['skipread'] = set()
2491
2491
2492 for rev in self:
2492 for rev in self:
2493 node = self.node(rev)
2493 node = self.node(rev)
2494
2494
2495 # Verify contents. 4 cases to care about:
2495 # Verify contents. 4 cases to care about:
2496 #
2496 #
2497 # common: the most common case
2497 # common: the most common case
2498 # rename: with a rename
2498 # rename: with a rename
2499 # meta: file content starts with b'\1\n', the metadata
2499 # meta: file content starts with b'\1\n', the metadata
2500 # header defined in filelog.py, but without a rename
2500 # header defined in filelog.py, but without a rename
2501 # ext: content stored externally
2501 # ext: content stored externally
2502 #
2502 #
2503 # More formally, their differences are shown below:
2503 # More formally, their differences are shown below:
2504 #
2504 #
2505 # | common | rename | meta | ext
2505 # | common | rename | meta | ext
2506 # -------------------------------------------------------
2506 # -------------------------------------------------------
2507 # flags() | 0 | 0 | 0 | not 0
2507 # flags() | 0 | 0 | 0 | not 0
2508 # renamed() | False | True | False | ?
2508 # renamed() | False | True | False | ?
2509 # rawtext[0:2]=='\1\n'| False | True | True | ?
2509 # rawtext[0:2]=='\1\n'| False | True | True | ?
2510 #
2510 #
2511 # "rawtext" means the raw text stored in revlog data, which
2511 # "rawtext" means the raw text stored in revlog data, which
2512 # could be retrieved by "revision(rev, raw=True)". "text"
2512 # could be retrieved by "revision(rev, raw=True)". "text"
2513 # mentioned below is "revision(rev, raw=False)".
2513 # mentioned below is "revision(rev, raw=False)".
2514 #
2514 #
2515 # There are 3 different lengths stored physically:
2515 # There are 3 different lengths stored physically:
2516 # 1. L1: rawsize, stored in revlog index
2516 # 1. L1: rawsize, stored in revlog index
2517 # 2. L2: len(rawtext), stored in revlog data
2517 # 2. L2: len(rawtext), stored in revlog data
2518 # 3. L3: len(text), stored in revlog data if flags==0, or
2518 # 3. L3: len(text), stored in revlog data if flags==0, or
2519 # possibly somewhere else if flags!=0
2519 # possibly somewhere else if flags!=0
2520 #
2520 #
2521 # L1 should be equal to L2. L3 could be different from them.
2521 # L1 should be equal to L2. L3 could be different from them.
2522 # "text" may or may not affect commit hash depending on flag
2522 # "text" may or may not affect commit hash depending on flag
2523 # processors (see revlog.addflagprocessor).
2523 # processors (see revlog.addflagprocessor).
2524 #
2524 #
2525 # | common | rename | meta | ext
2525 # | common | rename | meta | ext
2526 # -------------------------------------------------
2526 # -------------------------------------------------
2527 # rawsize() | L1 | L1 | L1 | L1
2527 # rawsize() | L1 | L1 | L1 | L1
2528 # size() | L1 | L2-LM | L1(*) | L1 (?)
2528 # size() | L1 | L2-LM | L1(*) | L1 (?)
2529 # len(rawtext) | L2 | L2 | L2 | L2
2529 # len(rawtext) | L2 | L2 | L2 | L2
2530 # len(text) | L2 | L2 | L2 | L3
2530 # len(text) | L2 | L2 | L2 | L3
2531 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2531 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2532 #
2532 #
2533 # LM: length of metadata, depending on rawtext
2533 # LM: length of metadata, depending on rawtext
2534 # (*): not ideal, see comment in filelog.size
2534 # (*): not ideal, see comment in filelog.size
2535 # (?): could be "- len(meta)" if the resolved content has
2535 # (?): could be "- len(meta)" if the resolved content has
2536 # rename metadata
2536 # rename metadata
2537 #
2537 #
2538 # Checks needed to be done:
2538 # Checks needed to be done:
2539 # 1. length check: L1 == L2, in all cases.
2539 # 1. length check: L1 == L2, in all cases.
2540 # 2. hash check: depending on flag processor, we may need to
2540 # 2. hash check: depending on flag processor, we may need to
2541 # use either "text" (external), or "rawtext" (in revlog).
2541 # use either "text" (external), or "rawtext" (in revlog).
2542
2542
2543 try:
2543 try:
2544 skipflags = state.get('skipflags', 0)
2544 skipflags = state.get('skipflags', 0)
2545 if skipflags:
2545 if skipflags:
2546 skipflags &= self.flags(rev)
2546 skipflags &= self.flags(rev)
2547
2547
2548 if skipflags:
2548 if skipflags:
2549 state['skipread'].add(node)
2549 state['skipread'].add(node)
2550 else:
2550 else:
2551 # Side-effect: read content and verify hash.
2551 # Side-effect: read content and verify hash.
2552 self.revision(node)
2552 self.revision(node)
2553
2553
2554 l1 = self.rawsize(rev)
2554 l1 = self.rawsize(rev)
2555 l2 = len(self.revision(node, raw=True))
2555 l2 = len(self.revision(node, raw=True))
2556
2556
2557 if l1 != l2:
2557 if l1 != l2:
2558 yield revlogproblem(
2558 yield revlogproblem(
2559 error=_('unpacked size is %d, %d expected') % (l2, l1),
2559 error=_('unpacked size is %d, %d expected') % (l2, l1),
2560 node=node)
2560 node=node)
2561
2561
2562 except error.CensoredNodeError:
2562 except error.CensoredNodeError:
2563 if state['erroroncensored']:
2563 if state['erroroncensored']:
2564 yield revlogproblem(error=_('censored file data'),
2564 yield revlogproblem(error=_('censored file data'),
2565 node=node)
2565 node=node)
2566 state['skipread'].add(node)
2566 state['skipread'].add(node)
2567 except Exception as e:
2567 except Exception as e:
2568 yield revlogproblem(
2568 yield revlogproblem(
2569 error=_('unpacking %s: %s') % (short(node),
2569 error=_('unpacking %s: %s') % (short(node),
2570 stringutil.forcebytestr(e)),
2570 stringutil.forcebytestr(e)),
2571 node=node)
2571 node=node)
2572 state['skipread'].add(node)
2572 state['skipread'].add(node)
2573
2573
2574 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2574 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2575 revisionscount=False, trackedsize=False,
2575 revisionscount=False, trackedsize=False,
2576 storedsize=False):
2576 storedsize=False):
2577 d = {}
2577 d = {}
2578
2578
2579 if exclusivefiles:
2579 if exclusivefiles:
2580 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2580 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2581 if not self._inline:
2581 if not self._inline:
2582 d['exclusivefiles'].append((self.opener, self.datafile))
2582 d['exclusivefiles'].append((self.opener, self.datafile))
2583
2583
2584 if sharedfiles:
2584 if sharedfiles:
2585 d['sharedfiles'] = []
2585 d['sharedfiles'] = []
2586
2586
2587 if revisionscount:
2587 if revisionscount:
2588 d['revisionscount'] = len(self)
2588 d['revisionscount'] = len(self)
2589
2589
2590 if trackedsize:
2590 if trackedsize:
2591 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2591 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2592
2592
2593 if storedsize:
2593 if storedsize:
2594 d['storedsize'] = sum(self.opener.stat(path).st_size
2594 d['storedsize'] = sum(self.opener.stat(path).st_size
2595 for path in self.files())
2595 for path in self.files())
2596
2596
2597 return d
2597 return d
@@ -1,897 +1,897 b''
1 # upgrade.py - functions for in place upgrade of Mercurial repository
1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 #
2 #
3 # Copyright (c) 2016-present, Gregory Szorc
3 # Copyright (c) 2016-present, Gregory Szorc
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import stat
10 import stat
11
11
12 from .i18n import _
12 from .i18n import _
13 from . import (
13 from . import (
14 changelog,
14 changelog,
15 error,
15 error,
16 filelog,
16 filelog,
17 hg,
17 hg,
18 localrepo,
18 localrepo,
19 manifest,
19 manifest,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 vfs as vfsmod,
24 vfs as vfsmod,
25 )
25 )
26
26
27 def requiredsourcerequirements(repo):
27 def requiredsourcerequirements(repo):
28 """Obtain requirements required to be present to upgrade a repo.
28 """Obtain requirements required to be present to upgrade a repo.
29
29
30 An upgrade will not be allowed if the repository doesn't have the
30 An upgrade will not be allowed if the repository doesn't have the
31 requirements returned by this function.
31 requirements returned by this function.
32 """
32 """
33 return {
33 return {
34 # Introduced in Mercurial 0.9.2.
34 # Introduced in Mercurial 0.9.2.
35 'revlogv1',
35 'revlogv1',
36 # Introduced in Mercurial 0.9.2.
36 # Introduced in Mercurial 0.9.2.
37 'store',
37 'store',
38 }
38 }
39
39
40 def blocksourcerequirements(repo):
40 def blocksourcerequirements(repo):
41 """Obtain requirements that will prevent an upgrade from occurring.
41 """Obtain requirements that will prevent an upgrade from occurring.
42
42
43 An upgrade cannot be performed if the source repository contains a
43 An upgrade cannot be performed if the source repository contains a
44 requirements in the returned set.
44 requirements in the returned set.
45 """
45 """
46 return {
46 return {
47 # The upgrade code does not yet support these experimental features.
47 # The upgrade code does not yet support these experimental features.
48 # This is an artificial limitation.
48 # This is an artificial limitation.
49 'treemanifest',
49 'treemanifest',
50 # This was a precursor to generaldelta and was never enabled by default.
50 # This was a precursor to generaldelta and was never enabled by default.
51 # It should (hopefully) not exist in the wild.
51 # It should (hopefully) not exist in the wild.
52 'parentdelta',
52 'parentdelta',
53 # Upgrade should operate on the actual store, not the shared link.
53 # Upgrade should operate on the actual store, not the shared link.
54 'shared',
54 'shared',
55 }
55 }
56
56
57 def supportremovedrequirements(repo):
57 def supportremovedrequirements(repo):
58 """Obtain requirements that can be removed during an upgrade.
58 """Obtain requirements that can be removed during an upgrade.
59
59
60 If an upgrade were to create a repository that dropped a requirement,
60 If an upgrade were to create a repository that dropped a requirement,
61 the dropped requirement must appear in the returned set for the upgrade
61 the dropped requirement must appear in the returned set for the upgrade
62 to be allowed.
62 to be allowed.
63 """
63 """
64 return {
64 return {
65 localrepo.SPARSEREVLOG_REQUIREMENT,
65 localrepo.SPARSEREVLOG_REQUIREMENT,
66 }
66 }
67
67
68 def supporteddestrequirements(repo):
68 def supporteddestrequirements(repo):
69 """Obtain requirements that upgrade supports in the destination.
69 """Obtain requirements that upgrade supports in the destination.
70
70
71 If the result of the upgrade would create requirements not in this set,
71 If the result of the upgrade would create requirements not in this set,
72 the upgrade is disallowed.
72 the upgrade is disallowed.
73
73
74 Extensions should monkeypatch this to add their custom requirements.
74 Extensions should monkeypatch this to add their custom requirements.
75 """
75 """
76 return {
76 return {
77 'dotencode',
77 'dotencode',
78 'fncache',
78 'fncache',
79 'generaldelta',
79 'generaldelta',
80 'revlogv1',
80 'revlogv1',
81 'store',
81 'store',
82 localrepo.SPARSEREVLOG_REQUIREMENT,
82 localrepo.SPARSEREVLOG_REQUIREMENT,
83 }
83 }
84
84
85 def allowednewrequirements(repo):
85 def allowednewrequirements(repo):
86 """Obtain requirements that can be added to a repository during upgrade.
86 """Obtain requirements that can be added to a repository during upgrade.
87
87
88 This is used to disallow proposed requirements from being added when
88 This is used to disallow proposed requirements from being added when
89 they weren't present before.
89 they weren't present before.
90
90
91 We use a list of allowed requirement additions instead of a list of known
91 We use a list of allowed requirement additions instead of a list of known
92 bad additions because the whitelist approach is safer and will prevent
92 bad additions because the whitelist approach is safer and will prevent
93 future, unknown requirements from accidentally being added.
93 future, unknown requirements from accidentally being added.
94 """
94 """
95 return {
95 return {
96 'dotencode',
96 'dotencode',
97 'fncache',
97 'fncache',
98 'generaldelta',
98 'generaldelta',
99 localrepo.SPARSEREVLOG_REQUIREMENT,
99 localrepo.SPARSEREVLOG_REQUIREMENT,
100 }
100 }
101
101
102 def preservedrequirements(repo):
102 def preservedrequirements(repo):
103 return set()
103 return set()
104
104
105 deficiency = 'deficiency'
105 deficiency = 'deficiency'
106 optimisation = 'optimization'
106 optimisation = 'optimization'
107
107
108 class improvement(object):
108 class improvement(object):
109 """Represents an improvement that can be made as part of an upgrade.
109 """Represents an improvement that can be made as part of an upgrade.
110
110
111 The following attributes are defined on each instance:
111 The following attributes are defined on each instance:
112
112
113 name
113 name
114 Machine-readable string uniquely identifying this improvement. It
114 Machine-readable string uniquely identifying this improvement. It
115 will be mapped to an action later in the upgrade process.
115 will be mapped to an action later in the upgrade process.
116
116
117 type
117 type
118 Either ``deficiency`` or ``optimisation``. A deficiency is an obvious
118 Either ``deficiency`` or ``optimisation``. A deficiency is an obvious
119 problem. An optimization is an action (sometimes optional) that
119 problem. An optimization is an action (sometimes optional) that
120 can be taken to further improve the state of the repository.
120 can be taken to further improve the state of the repository.
121
121
122 description
122 description
123 Message intended for humans explaining the improvement in more detail,
123 Message intended for humans explaining the improvement in more detail,
124 including the implications of it. For ``deficiency`` types, should be
124 including the implications of it. For ``deficiency`` types, should be
125 worded in the present tense. For ``optimisation`` types, should be
125 worded in the present tense. For ``optimisation`` types, should be
126 worded in the future tense.
126 worded in the future tense.
127
127
128 upgrademessage
128 upgrademessage
129 Message intended for humans explaining what an upgrade addressing this
129 Message intended for humans explaining what an upgrade addressing this
130 issue will do. Should be worded in the future tense.
130 issue will do. Should be worded in the future tense.
131 """
131 """
132 def __init__(self, name, type, description, upgrademessage):
132 def __init__(self, name, type, description, upgrademessage):
133 self.name = name
133 self.name = name
134 self.type = type
134 self.type = type
135 self.description = description
135 self.description = description
136 self.upgrademessage = upgrademessage
136 self.upgrademessage = upgrademessage
137
137
138 def __eq__(self, other):
138 def __eq__(self, other):
139 if not isinstance(other, improvement):
139 if not isinstance(other, improvement):
140 # This is what python tell use to do
140 # This is what python tell use to do
141 return NotImplemented
141 return NotImplemented
142 return self.name == other.name
142 return self.name == other.name
143
143
144 def __ne__(self, other):
144 def __ne__(self, other):
145 return not self == other
145 return not self == other
146
146
147 def __hash__(self):
147 def __hash__(self):
148 return hash(self.name)
148 return hash(self.name)
149
149
150 allformatvariant = []
150 allformatvariant = []
151
151
152 def registerformatvariant(cls):
152 def registerformatvariant(cls):
153 allformatvariant.append(cls)
153 allformatvariant.append(cls)
154 return cls
154 return cls
155
155
156 class formatvariant(improvement):
156 class formatvariant(improvement):
157 """an improvement subclass dedicated to repository format"""
157 """an improvement subclass dedicated to repository format"""
158 type = deficiency
158 type = deficiency
159 ### The following attributes should be defined for each class:
159 ### The following attributes should be defined for each class:
160
160
161 # machine-readable string uniquely identifying this improvement. it will be
161 # machine-readable string uniquely identifying this improvement. it will be
162 # mapped to an action later in the upgrade process.
162 # mapped to an action later in the upgrade process.
163 name = None
163 name = None
164
164
165 # message intended for humans explaining the improvement in more detail,
165 # message intended for humans explaining the improvement in more detail,
166 # including the implications of it ``deficiency`` types, should be worded
166 # including the implications of it ``deficiency`` types, should be worded
167 # in the present tense.
167 # in the present tense.
168 description = None
168 description = None
169
169
170 # message intended for humans explaining what an upgrade addressing this
170 # message intended for humans explaining what an upgrade addressing this
171 # issue will do. should be worded in the future tense.
171 # issue will do. should be worded in the future tense.
172 upgrademessage = None
172 upgrademessage = None
173
173
174 # value of current Mercurial default for new repository
174 # value of current Mercurial default for new repository
175 default = None
175 default = None
176
176
177 def __init__(self):
177 def __init__(self):
178 raise NotImplementedError()
178 raise NotImplementedError()
179
179
180 @staticmethod
180 @staticmethod
181 def fromrepo(repo):
181 def fromrepo(repo):
182 """current value of the variant in the repository"""
182 """current value of the variant in the repository"""
183 raise NotImplementedError()
183 raise NotImplementedError()
184
184
185 @staticmethod
185 @staticmethod
186 def fromconfig(repo):
186 def fromconfig(repo):
187 """current value of the variant in the configuration"""
187 """current value of the variant in the configuration"""
188 raise NotImplementedError()
188 raise NotImplementedError()
189
189
190 class requirementformatvariant(formatvariant):
190 class requirementformatvariant(formatvariant):
191 """formatvariant based on a 'requirement' name.
191 """formatvariant based on a 'requirement' name.
192
192
193 Many format variant are controlled by a 'requirement'. We define a small
193 Many format variant are controlled by a 'requirement'. We define a small
194 subclass to factor the code.
194 subclass to factor the code.
195 """
195 """
196
196
197 # the requirement that control this format variant
197 # the requirement that control this format variant
198 _requirement = None
198 _requirement = None
199
199
200 @staticmethod
200 @staticmethod
201 def _newreporequirements(ui):
201 def _newreporequirements(ui):
202 return localrepo.newreporequirements(
202 return localrepo.newreporequirements(
203 ui, localrepo.defaultcreateopts(ui))
203 ui, localrepo.defaultcreateopts(ui))
204
204
205 @classmethod
205 @classmethod
206 def fromrepo(cls, repo):
206 def fromrepo(cls, repo):
207 assert cls._requirement is not None
207 assert cls._requirement is not None
208 return cls._requirement in repo.requirements
208 return cls._requirement in repo.requirements
209
209
210 @classmethod
210 @classmethod
211 def fromconfig(cls, repo):
211 def fromconfig(cls, repo):
212 assert cls._requirement is not None
212 assert cls._requirement is not None
213 return cls._requirement in cls._newreporequirements(repo.ui)
213 return cls._requirement in cls._newreporequirements(repo.ui)
214
214
215 @registerformatvariant
215 @registerformatvariant
216 class fncache(requirementformatvariant):
216 class fncache(requirementformatvariant):
217 name = 'fncache'
217 name = 'fncache'
218
218
219 _requirement = 'fncache'
219 _requirement = 'fncache'
220
220
221 default = True
221 default = True
222
222
223 description = _('long and reserved filenames may not work correctly; '
223 description = _('long and reserved filenames may not work correctly; '
224 'repository performance is sub-optimal')
224 'repository performance is sub-optimal')
225
225
226 upgrademessage = _('repository will be more resilient to storing '
226 upgrademessage = _('repository will be more resilient to storing '
227 'certain paths and performance of certain '
227 'certain paths and performance of certain '
228 'operations should be improved')
228 'operations should be improved')
229
229
230 @registerformatvariant
230 @registerformatvariant
231 class dotencode(requirementformatvariant):
231 class dotencode(requirementformatvariant):
232 name = 'dotencode'
232 name = 'dotencode'
233
233
234 _requirement = 'dotencode'
234 _requirement = 'dotencode'
235
235
236 default = True
236 default = True
237
237
238 description = _('storage of filenames beginning with a period or '
238 description = _('storage of filenames beginning with a period or '
239 'space may not work correctly')
239 'space may not work correctly')
240
240
241 upgrademessage = _('repository will be better able to store files '
241 upgrademessage = _('repository will be better able to store files '
242 'beginning with a space or period')
242 'beginning with a space or period')
243
243
244 @registerformatvariant
244 @registerformatvariant
245 class generaldelta(requirementformatvariant):
245 class generaldelta(requirementformatvariant):
246 name = 'generaldelta'
246 name = 'generaldelta'
247
247
248 _requirement = 'generaldelta'
248 _requirement = 'generaldelta'
249
249
250 default = True
250 default = True
251
251
252 description = _('deltas within internal storage are unable to '
252 description = _('deltas within internal storage are unable to '
253 'choose optimal revisions; repository is larger and '
253 'choose optimal revisions; repository is larger and '
254 'slower than it could be; interaction with other '
254 'slower than it could be; interaction with other '
255 'repositories may require extra network and CPU '
255 'repositories may require extra network and CPU '
256 'resources, making "hg push" and "hg pull" slower')
256 'resources, making "hg push" and "hg pull" slower')
257
257
258 upgrademessage = _('repository storage will be able to create '
258 upgrademessage = _('repository storage will be able to create '
259 'optimal deltas; new repository data will be '
259 'optimal deltas; new repository data will be '
260 'smaller and read times should decrease; '
260 'smaller and read times should decrease; '
261 'interacting with other repositories using this '
261 'interacting with other repositories using this '
262 'storage model should require less network and '
262 'storage model should require less network and '
263 'CPU resources, making "hg push" and "hg pull" '
263 'CPU resources, making "hg push" and "hg pull" '
264 'faster')
264 'faster')
265
265
266 @registerformatvariant
266 @registerformatvariant
267 class sparserevlog(requirementformatvariant):
267 class sparserevlog(requirementformatvariant):
268 name = 'sparserevlog'
268 name = 'sparserevlog'
269
269
270 _requirement = localrepo.SPARSEREVLOG_REQUIREMENT
270 _requirement = localrepo.SPARSEREVLOG_REQUIREMENT
271
271
272 default = False
272 default = False
273
273
274 description = _('in order to limit disk reading and memory usage on older '
274 description = _('in order to limit disk reading and memory usage on older '
275 'version, the span of a delta chain from its root to its '
275 'version, the span of a delta chain from its root to its '
276 'end is limited, whatever the relevant data in this span. '
276 'end is limited, whatever the relevant data in this span. '
277 'This can severly limit Mercurial ability to build good '
277 'This can severly limit Mercurial ability to build good '
278 'chain of delta resulting is much more storage space being '
278 'chain of delta resulting is much more storage space being '
279 'taken and limit reusability of on disk delta during '
279 'taken and limit reusability of on disk delta during '
280 'exchange.'
280 'exchange.'
281 )
281 )
282
282
283 upgrademessage = _('Revlog supports delta chain with more unused data '
283 upgrademessage = _('Revlog supports delta chain with more unused data '
284 'between payload. These gaps will be skipped at read '
284 'between payload. These gaps will be skipped at read '
285 'time. This allows for better delta chains, making a '
285 'time. This allows for better delta chains, making a '
286 'better compression and faster exchange with server.')
286 'better compression and faster exchange with server.')
287
287
288 @registerformatvariant
288 @registerformatvariant
289 class removecldeltachain(formatvariant):
289 class removecldeltachain(formatvariant):
290 name = 'plain-cl-delta'
290 name = 'plain-cl-delta'
291
291
292 default = True
292 default = True
293
293
294 description = _('changelog storage is using deltas instead of '
294 description = _('changelog storage is using deltas instead of '
295 'raw entries; changelog reading and any '
295 'raw entries; changelog reading and any '
296 'operation relying on changelog data are slower '
296 'operation relying on changelog data are slower '
297 'than they could be')
297 'than they could be')
298
298
299 upgrademessage = _('changelog storage will be reformated to '
299 upgrademessage = _('changelog storage will be reformated to '
300 'store raw entries; changelog reading will be '
300 'store raw entries; changelog reading will be '
301 'faster; changelog size may be reduced')
301 'faster; changelog size may be reduced')
302
302
303 @staticmethod
303 @staticmethod
304 def fromrepo(repo):
304 def fromrepo(repo):
305 # Mercurial 4.0 changed changelogs to not use delta chains. Search for
305 # Mercurial 4.0 changed changelogs to not use delta chains. Search for
306 # changelogs with deltas.
306 # changelogs with deltas.
307 cl = repo.changelog
307 cl = repo.changelog
308 chainbase = cl.chainbase
308 chainbase = cl.chainbase
309 return all(rev == chainbase(rev) for rev in cl)
309 return all(rev == chainbase(rev) for rev in cl)
310
310
311 @staticmethod
311 @staticmethod
312 def fromconfig(repo):
312 def fromconfig(repo):
313 return True
313 return True
314
314
315 @registerformatvariant
315 @registerformatvariant
316 class compressionengine(formatvariant):
316 class compressionengine(formatvariant):
317 name = 'compression'
317 name = 'compression'
318 default = 'zlib'
318 default = 'zlib'
319
319
320 description = _('Compresion algorithm used to compress data. '
320 description = _('Compresion algorithm used to compress data. '
321 'Some engine are faster than other')
321 'Some engine are faster than other')
322
322
323 upgrademessage = _('revlog content will be recompressed with the new '
323 upgrademessage = _('revlog content will be recompressed with the new '
324 'algorithm.')
324 'algorithm.')
325
325
326 @classmethod
326 @classmethod
327 def fromrepo(cls, repo):
327 def fromrepo(cls, repo):
328 for req in repo.requirements:
328 for req in repo.requirements:
329 if req.startswith('exp-compression-'):
329 if req.startswith('exp-compression-'):
330 return req.split('-', 2)[2]
330 return req.split('-', 2)[2]
331 return 'zlib'
331 return 'zlib'
332
332
333 @classmethod
333 @classmethod
334 def fromconfig(cls, repo):
334 def fromconfig(cls, repo):
335 return repo.ui.config('experimental', 'format.compression')
335 return repo.ui.config('experimental', 'format.compression')
336
336
337 def finddeficiencies(repo):
337 def finddeficiencies(repo):
338 """returns a list of deficiencies that the repo suffer from"""
338 """returns a list of deficiencies that the repo suffer from"""
339 deficiencies = []
339 deficiencies = []
340
340
341 # We could detect lack of revlogv1 and store here, but they were added
341 # We could detect lack of revlogv1 and store here, but they were added
342 # in 0.9.2 and we don't support upgrading repos without these
342 # in 0.9.2 and we don't support upgrading repos without these
343 # requirements, so let's not bother.
343 # requirements, so let's not bother.
344
344
345 for fv in allformatvariant:
345 for fv in allformatvariant:
346 if not fv.fromrepo(repo):
346 if not fv.fromrepo(repo):
347 deficiencies.append(fv)
347 deficiencies.append(fv)
348
348
349 return deficiencies
349 return deficiencies
350
350
351 def findoptimizations(repo):
351 def findoptimizations(repo):
352 """Determine optimisation that could be used during upgrade"""
352 """Determine optimisation that could be used during upgrade"""
353 # These are unconditionally added. There is logic later that figures out
353 # These are unconditionally added. There is logic later that figures out
354 # which ones to apply.
354 # which ones to apply.
355 optimizations = []
355 optimizations = []
356
356
357 optimizations.append(improvement(
357 optimizations.append(improvement(
358 name='redeltaparent',
358 name='redeltaparent',
359 type=optimisation,
359 type=optimisation,
360 description=_('deltas within internal storage will be recalculated to '
360 description=_('deltas within internal storage will be recalculated to '
361 'choose an optimal base revision where this was not '
361 'choose an optimal base revision where this was not '
362 'already done; the size of the repository may shrink and '
362 'already done; the size of the repository may shrink and '
363 'various operations may become faster; the first time '
363 'various operations may become faster; the first time '
364 'this optimization is performed could slow down upgrade '
364 'this optimization is performed could slow down upgrade '
365 'execution considerably; subsequent invocations should '
365 'execution considerably; subsequent invocations should '
366 'not run noticeably slower'),
366 'not run noticeably slower'),
367 upgrademessage=_('deltas within internal storage will choose a new '
367 upgrademessage=_('deltas within internal storage will choose a new '
368 'base revision if needed')))
368 'base revision if needed')))
369
369
370 optimizations.append(improvement(
370 optimizations.append(improvement(
371 name='redeltamultibase',
371 name='redeltamultibase',
372 type=optimisation,
372 type=optimisation,
373 description=_('deltas within internal storage will be recalculated '
373 description=_('deltas within internal storage will be recalculated '
374 'against multiple base revision and the smallest '
374 'against multiple base revision and the smallest '
375 'difference will be used; the size of the repository may '
375 'difference will be used; the size of the repository may '
376 'shrink significantly when there are many merges; this '
376 'shrink significantly when there are many merges; this '
377 'optimization will slow down execution in proportion to '
377 'optimization will slow down execution in proportion to '
378 'the number of merges in the repository and the amount '
378 'the number of merges in the repository and the amount '
379 'of files in the repository; this slow down should not '
379 'of files in the repository; this slow down should not '
380 'be significant unless there are tens of thousands of '
380 'be significant unless there are tens of thousands of '
381 'files and thousands of merges'),
381 'files and thousands of merges'),
382 upgrademessage=_('deltas within internal storage will choose an '
382 upgrademessage=_('deltas within internal storage will choose an '
383 'optimal delta by computing deltas against multiple '
383 'optimal delta by computing deltas against multiple '
384 'parents; may slow down execution time '
384 'parents; may slow down execution time '
385 'significantly')))
385 'significantly')))
386
386
387 optimizations.append(improvement(
387 optimizations.append(improvement(
388 name='redeltaall',
388 name='redeltaall',
389 type=optimisation,
389 type=optimisation,
390 description=_('deltas within internal storage will always be '
390 description=_('deltas within internal storage will always be '
391 'recalculated without reusing prior deltas; this will '
391 'recalculated without reusing prior deltas; this will '
392 'likely make execution run several times slower; this '
392 'likely make execution run several times slower; this '
393 'optimization is typically not needed'),
393 'optimization is typically not needed'),
394 upgrademessage=_('deltas within internal storage will be fully '
394 upgrademessage=_('deltas within internal storage will be fully '
395 'recomputed; this will likely drastically slow down '
395 'recomputed; this will likely drastically slow down '
396 'execution time')))
396 'execution time')))
397
397
398 optimizations.append(improvement(
398 optimizations.append(improvement(
399 name='redeltafulladd',
399 name='redeltafulladd',
400 type=optimisation,
400 type=optimisation,
401 description=_('every revision will be re-added as if it was new '
401 description=_('every revision will be re-added as if it was new '
402 'content. It will go through the full storage '
402 'content. It will go through the full storage '
403 'mechanism giving extensions a chance to process it '
403 'mechanism giving extensions a chance to process it '
404 '(eg. lfs). This is similar to "redeltaall" but even '
404 '(eg. lfs). This is similar to "redeltaall" but even '
405 'slower since more logic is involved.'),
405 'slower since more logic is involved.'),
406 upgrademessage=_('each revision will be added as new content to the '
406 upgrademessage=_('each revision will be added as new content to the '
407 'internal storage; this will likely drastically slow '
407 'internal storage; this will likely drastically slow '
408 'down execution time, but some extensions might need '
408 'down execution time, but some extensions might need '
409 'it')))
409 'it')))
410
410
411 return optimizations
411 return optimizations
412
412
413 def determineactions(repo, deficiencies, sourcereqs, destreqs):
413 def determineactions(repo, deficiencies, sourcereqs, destreqs):
414 """Determine upgrade actions that will be performed.
414 """Determine upgrade actions that will be performed.
415
415
416 Given a list of improvements as returned by ``finddeficiencies`` and
416 Given a list of improvements as returned by ``finddeficiencies`` and
417 ``findoptimizations``, determine the list of upgrade actions that
417 ``findoptimizations``, determine the list of upgrade actions that
418 will be performed.
418 will be performed.
419
419
420 The role of this function is to filter improvements if needed, apply
420 The role of this function is to filter improvements if needed, apply
421 recommended optimizations from the improvements list that make sense,
421 recommended optimizations from the improvements list that make sense,
422 etc.
422 etc.
423
423
424 Returns a list of action names.
424 Returns a list of action names.
425 """
425 """
426 newactions = []
426 newactions = []
427
427
428 knownreqs = supporteddestrequirements(repo)
428 knownreqs = supporteddestrequirements(repo)
429
429
430 for d in deficiencies:
430 for d in deficiencies:
431 name = d.name
431 name = d.name
432
432
433 # If the action is a requirement that doesn't show up in the
433 # If the action is a requirement that doesn't show up in the
434 # destination requirements, prune the action.
434 # destination requirements, prune the action.
435 if name in knownreqs and name not in destreqs:
435 if name in knownreqs and name not in destreqs:
436 continue
436 continue
437
437
438 newactions.append(d)
438 newactions.append(d)
439
439
440 # FUTURE consider adding some optimizations here for certain transitions.
440 # FUTURE consider adding some optimizations here for certain transitions.
441 # e.g. adding generaldelta could schedule parent redeltas.
441 # e.g. adding generaldelta could schedule parent redeltas.
442
442
443 return newactions
443 return newactions
444
444
445 def _revlogfrompath(repo, path):
445 def _revlogfrompath(repo, path):
446 """Obtain a revlog from a repo path.
446 """Obtain a revlog from a repo path.
447
447
448 An instance of the appropriate class is returned.
448 An instance of the appropriate class is returned.
449 """
449 """
450 if path == '00changelog.i':
450 if path == '00changelog.i':
451 return changelog.changelog(repo.svfs)
451 return changelog.changelog(repo.svfs)
452 elif path.endswith('00manifest.i'):
452 elif path.endswith('00manifest.i'):
453 mandir = path[:-len('00manifest.i')]
453 mandir = path[:-len('00manifest.i')]
454 return manifest.manifestrevlog(repo.svfs, tree=mandir)
454 return manifest.manifestrevlog(repo.svfs, tree=mandir)
455 else:
455 else:
456 #reverse of "/".join(("data", path + ".i"))
456 #reverse of "/".join(("data", path + ".i"))
457 return filelog.filelog(repo.svfs, path[5:-2])
457 return filelog.filelog(repo.svfs, path[5:-2])
458
458
459 def _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse, deltabothparents):
459 def _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse, forcedeltabothparents):
460 """Copy revlogs between 2 repos."""
460 """Copy revlogs between 2 repos."""
461 revcount = 0
461 revcount = 0
462 srcsize = 0
462 srcsize = 0
463 srcrawsize = 0
463 srcrawsize = 0
464 dstsize = 0
464 dstsize = 0
465 fcount = 0
465 fcount = 0
466 frevcount = 0
466 frevcount = 0
467 fsrcsize = 0
467 fsrcsize = 0
468 frawsize = 0
468 frawsize = 0
469 fdstsize = 0
469 fdstsize = 0
470 mcount = 0
470 mcount = 0
471 mrevcount = 0
471 mrevcount = 0
472 msrcsize = 0
472 msrcsize = 0
473 mrawsize = 0
473 mrawsize = 0
474 mdstsize = 0
474 mdstsize = 0
475 crevcount = 0
475 crevcount = 0
476 csrcsize = 0
476 csrcsize = 0
477 crawsize = 0
477 crawsize = 0
478 cdstsize = 0
478 cdstsize = 0
479
479
480 # Perform a pass to collect metadata. This validates we can open all
480 # Perform a pass to collect metadata. This validates we can open all
481 # source files and allows a unified progress bar to be displayed.
481 # source files and allows a unified progress bar to be displayed.
482 for unencoded, encoded, size in srcrepo.store.walk():
482 for unencoded, encoded, size in srcrepo.store.walk():
483 if unencoded.endswith('.d'):
483 if unencoded.endswith('.d'):
484 continue
484 continue
485
485
486 rl = _revlogfrompath(srcrepo, unencoded)
486 rl = _revlogfrompath(srcrepo, unencoded)
487
487
488 info = rl.storageinfo(exclusivefiles=True, revisionscount=True,
488 info = rl.storageinfo(exclusivefiles=True, revisionscount=True,
489 trackedsize=True, storedsize=True)
489 trackedsize=True, storedsize=True)
490
490
491 revcount += info['revisionscount'] or 0
491 revcount += info['revisionscount'] or 0
492 datasize = info['storedsize'] or 0
492 datasize = info['storedsize'] or 0
493 rawsize = info['trackedsize'] or 0
493 rawsize = info['trackedsize'] or 0
494
494
495 srcsize += datasize
495 srcsize += datasize
496 srcrawsize += rawsize
496 srcrawsize += rawsize
497
497
498 # This is for the separate progress bars.
498 # This is for the separate progress bars.
499 if isinstance(rl, changelog.changelog):
499 if isinstance(rl, changelog.changelog):
500 crevcount += len(rl)
500 crevcount += len(rl)
501 csrcsize += datasize
501 csrcsize += datasize
502 crawsize += rawsize
502 crawsize += rawsize
503 elif isinstance(rl, manifest.manifestrevlog):
503 elif isinstance(rl, manifest.manifestrevlog):
504 mcount += 1
504 mcount += 1
505 mrevcount += len(rl)
505 mrevcount += len(rl)
506 msrcsize += datasize
506 msrcsize += datasize
507 mrawsize += rawsize
507 mrawsize += rawsize
508 elif isinstance(rl, filelog.filelog):
508 elif isinstance(rl, filelog.filelog):
509 fcount += 1
509 fcount += 1
510 frevcount += len(rl)
510 frevcount += len(rl)
511 fsrcsize += datasize
511 fsrcsize += datasize
512 frawsize += rawsize
512 frawsize += rawsize
513 else:
513 else:
514 error.ProgrammingError('unknown revlog type')
514 error.ProgrammingError('unknown revlog type')
515
515
516 if not revcount:
516 if not revcount:
517 return
517 return
518
518
519 ui.write(_('migrating %d total revisions (%d in filelogs, %d in manifests, '
519 ui.write(_('migrating %d total revisions (%d in filelogs, %d in manifests, '
520 '%d in changelog)\n') %
520 '%d in changelog)\n') %
521 (revcount, frevcount, mrevcount, crevcount))
521 (revcount, frevcount, mrevcount, crevcount))
522 ui.write(_('migrating %s in store; %s tracked data\n') % (
522 ui.write(_('migrating %s in store; %s tracked data\n') % (
523 (util.bytecount(srcsize), util.bytecount(srcrawsize))))
523 (util.bytecount(srcsize), util.bytecount(srcrawsize))))
524
524
525 # Used to keep track of progress.
525 # Used to keep track of progress.
526 progress = None
526 progress = None
527 def oncopiedrevision(rl, rev, node):
527 def oncopiedrevision(rl, rev, node):
528 progress.increment()
528 progress.increment()
529
529
530 # Do the actual copying.
530 # Do the actual copying.
531 # FUTURE this operation can be farmed off to worker processes.
531 # FUTURE this operation can be farmed off to worker processes.
532 seen = set()
532 seen = set()
533 for unencoded, encoded, size in srcrepo.store.walk():
533 for unencoded, encoded, size in srcrepo.store.walk():
534 if unencoded.endswith('.d'):
534 if unencoded.endswith('.d'):
535 continue
535 continue
536
536
537 oldrl = _revlogfrompath(srcrepo, unencoded)
537 oldrl = _revlogfrompath(srcrepo, unencoded)
538 newrl = _revlogfrompath(dstrepo, unencoded)
538 newrl = _revlogfrompath(dstrepo, unencoded)
539
539
540 if isinstance(oldrl, changelog.changelog) and 'c' not in seen:
540 if isinstance(oldrl, changelog.changelog) and 'c' not in seen:
541 ui.write(_('finished migrating %d manifest revisions across %d '
541 ui.write(_('finished migrating %d manifest revisions across %d '
542 'manifests; change in size: %s\n') %
542 'manifests; change in size: %s\n') %
543 (mrevcount, mcount, util.bytecount(mdstsize - msrcsize)))
543 (mrevcount, mcount, util.bytecount(mdstsize - msrcsize)))
544
544
545 ui.write(_('migrating changelog containing %d revisions '
545 ui.write(_('migrating changelog containing %d revisions '
546 '(%s in store; %s tracked data)\n') %
546 '(%s in store; %s tracked data)\n') %
547 (crevcount, util.bytecount(csrcsize),
547 (crevcount, util.bytecount(csrcsize),
548 util.bytecount(crawsize)))
548 util.bytecount(crawsize)))
549 seen.add('c')
549 seen.add('c')
550 progress = srcrepo.ui.makeprogress(_('changelog revisions'),
550 progress = srcrepo.ui.makeprogress(_('changelog revisions'),
551 total=crevcount)
551 total=crevcount)
552 elif isinstance(oldrl, manifest.manifestrevlog) and 'm' not in seen:
552 elif isinstance(oldrl, manifest.manifestrevlog) and 'm' not in seen:
553 ui.write(_('finished migrating %d filelog revisions across %d '
553 ui.write(_('finished migrating %d filelog revisions across %d '
554 'filelogs; change in size: %s\n') %
554 'filelogs; change in size: %s\n') %
555 (frevcount, fcount, util.bytecount(fdstsize - fsrcsize)))
555 (frevcount, fcount, util.bytecount(fdstsize - fsrcsize)))
556
556
557 ui.write(_('migrating %d manifests containing %d revisions '
557 ui.write(_('migrating %d manifests containing %d revisions '
558 '(%s in store; %s tracked data)\n') %
558 '(%s in store; %s tracked data)\n') %
559 (mcount, mrevcount, util.bytecount(msrcsize),
559 (mcount, mrevcount, util.bytecount(msrcsize),
560 util.bytecount(mrawsize)))
560 util.bytecount(mrawsize)))
561 seen.add('m')
561 seen.add('m')
562 if progress:
562 if progress:
563 progress.complete()
563 progress.complete()
564 progress = srcrepo.ui.makeprogress(_('manifest revisions'),
564 progress = srcrepo.ui.makeprogress(_('manifest revisions'),
565 total=mrevcount)
565 total=mrevcount)
566 elif 'f' not in seen:
566 elif 'f' not in seen:
567 ui.write(_('migrating %d filelogs containing %d revisions '
567 ui.write(_('migrating %d filelogs containing %d revisions '
568 '(%s in store; %s tracked data)\n') %
568 '(%s in store; %s tracked data)\n') %
569 (fcount, frevcount, util.bytecount(fsrcsize),
569 (fcount, frevcount, util.bytecount(fsrcsize),
570 util.bytecount(frawsize)))
570 util.bytecount(frawsize)))
571 seen.add('f')
571 seen.add('f')
572 if progress:
572 if progress:
573 progress.complete()
573 progress.complete()
574 progress = srcrepo.ui.makeprogress(_('file revisions'),
574 progress = srcrepo.ui.makeprogress(_('file revisions'),
575 total=frevcount)
575 total=frevcount)
576
576
577
577
578 ui.note(_('cloning %d revisions from %s\n') % (len(oldrl), unencoded))
578 ui.note(_('cloning %d revisions from %s\n') % (len(oldrl), unencoded))
579 oldrl.clone(tr, newrl, addrevisioncb=oncopiedrevision,
579 oldrl.clone(tr, newrl, addrevisioncb=oncopiedrevision,
580 deltareuse=deltareuse,
580 deltareuse=deltareuse,
581 deltabothparents=deltabothparents)
581 forcedeltabothparents=forcedeltabothparents)
582
582
583 info = newrl.storageinfo(storedsize=True)
583 info = newrl.storageinfo(storedsize=True)
584 datasize = info['storedsize'] or 0
584 datasize = info['storedsize'] or 0
585
585
586 dstsize += datasize
586 dstsize += datasize
587
587
588 if isinstance(newrl, changelog.changelog):
588 if isinstance(newrl, changelog.changelog):
589 cdstsize += datasize
589 cdstsize += datasize
590 elif isinstance(newrl, manifest.manifestrevlog):
590 elif isinstance(newrl, manifest.manifestrevlog):
591 mdstsize += datasize
591 mdstsize += datasize
592 else:
592 else:
593 fdstsize += datasize
593 fdstsize += datasize
594
594
595 progress.complete()
595 progress.complete()
596
596
597 ui.write(_('finished migrating %d changelog revisions; change in size: '
597 ui.write(_('finished migrating %d changelog revisions; change in size: '
598 '%s\n') % (crevcount, util.bytecount(cdstsize - csrcsize)))
598 '%s\n') % (crevcount, util.bytecount(cdstsize - csrcsize)))
599
599
600 ui.write(_('finished migrating %d total revisions; total change in store '
600 ui.write(_('finished migrating %d total revisions; total change in store '
601 'size: %s\n') % (revcount, util.bytecount(dstsize - srcsize)))
601 'size: %s\n') % (revcount, util.bytecount(dstsize - srcsize)))
602
602
603 def _filterstorefile(srcrepo, dstrepo, requirements, path, mode, st):
603 def _filterstorefile(srcrepo, dstrepo, requirements, path, mode, st):
604 """Determine whether to copy a store file during upgrade.
604 """Determine whether to copy a store file during upgrade.
605
605
606 This function is called when migrating store files from ``srcrepo`` to
606 This function is called when migrating store files from ``srcrepo`` to
607 ``dstrepo`` as part of upgrading a repository.
607 ``dstrepo`` as part of upgrading a repository.
608
608
609 Args:
609 Args:
610 srcrepo: repo we are copying from
610 srcrepo: repo we are copying from
611 dstrepo: repo we are copying to
611 dstrepo: repo we are copying to
612 requirements: set of requirements for ``dstrepo``
612 requirements: set of requirements for ``dstrepo``
613 path: store file being examined
613 path: store file being examined
614 mode: the ``ST_MODE`` file type of ``path``
614 mode: the ``ST_MODE`` file type of ``path``
615 st: ``stat`` data structure for ``path``
615 st: ``stat`` data structure for ``path``
616
616
617 Function should return ``True`` if the file is to be copied.
617 Function should return ``True`` if the file is to be copied.
618 """
618 """
619 # Skip revlogs.
619 # Skip revlogs.
620 if path.endswith(('.i', '.d')):
620 if path.endswith(('.i', '.d')):
621 return False
621 return False
622 # Skip transaction related files.
622 # Skip transaction related files.
623 if path.startswith('undo'):
623 if path.startswith('undo'):
624 return False
624 return False
625 # Only copy regular files.
625 # Only copy regular files.
626 if mode != stat.S_IFREG:
626 if mode != stat.S_IFREG:
627 return False
627 return False
628 # Skip other skipped files.
628 # Skip other skipped files.
629 if path in ('lock', 'fncache'):
629 if path in ('lock', 'fncache'):
630 return False
630 return False
631
631
632 return True
632 return True
633
633
634 def _finishdatamigration(ui, srcrepo, dstrepo, requirements):
634 def _finishdatamigration(ui, srcrepo, dstrepo, requirements):
635 """Hook point for extensions to perform additional actions during upgrade.
635 """Hook point for extensions to perform additional actions during upgrade.
636
636
637 This function is called after revlogs and store files have been copied but
637 This function is called after revlogs and store files have been copied but
638 before the new store is swapped into the original location.
638 before the new store is swapped into the original location.
639 """
639 """
640
640
641 def _upgraderepo(ui, srcrepo, dstrepo, requirements, actions):
641 def _upgraderepo(ui, srcrepo, dstrepo, requirements, actions):
642 """Do the low-level work of upgrading a repository.
642 """Do the low-level work of upgrading a repository.
643
643
644 The upgrade is effectively performed as a copy between a source
644 The upgrade is effectively performed as a copy between a source
645 repository and a temporary destination repository.
645 repository and a temporary destination repository.
646
646
647 The source repository is unmodified for as long as possible so the
647 The source repository is unmodified for as long as possible so the
648 upgrade can abort at any time without causing loss of service for
648 upgrade can abort at any time without causing loss of service for
649 readers and without corrupting the source repository.
649 readers and without corrupting the source repository.
650 """
650 """
651 assert srcrepo.currentwlock()
651 assert srcrepo.currentwlock()
652 assert dstrepo.currentwlock()
652 assert dstrepo.currentwlock()
653
653
654 ui.write(_('(it is safe to interrupt this process any time before '
654 ui.write(_('(it is safe to interrupt this process any time before '
655 'data migration completes)\n'))
655 'data migration completes)\n'))
656
656
657 if 'redeltaall' in actions:
657 if 'redeltaall' in actions:
658 deltareuse = revlog.revlog.DELTAREUSENEVER
658 deltareuse = revlog.revlog.DELTAREUSENEVER
659 elif 'redeltaparent' in actions:
659 elif 'redeltaparent' in actions:
660 deltareuse = revlog.revlog.DELTAREUSESAMEREVS
660 deltareuse = revlog.revlog.DELTAREUSESAMEREVS
661 elif 'redeltamultibase' in actions:
661 elif 'redeltamultibase' in actions:
662 deltareuse = revlog.revlog.DELTAREUSESAMEREVS
662 deltareuse = revlog.revlog.DELTAREUSESAMEREVS
663 elif 'redeltafulladd' in actions:
663 elif 'redeltafulladd' in actions:
664 deltareuse = revlog.revlog.DELTAREUSEFULLADD
664 deltareuse = revlog.revlog.DELTAREUSEFULLADD
665 else:
665 else:
666 deltareuse = revlog.revlog.DELTAREUSEALWAYS
666 deltareuse = revlog.revlog.DELTAREUSEALWAYS
667
667
668 with dstrepo.transaction('upgrade') as tr:
668 with dstrepo.transaction('upgrade') as tr:
669 _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse,
669 _copyrevlogs(ui, srcrepo, dstrepo, tr, deltareuse,
670 'redeltamultibase' in actions)
670 'redeltamultibase' in actions)
671
671
672 # Now copy other files in the store directory.
672 # Now copy other files in the store directory.
673 # The sorted() makes execution deterministic.
673 # The sorted() makes execution deterministic.
674 for p, kind, st in sorted(srcrepo.store.vfs.readdir('', stat=True)):
674 for p, kind, st in sorted(srcrepo.store.vfs.readdir('', stat=True)):
675 if not _filterstorefile(srcrepo, dstrepo, requirements,
675 if not _filterstorefile(srcrepo, dstrepo, requirements,
676 p, kind, st):
676 p, kind, st):
677 continue
677 continue
678
678
679 srcrepo.ui.write(_('copying %s\n') % p)
679 srcrepo.ui.write(_('copying %s\n') % p)
680 src = srcrepo.store.rawvfs.join(p)
680 src = srcrepo.store.rawvfs.join(p)
681 dst = dstrepo.store.rawvfs.join(p)
681 dst = dstrepo.store.rawvfs.join(p)
682 util.copyfile(src, dst, copystat=True)
682 util.copyfile(src, dst, copystat=True)
683
683
684 _finishdatamigration(ui, srcrepo, dstrepo, requirements)
684 _finishdatamigration(ui, srcrepo, dstrepo, requirements)
685
685
686 ui.write(_('data fully migrated to temporary repository\n'))
686 ui.write(_('data fully migrated to temporary repository\n'))
687
687
688 backuppath = pycompat.mkdtemp(prefix='upgradebackup.', dir=srcrepo.path)
688 backuppath = pycompat.mkdtemp(prefix='upgradebackup.', dir=srcrepo.path)
689 backupvfs = vfsmod.vfs(backuppath)
689 backupvfs = vfsmod.vfs(backuppath)
690
690
691 # Make a backup of requires file first, as it is the first to be modified.
691 # Make a backup of requires file first, as it is the first to be modified.
692 util.copyfile(srcrepo.vfs.join('requires'), backupvfs.join('requires'))
692 util.copyfile(srcrepo.vfs.join('requires'), backupvfs.join('requires'))
693
693
694 # We install an arbitrary requirement that clients must not support
694 # We install an arbitrary requirement that clients must not support
695 # as a mechanism to lock out new clients during the data swap. This is
695 # as a mechanism to lock out new clients during the data swap. This is
696 # better than allowing a client to continue while the repository is in
696 # better than allowing a client to continue while the repository is in
697 # an inconsistent state.
697 # an inconsistent state.
698 ui.write(_('marking source repository as being upgraded; clients will be '
698 ui.write(_('marking source repository as being upgraded; clients will be '
699 'unable to read from repository\n'))
699 'unable to read from repository\n'))
700 scmutil.writerequires(srcrepo.vfs,
700 scmutil.writerequires(srcrepo.vfs,
701 srcrepo.requirements | {'upgradeinprogress'})
701 srcrepo.requirements | {'upgradeinprogress'})
702
702
703 ui.write(_('starting in-place swap of repository data\n'))
703 ui.write(_('starting in-place swap of repository data\n'))
704 ui.write(_('replaced files will be backed up at %s\n') %
704 ui.write(_('replaced files will be backed up at %s\n') %
705 backuppath)
705 backuppath)
706
706
707 # Now swap in the new store directory. Doing it as a rename should make
707 # Now swap in the new store directory. Doing it as a rename should make
708 # the operation nearly instantaneous and atomic (at least in well-behaved
708 # the operation nearly instantaneous and atomic (at least in well-behaved
709 # environments).
709 # environments).
710 ui.write(_('replacing store...\n'))
710 ui.write(_('replacing store...\n'))
711 tstart = util.timer()
711 tstart = util.timer()
712 util.rename(srcrepo.spath, backupvfs.join('store'))
712 util.rename(srcrepo.spath, backupvfs.join('store'))
713 util.rename(dstrepo.spath, srcrepo.spath)
713 util.rename(dstrepo.spath, srcrepo.spath)
714 elapsed = util.timer() - tstart
714 elapsed = util.timer() - tstart
715 ui.write(_('store replacement complete; repository was inconsistent for '
715 ui.write(_('store replacement complete; repository was inconsistent for '
716 '%0.1fs\n') % elapsed)
716 '%0.1fs\n') % elapsed)
717
717
718 # We first write the requirements file. Any new requirements will lock
718 # We first write the requirements file. Any new requirements will lock
719 # out legacy clients.
719 # out legacy clients.
720 ui.write(_('finalizing requirements file and making repository readable '
720 ui.write(_('finalizing requirements file and making repository readable '
721 'again\n'))
721 'again\n'))
722 scmutil.writerequires(srcrepo.vfs, requirements)
722 scmutil.writerequires(srcrepo.vfs, requirements)
723
723
724 # The lock file from the old store won't be removed because nothing has a
724 # The lock file from the old store won't be removed because nothing has a
725 # reference to its new location. So clean it up manually. Alternatively, we
725 # reference to its new location. So clean it up manually. Alternatively, we
726 # could update srcrepo.svfs and other variables to point to the new
726 # could update srcrepo.svfs and other variables to point to the new
727 # location. This is simpler.
727 # location. This is simpler.
728 backupvfs.unlink('store/lock')
728 backupvfs.unlink('store/lock')
729
729
730 return backuppath
730 return backuppath
731
731
732 def upgraderepo(ui, repo, run=False, optimize=None):
732 def upgraderepo(ui, repo, run=False, optimize=None):
733 """Upgrade a repository in place."""
733 """Upgrade a repository in place."""
734 optimize = set(optimize or [])
734 optimize = set(optimize or [])
735 repo = repo.unfiltered()
735 repo = repo.unfiltered()
736
736
737 # Ensure the repository can be upgraded.
737 # Ensure the repository can be upgraded.
738 missingreqs = requiredsourcerequirements(repo) - repo.requirements
738 missingreqs = requiredsourcerequirements(repo) - repo.requirements
739 if missingreqs:
739 if missingreqs:
740 raise error.Abort(_('cannot upgrade repository; requirement '
740 raise error.Abort(_('cannot upgrade repository; requirement '
741 'missing: %s') % _(', ').join(sorted(missingreqs)))
741 'missing: %s') % _(', ').join(sorted(missingreqs)))
742
742
743 blockedreqs = blocksourcerequirements(repo) & repo.requirements
743 blockedreqs = blocksourcerequirements(repo) & repo.requirements
744 if blockedreqs:
744 if blockedreqs:
745 raise error.Abort(_('cannot upgrade repository; unsupported source '
745 raise error.Abort(_('cannot upgrade repository; unsupported source '
746 'requirement: %s') %
746 'requirement: %s') %
747 _(', ').join(sorted(blockedreqs)))
747 _(', ').join(sorted(blockedreqs)))
748
748
749 # FUTURE there is potentially a need to control the wanted requirements via
749 # FUTURE there is potentially a need to control the wanted requirements via
750 # command arguments or via an extension hook point.
750 # command arguments or via an extension hook point.
751 newreqs = localrepo.newreporequirements(
751 newreqs = localrepo.newreporequirements(
752 repo.ui, localrepo.defaultcreateopts(repo.ui))
752 repo.ui, localrepo.defaultcreateopts(repo.ui))
753 newreqs.update(preservedrequirements(repo))
753 newreqs.update(preservedrequirements(repo))
754
754
755 noremovereqs = (repo.requirements - newreqs -
755 noremovereqs = (repo.requirements - newreqs -
756 supportremovedrequirements(repo))
756 supportremovedrequirements(repo))
757 if noremovereqs:
757 if noremovereqs:
758 raise error.Abort(_('cannot upgrade repository; requirement would be '
758 raise error.Abort(_('cannot upgrade repository; requirement would be '
759 'removed: %s') % _(', ').join(sorted(noremovereqs)))
759 'removed: %s') % _(', ').join(sorted(noremovereqs)))
760
760
761 noaddreqs = (newreqs - repo.requirements -
761 noaddreqs = (newreqs - repo.requirements -
762 allowednewrequirements(repo))
762 allowednewrequirements(repo))
763 if noaddreqs:
763 if noaddreqs:
764 raise error.Abort(_('cannot upgrade repository; do not support adding '
764 raise error.Abort(_('cannot upgrade repository; do not support adding '
765 'requirement: %s') %
765 'requirement: %s') %
766 _(', ').join(sorted(noaddreqs)))
766 _(', ').join(sorted(noaddreqs)))
767
767
768 unsupportedreqs = newreqs - supporteddestrequirements(repo)
768 unsupportedreqs = newreqs - supporteddestrequirements(repo)
769 if unsupportedreqs:
769 if unsupportedreqs:
770 raise error.Abort(_('cannot upgrade repository; do not support '
770 raise error.Abort(_('cannot upgrade repository; do not support '
771 'destination requirement: %s') %
771 'destination requirement: %s') %
772 _(', ').join(sorted(unsupportedreqs)))
772 _(', ').join(sorted(unsupportedreqs)))
773
773
774 # Find and validate all improvements that can be made.
774 # Find and validate all improvements that can be made.
775 alloptimizations = findoptimizations(repo)
775 alloptimizations = findoptimizations(repo)
776
776
777 # Apply and Validate arguments.
777 # Apply and Validate arguments.
778 optimizations = []
778 optimizations = []
779 for o in alloptimizations:
779 for o in alloptimizations:
780 if o.name in optimize:
780 if o.name in optimize:
781 optimizations.append(o)
781 optimizations.append(o)
782 optimize.discard(o.name)
782 optimize.discard(o.name)
783
783
784 if optimize: # anything left is unknown
784 if optimize: # anything left is unknown
785 raise error.Abort(_('unknown optimization action requested: %s') %
785 raise error.Abort(_('unknown optimization action requested: %s') %
786 ', '.join(sorted(optimize)),
786 ', '.join(sorted(optimize)),
787 hint=_('run without arguments to see valid '
787 hint=_('run without arguments to see valid '
788 'optimizations'))
788 'optimizations'))
789
789
790 deficiencies = finddeficiencies(repo)
790 deficiencies = finddeficiencies(repo)
791 actions = determineactions(repo, deficiencies, repo.requirements, newreqs)
791 actions = determineactions(repo, deficiencies, repo.requirements, newreqs)
792 actions.extend(o for o in sorted(optimizations)
792 actions.extend(o for o in sorted(optimizations)
793 # determineactions could have added optimisation
793 # determineactions could have added optimisation
794 if o not in actions)
794 if o not in actions)
795
795
796 def printrequirements():
796 def printrequirements():
797 ui.write(_('requirements\n'))
797 ui.write(_('requirements\n'))
798 ui.write(_(' preserved: %s\n') %
798 ui.write(_(' preserved: %s\n') %
799 _(', ').join(sorted(newreqs & repo.requirements)))
799 _(', ').join(sorted(newreqs & repo.requirements)))
800
800
801 if repo.requirements - newreqs:
801 if repo.requirements - newreqs:
802 ui.write(_(' removed: %s\n') %
802 ui.write(_(' removed: %s\n') %
803 _(', ').join(sorted(repo.requirements - newreqs)))
803 _(', ').join(sorted(repo.requirements - newreqs)))
804
804
805 if newreqs - repo.requirements:
805 if newreqs - repo.requirements:
806 ui.write(_(' added: %s\n') %
806 ui.write(_(' added: %s\n') %
807 _(', ').join(sorted(newreqs - repo.requirements)))
807 _(', ').join(sorted(newreqs - repo.requirements)))
808
808
809 ui.write('\n')
809 ui.write('\n')
810
810
811 def printupgradeactions():
811 def printupgradeactions():
812 for a in actions:
812 for a in actions:
813 ui.write('%s\n %s\n\n' % (a.name, a.upgrademessage))
813 ui.write('%s\n %s\n\n' % (a.name, a.upgrademessage))
814
814
815 if not run:
815 if not run:
816 fromconfig = []
816 fromconfig = []
817 onlydefault = []
817 onlydefault = []
818
818
819 for d in deficiencies:
819 for d in deficiencies:
820 if d.fromconfig(repo):
820 if d.fromconfig(repo):
821 fromconfig.append(d)
821 fromconfig.append(d)
822 elif d.default:
822 elif d.default:
823 onlydefault.append(d)
823 onlydefault.append(d)
824
824
825 if fromconfig or onlydefault:
825 if fromconfig or onlydefault:
826
826
827 if fromconfig:
827 if fromconfig:
828 ui.write(_('repository lacks features recommended by '
828 ui.write(_('repository lacks features recommended by '
829 'current config options:\n\n'))
829 'current config options:\n\n'))
830 for i in fromconfig:
830 for i in fromconfig:
831 ui.write('%s\n %s\n\n' % (i.name, i.description))
831 ui.write('%s\n %s\n\n' % (i.name, i.description))
832
832
833 if onlydefault:
833 if onlydefault:
834 ui.write(_('repository lacks features used by the default '
834 ui.write(_('repository lacks features used by the default '
835 'config options:\n\n'))
835 'config options:\n\n'))
836 for i in onlydefault:
836 for i in onlydefault:
837 ui.write('%s\n %s\n\n' % (i.name, i.description))
837 ui.write('%s\n %s\n\n' % (i.name, i.description))
838
838
839 ui.write('\n')
839 ui.write('\n')
840 else:
840 else:
841 ui.write(_('(no feature deficiencies found in existing '
841 ui.write(_('(no feature deficiencies found in existing '
842 'repository)\n'))
842 'repository)\n'))
843
843
844 ui.write(_('performing an upgrade with "--run" will make the following '
844 ui.write(_('performing an upgrade with "--run" will make the following '
845 'changes:\n\n'))
845 'changes:\n\n'))
846
846
847 printrequirements()
847 printrequirements()
848 printupgradeactions()
848 printupgradeactions()
849
849
850 unusedoptimize = [i for i in alloptimizations if i not in actions]
850 unusedoptimize = [i for i in alloptimizations if i not in actions]
851
851
852 if unusedoptimize:
852 if unusedoptimize:
853 ui.write(_('additional optimizations are available by specifying '
853 ui.write(_('additional optimizations are available by specifying '
854 '"--optimize <name>":\n\n'))
854 '"--optimize <name>":\n\n'))
855 for i in unusedoptimize:
855 for i in unusedoptimize:
856 ui.write(_('%s\n %s\n\n') % (i.name, i.description))
856 ui.write(_('%s\n %s\n\n') % (i.name, i.description))
857 return
857 return
858
858
859 # Else we're in the run=true case.
859 # Else we're in the run=true case.
860 ui.write(_('upgrade will perform the following actions:\n\n'))
860 ui.write(_('upgrade will perform the following actions:\n\n'))
861 printrequirements()
861 printrequirements()
862 printupgradeactions()
862 printupgradeactions()
863
863
864 upgradeactions = [a.name for a in actions]
864 upgradeactions = [a.name for a in actions]
865
865
866 ui.write(_('beginning upgrade...\n'))
866 ui.write(_('beginning upgrade...\n'))
867 with repo.wlock(), repo.lock():
867 with repo.wlock(), repo.lock():
868 ui.write(_('repository locked and read-only\n'))
868 ui.write(_('repository locked and read-only\n'))
869 # Our strategy for upgrading the repository is to create a new,
869 # Our strategy for upgrading the repository is to create a new,
870 # temporary repository, write data to it, then do a swap of the
870 # temporary repository, write data to it, then do a swap of the
871 # data. There are less heavyweight ways to do this, but it is easier
871 # data. There are less heavyweight ways to do this, but it is easier
872 # to create a new repo object than to instantiate all the components
872 # to create a new repo object than to instantiate all the components
873 # (like the store) separately.
873 # (like the store) separately.
874 tmppath = pycompat.mkdtemp(prefix='upgrade.', dir=repo.path)
874 tmppath = pycompat.mkdtemp(prefix='upgrade.', dir=repo.path)
875 backuppath = None
875 backuppath = None
876 try:
876 try:
877 ui.write(_('creating temporary repository to stage migrated '
877 ui.write(_('creating temporary repository to stage migrated '
878 'data: %s\n') % tmppath)
878 'data: %s\n') % tmppath)
879
879
880 # clone ui without using ui.copy because repo.ui is protected
880 # clone ui without using ui.copy because repo.ui is protected
881 repoui = repo.ui.__class__(repo.ui)
881 repoui = repo.ui.__class__(repo.ui)
882 dstrepo = hg.repository(repoui, path=tmppath, create=True)
882 dstrepo = hg.repository(repoui, path=tmppath, create=True)
883
883
884 with dstrepo.wlock(), dstrepo.lock():
884 with dstrepo.wlock(), dstrepo.lock():
885 backuppath = _upgraderepo(ui, repo, dstrepo, newreqs,
885 backuppath = _upgraderepo(ui, repo, dstrepo, newreqs,
886 upgradeactions)
886 upgradeactions)
887
887
888 finally:
888 finally:
889 ui.write(_('removing temporary repository %s\n') % tmppath)
889 ui.write(_('removing temporary repository %s\n') % tmppath)
890 repo.vfs.rmtree(tmppath, forcibly=True)
890 repo.vfs.rmtree(tmppath, forcibly=True)
891
891
892 if backuppath:
892 if backuppath:
893 ui.warn(_('copy of old repository backed up at %s\n') %
893 ui.warn(_('copy of old repository backed up at %s\n') %
894 backuppath)
894 backuppath)
895 ui.warn(_('the old repository will not be deleted; remove '
895 ui.warn(_('the old repository will not be deleted; remove '
896 'it to free up disk space once the upgraded '
896 'it to free up disk space once the upgraded '
897 'repository is verified\n'))
897 'repository is verified\n'))
General Comments 0
You need to be logged in to leave comments. Login now