##// END OF EJS Templates
revlog: make sure we never use sparserevlog without general delta (issue6056)...
Boris Feld -
r41525:189e06b2 stable
parent child Browse files
Show More
@@ -1,2646 +1,2649
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
375 self._withsparseread = False
375 self._withsparseread = False
376 self._sparserevlog = False
376 self._sparserevlog = False
377 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
378 self._srmingapsize = 262144
378 self._srmingapsize = 262144
379
379
380 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
381 # custom flags.
381 # custom flags.
382 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
383
383
384 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
385 self._writinghandles = None
385 self._writinghandles = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 def _loadindex(self):
389 def _loadindex(self):
390 mmapindexthreshold = None
390 mmapindexthreshold = None
391 opts = getattr(self.opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
392
392
393 if 'revlogv2' in opts:
393 if 'revlogv2' in opts:
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 elif 'revlogv1' in opts:
395 elif 'revlogv1' in opts:
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 if 'generaldelta' in opts:
397 if 'generaldelta' in opts:
398 newversionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 elif getattr(self.opener, 'options', None) is not None:
399 elif getattr(self.opener, 'options', None) is not None:
400 # If options provided but no 'revlog*' found, the repository
400 # If options provided but no 'revlog*' found, the repository
401 # would have no 'requires' file in it, which means we have to
401 # would have no 'requires' file in it, which means we have to
402 # stick to the old format.
402 # stick to the old format.
403 newversionflags = REVLOGV0
403 newversionflags = REVLOGV0
404 else:
404 else:
405 newversionflags = REVLOG_DEFAULT_VERSION
405 newversionflags = REVLOG_DEFAULT_VERSION
406
406
407 if 'chunkcachesize' in opts:
407 if 'chunkcachesize' in opts:
408 self._chunkcachesize = opts['chunkcachesize']
408 self._chunkcachesize = opts['chunkcachesize']
409 if 'maxchainlen' in opts:
409 if 'maxchainlen' in opts:
410 self._maxchainlen = opts['maxchainlen']
410 self._maxchainlen = opts['maxchainlen']
411 if 'deltabothparents' in opts:
411 if 'deltabothparents' in opts:
412 self._deltabothparents = opts['deltabothparents']
412 self._deltabothparents = opts['deltabothparents']
413 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
413 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
414 if 'compengine' in opts:
414 if 'compengine' in opts:
415 self._compengine = opts['compengine']
415 self._compengine = opts['compengine']
416 if 'maxdeltachainspan' in opts:
416 if 'maxdeltachainspan' in opts:
417 self._maxdeltachainspan = opts['maxdeltachainspan']
417 self._maxdeltachainspan = opts['maxdeltachainspan']
418 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
418 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
419 mmapindexthreshold = opts['mmapindexthreshold']
419 mmapindexthreshold = opts['mmapindexthreshold']
420 self._sparserevlog = bool(opts.get('sparse-revlog', False))
420 self._sparserevlog = bool(opts.get('sparse-revlog', False))
421 withsparseread = bool(opts.get('with-sparse-read', False))
421 withsparseread = bool(opts.get('with-sparse-read', False))
422 # sparse-revlog forces sparse-read
422 # sparse-revlog forces sparse-read
423 self._withsparseread = self._sparserevlog or withsparseread
423 self._withsparseread = self._sparserevlog or withsparseread
424 if 'sparse-read-density-threshold' in opts:
424 if 'sparse-read-density-threshold' in opts:
425 self._srdensitythreshold = opts['sparse-read-density-threshold']
425 self._srdensitythreshold = opts['sparse-read-density-threshold']
426 if 'sparse-read-min-gap-size' in opts:
426 if 'sparse-read-min-gap-size' in opts:
427 self._srmingapsize = opts['sparse-read-min-gap-size']
427 self._srmingapsize = opts['sparse-read-min-gap-size']
428 if opts.get('enableellipsis'):
428 if opts.get('enableellipsis'):
429 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
430
430
431 # revlog v0 doesn't have flag processors
431 # revlog v0 doesn't have flag processors
432 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
432 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
433 _insertflagprocessor(flag, processor, self._flagprocessors)
433 _insertflagprocessor(flag, processor, self._flagprocessors)
434
434
435 if self._chunkcachesize <= 0:
435 if self._chunkcachesize <= 0:
436 raise error.RevlogError(_('revlog chunk cache size %r is not '
436 raise error.RevlogError(_('revlog chunk cache size %r is not '
437 'greater than 0') % self._chunkcachesize)
437 'greater than 0') % self._chunkcachesize)
438 elif self._chunkcachesize & (self._chunkcachesize - 1):
438 elif self._chunkcachesize & (self._chunkcachesize - 1):
439 raise error.RevlogError(_('revlog chunk cache size %r is not a '
439 raise error.RevlogError(_('revlog chunk cache size %r is not a '
440 'power of 2') % self._chunkcachesize)
440 'power of 2') % self._chunkcachesize)
441
441
442 indexdata = ''
442 indexdata = ''
443 self._initempty = True
443 self._initempty = True
444 try:
444 try:
445 with self._indexfp() as f:
445 with self._indexfp() as f:
446 if (mmapindexthreshold is not None and
446 if (mmapindexthreshold is not None and
447 self.opener.fstat(f).st_size >= mmapindexthreshold):
447 self.opener.fstat(f).st_size >= mmapindexthreshold):
448 # TODO: should .close() to release resources without
448 # TODO: should .close() to release resources without
449 # relying on Python GC
449 # relying on Python GC
450 indexdata = util.buffer(util.mmapread(f))
450 indexdata = util.buffer(util.mmapread(f))
451 else:
451 else:
452 indexdata = f.read()
452 indexdata = f.read()
453 if len(indexdata) > 0:
453 if len(indexdata) > 0:
454 versionflags = versionformat_unpack(indexdata[:4])[0]
454 versionflags = versionformat_unpack(indexdata[:4])[0]
455 self._initempty = False
455 self._initempty = False
456 else:
456 else:
457 versionflags = newversionflags
457 versionflags = newversionflags
458 except IOError as inst:
458 except IOError as inst:
459 if inst.errno != errno.ENOENT:
459 if inst.errno != errno.ENOENT:
460 raise
460 raise
461
461
462 versionflags = newversionflags
462 versionflags = newversionflags
463
463
464 self.version = versionflags
464 self.version = versionflags
465
465
466 flags = versionflags & ~0xFFFF
466 flags = versionflags & ~0xFFFF
467 fmt = versionflags & 0xFFFF
467 fmt = versionflags & 0xFFFF
468
468
469 if fmt == REVLOGV0:
469 if fmt == REVLOGV0:
470 if flags:
470 if flags:
471 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
471 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
472 'revlog %s') %
472 'revlog %s') %
473 (flags >> 16, fmt, self.indexfile))
473 (flags >> 16, fmt, self.indexfile))
474
474
475 self._inline = False
475 self._inline = False
476 self._generaldelta = False
476 self._generaldelta = False
477
477
478 elif fmt == REVLOGV1:
478 elif fmt == REVLOGV1:
479 if flags & ~REVLOGV1_FLAGS:
479 if flags & ~REVLOGV1_FLAGS:
480 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
480 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
481 'revlog %s') %
481 'revlog %s') %
482 (flags >> 16, fmt, self.indexfile))
482 (flags >> 16, fmt, self.indexfile))
483
483
484 self._inline = versionflags & FLAG_INLINE_DATA
484 self._inline = versionflags & FLAG_INLINE_DATA
485 self._generaldelta = versionflags & FLAG_GENERALDELTA
485 self._generaldelta = versionflags & FLAG_GENERALDELTA
486
486
487 elif fmt == REVLOGV2:
487 elif fmt == REVLOGV2:
488 if flags & ~REVLOGV2_FLAGS:
488 if flags & ~REVLOGV2_FLAGS:
489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
490 'revlog %s') %
490 'revlog %s') %
491 (flags >> 16, fmt, self.indexfile))
491 (flags >> 16, fmt, self.indexfile))
492
492
493 self._inline = versionflags & FLAG_INLINE_DATA
493 self._inline = versionflags & FLAG_INLINE_DATA
494 # generaldelta implied by version 2 revlogs.
494 # generaldelta implied by version 2 revlogs.
495 self._generaldelta = True
495 self._generaldelta = True
496
496
497 else:
497 else:
498 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
498 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
499 (fmt, self.indexfile))
499 (fmt, self.indexfile))
500 # sparse-revlog can't be on without general-delta (issue6056)
501 if not self._generaldelta:
502 self._sparserevlog = False
500
503
501 self._storedeltachains = True
504 self._storedeltachains = True
502
505
503 self._io = revlogio()
506 self._io = revlogio()
504 if self.version == REVLOGV0:
507 if self.version == REVLOGV0:
505 self._io = revlogoldio()
508 self._io = revlogoldio()
506 try:
509 try:
507 d = self._io.parseindex(indexdata, self._inline)
510 d = self._io.parseindex(indexdata, self._inline)
508 except (ValueError, IndexError):
511 except (ValueError, IndexError):
509 raise error.RevlogError(_("index %s is corrupted") %
512 raise error.RevlogError(_("index %s is corrupted") %
510 self.indexfile)
513 self.indexfile)
511 self.index, nodemap, self._chunkcache = d
514 self.index, nodemap, self._chunkcache = d
512 if nodemap is not None:
515 if nodemap is not None:
513 self.nodemap = self._nodecache = nodemap
516 self.nodemap = self._nodecache = nodemap
514 if not self._chunkcache:
517 if not self._chunkcache:
515 self._chunkclear()
518 self._chunkclear()
516 # revnum -> (chain-length, sum-delta-length)
519 # revnum -> (chain-length, sum-delta-length)
517 self._chaininfocache = {}
520 self._chaininfocache = {}
518 # revlog header -> revlog compressor
521 # revlog header -> revlog compressor
519 self._decompressors = {}
522 self._decompressors = {}
520
523
521 @util.propertycache
524 @util.propertycache
522 def _compressor(self):
525 def _compressor(self):
523 return util.compengines[self._compengine].revlogcompressor()
526 return util.compengines[self._compengine].revlogcompressor()
524
527
525 def _indexfp(self, mode='r'):
528 def _indexfp(self, mode='r'):
526 """file object for the revlog's index file"""
529 """file object for the revlog's index file"""
527 args = {r'mode': mode}
530 args = {r'mode': mode}
528 if mode != 'r':
531 if mode != 'r':
529 args[r'checkambig'] = self._checkambig
532 args[r'checkambig'] = self._checkambig
530 if mode == 'w':
533 if mode == 'w':
531 args[r'atomictemp'] = True
534 args[r'atomictemp'] = True
532 return self.opener(self.indexfile, **args)
535 return self.opener(self.indexfile, **args)
533
536
534 def _datafp(self, mode='r'):
537 def _datafp(self, mode='r'):
535 """file object for the revlog's data file"""
538 """file object for the revlog's data file"""
536 return self.opener(self.datafile, mode=mode)
539 return self.opener(self.datafile, mode=mode)
537
540
538 @contextlib.contextmanager
541 @contextlib.contextmanager
539 def _datareadfp(self, existingfp=None):
542 def _datareadfp(self, existingfp=None):
540 """file object suitable to read data"""
543 """file object suitable to read data"""
541 # Use explicit file handle, if given.
544 # Use explicit file handle, if given.
542 if existingfp is not None:
545 if existingfp is not None:
543 yield existingfp
546 yield existingfp
544
547
545 # Use a file handle being actively used for writes, if available.
548 # Use a file handle being actively used for writes, if available.
546 # There is some danger to doing this because reads will seek the
549 # There is some danger to doing this because reads will seek the
547 # file. However, _writeentry() performs a SEEK_END before all writes,
550 # file. However, _writeentry() performs a SEEK_END before all writes,
548 # so we should be safe.
551 # so we should be safe.
549 elif self._writinghandles:
552 elif self._writinghandles:
550 if self._inline:
553 if self._inline:
551 yield self._writinghandles[0]
554 yield self._writinghandles[0]
552 else:
555 else:
553 yield self._writinghandles[1]
556 yield self._writinghandles[1]
554
557
555 # Otherwise open a new file handle.
558 # Otherwise open a new file handle.
556 else:
559 else:
557 if self._inline:
560 if self._inline:
558 func = self._indexfp
561 func = self._indexfp
559 else:
562 else:
560 func = self._datafp
563 func = self._datafp
561 with func() as fp:
564 with func() as fp:
562 yield fp
565 yield fp
563
566
564 def tip(self):
567 def tip(self):
565 return self.node(len(self.index) - 1)
568 return self.node(len(self.index) - 1)
566 def __contains__(self, rev):
569 def __contains__(self, rev):
567 return 0 <= rev < len(self)
570 return 0 <= rev < len(self)
568 def __len__(self):
571 def __len__(self):
569 return len(self.index)
572 return len(self.index)
570 def __iter__(self):
573 def __iter__(self):
571 return iter(pycompat.xrange(len(self)))
574 return iter(pycompat.xrange(len(self)))
572 def revs(self, start=0, stop=None):
575 def revs(self, start=0, stop=None):
573 """iterate over all rev in this revlog (from start to stop)"""
576 """iterate over all rev in this revlog (from start to stop)"""
574 return storageutil.iterrevs(len(self), start=start, stop=stop)
577 return storageutil.iterrevs(len(self), start=start, stop=stop)
575
578
576 @util.propertycache
579 @util.propertycache
577 def nodemap(self):
580 def nodemap(self):
578 if self.index:
581 if self.index:
579 # populate mapping down to the initial node
582 # populate mapping down to the initial node
580 node0 = self.index[0][7] # get around changelog filtering
583 node0 = self.index[0][7] # get around changelog filtering
581 self.rev(node0)
584 self.rev(node0)
582 return self._nodecache
585 return self._nodecache
583
586
584 def hasnode(self, node):
587 def hasnode(self, node):
585 try:
588 try:
586 self.rev(node)
589 self.rev(node)
587 return True
590 return True
588 except KeyError:
591 except KeyError:
589 return False
592 return False
590
593
591 def candelta(self, baserev, rev):
594 def candelta(self, baserev, rev):
592 """whether two revisions (baserev, rev) can be delta-ed or not"""
595 """whether two revisions (baserev, rev) can be delta-ed or not"""
593 # Disable delta if either rev requires a content-changing flag
596 # Disable delta if either rev requires a content-changing flag
594 # processor (ex. LFS). This is because such flag processor can alter
597 # processor (ex. LFS). This is because such flag processor can alter
595 # the rawtext content that the delta will be based on, and two clients
598 # the rawtext content that the delta will be based on, and two clients
596 # could have a same revlog node with different flags (i.e. different
599 # could have a same revlog node with different flags (i.e. different
597 # rawtext contents) and the delta could be incompatible.
600 # rawtext contents) and the delta could be incompatible.
598 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
601 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
599 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
602 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
600 return False
603 return False
601 return True
604 return True
602
605
603 def clearcaches(self):
606 def clearcaches(self):
604 self._revisioncache = None
607 self._revisioncache = None
605 self._chainbasecache.clear()
608 self._chainbasecache.clear()
606 self._chunkcache = (0, '')
609 self._chunkcache = (0, '')
607 self._pcache = {}
610 self._pcache = {}
608
611
609 try:
612 try:
610 self._nodecache.clearcaches()
613 self._nodecache.clearcaches()
611 except AttributeError:
614 except AttributeError:
612 self._nodecache = {nullid: nullrev}
615 self._nodecache = {nullid: nullrev}
613 self._nodepos = None
616 self._nodepos = None
614
617
615 def rev(self, node):
618 def rev(self, node):
616 try:
619 try:
617 return self._nodecache[node]
620 return self._nodecache[node]
618 except TypeError:
621 except TypeError:
619 raise
622 raise
620 except error.RevlogError:
623 except error.RevlogError:
621 # parsers.c radix tree lookup failed
624 # parsers.c radix tree lookup failed
622 if node == wdirid or node in wdirfilenodeids:
625 if node == wdirid or node in wdirfilenodeids:
623 raise error.WdirUnsupported
626 raise error.WdirUnsupported
624 raise error.LookupError(node, self.indexfile, _('no node'))
627 raise error.LookupError(node, self.indexfile, _('no node'))
625 except KeyError:
628 except KeyError:
626 # pure python cache lookup failed
629 # pure python cache lookup failed
627 n = self._nodecache
630 n = self._nodecache
628 i = self.index
631 i = self.index
629 p = self._nodepos
632 p = self._nodepos
630 if p is None:
633 if p is None:
631 p = len(i) - 1
634 p = len(i) - 1
632 else:
635 else:
633 assert p < len(i)
636 assert p < len(i)
634 for r in pycompat.xrange(p, -1, -1):
637 for r in pycompat.xrange(p, -1, -1):
635 v = i[r][7]
638 v = i[r][7]
636 n[v] = r
639 n[v] = r
637 if v == node:
640 if v == node:
638 self._nodepos = r - 1
641 self._nodepos = r - 1
639 return r
642 return r
640 if node == wdirid or node in wdirfilenodeids:
643 if node == wdirid or node in wdirfilenodeids:
641 raise error.WdirUnsupported
644 raise error.WdirUnsupported
642 raise error.LookupError(node, self.indexfile, _('no node'))
645 raise error.LookupError(node, self.indexfile, _('no node'))
643
646
644 # Accessors for index entries.
647 # Accessors for index entries.
645
648
646 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
649 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
647 # are flags.
650 # are flags.
648 def start(self, rev):
651 def start(self, rev):
649 return int(self.index[rev][0] >> 16)
652 return int(self.index[rev][0] >> 16)
650
653
651 def flags(self, rev):
654 def flags(self, rev):
652 return self.index[rev][0] & 0xFFFF
655 return self.index[rev][0] & 0xFFFF
653
656
654 def length(self, rev):
657 def length(self, rev):
655 return self.index[rev][1]
658 return self.index[rev][1]
656
659
657 def rawsize(self, rev):
660 def rawsize(self, rev):
658 """return the length of the uncompressed text for a given revision"""
661 """return the length of the uncompressed text for a given revision"""
659 l = self.index[rev][2]
662 l = self.index[rev][2]
660 if l >= 0:
663 if l >= 0:
661 return l
664 return l
662
665
663 t = self.revision(rev, raw=True)
666 t = self.revision(rev, raw=True)
664 return len(t)
667 return len(t)
665
668
666 def size(self, rev):
669 def size(self, rev):
667 """length of non-raw text (processed by a "read" flag processor)"""
670 """length of non-raw text (processed by a "read" flag processor)"""
668 # fast path: if no "read" flag processor could change the content,
671 # fast path: if no "read" flag processor could change the content,
669 # size is rawsize. note: ELLIPSIS is known to not change the content.
672 # size is rawsize. note: ELLIPSIS is known to not change the content.
670 flags = self.flags(rev)
673 flags = self.flags(rev)
671 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
674 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
672 return self.rawsize(rev)
675 return self.rawsize(rev)
673
676
674 return len(self.revision(rev, raw=False))
677 return len(self.revision(rev, raw=False))
675
678
676 def chainbase(self, rev):
679 def chainbase(self, rev):
677 base = self._chainbasecache.get(rev)
680 base = self._chainbasecache.get(rev)
678 if base is not None:
681 if base is not None:
679 return base
682 return base
680
683
681 index = self.index
684 index = self.index
682 iterrev = rev
685 iterrev = rev
683 base = index[iterrev][3]
686 base = index[iterrev][3]
684 while base != iterrev:
687 while base != iterrev:
685 iterrev = base
688 iterrev = base
686 base = index[iterrev][3]
689 base = index[iterrev][3]
687
690
688 self._chainbasecache[rev] = base
691 self._chainbasecache[rev] = base
689 return base
692 return base
690
693
691 def linkrev(self, rev):
694 def linkrev(self, rev):
692 return self.index[rev][4]
695 return self.index[rev][4]
693
696
694 def parentrevs(self, rev):
697 def parentrevs(self, rev):
695 try:
698 try:
696 entry = self.index[rev]
699 entry = self.index[rev]
697 except IndexError:
700 except IndexError:
698 if rev == wdirrev:
701 if rev == wdirrev:
699 raise error.WdirUnsupported
702 raise error.WdirUnsupported
700 raise
703 raise
701
704
702 return entry[5], entry[6]
705 return entry[5], entry[6]
703
706
704 # fast parentrevs(rev) where rev isn't filtered
707 # fast parentrevs(rev) where rev isn't filtered
705 _uncheckedparentrevs = parentrevs
708 _uncheckedparentrevs = parentrevs
706
709
707 def node(self, rev):
710 def node(self, rev):
708 try:
711 try:
709 return self.index[rev][7]
712 return self.index[rev][7]
710 except IndexError:
713 except IndexError:
711 if rev == wdirrev:
714 if rev == wdirrev:
712 raise error.WdirUnsupported
715 raise error.WdirUnsupported
713 raise
716 raise
714
717
715 # Derived from index values.
718 # Derived from index values.
716
719
717 def end(self, rev):
720 def end(self, rev):
718 return self.start(rev) + self.length(rev)
721 return self.start(rev) + self.length(rev)
719
722
720 def parents(self, node):
723 def parents(self, node):
721 i = self.index
724 i = self.index
722 d = i[self.rev(node)]
725 d = i[self.rev(node)]
723 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
726 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
724
727
725 def chainlen(self, rev):
728 def chainlen(self, rev):
726 return self._chaininfo(rev)[0]
729 return self._chaininfo(rev)[0]
727
730
728 def _chaininfo(self, rev):
731 def _chaininfo(self, rev):
729 chaininfocache = self._chaininfocache
732 chaininfocache = self._chaininfocache
730 if rev in chaininfocache:
733 if rev in chaininfocache:
731 return chaininfocache[rev]
734 return chaininfocache[rev]
732 index = self.index
735 index = self.index
733 generaldelta = self._generaldelta
736 generaldelta = self._generaldelta
734 iterrev = rev
737 iterrev = rev
735 e = index[iterrev]
738 e = index[iterrev]
736 clen = 0
739 clen = 0
737 compresseddeltalen = 0
740 compresseddeltalen = 0
738 while iterrev != e[3]:
741 while iterrev != e[3]:
739 clen += 1
742 clen += 1
740 compresseddeltalen += e[1]
743 compresseddeltalen += e[1]
741 if generaldelta:
744 if generaldelta:
742 iterrev = e[3]
745 iterrev = e[3]
743 else:
746 else:
744 iterrev -= 1
747 iterrev -= 1
745 if iterrev in chaininfocache:
748 if iterrev in chaininfocache:
746 t = chaininfocache[iterrev]
749 t = chaininfocache[iterrev]
747 clen += t[0]
750 clen += t[0]
748 compresseddeltalen += t[1]
751 compresseddeltalen += t[1]
749 break
752 break
750 e = index[iterrev]
753 e = index[iterrev]
751 else:
754 else:
752 # Add text length of base since decompressing that also takes
755 # Add text length of base since decompressing that also takes
753 # work. For cache hits the length is already included.
756 # work. For cache hits the length is already included.
754 compresseddeltalen += e[1]
757 compresseddeltalen += e[1]
755 r = (clen, compresseddeltalen)
758 r = (clen, compresseddeltalen)
756 chaininfocache[rev] = r
759 chaininfocache[rev] = r
757 return r
760 return r
758
761
759 def _deltachain(self, rev, stoprev=None):
762 def _deltachain(self, rev, stoprev=None):
760 """Obtain the delta chain for a revision.
763 """Obtain the delta chain for a revision.
761
764
762 ``stoprev`` specifies a revision to stop at. If not specified, we
765 ``stoprev`` specifies a revision to stop at. If not specified, we
763 stop at the base of the chain.
766 stop at the base of the chain.
764
767
765 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
768 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
766 revs in ascending order and ``stopped`` is a bool indicating whether
769 revs in ascending order and ``stopped`` is a bool indicating whether
767 ``stoprev`` was hit.
770 ``stoprev`` was hit.
768 """
771 """
769 # Try C implementation.
772 # Try C implementation.
770 try:
773 try:
771 return self.index.deltachain(rev, stoprev, self._generaldelta)
774 return self.index.deltachain(rev, stoprev, self._generaldelta)
772 except AttributeError:
775 except AttributeError:
773 pass
776 pass
774
777
775 chain = []
778 chain = []
776
779
777 # Alias to prevent attribute lookup in tight loop.
780 # Alias to prevent attribute lookup in tight loop.
778 index = self.index
781 index = self.index
779 generaldelta = self._generaldelta
782 generaldelta = self._generaldelta
780
783
781 iterrev = rev
784 iterrev = rev
782 e = index[iterrev]
785 e = index[iterrev]
783 while iterrev != e[3] and iterrev != stoprev:
786 while iterrev != e[3] and iterrev != stoprev:
784 chain.append(iterrev)
787 chain.append(iterrev)
785 if generaldelta:
788 if generaldelta:
786 iterrev = e[3]
789 iterrev = e[3]
787 else:
790 else:
788 iterrev -= 1
791 iterrev -= 1
789 e = index[iterrev]
792 e = index[iterrev]
790
793
791 if iterrev == stoprev:
794 if iterrev == stoprev:
792 stopped = True
795 stopped = True
793 else:
796 else:
794 chain.append(iterrev)
797 chain.append(iterrev)
795 stopped = False
798 stopped = False
796
799
797 chain.reverse()
800 chain.reverse()
798 return chain, stopped
801 return chain, stopped
799
802
800 def ancestors(self, revs, stoprev=0, inclusive=False):
803 def ancestors(self, revs, stoprev=0, inclusive=False):
801 """Generate the ancestors of 'revs' in reverse revision order.
804 """Generate the ancestors of 'revs' in reverse revision order.
802 Does not generate revs lower than stoprev.
805 Does not generate revs lower than stoprev.
803
806
804 See the documentation for ancestor.lazyancestors for more details."""
807 See the documentation for ancestor.lazyancestors for more details."""
805
808
806 # first, make sure start revisions aren't filtered
809 # first, make sure start revisions aren't filtered
807 revs = list(revs)
810 revs = list(revs)
808 checkrev = self.node
811 checkrev = self.node
809 for r in revs:
812 for r in revs:
810 checkrev(r)
813 checkrev(r)
811 # and we're sure ancestors aren't filtered as well
814 # and we're sure ancestors aren't filtered as well
812
815
813 if rustext is not None:
816 if rustext is not None:
814 lazyancestors = rustext.ancestor.LazyAncestors
817 lazyancestors = rustext.ancestor.LazyAncestors
815 arg = self.index
818 arg = self.index
816 elif util.safehasattr(parsers, 'rustlazyancestors'):
819 elif util.safehasattr(parsers, 'rustlazyancestors'):
817 lazyancestors = ancestor.rustlazyancestors
820 lazyancestors = ancestor.rustlazyancestors
818 arg = self.index
821 arg = self.index
819 else:
822 else:
820 lazyancestors = ancestor.lazyancestors
823 lazyancestors = ancestor.lazyancestors
821 arg = self._uncheckedparentrevs
824 arg = self._uncheckedparentrevs
822 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
825 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
823
826
824 def descendants(self, revs):
827 def descendants(self, revs):
825 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
828 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
826
829
827 def findcommonmissing(self, common=None, heads=None):
830 def findcommonmissing(self, common=None, heads=None):
828 """Return a tuple of the ancestors of common and the ancestors of heads
831 """Return a tuple of the ancestors of common and the ancestors of heads
829 that are not ancestors of common. In revset terminology, we return the
832 that are not ancestors of common. In revset terminology, we return the
830 tuple:
833 tuple:
831
834
832 ::common, (::heads) - (::common)
835 ::common, (::heads) - (::common)
833
836
834 The list is sorted by revision number, meaning it is
837 The list is sorted by revision number, meaning it is
835 topologically sorted.
838 topologically sorted.
836
839
837 'heads' and 'common' are both lists of node IDs. If heads is
840 'heads' and 'common' are both lists of node IDs. If heads is
838 not supplied, uses all of the revlog's heads. If common is not
841 not supplied, uses all of the revlog's heads. If common is not
839 supplied, uses nullid."""
842 supplied, uses nullid."""
840 if common is None:
843 if common is None:
841 common = [nullid]
844 common = [nullid]
842 if heads is None:
845 if heads is None:
843 heads = self.heads()
846 heads = self.heads()
844
847
845 common = [self.rev(n) for n in common]
848 common = [self.rev(n) for n in common]
846 heads = [self.rev(n) for n in heads]
849 heads = [self.rev(n) for n in heads]
847
850
848 # we want the ancestors, but inclusive
851 # we want the ancestors, but inclusive
849 class lazyset(object):
852 class lazyset(object):
850 def __init__(self, lazyvalues):
853 def __init__(self, lazyvalues):
851 self.addedvalues = set()
854 self.addedvalues = set()
852 self.lazyvalues = lazyvalues
855 self.lazyvalues = lazyvalues
853
856
854 def __contains__(self, value):
857 def __contains__(self, value):
855 return value in self.addedvalues or value in self.lazyvalues
858 return value in self.addedvalues or value in self.lazyvalues
856
859
857 def __iter__(self):
860 def __iter__(self):
858 added = self.addedvalues
861 added = self.addedvalues
859 for r in added:
862 for r in added:
860 yield r
863 yield r
861 for r in self.lazyvalues:
864 for r in self.lazyvalues:
862 if not r in added:
865 if not r in added:
863 yield r
866 yield r
864
867
865 def add(self, value):
868 def add(self, value):
866 self.addedvalues.add(value)
869 self.addedvalues.add(value)
867
870
868 def update(self, values):
871 def update(self, values):
869 self.addedvalues.update(values)
872 self.addedvalues.update(values)
870
873
871 has = lazyset(self.ancestors(common))
874 has = lazyset(self.ancestors(common))
872 has.add(nullrev)
875 has.add(nullrev)
873 has.update(common)
876 has.update(common)
874
877
875 # take all ancestors from heads that aren't in has
878 # take all ancestors from heads that aren't in has
876 missing = set()
879 missing = set()
877 visit = collections.deque(r for r in heads if r not in has)
880 visit = collections.deque(r for r in heads if r not in has)
878 while visit:
881 while visit:
879 r = visit.popleft()
882 r = visit.popleft()
880 if r in missing:
883 if r in missing:
881 continue
884 continue
882 else:
885 else:
883 missing.add(r)
886 missing.add(r)
884 for p in self.parentrevs(r):
887 for p in self.parentrevs(r):
885 if p not in has:
888 if p not in has:
886 visit.append(p)
889 visit.append(p)
887 missing = list(missing)
890 missing = list(missing)
888 missing.sort()
891 missing.sort()
889 return has, [self.node(miss) for miss in missing]
892 return has, [self.node(miss) for miss in missing]
890
893
891 def incrementalmissingrevs(self, common=None):
894 def incrementalmissingrevs(self, common=None):
892 """Return an object that can be used to incrementally compute the
895 """Return an object that can be used to incrementally compute the
893 revision numbers of the ancestors of arbitrary sets that are not
896 revision numbers of the ancestors of arbitrary sets that are not
894 ancestors of common. This is an ancestor.incrementalmissingancestors
897 ancestors of common. This is an ancestor.incrementalmissingancestors
895 object.
898 object.
896
899
897 'common' is a list of revision numbers. If common is not supplied, uses
900 'common' is a list of revision numbers. If common is not supplied, uses
898 nullrev.
901 nullrev.
899 """
902 """
900 if common is None:
903 if common is None:
901 common = [nullrev]
904 common = [nullrev]
902
905
903 if rustext is not None:
906 if rustext is not None:
904 return rustext.ancestor.MissingAncestors(self.index, common)
907 return rustext.ancestor.MissingAncestors(self.index, common)
905 return ancestor.incrementalmissingancestors(self.parentrevs, common)
908 return ancestor.incrementalmissingancestors(self.parentrevs, common)
906
909
907 def findmissingrevs(self, common=None, heads=None):
910 def findmissingrevs(self, common=None, heads=None):
908 """Return the revision numbers of the ancestors of heads that
911 """Return the revision numbers of the ancestors of heads that
909 are not ancestors of common.
912 are not ancestors of common.
910
913
911 More specifically, return a list of revision numbers corresponding to
914 More specifically, return a list of revision numbers corresponding to
912 nodes N such that every N satisfies the following constraints:
915 nodes N such that every N satisfies the following constraints:
913
916
914 1. N is an ancestor of some node in 'heads'
917 1. N is an ancestor of some node in 'heads'
915 2. N is not an ancestor of any node in 'common'
918 2. N is not an ancestor of any node in 'common'
916
919
917 The list is sorted by revision number, meaning it is
920 The list is sorted by revision number, meaning it is
918 topologically sorted.
921 topologically sorted.
919
922
920 'heads' and 'common' are both lists of revision numbers. If heads is
923 'heads' and 'common' are both lists of revision numbers. If heads is
921 not supplied, uses all of the revlog's heads. If common is not
924 not supplied, uses all of the revlog's heads. If common is not
922 supplied, uses nullid."""
925 supplied, uses nullid."""
923 if common is None:
926 if common is None:
924 common = [nullrev]
927 common = [nullrev]
925 if heads is None:
928 if heads is None:
926 heads = self.headrevs()
929 heads = self.headrevs()
927
930
928 inc = self.incrementalmissingrevs(common=common)
931 inc = self.incrementalmissingrevs(common=common)
929 return inc.missingancestors(heads)
932 return inc.missingancestors(heads)
930
933
931 def findmissing(self, common=None, heads=None):
934 def findmissing(self, common=None, heads=None):
932 """Return the ancestors of heads that are not ancestors of common.
935 """Return the ancestors of heads that are not ancestors of common.
933
936
934 More specifically, return a list of nodes N such that every N
937 More specifically, return a list of nodes N such that every N
935 satisfies the following constraints:
938 satisfies the following constraints:
936
939
937 1. N is an ancestor of some node in 'heads'
940 1. N is an ancestor of some node in 'heads'
938 2. N is not an ancestor of any node in 'common'
941 2. N is not an ancestor of any node in 'common'
939
942
940 The list is sorted by revision number, meaning it is
943 The list is sorted by revision number, meaning it is
941 topologically sorted.
944 topologically sorted.
942
945
943 'heads' and 'common' are both lists of node IDs. If heads is
946 'heads' and 'common' are both lists of node IDs. If heads is
944 not supplied, uses all of the revlog's heads. If common is not
947 not supplied, uses all of the revlog's heads. If common is not
945 supplied, uses nullid."""
948 supplied, uses nullid."""
946 if common is None:
949 if common is None:
947 common = [nullid]
950 common = [nullid]
948 if heads is None:
951 if heads is None:
949 heads = self.heads()
952 heads = self.heads()
950
953
951 common = [self.rev(n) for n in common]
954 common = [self.rev(n) for n in common]
952 heads = [self.rev(n) for n in heads]
955 heads = [self.rev(n) for n in heads]
953
956
954 inc = self.incrementalmissingrevs(common=common)
957 inc = self.incrementalmissingrevs(common=common)
955 return [self.node(r) for r in inc.missingancestors(heads)]
958 return [self.node(r) for r in inc.missingancestors(heads)]
956
959
957 def nodesbetween(self, roots=None, heads=None):
960 def nodesbetween(self, roots=None, heads=None):
958 """Return a topological path from 'roots' to 'heads'.
961 """Return a topological path from 'roots' to 'heads'.
959
962
960 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
963 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
961 topologically sorted list of all nodes N that satisfy both of
964 topologically sorted list of all nodes N that satisfy both of
962 these constraints:
965 these constraints:
963
966
964 1. N is a descendant of some node in 'roots'
967 1. N is a descendant of some node in 'roots'
965 2. N is an ancestor of some node in 'heads'
968 2. N is an ancestor of some node in 'heads'
966
969
967 Every node is considered to be both a descendant and an ancestor
970 Every node is considered to be both a descendant and an ancestor
968 of itself, so every reachable node in 'roots' and 'heads' will be
971 of itself, so every reachable node in 'roots' and 'heads' will be
969 included in 'nodes'.
972 included in 'nodes'.
970
973
971 'outroots' is the list of reachable nodes in 'roots', i.e., the
974 'outroots' is the list of reachable nodes in 'roots', i.e., the
972 subset of 'roots' that is returned in 'nodes'. Likewise,
975 subset of 'roots' that is returned in 'nodes'. Likewise,
973 'outheads' is the subset of 'heads' that is also in 'nodes'.
976 'outheads' is the subset of 'heads' that is also in 'nodes'.
974
977
975 'roots' and 'heads' are both lists of node IDs. If 'roots' is
978 'roots' and 'heads' are both lists of node IDs. If 'roots' is
976 unspecified, uses nullid as the only root. If 'heads' is
979 unspecified, uses nullid as the only root. If 'heads' is
977 unspecified, uses list of all of the revlog's heads."""
980 unspecified, uses list of all of the revlog's heads."""
978 nonodes = ([], [], [])
981 nonodes = ([], [], [])
979 if roots is not None:
982 if roots is not None:
980 roots = list(roots)
983 roots = list(roots)
981 if not roots:
984 if not roots:
982 return nonodes
985 return nonodes
983 lowestrev = min([self.rev(n) for n in roots])
986 lowestrev = min([self.rev(n) for n in roots])
984 else:
987 else:
985 roots = [nullid] # Everybody's a descendant of nullid
988 roots = [nullid] # Everybody's a descendant of nullid
986 lowestrev = nullrev
989 lowestrev = nullrev
987 if (lowestrev == nullrev) and (heads is None):
990 if (lowestrev == nullrev) and (heads is None):
988 # We want _all_ the nodes!
991 # We want _all_ the nodes!
989 return ([self.node(r) for r in self], [nullid], list(self.heads()))
992 return ([self.node(r) for r in self], [nullid], list(self.heads()))
990 if heads is None:
993 if heads is None:
991 # All nodes are ancestors, so the latest ancestor is the last
994 # All nodes are ancestors, so the latest ancestor is the last
992 # node.
995 # node.
993 highestrev = len(self) - 1
996 highestrev = len(self) - 1
994 # Set ancestors to None to signal that every node is an ancestor.
997 # Set ancestors to None to signal that every node is an ancestor.
995 ancestors = None
998 ancestors = None
996 # Set heads to an empty dictionary for later discovery of heads
999 # Set heads to an empty dictionary for later discovery of heads
997 heads = {}
1000 heads = {}
998 else:
1001 else:
999 heads = list(heads)
1002 heads = list(heads)
1000 if not heads:
1003 if not heads:
1001 return nonodes
1004 return nonodes
1002 ancestors = set()
1005 ancestors = set()
1003 # Turn heads into a dictionary so we can remove 'fake' heads.
1006 # Turn heads into a dictionary so we can remove 'fake' heads.
1004 # Also, later we will be using it to filter out the heads we can't
1007 # Also, later we will be using it to filter out the heads we can't
1005 # find from roots.
1008 # find from roots.
1006 heads = dict.fromkeys(heads, False)
1009 heads = dict.fromkeys(heads, False)
1007 # Start at the top and keep marking parents until we're done.
1010 # Start at the top and keep marking parents until we're done.
1008 nodestotag = set(heads)
1011 nodestotag = set(heads)
1009 # Remember where the top was so we can use it as a limit later.
1012 # Remember where the top was so we can use it as a limit later.
1010 highestrev = max([self.rev(n) for n in nodestotag])
1013 highestrev = max([self.rev(n) for n in nodestotag])
1011 while nodestotag:
1014 while nodestotag:
1012 # grab a node to tag
1015 # grab a node to tag
1013 n = nodestotag.pop()
1016 n = nodestotag.pop()
1014 # Never tag nullid
1017 # Never tag nullid
1015 if n == nullid:
1018 if n == nullid:
1016 continue
1019 continue
1017 # A node's revision number represents its place in a
1020 # A node's revision number represents its place in a
1018 # topologically sorted list of nodes.
1021 # topologically sorted list of nodes.
1019 r = self.rev(n)
1022 r = self.rev(n)
1020 if r >= lowestrev:
1023 if r >= lowestrev:
1021 if n not in ancestors:
1024 if n not in ancestors:
1022 # If we are possibly a descendant of one of the roots
1025 # If we are possibly a descendant of one of the roots
1023 # and we haven't already been marked as an ancestor
1026 # and we haven't already been marked as an ancestor
1024 ancestors.add(n) # Mark as ancestor
1027 ancestors.add(n) # Mark as ancestor
1025 # Add non-nullid parents to list of nodes to tag.
1028 # Add non-nullid parents to list of nodes to tag.
1026 nodestotag.update([p for p in self.parents(n) if
1029 nodestotag.update([p for p in self.parents(n) if
1027 p != nullid])
1030 p != nullid])
1028 elif n in heads: # We've seen it before, is it a fake head?
1031 elif n in heads: # We've seen it before, is it a fake head?
1029 # So it is, real heads should not be the ancestors of
1032 # So it is, real heads should not be the ancestors of
1030 # any other heads.
1033 # any other heads.
1031 heads.pop(n)
1034 heads.pop(n)
1032 if not ancestors:
1035 if not ancestors:
1033 return nonodes
1036 return nonodes
1034 # Now that we have our set of ancestors, we want to remove any
1037 # Now that we have our set of ancestors, we want to remove any
1035 # roots that are not ancestors.
1038 # roots that are not ancestors.
1036
1039
1037 # If one of the roots was nullid, everything is included anyway.
1040 # If one of the roots was nullid, everything is included anyway.
1038 if lowestrev > nullrev:
1041 if lowestrev > nullrev:
1039 # But, since we weren't, let's recompute the lowest rev to not
1042 # But, since we weren't, let's recompute the lowest rev to not
1040 # include roots that aren't ancestors.
1043 # include roots that aren't ancestors.
1041
1044
1042 # Filter out roots that aren't ancestors of heads
1045 # Filter out roots that aren't ancestors of heads
1043 roots = [root for root in roots if root in ancestors]
1046 roots = [root for root in roots if root in ancestors]
1044 # Recompute the lowest revision
1047 # Recompute the lowest revision
1045 if roots:
1048 if roots:
1046 lowestrev = min([self.rev(root) for root in roots])
1049 lowestrev = min([self.rev(root) for root in roots])
1047 else:
1050 else:
1048 # No more roots? Return empty list
1051 # No more roots? Return empty list
1049 return nonodes
1052 return nonodes
1050 else:
1053 else:
1051 # We are descending from nullid, and don't need to care about
1054 # We are descending from nullid, and don't need to care about
1052 # any other roots.
1055 # any other roots.
1053 lowestrev = nullrev
1056 lowestrev = nullrev
1054 roots = [nullid]
1057 roots = [nullid]
1055 # Transform our roots list into a set.
1058 # Transform our roots list into a set.
1056 descendants = set(roots)
1059 descendants = set(roots)
1057 # Also, keep the original roots so we can filter out roots that aren't
1060 # Also, keep the original roots so we can filter out roots that aren't
1058 # 'real' roots (i.e. are descended from other roots).
1061 # 'real' roots (i.e. are descended from other roots).
1059 roots = descendants.copy()
1062 roots = descendants.copy()
1060 # Our topologically sorted list of output nodes.
1063 # Our topologically sorted list of output nodes.
1061 orderedout = []
1064 orderedout = []
1062 # Don't start at nullid since we don't want nullid in our output list,
1065 # Don't start at nullid since we don't want nullid in our output list,
1063 # and if nullid shows up in descendants, empty parents will look like
1066 # and if nullid shows up in descendants, empty parents will look like
1064 # they're descendants.
1067 # they're descendants.
1065 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1068 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1066 n = self.node(r)
1069 n = self.node(r)
1067 isdescendant = False
1070 isdescendant = False
1068 if lowestrev == nullrev: # Everybody is a descendant of nullid
1071 if lowestrev == nullrev: # Everybody is a descendant of nullid
1069 isdescendant = True
1072 isdescendant = True
1070 elif n in descendants:
1073 elif n in descendants:
1071 # n is already a descendant
1074 # n is already a descendant
1072 isdescendant = True
1075 isdescendant = True
1073 # This check only needs to be done here because all the roots
1076 # This check only needs to be done here because all the roots
1074 # will start being marked is descendants before the loop.
1077 # will start being marked is descendants before the loop.
1075 if n in roots:
1078 if n in roots:
1076 # If n was a root, check if it's a 'real' root.
1079 # If n was a root, check if it's a 'real' root.
1077 p = tuple(self.parents(n))
1080 p = tuple(self.parents(n))
1078 # If any of its parents are descendants, it's not a root.
1081 # If any of its parents are descendants, it's not a root.
1079 if (p[0] in descendants) or (p[1] in descendants):
1082 if (p[0] in descendants) or (p[1] in descendants):
1080 roots.remove(n)
1083 roots.remove(n)
1081 else:
1084 else:
1082 p = tuple(self.parents(n))
1085 p = tuple(self.parents(n))
1083 # A node is a descendant if either of its parents are
1086 # A node is a descendant if either of its parents are
1084 # descendants. (We seeded the dependents list with the roots
1087 # descendants. (We seeded the dependents list with the roots
1085 # up there, remember?)
1088 # up there, remember?)
1086 if (p[0] in descendants) or (p[1] in descendants):
1089 if (p[0] in descendants) or (p[1] in descendants):
1087 descendants.add(n)
1090 descendants.add(n)
1088 isdescendant = True
1091 isdescendant = True
1089 if isdescendant and ((ancestors is None) or (n in ancestors)):
1092 if isdescendant and ((ancestors is None) or (n in ancestors)):
1090 # Only include nodes that are both descendants and ancestors.
1093 # Only include nodes that are both descendants and ancestors.
1091 orderedout.append(n)
1094 orderedout.append(n)
1092 if (ancestors is not None) and (n in heads):
1095 if (ancestors is not None) and (n in heads):
1093 # We're trying to figure out which heads are reachable
1096 # We're trying to figure out which heads are reachable
1094 # from roots.
1097 # from roots.
1095 # Mark this head as having been reached
1098 # Mark this head as having been reached
1096 heads[n] = True
1099 heads[n] = True
1097 elif ancestors is None:
1100 elif ancestors is None:
1098 # Otherwise, we're trying to discover the heads.
1101 # Otherwise, we're trying to discover the heads.
1099 # Assume this is a head because if it isn't, the next step
1102 # Assume this is a head because if it isn't, the next step
1100 # will eventually remove it.
1103 # will eventually remove it.
1101 heads[n] = True
1104 heads[n] = True
1102 # But, obviously its parents aren't.
1105 # But, obviously its parents aren't.
1103 for p in self.parents(n):
1106 for p in self.parents(n):
1104 heads.pop(p, None)
1107 heads.pop(p, None)
1105 heads = [head for head, flag in heads.iteritems() if flag]
1108 heads = [head for head, flag in heads.iteritems() if flag]
1106 roots = list(roots)
1109 roots = list(roots)
1107 assert orderedout
1110 assert orderedout
1108 assert roots
1111 assert roots
1109 assert heads
1112 assert heads
1110 return (orderedout, roots, heads)
1113 return (orderedout, roots, heads)
1111
1114
1112 def headrevs(self, revs=None):
1115 def headrevs(self, revs=None):
1113 if revs is None:
1116 if revs is None:
1114 try:
1117 try:
1115 return self.index.headrevs()
1118 return self.index.headrevs()
1116 except AttributeError:
1119 except AttributeError:
1117 return self._headrevs()
1120 return self._headrevs()
1118 return dagop.headrevs(revs, self.parentrevs)
1121 return dagop.headrevs(revs, self.parentrevs)
1119
1122
1120 def computephases(self, roots):
1123 def computephases(self, roots):
1121 return self.index.computephasesmapsets(roots)
1124 return self.index.computephasesmapsets(roots)
1122
1125
1123 def _headrevs(self):
1126 def _headrevs(self):
1124 count = len(self)
1127 count = len(self)
1125 if not count:
1128 if not count:
1126 return [nullrev]
1129 return [nullrev]
1127 # we won't iter over filtered rev so nobody is a head at start
1130 # we won't iter over filtered rev so nobody is a head at start
1128 ishead = [0] * (count + 1)
1131 ishead = [0] * (count + 1)
1129 index = self.index
1132 index = self.index
1130 for r in self:
1133 for r in self:
1131 ishead[r] = 1 # I may be an head
1134 ishead[r] = 1 # I may be an head
1132 e = index[r]
1135 e = index[r]
1133 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1136 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1134 return [r for r, val in enumerate(ishead) if val]
1137 return [r for r, val in enumerate(ishead) if val]
1135
1138
1136 def heads(self, start=None, stop=None):
1139 def heads(self, start=None, stop=None):
1137 """return the list of all nodes that have no children
1140 """return the list of all nodes that have no children
1138
1141
1139 if start is specified, only heads that are descendants of
1142 if start is specified, only heads that are descendants of
1140 start will be returned
1143 start will be returned
1141 if stop is specified, it will consider all the revs from stop
1144 if stop is specified, it will consider all the revs from stop
1142 as if they had no children
1145 as if they had no children
1143 """
1146 """
1144 if start is None and stop is None:
1147 if start is None and stop is None:
1145 if not len(self):
1148 if not len(self):
1146 return [nullid]
1149 return [nullid]
1147 return [self.node(r) for r in self.headrevs()]
1150 return [self.node(r) for r in self.headrevs()]
1148
1151
1149 if start is None:
1152 if start is None:
1150 start = nullrev
1153 start = nullrev
1151 else:
1154 else:
1152 start = self.rev(start)
1155 start = self.rev(start)
1153
1156
1154 stoprevs = set(self.rev(n) for n in stop or [])
1157 stoprevs = set(self.rev(n) for n in stop or [])
1155
1158
1156 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1159 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1157 stoprevs=stoprevs)
1160 stoprevs=stoprevs)
1158
1161
1159 return [self.node(rev) for rev in revs]
1162 return [self.node(rev) for rev in revs]
1160
1163
1161 def children(self, node):
1164 def children(self, node):
1162 """find the children of a given node"""
1165 """find the children of a given node"""
1163 c = []
1166 c = []
1164 p = self.rev(node)
1167 p = self.rev(node)
1165 for r in self.revs(start=p + 1):
1168 for r in self.revs(start=p + 1):
1166 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1169 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1167 if prevs:
1170 if prevs:
1168 for pr in prevs:
1171 for pr in prevs:
1169 if pr == p:
1172 if pr == p:
1170 c.append(self.node(r))
1173 c.append(self.node(r))
1171 elif p == nullrev:
1174 elif p == nullrev:
1172 c.append(self.node(r))
1175 c.append(self.node(r))
1173 return c
1176 return c
1174
1177
1175 def commonancestorsheads(self, a, b):
1178 def commonancestorsheads(self, a, b):
1176 """calculate all the heads of the common ancestors of nodes a and b"""
1179 """calculate all the heads of the common ancestors of nodes a and b"""
1177 a, b = self.rev(a), self.rev(b)
1180 a, b = self.rev(a), self.rev(b)
1178 ancs = self._commonancestorsheads(a, b)
1181 ancs = self._commonancestorsheads(a, b)
1179 return pycompat.maplist(self.node, ancs)
1182 return pycompat.maplist(self.node, ancs)
1180
1183
1181 def _commonancestorsheads(self, *revs):
1184 def _commonancestorsheads(self, *revs):
1182 """calculate all the heads of the common ancestors of revs"""
1185 """calculate all the heads of the common ancestors of revs"""
1183 try:
1186 try:
1184 ancs = self.index.commonancestorsheads(*revs)
1187 ancs = self.index.commonancestorsheads(*revs)
1185 except (AttributeError, OverflowError): # C implementation failed
1188 except (AttributeError, OverflowError): # C implementation failed
1186 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1189 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1187 return ancs
1190 return ancs
1188
1191
1189 def isancestor(self, a, b):
1192 def isancestor(self, a, b):
1190 """return True if node a is an ancestor of node b
1193 """return True if node a is an ancestor of node b
1191
1194
1192 A revision is considered an ancestor of itself."""
1195 A revision is considered an ancestor of itself."""
1193 a, b = self.rev(a), self.rev(b)
1196 a, b = self.rev(a), self.rev(b)
1194 return self.isancestorrev(a, b)
1197 return self.isancestorrev(a, b)
1195
1198
1196 def isancestorrev(self, a, b):
1199 def isancestorrev(self, a, b):
1197 """return True if revision a is an ancestor of revision b
1200 """return True if revision a is an ancestor of revision b
1198
1201
1199 A revision is considered an ancestor of itself.
1202 A revision is considered an ancestor of itself.
1200
1203
1201 The implementation of this is trivial but the use of
1204 The implementation of this is trivial but the use of
1202 commonancestorsheads is not."""
1205 commonancestorsheads is not."""
1203 if a == nullrev:
1206 if a == nullrev:
1204 return True
1207 return True
1205 elif a == b:
1208 elif a == b:
1206 return True
1209 return True
1207 elif a > b:
1210 elif a > b:
1208 return False
1211 return False
1209 return a in self._commonancestorsheads(a, b)
1212 return a in self._commonancestorsheads(a, b)
1210
1213
1211 def ancestor(self, a, b):
1214 def ancestor(self, a, b):
1212 """calculate the "best" common ancestor of nodes a and b"""
1215 """calculate the "best" common ancestor of nodes a and b"""
1213
1216
1214 a, b = self.rev(a), self.rev(b)
1217 a, b = self.rev(a), self.rev(b)
1215 try:
1218 try:
1216 ancs = self.index.ancestors(a, b)
1219 ancs = self.index.ancestors(a, b)
1217 except (AttributeError, OverflowError):
1220 except (AttributeError, OverflowError):
1218 ancs = ancestor.ancestors(self.parentrevs, a, b)
1221 ancs = ancestor.ancestors(self.parentrevs, a, b)
1219 if ancs:
1222 if ancs:
1220 # choose a consistent winner when there's a tie
1223 # choose a consistent winner when there's a tie
1221 return min(map(self.node, ancs))
1224 return min(map(self.node, ancs))
1222 return nullid
1225 return nullid
1223
1226
1224 def _match(self, id):
1227 def _match(self, id):
1225 if isinstance(id, int):
1228 if isinstance(id, int):
1226 # rev
1229 # rev
1227 return self.node(id)
1230 return self.node(id)
1228 if len(id) == 20:
1231 if len(id) == 20:
1229 # possibly a binary node
1232 # possibly a binary node
1230 # odds of a binary node being all hex in ASCII are 1 in 10**25
1233 # odds of a binary node being all hex in ASCII are 1 in 10**25
1231 try:
1234 try:
1232 node = id
1235 node = id
1233 self.rev(node) # quick search the index
1236 self.rev(node) # quick search the index
1234 return node
1237 return node
1235 except error.LookupError:
1238 except error.LookupError:
1236 pass # may be partial hex id
1239 pass # may be partial hex id
1237 try:
1240 try:
1238 # str(rev)
1241 # str(rev)
1239 rev = int(id)
1242 rev = int(id)
1240 if "%d" % rev != id:
1243 if "%d" % rev != id:
1241 raise ValueError
1244 raise ValueError
1242 if rev < 0:
1245 if rev < 0:
1243 rev = len(self) + rev
1246 rev = len(self) + rev
1244 if rev < 0 or rev >= len(self):
1247 if rev < 0 or rev >= len(self):
1245 raise ValueError
1248 raise ValueError
1246 return self.node(rev)
1249 return self.node(rev)
1247 except (ValueError, OverflowError):
1250 except (ValueError, OverflowError):
1248 pass
1251 pass
1249 if len(id) == 40:
1252 if len(id) == 40:
1250 try:
1253 try:
1251 # a full hex nodeid?
1254 # a full hex nodeid?
1252 node = bin(id)
1255 node = bin(id)
1253 self.rev(node)
1256 self.rev(node)
1254 return node
1257 return node
1255 except (TypeError, error.LookupError):
1258 except (TypeError, error.LookupError):
1256 pass
1259 pass
1257
1260
1258 def _partialmatch(self, id):
1261 def _partialmatch(self, id):
1259 # we don't care wdirfilenodeids as they should be always full hash
1262 # we don't care wdirfilenodeids as they should be always full hash
1260 maybewdir = wdirhex.startswith(id)
1263 maybewdir = wdirhex.startswith(id)
1261 try:
1264 try:
1262 partial = self.index.partialmatch(id)
1265 partial = self.index.partialmatch(id)
1263 if partial and self.hasnode(partial):
1266 if partial and self.hasnode(partial):
1264 if maybewdir:
1267 if maybewdir:
1265 # single 'ff...' match in radix tree, ambiguous with wdir
1268 # single 'ff...' match in radix tree, ambiguous with wdir
1266 raise error.RevlogError
1269 raise error.RevlogError
1267 return partial
1270 return partial
1268 if maybewdir:
1271 if maybewdir:
1269 # no 'ff...' match in radix tree, wdir identified
1272 # no 'ff...' match in radix tree, wdir identified
1270 raise error.WdirUnsupported
1273 raise error.WdirUnsupported
1271 return None
1274 return None
1272 except error.RevlogError:
1275 except error.RevlogError:
1273 # parsers.c radix tree lookup gave multiple matches
1276 # parsers.c radix tree lookup gave multiple matches
1274 # fast path: for unfiltered changelog, radix tree is accurate
1277 # fast path: for unfiltered changelog, radix tree is accurate
1275 if not getattr(self, 'filteredrevs', None):
1278 if not getattr(self, 'filteredrevs', None):
1276 raise error.AmbiguousPrefixLookupError(
1279 raise error.AmbiguousPrefixLookupError(
1277 id, self.indexfile, _('ambiguous identifier'))
1280 id, self.indexfile, _('ambiguous identifier'))
1278 # fall through to slow path that filters hidden revisions
1281 # fall through to slow path that filters hidden revisions
1279 except (AttributeError, ValueError):
1282 except (AttributeError, ValueError):
1280 # we are pure python, or key was too short to search radix tree
1283 # we are pure python, or key was too short to search radix tree
1281 pass
1284 pass
1282
1285
1283 if id in self._pcache:
1286 if id in self._pcache:
1284 return self._pcache[id]
1287 return self._pcache[id]
1285
1288
1286 if len(id) <= 40:
1289 if len(id) <= 40:
1287 try:
1290 try:
1288 # hex(node)[:...]
1291 # hex(node)[:...]
1289 l = len(id) // 2 # grab an even number of digits
1292 l = len(id) // 2 # grab an even number of digits
1290 prefix = bin(id[:l * 2])
1293 prefix = bin(id[:l * 2])
1291 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1294 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1292 nl = [n for n in nl if hex(n).startswith(id) and
1295 nl = [n for n in nl if hex(n).startswith(id) and
1293 self.hasnode(n)]
1296 self.hasnode(n)]
1294 if nullhex.startswith(id):
1297 if nullhex.startswith(id):
1295 nl.append(nullid)
1298 nl.append(nullid)
1296 if len(nl) > 0:
1299 if len(nl) > 0:
1297 if len(nl) == 1 and not maybewdir:
1300 if len(nl) == 1 and not maybewdir:
1298 self._pcache[id] = nl[0]
1301 self._pcache[id] = nl[0]
1299 return nl[0]
1302 return nl[0]
1300 raise error.AmbiguousPrefixLookupError(
1303 raise error.AmbiguousPrefixLookupError(
1301 id, self.indexfile, _('ambiguous identifier'))
1304 id, self.indexfile, _('ambiguous identifier'))
1302 if maybewdir:
1305 if maybewdir:
1303 raise error.WdirUnsupported
1306 raise error.WdirUnsupported
1304 return None
1307 return None
1305 except TypeError:
1308 except TypeError:
1306 pass
1309 pass
1307
1310
1308 def lookup(self, id):
1311 def lookup(self, id):
1309 """locate a node based on:
1312 """locate a node based on:
1310 - revision number or str(revision number)
1313 - revision number or str(revision number)
1311 - nodeid or subset of hex nodeid
1314 - nodeid or subset of hex nodeid
1312 """
1315 """
1313 n = self._match(id)
1316 n = self._match(id)
1314 if n is not None:
1317 if n is not None:
1315 return n
1318 return n
1316 n = self._partialmatch(id)
1319 n = self._partialmatch(id)
1317 if n:
1320 if n:
1318 return n
1321 return n
1319
1322
1320 raise error.LookupError(id, self.indexfile, _('no match found'))
1323 raise error.LookupError(id, self.indexfile, _('no match found'))
1321
1324
1322 def shortest(self, node, minlength=1):
1325 def shortest(self, node, minlength=1):
1323 """Find the shortest unambiguous prefix that matches node."""
1326 """Find the shortest unambiguous prefix that matches node."""
1324 def isvalid(prefix):
1327 def isvalid(prefix):
1325 try:
1328 try:
1326 node = self._partialmatch(prefix)
1329 node = self._partialmatch(prefix)
1327 except error.AmbiguousPrefixLookupError:
1330 except error.AmbiguousPrefixLookupError:
1328 return False
1331 return False
1329 except error.WdirUnsupported:
1332 except error.WdirUnsupported:
1330 # single 'ff...' match
1333 # single 'ff...' match
1331 return True
1334 return True
1332 if node is None:
1335 if node is None:
1333 raise error.LookupError(node, self.indexfile, _('no node'))
1336 raise error.LookupError(node, self.indexfile, _('no node'))
1334 return True
1337 return True
1335
1338
1336 def maybewdir(prefix):
1339 def maybewdir(prefix):
1337 return all(c == 'f' for c in prefix)
1340 return all(c == 'f' for c in prefix)
1338
1341
1339 hexnode = hex(node)
1342 hexnode = hex(node)
1340
1343
1341 def disambiguate(hexnode, minlength):
1344 def disambiguate(hexnode, minlength):
1342 """Disambiguate against wdirid."""
1345 """Disambiguate against wdirid."""
1343 for length in range(minlength, 41):
1346 for length in range(minlength, 41):
1344 prefix = hexnode[:length]
1347 prefix = hexnode[:length]
1345 if not maybewdir(prefix):
1348 if not maybewdir(prefix):
1346 return prefix
1349 return prefix
1347
1350
1348 if not getattr(self, 'filteredrevs', None):
1351 if not getattr(self, 'filteredrevs', None):
1349 try:
1352 try:
1350 length = max(self.index.shortest(node), minlength)
1353 length = max(self.index.shortest(node), minlength)
1351 return disambiguate(hexnode, length)
1354 return disambiguate(hexnode, length)
1352 except error.RevlogError:
1355 except error.RevlogError:
1353 if node != wdirid:
1356 if node != wdirid:
1354 raise error.LookupError(node, self.indexfile, _('no node'))
1357 raise error.LookupError(node, self.indexfile, _('no node'))
1355 except AttributeError:
1358 except AttributeError:
1356 # Fall through to pure code
1359 # Fall through to pure code
1357 pass
1360 pass
1358
1361
1359 if node == wdirid:
1362 if node == wdirid:
1360 for length in range(minlength, 41):
1363 for length in range(minlength, 41):
1361 prefix = hexnode[:length]
1364 prefix = hexnode[:length]
1362 if isvalid(prefix):
1365 if isvalid(prefix):
1363 return prefix
1366 return prefix
1364
1367
1365 for length in range(minlength, 41):
1368 for length in range(minlength, 41):
1366 prefix = hexnode[:length]
1369 prefix = hexnode[:length]
1367 if isvalid(prefix):
1370 if isvalid(prefix):
1368 return disambiguate(hexnode, length)
1371 return disambiguate(hexnode, length)
1369
1372
1370 def cmp(self, node, text):
1373 def cmp(self, node, text):
1371 """compare text with a given file revision
1374 """compare text with a given file revision
1372
1375
1373 returns True if text is different than what is stored.
1376 returns True if text is different than what is stored.
1374 """
1377 """
1375 p1, p2 = self.parents(node)
1378 p1, p2 = self.parents(node)
1376 return storageutil.hashrevisionsha1(text, p1, p2) != node
1379 return storageutil.hashrevisionsha1(text, p1, p2) != node
1377
1380
1378 def _cachesegment(self, offset, data):
1381 def _cachesegment(self, offset, data):
1379 """Add a segment to the revlog cache.
1382 """Add a segment to the revlog cache.
1380
1383
1381 Accepts an absolute offset and the data that is at that location.
1384 Accepts an absolute offset and the data that is at that location.
1382 """
1385 """
1383 o, d = self._chunkcache
1386 o, d = self._chunkcache
1384 # try to add to existing cache
1387 # try to add to existing cache
1385 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1388 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1386 self._chunkcache = o, d + data
1389 self._chunkcache = o, d + data
1387 else:
1390 else:
1388 self._chunkcache = offset, data
1391 self._chunkcache = offset, data
1389
1392
1390 def _readsegment(self, offset, length, df=None):
1393 def _readsegment(self, offset, length, df=None):
1391 """Load a segment of raw data from the revlog.
1394 """Load a segment of raw data from the revlog.
1392
1395
1393 Accepts an absolute offset, length to read, and an optional existing
1396 Accepts an absolute offset, length to read, and an optional existing
1394 file handle to read from.
1397 file handle to read from.
1395
1398
1396 If an existing file handle is passed, it will be seeked and the
1399 If an existing file handle is passed, it will be seeked and the
1397 original seek position will NOT be restored.
1400 original seek position will NOT be restored.
1398
1401
1399 Returns a str or buffer of raw byte data.
1402 Returns a str or buffer of raw byte data.
1400
1403
1401 Raises if the requested number of bytes could not be read.
1404 Raises if the requested number of bytes could not be read.
1402 """
1405 """
1403 # Cache data both forward and backward around the requested
1406 # Cache data both forward and backward around the requested
1404 # data, in a fixed size window. This helps speed up operations
1407 # data, in a fixed size window. This helps speed up operations
1405 # involving reading the revlog backwards.
1408 # involving reading the revlog backwards.
1406 cachesize = self._chunkcachesize
1409 cachesize = self._chunkcachesize
1407 realoffset = offset & ~(cachesize - 1)
1410 realoffset = offset & ~(cachesize - 1)
1408 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1411 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1409 - realoffset)
1412 - realoffset)
1410 with self._datareadfp(df) as df:
1413 with self._datareadfp(df) as df:
1411 df.seek(realoffset)
1414 df.seek(realoffset)
1412 d = df.read(reallength)
1415 d = df.read(reallength)
1413
1416
1414 self._cachesegment(realoffset, d)
1417 self._cachesegment(realoffset, d)
1415 if offset != realoffset or reallength != length:
1418 if offset != realoffset or reallength != length:
1416 startoffset = offset - realoffset
1419 startoffset = offset - realoffset
1417 if len(d) - startoffset < length:
1420 if len(d) - startoffset < length:
1418 raise error.RevlogError(
1421 raise error.RevlogError(
1419 _('partial read of revlog %s; expected %d bytes from '
1422 _('partial read of revlog %s; expected %d bytes from '
1420 'offset %d, got %d') %
1423 'offset %d, got %d') %
1421 (self.indexfile if self._inline else self.datafile,
1424 (self.indexfile if self._inline else self.datafile,
1422 length, realoffset, len(d) - startoffset))
1425 length, realoffset, len(d) - startoffset))
1423
1426
1424 return util.buffer(d, startoffset, length)
1427 return util.buffer(d, startoffset, length)
1425
1428
1426 if len(d) < length:
1429 if len(d) < length:
1427 raise error.RevlogError(
1430 raise error.RevlogError(
1428 _('partial read of revlog %s; expected %d bytes from offset '
1431 _('partial read of revlog %s; expected %d bytes from offset '
1429 '%d, got %d') %
1432 '%d, got %d') %
1430 (self.indexfile if self._inline else self.datafile,
1433 (self.indexfile if self._inline else self.datafile,
1431 length, offset, len(d)))
1434 length, offset, len(d)))
1432
1435
1433 return d
1436 return d
1434
1437
1435 def _getsegment(self, offset, length, df=None):
1438 def _getsegment(self, offset, length, df=None):
1436 """Obtain a segment of raw data from the revlog.
1439 """Obtain a segment of raw data from the revlog.
1437
1440
1438 Accepts an absolute offset, length of bytes to obtain, and an
1441 Accepts an absolute offset, length of bytes to obtain, and an
1439 optional file handle to the already-opened revlog. If the file
1442 optional file handle to the already-opened revlog. If the file
1440 handle is used, it's original seek position will not be preserved.
1443 handle is used, it's original seek position will not be preserved.
1441
1444
1442 Requests for data may be returned from a cache.
1445 Requests for data may be returned from a cache.
1443
1446
1444 Returns a str or a buffer instance of raw byte data.
1447 Returns a str or a buffer instance of raw byte data.
1445 """
1448 """
1446 o, d = self._chunkcache
1449 o, d = self._chunkcache
1447 l = len(d)
1450 l = len(d)
1448
1451
1449 # is it in the cache?
1452 # is it in the cache?
1450 cachestart = offset - o
1453 cachestart = offset - o
1451 cacheend = cachestart + length
1454 cacheend = cachestart + length
1452 if cachestart >= 0 and cacheend <= l:
1455 if cachestart >= 0 and cacheend <= l:
1453 if cachestart == 0 and cacheend == l:
1456 if cachestart == 0 and cacheend == l:
1454 return d # avoid a copy
1457 return d # avoid a copy
1455 return util.buffer(d, cachestart, cacheend - cachestart)
1458 return util.buffer(d, cachestart, cacheend - cachestart)
1456
1459
1457 return self._readsegment(offset, length, df=df)
1460 return self._readsegment(offset, length, df=df)
1458
1461
1459 def _getsegmentforrevs(self, startrev, endrev, df=None):
1462 def _getsegmentforrevs(self, startrev, endrev, df=None):
1460 """Obtain a segment of raw data corresponding to a range of revisions.
1463 """Obtain a segment of raw data corresponding to a range of revisions.
1461
1464
1462 Accepts the start and end revisions and an optional already-open
1465 Accepts the start and end revisions and an optional already-open
1463 file handle to be used for reading. If the file handle is read, its
1466 file handle to be used for reading. If the file handle is read, its
1464 seek position will not be preserved.
1467 seek position will not be preserved.
1465
1468
1466 Requests for data may be satisfied by a cache.
1469 Requests for data may be satisfied by a cache.
1467
1470
1468 Returns a 2-tuple of (offset, data) for the requested range of
1471 Returns a 2-tuple of (offset, data) for the requested range of
1469 revisions. Offset is the integer offset from the beginning of the
1472 revisions. Offset is the integer offset from the beginning of the
1470 revlog and data is a str or buffer of the raw byte data.
1473 revlog and data is a str or buffer of the raw byte data.
1471
1474
1472 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1475 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1473 to determine where each revision's data begins and ends.
1476 to determine where each revision's data begins and ends.
1474 """
1477 """
1475 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1478 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1476 # (functions are expensive).
1479 # (functions are expensive).
1477 index = self.index
1480 index = self.index
1478 istart = index[startrev]
1481 istart = index[startrev]
1479 start = int(istart[0] >> 16)
1482 start = int(istart[0] >> 16)
1480 if startrev == endrev:
1483 if startrev == endrev:
1481 end = start + istart[1]
1484 end = start + istart[1]
1482 else:
1485 else:
1483 iend = index[endrev]
1486 iend = index[endrev]
1484 end = int(iend[0] >> 16) + iend[1]
1487 end = int(iend[0] >> 16) + iend[1]
1485
1488
1486 if self._inline:
1489 if self._inline:
1487 start += (startrev + 1) * self._io.size
1490 start += (startrev + 1) * self._io.size
1488 end += (endrev + 1) * self._io.size
1491 end += (endrev + 1) * self._io.size
1489 length = end - start
1492 length = end - start
1490
1493
1491 return start, self._getsegment(start, length, df=df)
1494 return start, self._getsegment(start, length, df=df)
1492
1495
1493 def _chunk(self, rev, df=None):
1496 def _chunk(self, rev, df=None):
1494 """Obtain a single decompressed chunk for a revision.
1497 """Obtain a single decompressed chunk for a revision.
1495
1498
1496 Accepts an integer revision and an optional already-open file handle
1499 Accepts an integer revision and an optional already-open file handle
1497 to be used for reading. If used, the seek position of the file will not
1500 to be used for reading. If used, the seek position of the file will not
1498 be preserved.
1501 be preserved.
1499
1502
1500 Returns a str holding uncompressed data for the requested revision.
1503 Returns a str holding uncompressed data for the requested revision.
1501 """
1504 """
1502 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1505 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1503
1506
1504 def _chunks(self, revs, df=None, targetsize=None):
1507 def _chunks(self, revs, df=None, targetsize=None):
1505 """Obtain decompressed chunks for the specified revisions.
1508 """Obtain decompressed chunks for the specified revisions.
1506
1509
1507 Accepts an iterable of numeric revisions that are assumed to be in
1510 Accepts an iterable of numeric revisions that are assumed to be in
1508 ascending order. Also accepts an optional already-open file handle
1511 ascending order. Also accepts an optional already-open file handle
1509 to be used for reading. If used, the seek position of the file will
1512 to be used for reading. If used, the seek position of the file will
1510 not be preserved.
1513 not be preserved.
1511
1514
1512 This function is similar to calling ``self._chunk()`` multiple times,
1515 This function is similar to calling ``self._chunk()`` multiple times,
1513 but is faster.
1516 but is faster.
1514
1517
1515 Returns a list with decompressed data for each requested revision.
1518 Returns a list with decompressed data for each requested revision.
1516 """
1519 """
1517 if not revs:
1520 if not revs:
1518 return []
1521 return []
1519 start = self.start
1522 start = self.start
1520 length = self.length
1523 length = self.length
1521 inline = self._inline
1524 inline = self._inline
1522 iosize = self._io.size
1525 iosize = self._io.size
1523 buffer = util.buffer
1526 buffer = util.buffer
1524
1527
1525 l = []
1528 l = []
1526 ladd = l.append
1529 ladd = l.append
1527
1530
1528 if not self._withsparseread:
1531 if not self._withsparseread:
1529 slicedchunks = (revs,)
1532 slicedchunks = (revs,)
1530 else:
1533 else:
1531 slicedchunks = deltautil.slicechunk(self, revs,
1534 slicedchunks = deltautil.slicechunk(self, revs,
1532 targetsize=targetsize)
1535 targetsize=targetsize)
1533
1536
1534 for revschunk in slicedchunks:
1537 for revschunk in slicedchunks:
1535 firstrev = revschunk[0]
1538 firstrev = revschunk[0]
1536 # Skip trailing revisions with empty diff
1539 # Skip trailing revisions with empty diff
1537 for lastrev in revschunk[::-1]:
1540 for lastrev in revschunk[::-1]:
1538 if length(lastrev) != 0:
1541 if length(lastrev) != 0:
1539 break
1542 break
1540
1543
1541 try:
1544 try:
1542 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1545 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1543 except OverflowError:
1546 except OverflowError:
1544 # issue4215 - we can't cache a run of chunks greater than
1547 # issue4215 - we can't cache a run of chunks greater than
1545 # 2G on Windows
1548 # 2G on Windows
1546 return [self._chunk(rev, df=df) for rev in revschunk]
1549 return [self._chunk(rev, df=df) for rev in revschunk]
1547
1550
1548 decomp = self.decompress
1551 decomp = self.decompress
1549 for rev in revschunk:
1552 for rev in revschunk:
1550 chunkstart = start(rev)
1553 chunkstart = start(rev)
1551 if inline:
1554 if inline:
1552 chunkstart += (rev + 1) * iosize
1555 chunkstart += (rev + 1) * iosize
1553 chunklength = length(rev)
1556 chunklength = length(rev)
1554 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1557 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1555
1558
1556 return l
1559 return l
1557
1560
1558 def _chunkclear(self):
1561 def _chunkclear(self):
1559 """Clear the raw chunk cache."""
1562 """Clear the raw chunk cache."""
1560 self._chunkcache = (0, '')
1563 self._chunkcache = (0, '')
1561
1564
1562 def deltaparent(self, rev):
1565 def deltaparent(self, rev):
1563 """return deltaparent of the given revision"""
1566 """return deltaparent of the given revision"""
1564 base = self.index[rev][3]
1567 base = self.index[rev][3]
1565 if base == rev:
1568 if base == rev:
1566 return nullrev
1569 return nullrev
1567 elif self._generaldelta:
1570 elif self._generaldelta:
1568 return base
1571 return base
1569 else:
1572 else:
1570 return rev - 1
1573 return rev - 1
1571
1574
1572 def issnapshot(self, rev):
1575 def issnapshot(self, rev):
1573 """tells whether rev is a snapshot
1576 """tells whether rev is a snapshot
1574 """
1577 """
1575 if not self._sparserevlog:
1578 if not self._sparserevlog:
1576 return self.deltaparent(rev) == nullrev
1579 return self.deltaparent(rev) == nullrev
1577 elif util.safehasattr(self.index, 'issnapshot'):
1580 elif util.safehasattr(self.index, 'issnapshot'):
1578 # directly assign the method to cache the testing and access
1581 # directly assign the method to cache the testing and access
1579 self.issnapshot = self.index.issnapshot
1582 self.issnapshot = self.index.issnapshot
1580 return self.issnapshot(rev)
1583 return self.issnapshot(rev)
1581 if rev == nullrev:
1584 if rev == nullrev:
1582 return True
1585 return True
1583 entry = self.index[rev]
1586 entry = self.index[rev]
1584 base = entry[3]
1587 base = entry[3]
1585 if base == rev:
1588 if base == rev:
1586 return True
1589 return True
1587 if base == nullrev:
1590 if base == nullrev:
1588 return True
1591 return True
1589 p1 = entry[5]
1592 p1 = entry[5]
1590 p2 = entry[6]
1593 p2 = entry[6]
1591 if base == p1 or base == p2:
1594 if base == p1 or base == p2:
1592 return False
1595 return False
1593 return self.issnapshot(base)
1596 return self.issnapshot(base)
1594
1597
1595 def snapshotdepth(self, rev):
1598 def snapshotdepth(self, rev):
1596 """number of snapshot in the chain before this one"""
1599 """number of snapshot in the chain before this one"""
1597 if not self.issnapshot(rev):
1600 if not self.issnapshot(rev):
1598 raise error.ProgrammingError('revision %d not a snapshot')
1601 raise error.ProgrammingError('revision %d not a snapshot')
1599 return len(self._deltachain(rev)[0]) - 1
1602 return len(self._deltachain(rev)[0]) - 1
1600
1603
1601 def revdiff(self, rev1, rev2):
1604 def revdiff(self, rev1, rev2):
1602 """return or calculate a delta between two revisions
1605 """return or calculate a delta between two revisions
1603
1606
1604 The delta calculated is in binary form and is intended to be written to
1607 The delta calculated is in binary form and is intended to be written to
1605 revlog data directly. So this function needs raw revision data.
1608 revlog data directly. So this function needs raw revision data.
1606 """
1609 """
1607 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1610 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1608 return bytes(self._chunk(rev2))
1611 return bytes(self._chunk(rev2))
1609
1612
1610 return mdiff.textdiff(self.revision(rev1, raw=True),
1613 return mdiff.textdiff(self.revision(rev1, raw=True),
1611 self.revision(rev2, raw=True))
1614 self.revision(rev2, raw=True))
1612
1615
1613 def revision(self, nodeorrev, _df=None, raw=False):
1616 def revision(self, nodeorrev, _df=None, raw=False):
1614 """return an uncompressed revision of a given node or revision
1617 """return an uncompressed revision of a given node or revision
1615 number.
1618 number.
1616
1619
1617 _df - an existing file handle to read from. (internal-only)
1620 _df - an existing file handle to read from. (internal-only)
1618 raw - an optional argument specifying if the revision data is to be
1621 raw - an optional argument specifying if the revision data is to be
1619 treated as raw data when applying flag transforms. 'raw' should be set
1622 treated as raw data when applying flag transforms. 'raw' should be set
1620 to True when generating changegroups or in debug commands.
1623 to True when generating changegroups or in debug commands.
1621 """
1624 """
1622 if isinstance(nodeorrev, int):
1625 if isinstance(nodeorrev, int):
1623 rev = nodeorrev
1626 rev = nodeorrev
1624 node = self.node(rev)
1627 node = self.node(rev)
1625 else:
1628 else:
1626 node = nodeorrev
1629 node = nodeorrev
1627 rev = None
1630 rev = None
1628
1631
1629 cachedrev = None
1632 cachedrev = None
1630 flags = None
1633 flags = None
1631 rawtext = None
1634 rawtext = None
1632 if node == nullid:
1635 if node == nullid:
1633 return ""
1636 return ""
1634 if self._revisioncache:
1637 if self._revisioncache:
1635 if self._revisioncache[0] == node:
1638 if self._revisioncache[0] == node:
1636 # _cache only stores rawtext
1639 # _cache only stores rawtext
1637 if raw:
1640 if raw:
1638 return self._revisioncache[2]
1641 return self._revisioncache[2]
1639 # duplicated, but good for perf
1642 # duplicated, but good for perf
1640 if rev is None:
1643 if rev is None:
1641 rev = self.rev(node)
1644 rev = self.rev(node)
1642 if flags is None:
1645 if flags is None:
1643 flags = self.flags(rev)
1646 flags = self.flags(rev)
1644 # no extra flags set, no flag processor runs, text = rawtext
1647 # no extra flags set, no flag processor runs, text = rawtext
1645 if flags == REVIDX_DEFAULT_FLAGS:
1648 if flags == REVIDX_DEFAULT_FLAGS:
1646 return self._revisioncache[2]
1649 return self._revisioncache[2]
1647 # rawtext is reusable. need to run flag processor
1650 # rawtext is reusable. need to run flag processor
1648 rawtext = self._revisioncache[2]
1651 rawtext = self._revisioncache[2]
1649
1652
1650 cachedrev = self._revisioncache[1]
1653 cachedrev = self._revisioncache[1]
1651
1654
1652 # look up what we need to read
1655 # look up what we need to read
1653 if rawtext is None:
1656 if rawtext is None:
1654 if rev is None:
1657 if rev is None:
1655 rev = self.rev(node)
1658 rev = self.rev(node)
1656
1659
1657 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1660 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1658 if stopped:
1661 if stopped:
1659 rawtext = self._revisioncache[2]
1662 rawtext = self._revisioncache[2]
1660
1663
1661 # drop cache to save memory
1664 # drop cache to save memory
1662 self._revisioncache = None
1665 self._revisioncache = None
1663
1666
1664 targetsize = None
1667 targetsize = None
1665 rawsize = self.index[rev][2]
1668 rawsize = self.index[rev][2]
1666 if 0 <= rawsize:
1669 if 0 <= rawsize:
1667 targetsize = 4 * rawsize
1670 targetsize = 4 * rawsize
1668
1671
1669 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1672 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1670 if rawtext is None:
1673 if rawtext is None:
1671 rawtext = bytes(bins[0])
1674 rawtext = bytes(bins[0])
1672 bins = bins[1:]
1675 bins = bins[1:]
1673
1676
1674 rawtext = mdiff.patches(rawtext, bins)
1677 rawtext = mdiff.patches(rawtext, bins)
1675 self._revisioncache = (node, rev, rawtext)
1678 self._revisioncache = (node, rev, rawtext)
1676
1679
1677 if flags is None:
1680 if flags is None:
1678 if rev is None:
1681 if rev is None:
1679 rev = self.rev(node)
1682 rev = self.rev(node)
1680 flags = self.flags(rev)
1683 flags = self.flags(rev)
1681
1684
1682 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1685 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1683 if validatehash:
1686 if validatehash:
1684 self.checkhash(text, node, rev=rev)
1687 self.checkhash(text, node, rev=rev)
1685
1688
1686 return text
1689 return text
1687
1690
1688 def hash(self, text, p1, p2):
1691 def hash(self, text, p1, p2):
1689 """Compute a node hash.
1692 """Compute a node hash.
1690
1693
1691 Available as a function so that subclasses can replace the hash
1694 Available as a function so that subclasses can replace the hash
1692 as needed.
1695 as needed.
1693 """
1696 """
1694 return storageutil.hashrevisionsha1(text, p1, p2)
1697 return storageutil.hashrevisionsha1(text, p1, p2)
1695
1698
1696 def _processflags(self, text, flags, operation, raw=False):
1699 def _processflags(self, text, flags, operation, raw=False):
1697 """Inspect revision data flags and applies transforms defined by
1700 """Inspect revision data flags and applies transforms defined by
1698 registered flag processors.
1701 registered flag processors.
1699
1702
1700 ``text`` - the revision data to process
1703 ``text`` - the revision data to process
1701 ``flags`` - the revision flags
1704 ``flags`` - the revision flags
1702 ``operation`` - the operation being performed (read or write)
1705 ``operation`` - the operation being performed (read or write)
1703 ``raw`` - an optional argument describing if the raw transform should be
1706 ``raw`` - an optional argument describing if the raw transform should be
1704 applied.
1707 applied.
1705
1708
1706 This method processes the flags in the order (or reverse order if
1709 This method processes the flags in the order (or reverse order if
1707 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1710 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1708 flag processors registered for present flags. The order of flags defined
1711 flag processors registered for present flags. The order of flags defined
1709 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1712 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1710
1713
1711 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1714 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1712 processed text and ``validatehash`` is a bool indicating whether the
1715 processed text and ``validatehash`` is a bool indicating whether the
1713 returned text should be checked for hash integrity.
1716 returned text should be checked for hash integrity.
1714
1717
1715 Note: If the ``raw`` argument is set, it has precedence over the
1718 Note: If the ``raw`` argument is set, it has precedence over the
1716 operation and will only update the value of ``validatehash``.
1719 operation and will only update the value of ``validatehash``.
1717 """
1720 """
1718 # fast path: no flag processors will run
1721 # fast path: no flag processors will run
1719 if flags == 0:
1722 if flags == 0:
1720 return text, True
1723 return text, True
1721 if not operation in ('read', 'write'):
1724 if not operation in ('read', 'write'):
1722 raise error.ProgrammingError(_("invalid '%s' operation") %
1725 raise error.ProgrammingError(_("invalid '%s' operation") %
1723 operation)
1726 operation)
1724 # Check all flags are known.
1727 # Check all flags are known.
1725 if flags & ~REVIDX_KNOWN_FLAGS:
1728 if flags & ~REVIDX_KNOWN_FLAGS:
1726 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1729 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1727 (flags & ~REVIDX_KNOWN_FLAGS))
1730 (flags & ~REVIDX_KNOWN_FLAGS))
1728 validatehash = True
1731 validatehash = True
1729 # Depending on the operation (read or write), the order might be
1732 # Depending on the operation (read or write), the order might be
1730 # reversed due to non-commutative transforms.
1733 # reversed due to non-commutative transforms.
1731 orderedflags = REVIDX_FLAGS_ORDER
1734 orderedflags = REVIDX_FLAGS_ORDER
1732 if operation == 'write':
1735 if operation == 'write':
1733 orderedflags = reversed(orderedflags)
1736 orderedflags = reversed(orderedflags)
1734
1737
1735 for flag in orderedflags:
1738 for flag in orderedflags:
1736 # If a flagprocessor has been registered for a known flag, apply the
1739 # If a flagprocessor has been registered for a known flag, apply the
1737 # related operation transform and update result tuple.
1740 # related operation transform and update result tuple.
1738 if flag & flags:
1741 if flag & flags:
1739 vhash = True
1742 vhash = True
1740
1743
1741 if flag not in self._flagprocessors:
1744 if flag not in self._flagprocessors:
1742 message = _("missing processor for flag '%#x'") % (flag)
1745 message = _("missing processor for flag '%#x'") % (flag)
1743 raise error.RevlogError(message)
1746 raise error.RevlogError(message)
1744
1747
1745 processor = self._flagprocessors[flag]
1748 processor = self._flagprocessors[flag]
1746 if processor is not None:
1749 if processor is not None:
1747 readtransform, writetransform, rawtransform = processor
1750 readtransform, writetransform, rawtransform = processor
1748
1751
1749 if raw:
1752 if raw:
1750 vhash = rawtransform(self, text)
1753 vhash = rawtransform(self, text)
1751 elif operation == 'read':
1754 elif operation == 'read':
1752 text, vhash = readtransform(self, text)
1755 text, vhash = readtransform(self, text)
1753 else: # write operation
1756 else: # write operation
1754 text, vhash = writetransform(self, text)
1757 text, vhash = writetransform(self, text)
1755 validatehash = validatehash and vhash
1758 validatehash = validatehash and vhash
1756
1759
1757 return text, validatehash
1760 return text, validatehash
1758
1761
1759 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1762 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1760 """Check node hash integrity.
1763 """Check node hash integrity.
1761
1764
1762 Available as a function so that subclasses can extend hash mismatch
1765 Available as a function so that subclasses can extend hash mismatch
1763 behaviors as needed.
1766 behaviors as needed.
1764 """
1767 """
1765 try:
1768 try:
1766 if p1 is None and p2 is None:
1769 if p1 is None and p2 is None:
1767 p1, p2 = self.parents(node)
1770 p1, p2 = self.parents(node)
1768 if node != self.hash(text, p1, p2):
1771 if node != self.hash(text, p1, p2):
1769 # Clear the revision cache on hash failure. The revision cache
1772 # Clear the revision cache on hash failure. The revision cache
1770 # only stores the raw revision and clearing the cache does have
1773 # only stores the raw revision and clearing the cache does have
1771 # the side-effect that we won't have a cache hit when the raw
1774 # the side-effect that we won't have a cache hit when the raw
1772 # revision data is accessed. But this case should be rare and
1775 # revision data is accessed. But this case should be rare and
1773 # it is extra work to teach the cache about the hash
1776 # it is extra work to teach the cache about the hash
1774 # verification state.
1777 # verification state.
1775 if self._revisioncache and self._revisioncache[0] == node:
1778 if self._revisioncache and self._revisioncache[0] == node:
1776 self._revisioncache = None
1779 self._revisioncache = None
1777
1780
1778 revornode = rev
1781 revornode = rev
1779 if revornode is None:
1782 if revornode is None:
1780 revornode = templatefilters.short(hex(node))
1783 revornode = templatefilters.short(hex(node))
1781 raise error.RevlogError(_("integrity check failed on %s:%s")
1784 raise error.RevlogError(_("integrity check failed on %s:%s")
1782 % (self.indexfile, pycompat.bytestr(revornode)))
1785 % (self.indexfile, pycompat.bytestr(revornode)))
1783 except error.RevlogError:
1786 except error.RevlogError:
1784 if self._censorable and storageutil.iscensoredtext(text):
1787 if self._censorable and storageutil.iscensoredtext(text):
1785 raise error.CensoredNodeError(self.indexfile, node, text)
1788 raise error.CensoredNodeError(self.indexfile, node, text)
1786 raise
1789 raise
1787
1790
1788 def _enforceinlinesize(self, tr, fp=None):
1791 def _enforceinlinesize(self, tr, fp=None):
1789 """Check if the revlog is too big for inline and convert if so.
1792 """Check if the revlog is too big for inline and convert if so.
1790
1793
1791 This should be called after revisions are added to the revlog. If the
1794 This should be called after revisions are added to the revlog. If the
1792 revlog has grown too large to be an inline revlog, it will convert it
1795 revlog has grown too large to be an inline revlog, it will convert it
1793 to use multiple index and data files.
1796 to use multiple index and data files.
1794 """
1797 """
1795 tiprev = len(self) - 1
1798 tiprev = len(self) - 1
1796 if (not self._inline or
1799 if (not self._inline or
1797 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1800 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1798 return
1801 return
1799
1802
1800 trinfo = tr.find(self.indexfile)
1803 trinfo = tr.find(self.indexfile)
1801 if trinfo is None:
1804 if trinfo is None:
1802 raise error.RevlogError(_("%s not found in the transaction")
1805 raise error.RevlogError(_("%s not found in the transaction")
1803 % self.indexfile)
1806 % self.indexfile)
1804
1807
1805 trindex = trinfo[2]
1808 trindex = trinfo[2]
1806 if trindex is not None:
1809 if trindex is not None:
1807 dataoff = self.start(trindex)
1810 dataoff = self.start(trindex)
1808 else:
1811 else:
1809 # revlog was stripped at start of transaction, use all leftover data
1812 # revlog was stripped at start of transaction, use all leftover data
1810 trindex = len(self) - 1
1813 trindex = len(self) - 1
1811 dataoff = self.end(tiprev)
1814 dataoff = self.end(tiprev)
1812
1815
1813 tr.add(self.datafile, dataoff)
1816 tr.add(self.datafile, dataoff)
1814
1817
1815 if fp:
1818 if fp:
1816 fp.flush()
1819 fp.flush()
1817 fp.close()
1820 fp.close()
1818 # We can't use the cached file handle after close(). So prevent
1821 # We can't use the cached file handle after close(). So prevent
1819 # its usage.
1822 # its usage.
1820 self._writinghandles = None
1823 self._writinghandles = None
1821
1824
1822 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1825 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1823 for r in self:
1826 for r in self:
1824 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1827 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1825
1828
1826 with self._indexfp('w') as fp:
1829 with self._indexfp('w') as fp:
1827 self.version &= ~FLAG_INLINE_DATA
1830 self.version &= ~FLAG_INLINE_DATA
1828 self._inline = False
1831 self._inline = False
1829 io = self._io
1832 io = self._io
1830 for i in self:
1833 for i in self:
1831 e = io.packentry(self.index[i], self.node, self.version, i)
1834 e = io.packentry(self.index[i], self.node, self.version, i)
1832 fp.write(e)
1835 fp.write(e)
1833
1836
1834 # the temp file replace the real index when we exit the context
1837 # the temp file replace the real index when we exit the context
1835 # manager
1838 # manager
1836
1839
1837 tr.replace(self.indexfile, trindex * self._io.size)
1840 tr.replace(self.indexfile, trindex * self._io.size)
1838 self._chunkclear()
1841 self._chunkclear()
1839
1842
1840 def _nodeduplicatecallback(self, transaction, node):
1843 def _nodeduplicatecallback(self, transaction, node):
1841 """called when trying to add a node already stored.
1844 """called when trying to add a node already stored.
1842 """
1845 """
1843
1846
1844 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1847 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1845 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1848 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1846 """add a revision to the log
1849 """add a revision to the log
1847
1850
1848 text - the revision data to add
1851 text - the revision data to add
1849 transaction - the transaction object used for rollback
1852 transaction - the transaction object used for rollback
1850 link - the linkrev data to add
1853 link - the linkrev data to add
1851 p1, p2 - the parent nodeids of the revision
1854 p1, p2 - the parent nodeids of the revision
1852 cachedelta - an optional precomputed delta
1855 cachedelta - an optional precomputed delta
1853 node - nodeid of revision; typically node is not specified, and it is
1856 node - nodeid of revision; typically node is not specified, and it is
1854 computed by default as hash(text, p1, p2), however subclasses might
1857 computed by default as hash(text, p1, p2), however subclasses might
1855 use different hashing method (and override checkhash() in such case)
1858 use different hashing method (and override checkhash() in such case)
1856 flags - the known flags to set on the revision
1859 flags - the known flags to set on the revision
1857 deltacomputer - an optional deltacomputer instance shared between
1860 deltacomputer - an optional deltacomputer instance shared between
1858 multiple calls
1861 multiple calls
1859 """
1862 """
1860 if link == nullrev:
1863 if link == nullrev:
1861 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1864 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1862 % self.indexfile)
1865 % self.indexfile)
1863
1866
1864 if flags:
1867 if flags:
1865 node = node or self.hash(text, p1, p2)
1868 node = node or self.hash(text, p1, p2)
1866
1869
1867 rawtext, validatehash = self._processflags(text, flags, 'write')
1870 rawtext, validatehash = self._processflags(text, flags, 'write')
1868
1871
1869 # If the flag processor modifies the revision data, ignore any provided
1872 # If the flag processor modifies the revision data, ignore any provided
1870 # cachedelta.
1873 # cachedelta.
1871 if rawtext != text:
1874 if rawtext != text:
1872 cachedelta = None
1875 cachedelta = None
1873
1876
1874 if len(rawtext) > _maxentrysize:
1877 if len(rawtext) > _maxentrysize:
1875 raise error.RevlogError(
1878 raise error.RevlogError(
1876 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1879 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1877 % (self.indexfile, len(rawtext)))
1880 % (self.indexfile, len(rawtext)))
1878
1881
1879 node = node or self.hash(rawtext, p1, p2)
1882 node = node or self.hash(rawtext, p1, p2)
1880 if node in self.nodemap:
1883 if node in self.nodemap:
1881 return node
1884 return node
1882
1885
1883 if validatehash:
1886 if validatehash:
1884 self.checkhash(rawtext, node, p1=p1, p2=p2)
1887 self.checkhash(rawtext, node, p1=p1, p2=p2)
1885
1888
1886 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1889 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1887 flags, cachedelta=cachedelta,
1890 flags, cachedelta=cachedelta,
1888 deltacomputer=deltacomputer)
1891 deltacomputer=deltacomputer)
1889
1892
1890 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1893 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1891 cachedelta=None, deltacomputer=None):
1894 cachedelta=None, deltacomputer=None):
1892 """add a raw revision with known flags, node and parents
1895 """add a raw revision with known flags, node and parents
1893 useful when reusing a revision not stored in this revlog (ex: received
1896 useful when reusing a revision not stored in this revlog (ex: received
1894 over wire, or read from an external bundle).
1897 over wire, or read from an external bundle).
1895 """
1898 """
1896 dfh = None
1899 dfh = None
1897 if not self._inline:
1900 if not self._inline:
1898 dfh = self._datafp("a+")
1901 dfh = self._datafp("a+")
1899 ifh = self._indexfp("a+")
1902 ifh = self._indexfp("a+")
1900 try:
1903 try:
1901 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1904 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1902 flags, cachedelta, ifh, dfh,
1905 flags, cachedelta, ifh, dfh,
1903 deltacomputer=deltacomputer)
1906 deltacomputer=deltacomputer)
1904 finally:
1907 finally:
1905 if dfh:
1908 if dfh:
1906 dfh.close()
1909 dfh.close()
1907 ifh.close()
1910 ifh.close()
1908
1911
1909 def compress(self, data):
1912 def compress(self, data):
1910 """Generate a possibly-compressed representation of data."""
1913 """Generate a possibly-compressed representation of data."""
1911 if not data:
1914 if not data:
1912 return '', data
1915 return '', data
1913
1916
1914 compressed = self._compressor.compress(data)
1917 compressed = self._compressor.compress(data)
1915
1918
1916 if compressed:
1919 if compressed:
1917 # The revlog compressor added the header in the returned data.
1920 # The revlog compressor added the header in the returned data.
1918 return '', compressed
1921 return '', compressed
1919
1922
1920 if data[0:1] == '\0':
1923 if data[0:1] == '\0':
1921 return '', data
1924 return '', data
1922 return 'u', data
1925 return 'u', data
1923
1926
1924 def decompress(self, data):
1927 def decompress(self, data):
1925 """Decompress a revlog chunk.
1928 """Decompress a revlog chunk.
1926
1929
1927 The chunk is expected to begin with a header identifying the
1930 The chunk is expected to begin with a header identifying the
1928 format type so it can be routed to an appropriate decompressor.
1931 format type so it can be routed to an appropriate decompressor.
1929 """
1932 """
1930 if not data:
1933 if not data:
1931 return data
1934 return data
1932
1935
1933 # Revlogs are read much more frequently than they are written and many
1936 # Revlogs are read much more frequently than they are written and many
1934 # chunks only take microseconds to decompress, so performance is
1937 # chunks only take microseconds to decompress, so performance is
1935 # important here.
1938 # important here.
1936 #
1939 #
1937 # We can make a few assumptions about revlogs:
1940 # We can make a few assumptions about revlogs:
1938 #
1941 #
1939 # 1) the majority of chunks will be compressed (as opposed to inline
1942 # 1) the majority of chunks will be compressed (as opposed to inline
1940 # raw data).
1943 # raw data).
1941 # 2) decompressing *any* data will likely by at least 10x slower than
1944 # 2) decompressing *any* data will likely by at least 10x slower than
1942 # returning raw inline data.
1945 # returning raw inline data.
1943 # 3) we want to prioritize common and officially supported compression
1946 # 3) we want to prioritize common and officially supported compression
1944 # engines
1947 # engines
1945 #
1948 #
1946 # It follows that we want to optimize for "decompress compressed data
1949 # It follows that we want to optimize for "decompress compressed data
1947 # when encoded with common and officially supported compression engines"
1950 # when encoded with common and officially supported compression engines"
1948 # case over "raw data" and "data encoded by less common or non-official
1951 # case over "raw data" and "data encoded by less common or non-official
1949 # compression engines." That is why we have the inline lookup first
1952 # compression engines." That is why we have the inline lookup first
1950 # followed by the compengines lookup.
1953 # followed by the compengines lookup.
1951 #
1954 #
1952 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1955 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1953 # compressed chunks. And this matters for changelog and manifest reads.
1956 # compressed chunks. And this matters for changelog and manifest reads.
1954 t = data[0:1]
1957 t = data[0:1]
1955
1958
1956 if t == 'x':
1959 if t == 'x':
1957 try:
1960 try:
1958 return _zlibdecompress(data)
1961 return _zlibdecompress(data)
1959 except zlib.error as e:
1962 except zlib.error as e:
1960 raise error.RevlogError(_('revlog decompress error: %s') %
1963 raise error.RevlogError(_('revlog decompress error: %s') %
1961 stringutil.forcebytestr(e))
1964 stringutil.forcebytestr(e))
1962 # '\0' is more common than 'u' so it goes first.
1965 # '\0' is more common than 'u' so it goes first.
1963 elif t == '\0':
1966 elif t == '\0':
1964 return data
1967 return data
1965 elif t == 'u':
1968 elif t == 'u':
1966 return util.buffer(data, 1)
1969 return util.buffer(data, 1)
1967
1970
1968 try:
1971 try:
1969 compressor = self._decompressors[t]
1972 compressor = self._decompressors[t]
1970 except KeyError:
1973 except KeyError:
1971 try:
1974 try:
1972 engine = util.compengines.forrevlogheader(t)
1975 engine = util.compengines.forrevlogheader(t)
1973 compressor = engine.revlogcompressor()
1976 compressor = engine.revlogcompressor()
1974 self._decompressors[t] = compressor
1977 self._decompressors[t] = compressor
1975 except KeyError:
1978 except KeyError:
1976 raise error.RevlogError(_('unknown compression type %r') % t)
1979 raise error.RevlogError(_('unknown compression type %r') % t)
1977
1980
1978 return compressor.decompress(data)
1981 return compressor.decompress(data)
1979
1982
1980 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1983 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1981 cachedelta, ifh, dfh, alwayscache=False,
1984 cachedelta, ifh, dfh, alwayscache=False,
1982 deltacomputer=None):
1985 deltacomputer=None):
1983 """internal function to add revisions to the log
1986 """internal function to add revisions to the log
1984
1987
1985 see addrevision for argument descriptions.
1988 see addrevision for argument descriptions.
1986
1989
1987 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1990 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1988
1991
1989 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1992 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1990 be used.
1993 be used.
1991
1994
1992 invariants:
1995 invariants:
1993 - rawtext is optional (can be None); if not set, cachedelta must be set.
1996 - rawtext is optional (can be None); if not set, cachedelta must be set.
1994 if both are set, they must correspond to each other.
1997 if both are set, they must correspond to each other.
1995 """
1998 """
1996 if node == nullid:
1999 if node == nullid:
1997 raise error.RevlogError(_("%s: attempt to add null revision") %
2000 raise error.RevlogError(_("%s: attempt to add null revision") %
1998 self.indexfile)
2001 self.indexfile)
1999 if node == wdirid or node in wdirfilenodeids:
2002 if node == wdirid or node in wdirfilenodeids:
2000 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2003 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2001 self.indexfile)
2004 self.indexfile)
2002
2005
2003 if self._inline:
2006 if self._inline:
2004 fh = ifh
2007 fh = ifh
2005 else:
2008 else:
2006 fh = dfh
2009 fh = dfh
2007
2010
2008 btext = [rawtext]
2011 btext = [rawtext]
2009
2012
2010 curr = len(self)
2013 curr = len(self)
2011 prev = curr - 1
2014 prev = curr - 1
2012 offset = self.end(prev)
2015 offset = self.end(prev)
2013 p1r, p2r = self.rev(p1), self.rev(p2)
2016 p1r, p2r = self.rev(p1), self.rev(p2)
2014
2017
2015 # full versions are inserted when the needed deltas
2018 # full versions are inserted when the needed deltas
2016 # become comparable to the uncompressed text
2019 # become comparable to the uncompressed text
2017 if rawtext is None:
2020 if rawtext is None:
2018 # need rawtext size, before changed by flag processors, which is
2021 # need rawtext size, before changed by flag processors, which is
2019 # the non-raw size. use revlog explicitly to avoid filelog's extra
2022 # the non-raw size. use revlog explicitly to avoid filelog's extra
2020 # logic that might remove metadata size.
2023 # logic that might remove metadata size.
2021 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2024 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2022 cachedelta[1])
2025 cachedelta[1])
2023 else:
2026 else:
2024 textlen = len(rawtext)
2027 textlen = len(rawtext)
2025
2028
2026 if deltacomputer is None:
2029 if deltacomputer is None:
2027 deltacomputer = deltautil.deltacomputer(self)
2030 deltacomputer = deltautil.deltacomputer(self)
2028
2031
2029 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2032 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2030
2033
2031 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2034 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2032
2035
2033 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2036 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2034 deltainfo.base, link, p1r, p2r, node)
2037 deltainfo.base, link, p1r, p2r, node)
2035 self.index.append(e)
2038 self.index.append(e)
2036 self.nodemap[node] = curr
2039 self.nodemap[node] = curr
2037
2040
2038 # Reset the pure node cache start lookup offset to account for new
2041 # Reset the pure node cache start lookup offset to account for new
2039 # revision.
2042 # revision.
2040 if self._nodepos is not None:
2043 if self._nodepos is not None:
2041 self._nodepos = curr
2044 self._nodepos = curr
2042
2045
2043 entry = self._io.packentry(e, self.node, self.version, curr)
2046 entry = self._io.packentry(e, self.node, self.version, curr)
2044 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2047 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2045 link, offset)
2048 link, offset)
2046
2049
2047 rawtext = btext[0]
2050 rawtext = btext[0]
2048
2051
2049 if alwayscache and rawtext is None:
2052 if alwayscache and rawtext is None:
2050 rawtext = deltacomputer.buildtext(revinfo, fh)
2053 rawtext = deltacomputer.buildtext(revinfo, fh)
2051
2054
2052 if type(rawtext) == bytes: # only accept immutable objects
2055 if type(rawtext) == bytes: # only accept immutable objects
2053 self._revisioncache = (node, curr, rawtext)
2056 self._revisioncache = (node, curr, rawtext)
2054 self._chainbasecache[curr] = deltainfo.chainbase
2057 self._chainbasecache[curr] = deltainfo.chainbase
2055 return node
2058 return node
2056
2059
2057 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2060 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2058 # Files opened in a+ mode have inconsistent behavior on various
2061 # Files opened in a+ mode have inconsistent behavior on various
2059 # platforms. Windows requires that a file positioning call be made
2062 # platforms. Windows requires that a file positioning call be made
2060 # when the file handle transitions between reads and writes. See
2063 # when the file handle transitions between reads and writes. See
2061 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2064 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2062 # platforms, Python or the platform itself can be buggy. Some versions
2065 # platforms, Python or the platform itself can be buggy. Some versions
2063 # of Solaris have been observed to not append at the end of the file
2066 # of Solaris have been observed to not append at the end of the file
2064 # if the file was seeked to before the end. See issue4943 for more.
2067 # if the file was seeked to before the end. See issue4943 for more.
2065 #
2068 #
2066 # We work around this issue by inserting a seek() before writing.
2069 # We work around this issue by inserting a seek() before writing.
2067 # Note: This is likely not necessary on Python 3. However, because
2070 # Note: This is likely not necessary on Python 3. However, because
2068 # the file handle is reused for reads and may be seeked there, we need
2071 # the file handle is reused for reads and may be seeked there, we need
2069 # to be careful before changing this.
2072 # to be careful before changing this.
2070 ifh.seek(0, os.SEEK_END)
2073 ifh.seek(0, os.SEEK_END)
2071 if dfh:
2074 if dfh:
2072 dfh.seek(0, os.SEEK_END)
2075 dfh.seek(0, os.SEEK_END)
2073
2076
2074 curr = len(self) - 1
2077 curr = len(self) - 1
2075 if not self._inline:
2078 if not self._inline:
2076 transaction.add(self.datafile, offset)
2079 transaction.add(self.datafile, offset)
2077 transaction.add(self.indexfile, curr * len(entry))
2080 transaction.add(self.indexfile, curr * len(entry))
2078 if data[0]:
2081 if data[0]:
2079 dfh.write(data[0])
2082 dfh.write(data[0])
2080 dfh.write(data[1])
2083 dfh.write(data[1])
2081 ifh.write(entry)
2084 ifh.write(entry)
2082 else:
2085 else:
2083 offset += curr * self._io.size
2086 offset += curr * self._io.size
2084 transaction.add(self.indexfile, offset, curr)
2087 transaction.add(self.indexfile, offset, curr)
2085 ifh.write(entry)
2088 ifh.write(entry)
2086 ifh.write(data[0])
2089 ifh.write(data[0])
2087 ifh.write(data[1])
2090 ifh.write(data[1])
2088 self._enforceinlinesize(transaction, ifh)
2091 self._enforceinlinesize(transaction, ifh)
2089
2092
2090 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2093 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2091 """
2094 """
2092 add a delta group
2095 add a delta group
2093
2096
2094 given a set of deltas, add them to the revision log. the
2097 given a set of deltas, add them to the revision log. the
2095 first delta is against its parent, which should be in our
2098 first delta is against its parent, which should be in our
2096 log, the rest are against the previous delta.
2099 log, the rest are against the previous delta.
2097
2100
2098 If ``addrevisioncb`` is defined, it will be called with arguments of
2101 If ``addrevisioncb`` is defined, it will be called with arguments of
2099 this revlog and the node that was added.
2102 this revlog and the node that was added.
2100 """
2103 """
2101
2104
2102 if self._writinghandles:
2105 if self._writinghandles:
2103 raise error.ProgrammingError('cannot nest addgroup() calls')
2106 raise error.ProgrammingError('cannot nest addgroup() calls')
2104
2107
2105 nodes = []
2108 nodes = []
2106
2109
2107 r = len(self)
2110 r = len(self)
2108 end = 0
2111 end = 0
2109 if r:
2112 if r:
2110 end = self.end(r - 1)
2113 end = self.end(r - 1)
2111 ifh = self._indexfp("a+")
2114 ifh = self._indexfp("a+")
2112 isize = r * self._io.size
2115 isize = r * self._io.size
2113 if self._inline:
2116 if self._inline:
2114 transaction.add(self.indexfile, end + isize, r)
2117 transaction.add(self.indexfile, end + isize, r)
2115 dfh = None
2118 dfh = None
2116 else:
2119 else:
2117 transaction.add(self.indexfile, isize, r)
2120 transaction.add(self.indexfile, isize, r)
2118 transaction.add(self.datafile, end)
2121 transaction.add(self.datafile, end)
2119 dfh = self._datafp("a+")
2122 dfh = self._datafp("a+")
2120 def flush():
2123 def flush():
2121 if dfh:
2124 if dfh:
2122 dfh.flush()
2125 dfh.flush()
2123 ifh.flush()
2126 ifh.flush()
2124
2127
2125 self._writinghandles = (ifh, dfh)
2128 self._writinghandles = (ifh, dfh)
2126
2129
2127 try:
2130 try:
2128 deltacomputer = deltautil.deltacomputer(self)
2131 deltacomputer = deltautil.deltacomputer(self)
2129 # loop through our set of deltas
2132 # loop through our set of deltas
2130 for data in deltas:
2133 for data in deltas:
2131 node, p1, p2, linknode, deltabase, delta, flags = data
2134 node, p1, p2, linknode, deltabase, delta, flags = data
2132 link = linkmapper(linknode)
2135 link = linkmapper(linknode)
2133 flags = flags or REVIDX_DEFAULT_FLAGS
2136 flags = flags or REVIDX_DEFAULT_FLAGS
2134
2137
2135 nodes.append(node)
2138 nodes.append(node)
2136
2139
2137 if node in self.nodemap:
2140 if node in self.nodemap:
2138 self._nodeduplicatecallback(transaction, node)
2141 self._nodeduplicatecallback(transaction, node)
2139 # this can happen if two branches make the same change
2142 # this can happen if two branches make the same change
2140 continue
2143 continue
2141
2144
2142 for p in (p1, p2):
2145 for p in (p1, p2):
2143 if p not in self.nodemap:
2146 if p not in self.nodemap:
2144 raise error.LookupError(p, self.indexfile,
2147 raise error.LookupError(p, self.indexfile,
2145 _('unknown parent'))
2148 _('unknown parent'))
2146
2149
2147 if deltabase not in self.nodemap:
2150 if deltabase not in self.nodemap:
2148 raise error.LookupError(deltabase, self.indexfile,
2151 raise error.LookupError(deltabase, self.indexfile,
2149 _('unknown delta base'))
2152 _('unknown delta base'))
2150
2153
2151 baserev = self.rev(deltabase)
2154 baserev = self.rev(deltabase)
2152
2155
2153 if baserev != nullrev and self.iscensored(baserev):
2156 if baserev != nullrev and self.iscensored(baserev):
2154 # if base is censored, delta must be full replacement in a
2157 # if base is censored, delta must be full replacement in a
2155 # single patch operation
2158 # single patch operation
2156 hlen = struct.calcsize(">lll")
2159 hlen = struct.calcsize(">lll")
2157 oldlen = self.rawsize(baserev)
2160 oldlen = self.rawsize(baserev)
2158 newlen = len(delta) - hlen
2161 newlen = len(delta) - hlen
2159 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2162 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2160 raise error.CensoredBaseError(self.indexfile,
2163 raise error.CensoredBaseError(self.indexfile,
2161 self.node(baserev))
2164 self.node(baserev))
2162
2165
2163 if not flags and self._peek_iscensored(baserev, delta, flush):
2166 if not flags and self._peek_iscensored(baserev, delta, flush):
2164 flags |= REVIDX_ISCENSORED
2167 flags |= REVIDX_ISCENSORED
2165
2168
2166 # We assume consumers of addrevisioncb will want to retrieve
2169 # We assume consumers of addrevisioncb will want to retrieve
2167 # the added revision, which will require a call to
2170 # the added revision, which will require a call to
2168 # revision(). revision() will fast path if there is a cache
2171 # revision(). revision() will fast path if there is a cache
2169 # hit. So, we tell _addrevision() to always cache in this case.
2172 # hit. So, we tell _addrevision() to always cache in this case.
2170 # We're only using addgroup() in the context of changegroup
2173 # We're only using addgroup() in the context of changegroup
2171 # generation so the revision data can always be handled as raw
2174 # generation so the revision data can always be handled as raw
2172 # by the flagprocessor.
2175 # by the flagprocessor.
2173 self._addrevision(node, None, transaction, link,
2176 self._addrevision(node, None, transaction, link,
2174 p1, p2, flags, (baserev, delta),
2177 p1, p2, flags, (baserev, delta),
2175 ifh, dfh,
2178 ifh, dfh,
2176 alwayscache=bool(addrevisioncb),
2179 alwayscache=bool(addrevisioncb),
2177 deltacomputer=deltacomputer)
2180 deltacomputer=deltacomputer)
2178
2181
2179 if addrevisioncb:
2182 if addrevisioncb:
2180 addrevisioncb(self, node)
2183 addrevisioncb(self, node)
2181
2184
2182 if not dfh and not self._inline:
2185 if not dfh and not self._inline:
2183 # addrevision switched from inline to conventional
2186 # addrevision switched from inline to conventional
2184 # reopen the index
2187 # reopen the index
2185 ifh.close()
2188 ifh.close()
2186 dfh = self._datafp("a+")
2189 dfh = self._datafp("a+")
2187 ifh = self._indexfp("a+")
2190 ifh = self._indexfp("a+")
2188 self._writinghandles = (ifh, dfh)
2191 self._writinghandles = (ifh, dfh)
2189 finally:
2192 finally:
2190 self._writinghandles = None
2193 self._writinghandles = None
2191
2194
2192 if dfh:
2195 if dfh:
2193 dfh.close()
2196 dfh.close()
2194 ifh.close()
2197 ifh.close()
2195
2198
2196 return nodes
2199 return nodes
2197
2200
2198 def iscensored(self, rev):
2201 def iscensored(self, rev):
2199 """Check if a file revision is censored."""
2202 """Check if a file revision is censored."""
2200 if not self._censorable:
2203 if not self._censorable:
2201 return False
2204 return False
2202
2205
2203 return self.flags(rev) & REVIDX_ISCENSORED
2206 return self.flags(rev) & REVIDX_ISCENSORED
2204
2207
2205 def _peek_iscensored(self, baserev, delta, flush):
2208 def _peek_iscensored(self, baserev, delta, flush):
2206 """Quickly check if a delta produces a censored revision."""
2209 """Quickly check if a delta produces a censored revision."""
2207 if not self._censorable:
2210 if not self._censorable:
2208 return False
2211 return False
2209
2212
2210 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2213 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2211
2214
2212 def getstrippoint(self, minlink):
2215 def getstrippoint(self, minlink):
2213 """find the minimum rev that must be stripped to strip the linkrev
2216 """find the minimum rev that must be stripped to strip the linkrev
2214
2217
2215 Returns a tuple containing the minimum rev and a set of all revs that
2218 Returns a tuple containing the minimum rev and a set of all revs that
2216 have linkrevs that will be broken by this strip.
2219 have linkrevs that will be broken by this strip.
2217 """
2220 """
2218 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2221 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2219 self.headrevs(),
2222 self.headrevs(),
2220 self.linkrev, self.parentrevs)
2223 self.linkrev, self.parentrevs)
2221
2224
2222 def strip(self, minlink, transaction):
2225 def strip(self, minlink, transaction):
2223 """truncate the revlog on the first revision with a linkrev >= minlink
2226 """truncate the revlog on the first revision with a linkrev >= minlink
2224
2227
2225 This function is called when we're stripping revision minlink and
2228 This function is called when we're stripping revision minlink and
2226 its descendants from the repository.
2229 its descendants from the repository.
2227
2230
2228 We have to remove all revisions with linkrev >= minlink, because
2231 We have to remove all revisions with linkrev >= minlink, because
2229 the equivalent changelog revisions will be renumbered after the
2232 the equivalent changelog revisions will be renumbered after the
2230 strip.
2233 strip.
2231
2234
2232 So we truncate the revlog on the first of these revisions, and
2235 So we truncate the revlog on the first of these revisions, and
2233 trust that the caller has saved the revisions that shouldn't be
2236 trust that the caller has saved the revisions that shouldn't be
2234 removed and that it'll re-add them after this truncation.
2237 removed and that it'll re-add them after this truncation.
2235 """
2238 """
2236 if len(self) == 0:
2239 if len(self) == 0:
2237 return
2240 return
2238
2241
2239 rev, _ = self.getstrippoint(minlink)
2242 rev, _ = self.getstrippoint(minlink)
2240 if rev == len(self):
2243 if rev == len(self):
2241 return
2244 return
2242
2245
2243 # first truncate the files on disk
2246 # first truncate the files on disk
2244 end = self.start(rev)
2247 end = self.start(rev)
2245 if not self._inline:
2248 if not self._inline:
2246 transaction.add(self.datafile, end)
2249 transaction.add(self.datafile, end)
2247 end = rev * self._io.size
2250 end = rev * self._io.size
2248 else:
2251 else:
2249 end += rev * self._io.size
2252 end += rev * self._io.size
2250
2253
2251 transaction.add(self.indexfile, end)
2254 transaction.add(self.indexfile, end)
2252
2255
2253 # then reset internal state in memory to forget those revisions
2256 # then reset internal state in memory to forget those revisions
2254 self._revisioncache = None
2257 self._revisioncache = None
2255 self._chaininfocache = {}
2258 self._chaininfocache = {}
2256 self._chunkclear()
2259 self._chunkclear()
2257 for x in pycompat.xrange(rev, len(self)):
2260 for x in pycompat.xrange(rev, len(self)):
2258 del self.nodemap[self.node(x)]
2261 del self.nodemap[self.node(x)]
2259
2262
2260 del self.index[rev:-1]
2263 del self.index[rev:-1]
2261 self._nodepos = None
2264 self._nodepos = None
2262
2265
2263 def checksize(self):
2266 def checksize(self):
2264 expected = 0
2267 expected = 0
2265 if len(self):
2268 if len(self):
2266 expected = max(0, self.end(len(self) - 1))
2269 expected = max(0, self.end(len(self) - 1))
2267
2270
2268 try:
2271 try:
2269 with self._datafp() as f:
2272 with self._datafp() as f:
2270 f.seek(0, 2)
2273 f.seek(0, 2)
2271 actual = f.tell()
2274 actual = f.tell()
2272 dd = actual - expected
2275 dd = actual - expected
2273 except IOError as inst:
2276 except IOError as inst:
2274 if inst.errno != errno.ENOENT:
2277 if inst.errno != errno.ENOENT:
2275 raise
2278 raise
2276 dd = 0
2279 dd = 0
2277
2280
2278 try:
2281 try:
2279 f = self.opener(self.indexfile)
2282 f = self.opener(self.indexfile)
2280 f.seek(0, 2)
2283 f.seek(0, 2)
2281 actual = f.tell()
2284 actual = f.tell()
2282 f.close()
2285 f.close()
2283 s = self._io.size
2286 s = self._io.size
2284 i = max(0, actual // s)
2287 i = max(0, actual // s)
2285 di = actual - (i * s)
2288 di = actual - (i * s)
2286 if self._inline:
2289 if self._inline:
2287 databytes = 0
2290 databytes = 0
2288 for r in self:
2291 for r in self:
2289 databytes += max(0, self.length(r))
2292 databytes += max(0, self.length(r))
2290 dd = 0
2293 dd = 0
2291 di = actual - len(self) * s - databytes
2294 di = actual - len(self) * s - databytes
2292 except IOError as inst:
2295 except IOError as inst:
2293 if inst.errno != errno.ENOENT:
2296 if inst.errno != errno.ENOENT:
2294 raise
2297 raise
2295 di = 0
2298 di = 0
2296
2299
2297 return (dd, di)
2300 return (dd, di)
2298
2301
2299 def files(self):
2302 def files(self):
2300 res = [self.indexfile]
2303 res = [self.indexfile]
2301 if not self._inline:
2304 if not self._inline:
2302 res.append(self.datafile)
2305 res.append(self.datafile)
2303 return res
2306 return res
2304
2307
2305 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2308 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2306 assumehaveparentrevisions=False,
2309 assumehaveparentrevisions=False,
2307 deltamode=repository.CG_DELTAMODE_STD):
2310 deltamode=repository.CG_DELTAMODE_STD):
2308 if nodesorder not in ('nodes', 'storage', 'linear', None):
2311 if nodesorder not in ('nodes', 'storage', 'linear', None):
2309 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2312 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2310 nodesorder)
2313 nodesorder)
2311
2314
2312 if nodesorder is None and not self._generaldelta:
2315 if nodesorder is None and not self._generaldelta:
2313 nodesorder = 'storage'
2316 nodesorder = 'storage'
2314
2317
2315 if (not self._storedeltachains and
2318 if (not self._storedeltachains and
2316 deltamode != repository.CG_DELTAMODE_PREV):
2319 deltamode != repository.CG_DELTAMODE_PREV):
2317 deltamode = repository.CG_DELTAMODE_FULL
2320 deltamode = repository.CG_DELTAMODE_FULL
2318
2321
2319 return storageutil.emitrevisions(
2322 return storageutil.emitrevisions(
2320 self, nodes, nodesorder, revlogrevisiondelta,
2323 self, nodes, nodesorder, revlogrevisiondelta,
2321 deltaparentfn=self.deltaparent,
2324 deltaparentfn=self.deltaparent,
2322 candeltafn=self.candelta,
2325 candeltafn=self.candelta,
2323 rawsizefn=self.rawsize,
2326 rawsizefn=self.rawsize,
2324 revdifffn=self.revdiff,
2327 revdifffn=self.revdiff,
2325 flagsfn=self.flags,
2328 flagsfn=self.flags,
2326 deltamode=deltamode,
2329 deltamode=deltamode,
2327 revisiondata=revisiondata,
2330 revisiondata=revisiondata,
2328 assumehaveparentrevisions=assumehaveparentrevisions)
2331 assumehaveparentrevisions=assumehaveparentrevisions)
2329
2332
2330 DELTAREUSEALWAYS = 'always'
2333 DELTAREUSEALWAYS = 'always'
2331 DELTAREUSESAMEREVS = 'samerevs'
2334 DELTAREUSESAMEREVS = 'samerevs'
2332 DELTAREUSENEVER = 'never'
2335 DELTAREUSENEVER = 'never'
2333
2336
2334 DELTAREUSEFULLADD = 'fulladd'
2337 DELTAREUSEFULLADD = 'fulladd'
2335
2338
2336 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2339 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2337
2340
2338 def clone(self, tr, destrevlog, addrevisioncb=None,
2341 def clone(self, tr, destrevlog, addrevisioncb=None,
2339 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2342 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2340 """Copy this revlog to another, possibly with format changes.
2343 """Copy this revlog to another, possibly with format changes.
2341
2344
2342 The destination revlog will contain the same revisions and nodes.
2345 The destination revlog will contain the same revisions and nodes.
2343 However, it may not be bit-for-bit identical due to e.g. delta encoding
2346 However, it may not be bit-for-bit identical due to e.g. delta encoding
2344 differences.
2347 differences.
2345
2348
2346 The ``deltareuse`` argument control how deltas from the existing revlog
2349 The ``deltareuse`` argument control how deltas from the existing revlog
2347 are preserved in the destination revlog. The argument can have the
2350 are preserved in the destination revlog. The argument can have the
2348 following values:
2351 following values:
2349
2352
2350 DELTAREUSEALWAYS
2353 DELTAREUSEALWAYS
2351 Deltas will always be reused (if possible), even if the destination
2354 Deltas will always be reused (if possible), even if the destination
2352 revlog would not select the same revisions for the delta. This is the
2355 revlog would not select the same revisions for the delta. This is the
2353 fastest mode of operation.
2356 fastest mode of operation.
2354 DELTAREUSESAMEREVS
2357 DELTAREUSESAMEREVS
2355 Deltas will be reused if the destination revlog would pick the same
2358 Deltas will be reused if the destination revlog would pick the same
2356 revisions for the delta. This mode strikes a balance between speed
2359 revisions for the delta. This mode strikes a balance between speed
2357 and optimization.
2360 and optimization.
2358 DELTAREUSENEVER
2361 DELTAREUSENEVER
2359 Deltas will never be reused. This is the slowest mode of execution.
2362 Deltas will never be reused. This is the slowest mode of execution.
2360 This mode can be used to recompute deltas (e.g. if the diff/delta
2363 This mode can be used to recompute deltas (e.g. if the diff/delta
2361 algorithm changes).
2364 algorithm changes).
2362
2365
2363 Delta computation can be slow, so the choice of delta reuse policy can
2366 Delta computation can be slow, so the choice of delta reuse policy can
2364 significantly affect run time.
2367 significantly affect run time.
2365
2368
2366 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2369 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2367 two extremes. Deltas will be reused if they are appropriate. But if the
2370 two extremes. Deltas will be reused if they are appropriate. But if the
2368 delta could choose a better revision, it will do so. This means if you
2371 delta could choose a better revision, it will do so. This means if you
2369 are converting a non-generaldelta revlog to a generaldelta revlog,
2372 are converting a non-generaldelta revlog to a generaldelta revlog,
2370 deltas will be recomputed if the delta's parent isn't a parent of the
2373 deltas will be recomputed if the delta's parent isn't a parent of the
2371 revision.
2374 revision.
2372
2375
2373 In addition to the delta policy, the ``forcedeltabothparents``
2376 In addition to the delta policy, the ``forcedeltabothparents``
2374 argument controls whether to force compute deltas against both parents
2377 argument controls whether to force compute deltas against both parents
2375 for merges. By default, the current default is used.
2378 for merges. By default, the current default is used.
2376 """
2379 """
2377 if deltareuse not in self.DELTAREUSEALL:
2380 if deltareuse not in self.DELTAREUSEALL:
2378 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2381 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2379
2382
2380 if len(destrevlog):
2383 if len(destrevlog):
2381 raise ValueError(_('destination revlog is not empty'))
2384 raise ValueError(_('destination revlog is not empty'))
2382
2385
2383 if getattr(self, 'filteredrevs', None):
2386 if getattr(self, 'filteredrevs', None):
2384 raise ValueError(_('source revlog has filtered revisions'))
2387 raise ValueError(_('source revlog has filtered revisions'))
2385 if getattr(destrevlog, 'filteredrevs', None):
2388 if getattr(destrevlog, 'filteredrevs', None):
2386 raise ValueError(_('destination revlog has filtered revisions'))
2389 raise ValueError(_('destination revlog has filtered revisions'))
2387
2390
2388 # lazydeltabase controls whether to reuse a cached delta, if possible.
2391 # lazydeltabase controls whether to reuse a cached delta, if possible.
2389 oldlazydeltabase = destrevlog._lazydeltabase
2392 oldlazydeltabase = destrevlog._lazydeltabase
2390 oldamd = destrevlog._deltabothparents
2393 oldamd = destrevlog._deltabothparents
2391
2394
2392 try:
2395 try:
2393 if deltareuse == self.DELTAREUSEALWAYS:
2396 if deltareuse == self.DELTAREUSEALWAYS:
2394 destrevlog._lazydeltabase = True
2397 destrevlog._lazydeltabase = True
2395 elif deltareuse == self.DELTAREUSESAMEREVS:
2398 elif deltareuse == self.DELTAREUSESAMEREVS:
2396 destrevlog._lazydeltabase = False
2399 destrevlog._lazydeltabase = False
2397
2400
2398 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2401 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2399
2402
2400 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2403 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2401 self.DELTAREUSESAMEREVS)
2404 self.DELTAREUSESAMEREVS)
2402
2405
2403 deltacomputer = deltautil.deltacomputer(destrevlog)
2406 deltacomputer = deltautil.deltacomputer(destrevlog)
2404 index = self.index
2407 index = self.index
2405 for rev in self:
2408 for rev in self:
2406 entry = index[rev]
2409 entry = index[rev]
2407
2410
2408 # Some classes override linkrev to take filtered revs into
2411 # Some classes override linkrev to take filtered revs into
2409 # account. Use raw entry from index.
2412 # account. Use raw entry from index.
2410 flags = entry[0] & 0xffff
2413 flags = entry[0] & 0xffff
2411 linkrev = entry[4]
2414 linkrev = entry[4]
2412 p1 = index[entry[5]][7]
2415 p1 = index[entry[5]][7]
2413 p2 = index[entry[6]][7]
2416 p2 = index[entry[6]][7]
2414 node = entry[7]
2417 node = entry[7]
2415
2418
2416 # (Possibly) reuse the delta from the revlog if allowed and
2419 # (Possibly) reuse the delta from the revlog if allowed and
2417 # the revlog chunk is a delta.
2420 # the revlog chunk is a delta.
2418 cachedelta = None
2421 cachedelta = None
2419 rawtext = None
2422 rawtext = None
2420 if populatecachedelta:
2423 if populatecachedelta:
2421 dp = self.deltaparent(rev)
2424 dp = self.deltaparent(rev)
2422 if dp != nullrev:
2425 if dp != nullrev:
2423 cachedelta = (dp, bytes(self._chunk(rev)))
2426 cachedelta = (dp, bytes(self._chunk(rev)))
2424
2427
2425 if not cachedelta:
2428 if not cachedelta:
2426 rawtext = self.revision(rev, raw=True)
2429 rawtext = self.revision(rev, raw=True)
2427
2430
2428
2431
2429 if deltareuse == self.DELTAREUSEFULLADD:
2432 if deltareuse == self.DELTAREUSEFULLADD:
2430 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2433 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2431 cachedelta=cachedelta,
2434 cachedelta=cachedelta,
2432 node=node, flags=flags,
2435 node=node, flags=flags,
2433 deltacomputer=deltacomputer)
2436 deltacomputer=deltacomputer)
2434 else:
2437 else:
2435 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2438 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2436 checkambig=False)
2439 checkambig=False)
2437 dfh = None
2440 dfh = None
2438 if not destrevlog._inline:
2441 if not destrevlog._inline:
2439 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2442 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2440 try:
2443 try:
2441 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2444 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2442 p2, flags, cachedelta, ifh, dfh,
2445 p2, flags, cachedelta, ifh, dfh,
2443 deltacomputer=deltacomputer)
2446 deltacomputer=deltacomputer)
2444 finally:
2447 finally:
2445 if dfh:
2448 if dfh:
2446 dfh.close()
2449 dfh.close()
2447 ifh.close()
2450 ifh.close()
2448
2451
2449 if addrevisioncb:
2452 if addrevisioncb:
2450 addrevisioncb(self, rev, node)
2453 addrevisioncb(self, rev, node)
2451 finally:
2454 finally:
2452 destrevlog._lazydeltabase = oldlazydeltabase
2455 destrevlog._lazydeltabase = oldlazydeltabase
2453 destrevlog._deltabothparents = oldamd
2456 destrevlog._deltabothparents = oldamd
2454
2457
2455 def censorrevision(self, tr, censornode, tombstone=b''):
2458 def censorrevision(self, tr, censornode, tombstone=b''):
2456 if (self.version & 0xFFFF) == REVLOGV0:
2459 if (self.version & 0xFFFF) == REVLOGV0:
2457 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2460 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2458 self.version)
2461 self.version)
2459
2462
2460 censorrev = self.rev(censornode)
2463 censorrev = self.rev(censornode)
2461 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2464 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2462
2465
2463 if len(tombstone) > self.rawsize(censorrev):
2466 if len(tombstone) > self.rawsize(censorrev):
2464 raise error.Abort(_('censor tombstone must be no longer than '
2467 raise error.Abort(_('censor tombstone must be no longer than '
2465 'censored data'))
2468 'censored data'))
2466
2469
2467 # Rewriting the revlog in place is hard. Our strategy for censoring is
2470 # Rewriting the revlog in place is hard. Our strategy for censoring is
2468 # to create a new revlog, copy all revisions to it, then replace the
2471 # to create a new revlog, copy all revisions to it, then replace the
2469 # revlogs on transaction close.
2472 # revlogs on transaction close.
2470
2473
2471 newindexfile = self.indexfile + b'.tmpcensored'
2474 newindexfile = self.indexfile + b'.tmpcensored'
2472 newdatafile = self.datafile + b'.tmpcensored'
2475 newdatafile = self.datafile + b'.tmpcensored'
2473
2476
2474 # This is a bit dangerous. We could easily have a mismatch of state.
2477 # This is a bit dangerous. We could easily have a mismatch of state.
2475 newrl = revlog(self.opener, newindexfile, newdatafile,
2478 newrl = revlog(self.opener, newindexfile, newdatafile,
2476 censorable=True)
2479 censorable=True)
2477 newrl.version = self.version
2480 newrl.version = self.version
2478 newrl._generaldelta = self._generaldelta
2481 newrl._generaldelta = self._generaldelta
2479 newrl._io = self._io
2482 newrl._io = self._io
2480
2483
2481 for rev in self.revs():
2484 for rev in self.revs():
2482 node = self.node(rev)
2485 node = self.node(rev)
2483 p1, p2 = self.parents(node)
2486 p1, p2 = self.parents(node)
2484
2487
2485 if rev == censorrev:
2488 if rev == censorrev:
2486 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2489 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2487 p1, p2, censornode, REVIDX_ISCENSORED)
2490 p1, p2, censornode, REVIDX_ISCENSORED)
2488
2491
2489 if newrl.deltaparent(rev) != nullrev:
2492 if newrl.deltaparent(rev) != nullrev:
2490 raise error.Abort(_('censored revision stored as delta; '
2493 raise error.Abort(_('censored revision stored as delta; '
2491 'cannot censor'),
2494 'cannot censor'),
2492 hint=_('censoring of revlogs is not '
2495 hint=_('censoring of revlogs is not '
2493 'fully implemented; please report '
2496 'fully implemented; please report '
2494 'this bug'))
2497 'this bug'))
2495 continue
2498 continue
2496
2499
2497 if self.iscensored(rev):
2500 if self.iscensored(rev):
2498 if self.deltaparent(rev) != nullrev:
2501 if self.deltaparent(rev) != nullrev:
2499 raise error.Abort(_('cannot censor due to censored '
2502 raise error.Abort(_('cannot censor due to censored '
2500 'revision having delta stored'))
2503 'revision having delta stored'))
2501 rawtext = self._chunk(rev)
2504 rawtext = self._chunk(rev)
2502 else:
2505 else:
2503 rawtext = self.revision(rev, raw=True)
2506 rawtext = self.revision(rev, raw=True)
2504
2507
2505 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2508 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2506 self.flags(rev))
2509 self.flags(rev))
2507
2510
2508 tr.addbackup(self.indexfile, location='store')
2511 tr.addbackup(self.indexfile, location='store')
2509 if not self._inline:
2512 if not self._inline:
2510 tr.addbackup(self.datafile, location='store')
2513 tr.addbackup(self.datafile, location='store')
2511
2514
2512 self.opener.rename(newrl.indexfile, self.indexfile)
2515 self.opener.rename(newrl.indexfile, self.indexfile)
2513 if not self._inline:
2516 if not self._inline:
2514 self.opener.rename(newrl.datafile, self.datafile)
2517 self.opener.rename(newrl.datafile, self.datafile)
2515
2518
2516 self.clearcaches()
2519 self.clearcaches()
2517 self._loadindex()
2520 self._loadindex()
2518
2521
2519 def verifyintegrity(self, state):
2522 def verifyintegrity(self, state):
2520 """Verifies the integrity of the revlog.
2523 """Verifies the integrity of the revlog.
2521
2524
2522 Yields ``revlogproblem`` instances describing problems that are
2525 Yields ``revlogproblem`` instances describing problems that are
2523 found.
2526 found.
2524 """
2527 """
2525 dd, di = self.checksize()
2528 dd, di = self.checksize()
2526 if dd:
2529 if dd:
2527 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2530 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2528 if di:
2531 if di:
2529 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2532 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2530
2533
2531 version = self.version & 0xFFFF
2534 version = self.version & 0xFFFF
2532
2535
2533 # The verifier tells us what version revlog we should be.
2536 # The verifier tells us what version revlog we should be.
2534 if version != state['expectedversion']:
2537 if version != state['expectedversion']:
2535 yield revlogproblem(
2538 yield revlogproblem(
2536 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2539 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2537 (self.indexfile, version, state['expectedversion']))
2540 (self.indexfile, version, state['expectedversion']))
2538
2541
2539 state['skipread'] = set()
2542 state['skipread'] = set()
2540
2543
2541 for rev in self:
2544 for rev in self:
2542 node = self.node(rev)
2545 node = self.node(rev)
2543
2546
2544 # Verify contents. 4 cases to care about:
2547 # Verify contents. 4 cases to care about:
2545 #
2548 #
2546 # common: the most common case
2549 # common: the most common case
2547 # rename: with a rename
2550 # rename: with a rename
2548 # meta: file content starts with b'\1\n', the metadata
2551 # meta: file content starts with b'\1\n', the metadata
2549 # header defined in filelog.py, but without a rename
2552 # header defined in filelog.py, but without a rename
2550 # ext: content stored externally
2553 # ext: content stored externally
2551 #
2554 #
2552 # More formally, their differences are shown below:
2555 # More formally, their differences are shown below:
2553 #
2556 #
2554 # | common | rename | meta | ext
2557 # | common | rename | meta | ext
2555 # -------------------------------------------------------
2558 # -------------------------------------------------------
2556 # flags() | 0 | 0 | 0 | not 0
2559 # flags() | 0 | 0 | 0 | not 0
2557 # renamed() | False | True | False | ?
2560 # renamed() | False | True | False | ?
2558 # rawtext[0:2]=='\1\n'| False | True | True | ?
2561 # rawtext[0:2]=='\1\n'| False | True | True | ?
2559 #
2562 #
2560 # "rawtext" means the raw text stored in revlog data, which
2563 # "rawtext" means the raw text stored in revlog data, which
2561 # could be retrieved by "revision(rev, raw=True)". "text"
2564 # could be retrieved by "revision(rev, raw=True)". "text"
2562 # mentioned below is "revision(rev, raw=False)".
2565 # mentioned below is "revision(rev, raw=False)".
2563 #
2566 #
2564 # There are 3 different lengths stored physically:
2567 # There are 3 different lengths stored physically:
2565 # 1. L1: rawsize, stored in revlog index
2568 # 1. L1: rawsize, stored in revlog index
2566 # 2. L2: len(rawtext), stored in revlog data
2569 # 2. L2: len(rawtext), stored in revlog data
2567 # 3. L3: len(text), stored in revlog data if flags==0, or
2570 # 3. L3: len(text), stored in revlog data if flags==0, or
2568 # possibly somewhere else if flags!=0
2571 # possibly somewhere else if flags!=0
2569 #
2572 #
2570 # L1 should be equal to L2. L3 could be different from them.
2573 # L1 should be equal to L2. L3 could be different from them.
2571 # "text" may or may not affect commit hash depending on flag
2574 # "text" may or may not affect commit hash depending on flag
2572 # processors (see revlog.addflagprocessor).
2575 # processors (see revlog.addflagprocessor).
2573 #
2576 #
2574 # | common | rename | meta | ext
2577 # | common | rename | meta | ext
2575 # -------------------------------------------------
2578 # -------------------------------------------------
2576 # rawsize() | L1 | L1 | L1 | L1
2579 # rawsize() | L1 | L1 | L1 | L1
2577 # size() | L1 | L2-LM | L1(*) | L1 (?)
2580 # size() | L1 | L2-LM | L1(*) | L1 (?)
2578 # len(rawtext) | L2 | L2 | L2 | L2
2581 # len(rawtext) | L2 | L2 | L2 | L2
2579 # len(text) | L2 | L2 | L2 | L3
2582 # len(text) | L2 | L2 | L2 | L3
2580 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2583 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2581 #
2584 #
2582 # LM: length of metadata, depending on rawtext
2585 # LM: length of metadata, depending on rawtext
2583 # (*): not ideal, see comment in filelog.size
2586 # (*): not ideal, see comment in filelog.size
2584 # (?): could be "- len(meta)" if the resolved content has
2587 # (?): could be "- len(meta)" if the resolved content has
2585 # rename metadata
2588 # rename metadata
2586 #
2589 #
2587 # Checks needed to be done:
2590 # Checks needed to be done:
2588 # 1. length check: L1 == L2, in all cases.
2591 # 1. length check: L1 == L2, in all cases.
2589 # 2. hash check: depending on flag processor, we may need to
2592 # 2. hash check: depending on flag processor, we may need to
2590 # use either "text" (external), or "rawtext" (in revlog).
2593 # use either "text" (external), or "rawtext" (in revlog).
2591
2594
2592 try:
2595 try:
2593 skipflags = state.get('skipflags', 0)
2596 skipflags = state.get('skipflags', 0)
2594 if skipflags:
2597 if skipflags:
2595 skipflags &= self.flags(rev)
2598 skipflags &= self.flags(rev)
2596
2599
2597 if skipflags:
2600 if skipflags:
2598 state['skipread'].add(node)
2601 state['skipread'].add(node)
2599 else:
2602 else:
2600 # Side-effect: read content and verify hash.
2603 # Side-effect: read content and verify hash.
2601 self.revision(node)
2604 self.revision(node)
2602
2605
2603 l1 = self.rawsize(rev)
2606 l1 = self.rawsize(rev)
2604 l2 = len(self.revision(node, raw=True))
2607 l2 = len(self.revision(node, raw=True))
2605
2608
2606 if l1 != l2:
2609 if l1 != l2:
2607 yield revlogproblem(
2610 yield revlogproblem(
2608 error=_('unpacked size is %d, %d expected') % (l2, l1),
2611 error=_('unpacked size is %d, %d expected') % (l2, l1),
2609 node=node)
2612 node=node)
2610
2613
2611 except error.CensoredNodeError:
2614 except error.CensoredNodeError:
2612 if state['erroroncensored']:
2615 if state['erroroncensored']:
2613 yield revlogproblem(error=_('censored file data'),
2616 yield revlogproblem(error=_('censored file data'),
2614 node=node)
2617 node=node)
2615 state['skipread'].add(node)
2618 state['skipread'].add(node)
2616 except Exception as e:
2619 except Exception as e:
2617 yield revlogproblem(
2620 yield revlogproblem(
2618 error=_('unpacking %s: %s') % (short(node),
2621 error=_('unpacking %s: %s') % (short(node),
2619 stringutil.forcebytestr(e)),
2622 stringutil.forcebytestr(e)),
2620 node=node)
2623 node=node)
2621 state['skipread'].add(node)
2624 state['skipread'].add(node)
2622
2625
2623 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2626 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2624 revisionscount=False, trackedsize=False,
2627 revisionscount=False, trackedsize=False,
2625 storedsize=False):
2628 storedsize=False):
2626 d = {}
2629 d = {}
2627
2630
2628 if exclusivefiles:
2631 if exclusivefiles:
2629 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2632 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2630 if not self._inline:
2633 if not self._inline:
2631 d['exclusivefiles'].append((self.opener, self.datafile))
2634 d['exclusivefiles'].append((self.opener, self.datafile))
2632
2635
2633 if sharedfiles:
2636 if sharedfiles:
2634 d['sharedfiles'] = []
2637 d['sharedfiles'] = []
2635
2638
2636 if revisionscount:
2639 if revisionscount:
2637 d['revisionscount'] = len(self)
2640 d['revisionscount'] = len(self)
2638
2641
2639 if trackedsize:
2642 if trackedsize:
2640 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2643 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2641
2644
2642 if storedsize:
2645 if storedsize:
2643 d['storedsize'] = sum(self.opener.stat(path).st_size
2646 d['storedsize'] = sum(self.opener.stat(path).st_size
2644 for path in self.files())
2647 for path in self.files())
2645
2648
2646 return d
2649 return d
@@ -1,1015 +1,1016
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 util,
33 util,
34 )
34 )
35
35
36 # maximum <delta-chain-data>/<revision-text-length> ratio
36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 LIMIT_DELTA2TEXT = 2
37 LIMIT_DELTA2TEXT = 2
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47 self._snapshot = set(snapshot)
47 self._snapshot = set(snapshot)
48 self.index = None
48 self.index = None
49
49
50 def start(self, rev):
50 def start(self, rev):
51 if rev == nullrev:
51 if rev == nullrev:
52 return 0
52 return 0
53 if rev == 0:
53 if rev == 0:
54 return 0
54 return 0
55 return self._data[rev - 1]
55 return self._data[rev - 1]
56
56
57 def end(self, rev):
57 def end(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 return self._data[rev]
60 return self._data[rev]
61
61
62 def length(self, rev):
62 def length(self, rev):
63 return self.end(rev) - self.start(rev)
63 return self.end(rev) - self.start(rev)
64
64
65 def __len__(self):
65 def __len__(self):
66 return len(self._data)
66 return len(self._data)
67
67
68 def issnapshot(self, rev):
68 def issnapshot(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return True
70 return True
71 return rev in self._snapshot
71 return rev in self._snapshot
72
72
73 def slicechunk(revlog, revs, targetsize=None):
73 def slicechunk(revlog, revs, targetsize=None):
74 """slice revs to reduce the amount of unrelated data to be read from disk.
74 """slice revs to reduce the amount of unrelated data to be read from disk.
75
75
76 ``revs`` is sliced into groups that should be read in one time.
76 ``revs`` is sliced into groups that should be read in one time.
77 Assume that revs are sorted.
77 Assume that revs are sorted.
78
78
79 The initial chunk is sliced until the overall density (payload/chunks-span
79 The initial chunk is sliced until the overall density (payload/chunks-span
80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 `revlog._srmingapsize` is skipped.
81 `revlog._srmingapsize` is skipped.
82
82
83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 For consistency with other slicing choice, this limit won't go lower than
84 For consistency with other slicing choice, this limit won't go lower than
85 `revlog._srmingapsize`.
85 `revlog._srmingapsize`.
86
86
87 If individual revisions chunk are larger than this limit, they will still
87 If individual revisions chunk are larger than this limit, they will still
88 be raised individually.
88 be raised individually.
89
89
90 >>> data = [
90 >>> data = [
91 ... 5, #00 (5)
91 ... 5, #00 (5)
92 ... 10, #01 (5)
92 ... 10, #01 (5)
93 ... 12, #02 (2)
93 ... 12, #02 (2)
94 ... 12, #03 (empty)
94 ... 12, #03 (empty)
95 ... 27, #04 (15)
95 ... 27, #04 (15)
96 ... 31, #05 (4)
96 ... 31, #05 (4)
97 ... 31, #06 (empty)
97 ... 31, #06 (empty)
98 ... 42, #07 (11)
98 ... 42, #07 (11)
99 ... 47, #08 (5)
99 ... 47, #08 (5)
100 ... 47, #09 (empty)
100 ... 47, #09 (empty)
101 ... 48, #10 (1)
101 ... 48, #10 (1)
102 ... 51, #11 (3)
102 ... 51, #11 (3)
103 ... 74, #12 (23)
103 ... 74, #12 (23)
104 ... 85, #13 (11)
104 ... 85, #13 (11)
105 ... 86, #14 (1)
105 ... 86, #14 (1)
106 ... 91, #15 (5)
106 ... 91, #15 (5)
107 ... ]
107 ... ]
108 >>> revlog = _testrevlog(data, snapshot=range(16))
108 >>> revlog = _testrevlog(data, snapshot=range(16))
109
109
110 >>> list(slicechunk(revlog, list(range(16))))
110 >>> list(slicechunk(revlog, list(range(16))))
111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 >>> list(slicechunk(revlog, [0, 15]))
112 >>> list(slicechunk(revlog, [0, 15]))
113 [[0], [15]]
113 [[0], [15]]
114 >>> list(slicechunk(revlog, [0, 11, 15]))
114 >>> list(slicechunk(revlog, [0, 11, 15]))
115 [[0], [11], [15]]
115 [[0], [11], [15]]
116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 [[0], [11, 13, 15]]
117 [[0], [11, 13, 15]]
118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 [[1, 2], [5, 8, 10, 11], [14]]
119 [[1, 2], [5, 8, 10, 11], [14]]
120
120
121 Slicing with a maximum chunk size
121 Slicing with a maximum chunk size
122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 [[0], [11], [13], [15]]
123 [[0], [11], [13], [15]]
124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 [[0], [11], [13, 15]]
125 [[0], [11], [13, 15]]
126
126
127 Slicing involving nullrev
127 Slicing involving nullrev
128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 [[-1, 0], [11], [13, 15]]
129 [[-1, 0], [11], [13, 15]]
130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 [[-1], [13], [15]]
131 [[-1], [13], [15]]
132 """
132 """
133 if targetsize is not None:
133 if targetsize is not None:
134 targetsize = max(targetsize, revlog._srmingapsize)
134 targetsize = max(targetsize, revlog._srmingapsize)
135 # targetsize should not be specified when evaluating delta candidates:
135 # targetsize should not be specified when evaluating delta candidates:
136 # * targetsize is used to ensure we stay within specification when reading,
136 # * targetsize is used to ensure we stay within specification when reading,
137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 if densityslicing is None:
138 if densityslicing is None:
139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 for chunk in densityslicing(revs,
140 for chunk in densityslicing(revs,
141 revlog._srdensitythreshold,
141 revlog._srdensitythreshold,
142 revlog._srmingapsize):
142 revlog._srmingapsize):
143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 yield subchunk
144 yield subchunk
145
145
146 def _slicechunktosize(revlog, revs, targetsize=None):
146 def _slicechunktosize(revlog, revs, targetsize=None):
147 """slice revs to match the target size
147 """slice revs to match the target size
148
148
149 This is intended to be used on chunk that density slicing selected by that
149 This is intended to be used on chunk that density slicing selected by that
150 are still too large compared to the read garantee of revlog. This might
150 are still too large compared to the read garantee of revlog. This might
151 happens when "minimal gap size" interrupted the slicing or when chain are
151 happens when "minimal gap size" interrupted the slicing or when chain are
152 built in a way that create large blocks next to each other.
152 built in a way that create large blocks next to each other.
153
153
154 >>> data = [
154 >>> data = [
155 ... 3, #0 (3)
155 ... 3, #0 (3)
156 ... 5, #1 (2)
156 ... 5, #1 (2)
157 ... 6, #2 (1)
157 ... 6, #2 (1)
158 ... 8, #3 (2)
158 ... 8, #3 (2)
159 ... 8, #4 (empty)
159 ... 8, #4 (empty)
160 ... 11, #5 (3)
160 ... 11, #5 (3)
161 ... 12, #6 (1)
161 ... 12, #6 (1)
162 ... 13, #7 (1)
162 ... 13, #7 (1)
163 ... 14, #8 (1)
163 ... 14, #8 (1)
164 ... ]
164 ... ]
165
165
166 == All snapshots cases ==
166 == All snapshots cases ==
167 >>> revlog = _testrevlog(data, snapshot=range(9))
167 >>> revlog = _testrevlog(data, snapshot=range(9))
168
168
169 Cases where chunk is already small enough
169 Cases where chunk is already small enough
170 >>> list(_slicechunktosize(revlog, [0], 3))
170 >>> list(_slicechunktosize(revlog, [0], 3))
171 [[0]]
171 [[0]]
172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
173 [[6, 7]]
173 [[6, 7]]
174 >>> list(_slicechunktosize(revlog, [0], None))
174 >>> list(_slicechunktosize(revlog, [0], None))
175 [[0]]
175 [[0]]
176 >>> list(_slicechunktosize(revlog, [6, 7], None))
176 >>> list(_slicechunktosize(revlog, [6, 7], None))
177 [[6, 7]]
177 [[6, 7]]
178
178
179 cases where we need actual slicing
179 cases where we need actual slicing
180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
181 [[0], [1]]
181 [[0], [1]]
182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
183 [[1], [3]]
183 [[1], [3]]
184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
185 [[1, 2], [3]]
185 [[1, 2], [3]]
186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
187 [[3], [5]]
187 [[3], [5]]
188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
189 [[3], [5]]
189 [[3], [5]]
190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
191 [[5], [6, 7, 8]]
191 [[5], [6, 7, 8]]
192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
193 [[0], [1, 2], [3], [5], [6, 7, 8]]
193 [[0], [1, 2], [3], [5], [6, 7, 8]]
194
194
195 Case with too large individual chunk (must return valid chunk)
195 Case with too large individual chunk (must return valid chunk)
196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
197 [[0], [1]]
197 [[0], [1]]
198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
199 [[1], [3]]
199 [[1], [3]]
200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
201 [[3], [5]]
201 [[3], [5]]
202
202
203 == No Snapshot cases ==
203 == No Snapshot cases ==
204 >>> revlog = _testrevlog(data)
204 >>> revlog = _testrevlog(data)
205
205
206 Cases where chunk is already small enough
206 Cases where chunk is already small enough
207 >>> list(_slicechunktosize(revlog, [0], 3))
207 >>> list(_slicechunktosize(revlog, [0], 3))
208 [[0]]
208 [[0]]
209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
210 [[6, 7]]
210 [[6, 7]]
211 >>> list(_slicechunktosize(revlog, [0], None))
211 >>> list(_slicechunktosize(revlog, [0], None))
212 [[0]]
212 [[0]]
213 >>> list(_slicechunktosize(revlog, [6, 7], None))
213 >>> list(_slicechunktosize(revlog, [6, 7], None))
214 [[6, 7]]
214 [[6, 7]]
215
215
216 cases where we need actual slicing
216 cases where we need actual slicing
217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
218 [[0], [1]]
218 [[0], [1]]
219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
220 [[1], [3]]
220 [[1], [3]]
221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
222 [[1], [2, 3]]
222 [[1], [2, 3]]
223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
224 [[3], [5]]
224 [[3], [5]]
225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
226 [[3], [4, 5]]
226 [[3], [4, 5]]
227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
228 [[5], [6, 7, 8]]
228 [[5], [6, 7, 8]]
229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
230 [[0], [1, 2], [3], [5], [6, 7, 8]]
230 [[0], [1, 2], [3], [5], [6, 7, 8]]
231
231
232 Case with too large individual chunk (must return valid chunk)
232 Case with too large individual chunk (must return valid chunk)
233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
234 [[0], [1]]
234 [[0], [1]]
235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
236 [[1], [3]]
236 [[1], [3]]
237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
238 [[3], [5]]
238 [[3], [5]]
239
239
240 == mixed case ==
240 == mixed case ==
241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
244 """
244 """
245 assert targetsize is None or 0 <= targetsize
245 assert targetsize is None or 0 <= targetsize
246 startdata = revlog.start(revs[0])
246 startdata = revlog.start(revs[0])
247 enddata = revlog.end(revs[-1])
247 enddata = revlog.end(revs[-1])
248 fullspan = enddata - startdata
248 fullspan = enddata - startdata
249 if targetsize is None or fullspan <= targetsize:
249 if targetsize is None or fullspan <= targetsize:
250 yield revs
250 yield revs
251 return
251 return
252
252
253 startrevidx = 0
253 startrevidx = 0
254 endrevidx = 1
254 endrevidx = 1
255 iterrevs = enumerate(revs)
255 iterrevs = enumerate(revs)
256 next(iterrevs) # skip first rev.
256 next(iterrevs) # skip first rev.
257 # first step: get snapshots out of the way
257 # first step: get snapshots out of the way
258 for idx, r in iterrevs:
258 for idx, r in iterrevs:
259 span = revlog.end(r) - startdata
259 span = revlog.end(r) - startdata
260 snapshot = revlog.issnapshot(r)
260 snapshot = revlog.issnapshot(r)
261 if span <= targetsize and snapshot:
261 if span <= targetsize and snapshot:
262 endrevidx = idx + 1
262 endrevidx = idx + 1
263 else:
263 else:
264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
265 if chunk:
265 if chunk:
266 yield chunk
266 yield chunk
267 startrevidx = idx
267 startrevidx = idx
268 startdata = revlog.start(r)
268 startdata = revlog.start(r)
269 endrevidx = idx + 1
269 endrevidx = idx + 1
270 if not snapshot:
270 if not snapshot:
271 break
271 break
272
272
273 # for the others, we use binary slicing to quickly converge toward valid
273 # for the others, we use binary slicing to quickly converge toward valid
274 # chunks (otherwise, we might end up looking for start/end of many
274 # chunks (otherwise, we might end up looking for start/end of many
275 # revisions). This logic is not looking for the perfect slicing point, it
275 # revisions). This logic is not looking for the perfect slicing point, it
276 # focuses on quickly converging toward valid chunks.
276 # focuses on quickly converging toward valid chunks.
277 nbitem = len(revs)
277 nbitem = len(revs)
278 while (enddata - startdata) > targetsize:
278 while (enddata - startdata) > targetsize:
279 endrevidx = nbitem
279 endrevidx = nbitem
280 if nbitem - startrevidx <= 1:
280 if nbitem - startrevidx <= 1:
281 break # protect against individual chunk larger than limit
281 break # protect against individual chunk larger than limit
282 localenddata = revlog.end(revs[endrevidx - 1])
282 localenddata = revlog.end(revs[endrevidx - 1])
283 span = localenddata - startdata
283 span = localenddata - startdata
284 while span > targetsize:
284 while span > targetsize:
285 if endrevidx - startrevidx <= 1:
285 if endrevidx - startrevidx <= 1:
286 break # protect against individual chunk larger than limit
286 break # protect against individual chunk larger than limit
287 endrevidx -= (endrevidx - startrevidx) // 2
287 endrevidx -= (endrevidx - startrevidx) // 2
288 localenddata = revlog.end(revs[endrevidx - 1])
288 localenddata = revlog.end(revs[endrevidx - 1])
289 span = localenddata - startdata
289 span = localenddata - startdata
290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
291 if chunk:
291 if chunk:
292 yield chunk
292 yield chunk
293 startrevidx = endrevidx
293 startrevidx = endrevidx
294 startdata = revlog.start(revs[startrevidx])
294 startdata = revlog.start(revs[startrevidx])
295
295
296 chunk = _trimchunk(revlog, revs, startrevidx)
296 chunk = _trimchunk(revlog, revs, startrevidx)
297 if chunk:
297 if chunk:
298 yield chunk
298 yield chunk
299
299
300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
301 mingapsize=0):
301 mingapsize=0):
302 """slice revs to reduce the amount of unrelated data to be read from disk.
302 """slice revs to reduce the amount of unrelated data to be read from disk.
303
303
304 ``revs`` is sliced into groups that should be read in one time.
304 ``revs`` is sliced into groups that should be read in one time.
305 Assume that revs are sorted.
305 Assume that revs are sorted.
306
306
307 The initial chunk is sliced until the overall density (payload/chunks-span
307 The initial chunk is sliced until the overall density (payload/chunks-span
308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
309 skipped.
309 skipped.
310
310
311 >>> revlog = _testrevlog([
311 >>> revlog = _testrevlog([
312 ... 5, #00 (5)
312 ... 5, #00 (5)
313 ... 10, #01 (5)
313 ... 10, #01 (5)
314 ... 12, #02 (2)
314 ... 12, #02 (2)
315 ... 12, #03 (empty)
315 ... 12, #03 (empty)
316 ... 27, #04 (15)
316 ... 27, #04 (15)
317 ... 31, #05 (4)
317 ... 31, #05 (4)
318 ... 31, #06 (empty)
318 ... 31, #06 (empty)
319 ... 42, #07 (11)
319 ... 42, #07 (11)
320 ... 47, #08 (5)
320 ... 47, #08 (5)
321 ... 47, #09 (empty)
321 ... 47, #09 (empty)
322 ... 48, #10 (1)
322 ... 48, #10 (1)
323 ... 51, #11 (3)
323 ... 51, #11 (3)
324 ... 74, #12 (23)
324 ... 74, #12 (23)
325 ... 85, #13 (11)
325 ... 85, #13 (11)
326 ... 86, #14 (1)
326 ... 86, #14 (1)
327 ... 91, #15 (5)
327 ... 91, #15 (5)
328 ... ])
328 ... ])
329
329
330 >>> list(_slicechunktodensity(revlog, list(range(16))))
330 >>> list(_slicechunktodensity(revlog, list(range(16))))
331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
332 >>> list(_slicechunktodensity(revlog, [0, 15]))
332 >>> list(_slicechunktodensity(revlog, [0, 15]))
333 [[0], [15]]
333 [[0], [15]]
334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
335 [[0], [11], [15]]
335 [[0], [11], [15]]
336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
337 [[0], [11, 13, 15]]
337 [[0], [11, 13, 15]]
338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
339 [[1, 2], [5, 8, 10, 11], [14]]
339 [[1, 2], [5, 8, 10, 11], [14]]
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
341 ... mingapsize=20))
341 ... mingapsize=20))
342 [[1, 2, 3, 5, 8, 10, 11], [14]]
342 [[1, 2, 3, 5, 8, 10, 11], [14]]
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
344 ... targetdensity=0.95))
344 ... targetdensity=0.95))
345 [[1, 2], [5], [8, 10, 11], [14]]
345 [[1, 2], [5], [8, 10, 11], [14]]
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 ... targetdensity=0.95, mingapsize=12))
347 ... targetdensity=0.95, mingapsize=12))
348 [[1, 2], [5, 8, 10, 11], [14]]
348 [[1, 2], [5, 8, 10, 11], [14]]
349 """
349 """
350 start = revlog.start
350 start = revlog.start
351 length = revlog.length
351 length = revlog.length
352
352
353 if len(revs) <= 1:
353 if len(revs) <= 1:
354 yield revs
354 yield revs
355 return
355 return
356
356
357 deltachainspan = segmentspan(revlog, revs)
357 deltachainspan = segmentspan(revlog, revs)
358
358
359 if deltachainspan < mingapsize:
359 if deltachainspan < mingapsize:
360 yield revs
360 yield revs
361 return
361 return
362
362
363 readdata = deltachainspan
363 readdata = deltachainspan
364 chainpayload = sum(length(r) for r in revs)
364 chainpayload = sum(length(r) for r in revs)
365
365
366 if deltachainspan:
366 if deltachainspan:
367 density = chainpayload / float(deltachainspan)
367 density = chainpayload / float(deltachainspan)
368 else:
368 else:
369 density = 1.0
369 density = 1.0
370
370
371 if density >= targetdensity:
371 if density >= targetdensity:
372 yield revs
372 yield revs
373 return
373 return
374
374
375 # Store the gaps in a heap to have them sorted by decreasing size
375 # Store the gaps in a heap to have them sorted by decreasing size
376 gaps = []
376 gaps = []
377 prevend = None
377 prevend = None
378 for i, rev in enumerate(revs):
378 for i, rev in enumerate(revs):
379 revstart = start(rev)
379 revstart = start(rev)
380 revlen = length(rev)
380 revlen = length(rev)
381
381
382 # Skip empty revisions to form larger holes
382 # Skip empty revisions to form larger holes
383 if revlen == 0:
383 if revlen == 0:
384 continue
384 continue
385
385
386 if prevend is not None:
386 if prevend is not None:
387 gapsize = revstart - prevend
387 gapsize = revstart - prevend
388 # only consider holes that are large enough
388 # only consider holes that are large enough
389 if gapsize > mingapsize:
389 if gapsize > mingapsize:
390 gaps.append((gapsize, i))
390 gaps.append((gapsize, i))
391
391
392 prevend = revstart + revlen
392 prevend = revstart + revlen
393 # sort the gaps to pop them from largest to small
393 # sort the gaps to pop them from largest to small
394 gaps.sort()
394 gaps.sort()
395
395
396 # Collect the indices of the largest holes until the density is acceptable
396 # Collect the indices of the largest holes until the density is acceptable
397 selected = []
397 selected = []
398 while gaps and density < targetdensity:
398 while gaps and density < targetdensity:
399 gapsize, gapidx = gaps.pop()
399 gapsize, gapidx = gaps.pop()
400
400
401 selected.append(gapidx)
401 selected.append(gapidx)
402
402
403 # the gap sizes are stored as negatives to be sorted decreasingly
403 # the gap sizes are stored as negatives to be sorted decreasingly
404 # by the heap
404 # by the heap
405 readdata -= gapsize
405 readdata -= gapsize
406 if readdata > 0:
406 if readdata > 0:
407 density = chainpayload / float(readdata)
407 density = chainpayload / float(readdata)
408 else:
408 else:
409 density = 1.0
409 density = 1.0
410 selected.sort()
410 selected.sort()
411
411
412 # Cut the revs at collected indices
412 # Cut the revs at collected indices
413 previdx = 0
413 previdx = 0
414 for idx in selected:
414 for idx in selected:
415
415
416 chunk = _trimchunk(revlog, revs, previdx, idx)
416 chunk = _trimchunk(revlog, revs, previdx, idx)
417 if chunk:
417 if chunk:
418 yield chunk
418 yield chunk
419
419
420 previdx = idx
420 previdx = idx
421
421
422 chunk = _trimchunk(revlog, revs, previdx)
422 chunk = _trimchunk(revlog, revs, previdx)
423 if chunk:
423 if chunk:
424 yield chunk
424 yield chunk
425
425
426 def _trimchunk(revlog, revs, startidx, endidx=None):
426 def _trimchunk(revlog, revs, startidx, endidx=None):
427 """returns revs[startidx:endidx] without empty trailing revs
427 """returns revs[startidx:endidx] without empty trailing revs
428
428
429 Doctest Setup
429 Doctest Setup
430 >>> revlog = _testrevlog([
430 >>> revlog = _testrevlog([
431 ... 5, #0
431 ... 5, #0
432 ... 10, #1
432 ... 10, #1
433 ... 12, #2
433 ... 12, #2
434 ... 12, #3 (empty)
434 ... 12, #3 (empty)
435 ... 17, #4
435 ... 17, #4
436 ... 21, #5
436 ... 21, #5
437 ... 21, #6 (empty)
437 ... 21, #6 (empty)
438 ... ])
438 ... ])
439
439
440 Contiguous cases:
440 Contiguous cases:
441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
442 [0, 1, 2, 3, 4, 5]
442 [0, 1, 2, 3, 4, 5]
443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
444 [0, 1, 2, 3, 4]
444 [0, 1, 2, 3, 4]
445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
446 [0, 1, 2]
446 [0, 1, 2]
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
448 [2]
448 [2]
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
450 [3, 4, 5]
450 [3, 4, 5]
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
452 [3, 4]
452 [3, 4]
453
453
454 Discontiguous cases:
454 Discontiguous cases:
455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
456 [1, 3, 5]
456 [1, 3, 5]
457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
458 [1]
458 [1]
459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
460 [3, 5]
460 [3, 5]
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
462 [3, 5]
462 [3, 5]
463 """
463 """
464 length = revlog.length
464 length = revlog.length
465
465
466 if endidx is None:
466 if endidx is None:
467 endidx = len(revs)
467 endidx = len(revs)
468
468
469 # If we have a non-emtpy delta candidate, there are nothing to trim
469 # If we have a non-emtpy delta candidate, there are nothing to trim
470 if revs[endidx - 1] < len(revlog):
470 if revs[endidx - 1] < len(revlog):
471 # Trim empty revs at the end, except the very first revision of a chain
471 # Trim empty revs at the end, except the very first revision of a chain
472 while (endidx > 1
472 while (endidx > 1
473 and endidx > startidx
473 and endidx > startidx
474 and length(revs[endidx - 1]) == 0):
474 and length(revs[endidx - 1]) == 0):
475 endidx -= 1
475 endidx -= 1
476
476
477 return revs[startidx:endidx]
477 return revs[startidx:endidx]
478
478
479 def segmentspan(revlog, revs):
479 def segmentspan(revlog, revs):
480 """Get the byte span of a segment of revisions
480 """Get the byte span of a segment of revisions
481
481
482 revs is a sorted array of revision numbers
482 revs is a sorted array of revision numbers
483
483
484 >>> revlog = _testrevlog([
484 >>> revlog = _testrevlog([
485 ... 5, #0
485 ... 5, #0
486 ... 10, #1
486 ... 10, #1
487 ... 12, #2
487 ... 12, #2
488 ... 12, #3 (empty)
488 ... 12, #3 (empty)
489 ... 17, #4
489 ... 17, #4
490 ... ])
490 ... ])
491
491
492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
493 17
493 17
494 >>> segmentspan(revlog, [0, 4])
494 >>> segmentspan(revlog, [0, 4])
495 17
495 17
496 >>> segmentspan(revlog, [3, 4])
496 >>> segmentspan(revlog, [3, 4])
497 5
497 5
498 >>> segmentspan(revlog, [1, 2, 3,])
498 >>> segmentspan(revlog, [1, 2, 3,])
499 7
499 7
500 >>> segmentspan(revlog, [1, 3])
500 >>> segmentspan(revlog, [1, 3])
501 7
501 7
502 """
502 """
503 if not revs:
503 if not revs:
504 return 0
504 return 0
505 end = revlog.end(revs[-1])
505 end = revlog.end(revs[-1])
506 return end - revlog.start(revs[0])
506 return end - revlog.start(revs[0])
507
507
508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
509 """build full text from a (base, delta) pair and other metadata"""
509 """build full text from a (base, delta) pair and other metadata"""
510 # special case deltas which replace entire base; no need to decode
510 # special case deltas which replace entire base; no need to decode
511 # base revision. this neatly avoids censored bases, which throw when
511 # base revision. this neatly avoids censored bases, which throw when
512 # they're decoded.
512 # they're decoded.
513 hlen = struct.calcsize(">lll")
513 hlen = struct.calcsize(">lll")
514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
515 len(delta) - hlen):
515 len(delta) - hlen):
516 fulltext = delta[hlen:]
516 fulltext = delta[hlen:]
517 else:
517 else:
518 # deltabase is rawtext before changed by flag processors, which is
518 # deltabase is rawtext before changed by flag processors, which is
519 # equivalent to non-raw text
519 # equivalent to non-raw text
520 basetext = revlog.revision(baserev, _df=fh, raw=False)
520 basetext = revlog.revision(baserev, _df=fh, raw=False)
521 fulltext = mdiff.patch(basetext, delta)
521 fulltext = mdiff.patch(basetext, delta)
522
522
523 try:
523 try:
524 res = revlog._processflags(fulltext, flags, 'read', raw=True)
524 res = revlog._processflags(fulltext, flags, 'read', raw=True)
525 fulltext, validatehash = res
525 fulltext, validatehash = res
526 if validatehash:
526 if validatehash:
527 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
527 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
528 if flags & REVIDX_ISCENSORED:
528 if flags & REVIDX_ISCENSORED:
529 raise error.StorageError(_('node %s is not censored') %
529 raise error.StorageError(_('node %s is not censored') %
530 expectednode)
530 expectednode)
531 except error.CensoredNodeError:
531 except error.CensoredNodeError:
532 # must pass the censored index flag to add censored revisions
532 # must pass the censored index flag to add censored revisions
533 if not flags & REVIDX_ISCENSORED:
533 if not flags & REVIDX_ISCENSORED:
534 raise
534 raise
535 return fulltext
535 return fulltext
536
536
537 @attr.s(slots=True, frozen=True)
537 @attr.s(slots=True, frozen=True)
538 class _deltainfo(object):
538 class _deltainfo(object):
539 distance = attr.ib()
539 distance = attr.ib()
540 deltalen = attr.ib()
540 deltalen = attr.ib()
541 data = attr.ib()
541 data = attr.ib()
542 base = attr.ib()
542 base = attr.ib()
543 chainbase = attr.ib()
543 chainbase = attr.ib()
544 chainlen = attr.ib()
544 chainlen = attr.ib()
545 compresseddeltalen = attr.ib()
545 compresseddeltalen = attr.ib()
546 snapshotdepth = attr.ib()
546 snapshotdepth = attr.ib()
547
547
548 def isgooddeltainfo(revlog, deltainfo, revinfo):
548 def isgooddeltainfo(revlog, deltainfo, revinfo):
549 """Returns True if the given delta is good. Good means that it is within
549 """Returns True if the given delta is good. Good means that it is within
550 the disk span, disk size, and chain length bounds that we know to be
550 the disk span, disk size, and chain length bounds that we know to be
551 performant."""
551 performant."""
552 if deltainfo is None:
552 if deltainfo is None:
553 return False
553 return False
554
554
555 # - 'deltainfo.distance' is the distance from the base revision --
555 # - 'deltainfo.distance' is the distance from the base revision --
556 # bounding it limits the amount of I/O we need to do.
556 # bounding it limits the amount of I/O we need to do.
557 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
557 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
558 # deltas we need to apply -- bounding it limits the amount of CPU
558 # deltas we need to apply -- bounding it limits the amount of CPU
559 # we consume.
559 # we consume.
560
560
561 textlen = revinfo.textlen
561 textlen = revinfo.textlen
562 defaultmax = textlen * 4
562 defaultmax = textlen * 4
563 maxdist = revlog._maxdeltachainspan
563 maxdist = revlog._maxdeltachainspan
564 if not maxdist:
564 if not maxdist:
565 maxdist = deltainfo.distance # ensure the conditional pass
565 maxdist = deltainfo.distance # ensure the conditional pass
566 maxdist = max(maxdist, defaultmax)
566 maxdist = max(maxdist, defaultmax)
567
567
568 # Bad delta from read span:
568 # Bad delta from read span:
569 #
569 #
570 # If the span of data read is larger than the maximum allowed.
570 # If the span of data read is larger than the maximum allowed.
571 #
571 #
572 # In the sparse-revlog case, we rely on the associated "sparse reading"
572 # In the sparse-revlog case, we rely on the associated "sparse reading"
573 # to avoid issue related to the span of data. In theory, it would be
573 # to avoid issue related to the span of data. In theory, it would be
574 # possible to build pathological revlog where delta pattern would lead
574 # possible to build pathological revlog where delta pattern would lead
575 # to too many reads. However, they do not happen in practice at all. So
575 # to too many reads. However, they do not happen in practice at all. So
576 # we skip the span check entirely.
576 # we skip the span check entirely.
577 if not revlog._sparserevlog and maxdist < deltainfo.distance:
577 if not revlog._sparserevlog and maxdist < deltainfo.distance:
578 return False
578 return False
579
579
580 # Bad delta from new delta size:
580 # Bad delta from new delta size:
581 #
581 #
582 # If the delta size is larger than the target text, storing the
582 # If the delta size is larger than the target text, storing the
583 # delta will be inefficient.
583 # delta will be inefficient.
584 if textlen < deltainfo.deltalen:
584 if textlen < deltainfo.deltalen:
585 return False
585 return False
586
586
587 # Bad delta from cumulated payload size:
587 # Bad delta from cumulated payload size:
588 #
588 #
589 # If the sum of delta get larger than K * target text length.
589 # If the sum of delta get larger than K * target text length.
590 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
590 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
591 return False
591 return False
592
592
593 # Bad delta from chain length:
593 # Bad delta from chain length:
594 #
594 #
595 # If the number of delta in the chain gets too high.
595 # If the number of delta in the chain gets too high.
596 if (revlog._maxchainlen
596 if (revlog._maxchainlen
597 and revlog._maxchainlen < deltainfo.chainlen):
597 and revlog._maxchainlen < deltainfo.chainlen):
598 return False
598 return False
599
599
600 # bad delta from intermediate snapshot size limit
600 # bad delta from intermediate snapshot size limit
601 #
601 #
602 # If an intermediate snapshot size is higher than the limit. The
602 # If an intermediate snapshot size is higher than the limit. The
603 # limit exist to prevent endless chain of intermediate delta to be
603 # limit exist to prevent endless chain of intermediate delta to be
604 # created.
604 # created.
605 if (deltainfo.snapshotdepth is not None and
605 if (deltainfo.snapshotdepth is not None and
606 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
606 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
607 return False
607 return False
608
608
609 # bad delta if new intermediate snapshot is larger than the previous
609 # bad delta if new intermediate snapshot is larger than the previous
610 # snapshot
610 # snapshot
611 if (deltainfo.snapshotdepth
611 if (deltainfo.snapshotdepth
612 and revlog.length(deltainfo.base) < deltainfo.deltalen):
612 and revlog.length(deltainfo.base) < deltainfo.deltalen):
613 return False
613 return False
614
614
615 return True
615 return True
616
616
617 # If a revision's full text is that much bigger than a base candidate full
617 # If a revision's full text is that much bigger than a base candidate full
618 # text's, it is very unlikely that it will produce a valid delta. We no longer
618 # text's, it is very unlikely that it will produce a valid delta. We no longer
619 # consider these candidates.
619 # consider these candidates.
620 LIMIT_BASE2TEXT = 500
620 LIMIT_BASE2TEXT = 500
621
621
622 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
622 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
623 """Provides group of revision to be tested as delta base
623 """Provides group of revision to be tested as delta base
624
624
625 This top level function focus on emitting groups with unique and worthwhile
625 This top level function focus on emitting groups with unique and worthwhile
626 content. See _raw_candidate_groups for details about the group order.
626 content. See _raw_candidate_groups for details about the group order.
627 """
627 """
628 # should we try to build a delta?
628 # should we try to build a delta?
629 if not (len(revlog) and revlog._storedeltachains):
629 if not (len(revlog) and revlog._storedeltachains):
630 yield None
630 yield None
631 return
631 return
632
632
633 deltalength = revlog.length
633 deltalength = revlog.length
634 deltaparent = revlog.deltaparent
634 deltaparent = revlog.deltaparent
635 sparse = revlog._sparserevlog
635 sparse = revlog._sparserevlog
636 good = None
636 good = None
637
637
638 deltas_limit = textlen * LIMIT_DELTA2TEXT
638 deltas_limit = textlen * LIMIT_DELTA2TEXT
639
639
640 tested = set([nullrev])
640 tested = set([nullrev])
641 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
641 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
642 while True:
642 while True:
643 temptative = candidates.send(good)
643 temptative = candidates.send(good)
644 if temptative is None:
644 if temptative is None:
645 break
645 break
646 group = []
646 group = []
647 for rev in temptative:
647 for rev in temptative:
648 # skip over empty delta (no need to include them in a chain)
648 # skip over empty delta (no need to include them in a chain)
649 while (revlog._generaldelta
649 while (revlog._generaldelta
650 and not (rev == nullrev
650 and not (rev == nullrev
651 or rev in tested
651 or rev in tested
652 or deltalength(rev))):
652 or deltalength(rev))):
653 tested.add(rev)
653 tested.add(rev)
654 rev = deltaparent(rev)
654 rev = deltaparent(rev)
655 # no need to try a delta against nullrev, this will be done as a
655 # no need to try a delta against nullrev, this will be done as a
656 # last resort.
656 # last resort.
657 if rev == nullrev:
657 if rev == nullrev:
658 continue
658 continue
659 # filter out revision we tested already
659 # filter out revision we tested already
660 if rev in tested:
660 if rev in tested:
661 continue
661 continue
662 tested.add(rev)
662 tested.add(rev)
663 # filter out delta base that will never produce good delta
663 # filter out delta base that will never produce good delta
664 if deltas_limit < revlog.length(rev):
664 if deltas_limit < revlog.length(rev):
665 continue
665 continue
666 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
666 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
667 continue
667 continue
668 # no delta for rawtext-changing revs (see "candelta" for why)
668 # no delta for rawtext-changing revs (see "candelta" for why)
669 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
669 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
670 continue
670 continue
671 # If we reach here, we are about to build and test a delta.
671 # If we reach here, we are about to build and test a delta.
672 # The delta building process will compute the chaininfo in all
672 # The delta building process will compute the chaininfo in all
673 # case, since that computation is cached, it is fine to access it
673 # case, since that computation is cached, it is fine to access it
674 # here too.
674 # here too.
675 chainlen, chainsize = revlog._chaininfo(rev)
675 chainlen, chainsize = revlog._chaininfo(rev)
676 # if chain will be too long, skip base
676 # if chain will be too long, skip base
677 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
677 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
678 continue
678 continue
679 # if chain already have too much data, skip base
679 # if chain already have too much data, skip base
680 if deltas_limit < chainsize:
680 if deltas_limit < chainsize:
681 continue
681 continue
682 group.append(rev)
682 group.append(rev)
683 if group:
683 if group:
684 # XXX: in the sparse revlog case, group can become large,
684 # XXX: in the sparse revlog case, group can become large,
685 # impacting performances. Some bounding or slicing mecanism
685 # impacting performances. Some bounding or slicing mecanism
686 # would help to reduce this impact.
686 # would help to reduce this impact.
687 good = yield tuple(group)
687 good = yield tuple(group)
688 yield None
688 yield None
689
689
690 def _findsnapshots(revlog, cache, start_rev):
690 def _findsnapshots(revlog, cache, start_rev):
691 """find snapshot from start_rev to tip"""
691 """find snapshot from start_rev to tip"""
692 if util.safehasattr(revlog.index, 'findsnapshots'):
692 if util.safehasattr(revlog.index, 'findsnapshots'):
693 revlog.index.findsnapshots(cache, start_rev)
693 revlog.index.findsnapshots(cache, start_rev)
694 else:
694 else:
695 deltaparent = revlog.deltaparent
695 deltaparent = revlog.deltaparent
696 issnapshot = revlog.issnapshot
696 issnapshot = revlog.issnapshot
697 for rev in revlog.revs(start_rev):
697 for rev in revlog.revs(start_rev):
698 if issnapshot(rev):
698 if issnapshot(rev):
699 cache[deltaparent(rev)].append(rev)
699 cache[deltaparent(rev)].append(rev)
700
700
701 def _refinedgroups(revlog, p1, p2, cachedelta):
701 def _refinedgroups(revlog, p1, p2, cachedelta):
702 good = None
702 good = None
703 # First we try to reuse a the delta contained in the bundle.
703 # First we try to reuse a the delta contained in the bundle.
704 # (or from the source revlog)
704 # (or from the source revlog)
705 #
705 #
706 # This logic only applies to general delta repositories and can be disabled
706 # This logic only applies to general delta repositories and can be disabled
707 # through configuration. Disabling reuse source delta is useful when
707 # through configuration. Disabling reuse source delta is useful when
708 # we want to make sure we recomputed "optimal" deltas.
708 # we want to make sure we recomputed "optimal" deltas.
709 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
709 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
710 # Assume what we received from the server is a good choice
710 # Assume what we received from the server is a good choice
711 # build delta will reuse the cache
711 # build delta will reuse the cache
712 good = yield (cachedelta[0],)
712 good = yield (cachedelta[0],)
713 if good is not None:
713 if good is not None:
714 yield None
714 yield None
715 return
715 return
716 snapshots = collections.defaultdict(list)
716 snapshots = collections.defaultdict(list)
717 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
717 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
718 good = yield candidates
718 good = yield candidates
719 if good is not None:
719 if good is not None:
720 break
720 break
721
721
722 # If sparse revlog is enabled, we can try to refine the available deltas
722 # If sparse revlog is enabled, we can try to refine the available deltas
723 if not revlog._sparserevlog:
723 if not revlog._sparserevlog:
724 yield None
724 yield None
725 return
725 return
726
726
727 # if we have a refinable value, try to refine it
727 # if we have a refinable value, try to refine it
728 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
728 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
729 # refine snapshot down
729 # refine snapshot down
730 previous = None
730 previous = None
731 while previous != good:
731 while previous != good:
732 previous = good
732 previous = good
733 base = revlog.deltaparent(good)
733 base = revlog.deltaparent(good)
734 if base == nullrev:
734 if base == nullrev:
735 break
735 break
736 good = yield (base,)
736 good = yield (base,)
737 # refine snapshot up
737 # refine snapshot up
738 if not snapshots:
738 if not snapshots:
739 _findsnapshots(revlog, snapshots, good + 1)
739 _findsnapshots(revlog, snapshots, good + 1)
740 previous = None
740 previous = None
741 while good != previous:
741 while good != previous:
742 previous = good
742 previous = good
743 children = tuple(sorted(c for c in snapshots[good]))
743 children = tuple(sorted(c for c in snapshots[good]))
744 good = yield children
744 good = yield children
745
745
746 # we have found nothing
746 # we have found nothing
747 yield None
747 yield None
748
748
749 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
749 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
750 """Provides group of revision to be tested as delta base
750 """Provides group of revision to be tested as delta base
751
751
752 This lower level function focus on emitting delta theorically interresting
752 This lower level function focus on emitting delta theorically interresting
753 without looking it any practical details.
753 without looking it any practical details.
754
754
755 The group order aims at providing fast or small candidates first.
755 The group order aims at providing fast or small candidates first.
756 """
756 """
757 gdelta = revlog._generaldelta
757 gdelta = revlog._generaldelta
758 sparse = revlog._sparserevlog
758 # gate sparse behind general-delta because of issue6056
759 sparse = gdelta and revlog._sparserevlog
759 curr = len(revlog)
760 curr = len(revlog)
760 prev = curr - 1
761 prev = curr - 1
761 deltachain = lambda rev: revlog._deltachain(rev)[0]
762 deltachain = lambda rev: revlog._deltachain(rev)[0]
762
763
763 if gdelta:
764 if gdelta:
764 # exclude already lazy tested base if any
765 # exclude already lazy tested base if any
765 parents = [p for p in (p1, p2) if p != nullrev]
766 parents = [p for p in (p1, p2) if p != nullrev]
766
767
767 if not revlog._deltabothparents and len(parents) == 2:
768 if not revlog._deltabothparents and len(parents) == 2:
768 parents.sort()
769 parents.sort()
769 # To minimize the chance of having to build a fulltext,
770 # To minimize the chance of having to build a fulltext,
770 # pick first whichever parent is closest to us (max rev)
771 # pick first whichever parent is closest to us (max rev)
771 yield (parents[1],)
772 yield (parents[1],)
772 # then the other one (min rev) if the first did not fit
773 # then the other one (min rev) if the first did not fit
773 yield (parents[0],)
774 yield (parents[0],)
774 elif len(parents) > 0:
775 elif len(parents) > 0:
775 # Test all parents (1 or 2), and keep the best candidate
776 # Test all parents (1 or 2), and keep the best candidate
776 yield parents
777 yield parents
777
778
778 if sparse and parents:
779 if sparse and parents:
779 if snapshots is None:
780 if snapshots is None:
780 # map: base-rev: snapshot-rev
781 # map: base-rev: snapshot-rev
781 snapshots = collections.defaultdict(list)
782 snapshots = collections.defaultdict(list)
782 # See if we can use an existing snapshot in the parent chains to use as
783 # See if we can use an existing snapshot in the parent chains to use as
783 # a base for a new intermediate-snapshot
784 # a base for a new intermediate-snapshot
784 #
785 #
785 # search for snapshot in parents delta chain
786 # search for snapshot in parents delta chain
786 # map: snapshot-level: snapshot-rev
787 # map: snapshot-level: snapshot-rev
787 parents_snaps = collections.defaultdict(set)
788 parents_snaps = collections.defaultdict(set)
788 candidate_chains = [deltachain(p) for p in parents]
789 candidate_chains = [deltachain(p) for p in parents]
789 for chain in candidate_chains:
790 for chain in candidate_chains:
790 for idx, s in enumerate(chain):
791 for idx, s in enumerate(chain):
791 if not revlog.issnapshot(s):
792 if not revlog.issnapshot(s):
792 break
793 break
793 parents_snaps[idx].add(s)
794 parents_snaps[idx].add(s)
794 snapfloor = min(parents_snaps[0]) + 1
795 snapfloor = min(parents_snaps[0]) + 1
795 _findsnapshots(revlog, snapshots, snapfloor)
796 _findsnapshots(revlog, snapshots, snapfloor)
796 # search for the highest "unrelated" revision
797 # search for the highest "unrelated" revision
797 #
798 #
798 # Adding snapshots used by "unrelated" revision increase the odd we
799 # Adding snapshots used by "unrelated" revision increase the odd we
799 # reuse an independant, yet better snapshot chain.
800 # reuse an independant, yet better snapshot chain.
800 #
801 #
801 # XXX instead of building a set of revisions, we could lazily enumerate
802 # XXX instead of building a set of revisions, we could lazily enumerate
802 # over the chains. That would be more efficient, however we stick to
803 # over the chains. That would be more efficient, however we stick to
803 # simple code for now.
804 # simple code for now.
804 all_revs = set()
805 all_revs = set()
805 for chain in candidate_chains:
806 for chain in candidate_chains:
806 all_revs.update(chain)
807 all_revs.update(chain)
807 other = None
808 other = None
808 for r in revlog.revs(prev, snapfloor):
809 for r in revlog.revs(prev, snapfloor):
809 if r not in all_revs:
810 if r not in all_revs:
810 other = r
811 other = r
811 break
812 break
812 if other is not None:
813 if other is not None:
813 # To avoid unfair competition, we won't use unrelated intermediate
814 # To avoid unfair competition, we won't use unrelated intermediate
814 # snapshot that are deeper than the ones from the parent delta
815 # snapshot that are deeper than the ones from the parent delta
815 # chain.
816 # chain.
816 max_depth = max(parents_snaps.keys())
817 max_depth = max(parents_snaps.keys())
817 chain = deltachain(other)
818 chain = deltachain(other)
818 for idx, s in enumerate(chain):
819 for idx, s in enumerate(chain):
819 if s < snapfloor:
820 if s < snapfloor:
820 continue
821 continue
821 if max_depth < idx:
822 if max_depth < idx:
822 break
823 break
823 if not revlog.issnapshot(s):
824 if not revlog.issnapshot(s):
824 break
825 break
825 parents_snaps[idx].add(s)
826 parents_snaps[idx].add(s)
826 # Test them as possible intermediate snapshot base
827 # Test them as possible intermediate snapshot base
827 # We test them from highest to lowest level. High level one are more
828 # We test them from highest to lowest level. High level one are more
828 # likely to result in small delta
829 # likely to result in small delta
829 floor = None
830 floor = None
830 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
831 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
831 siblings = set()
832 siblings = set()
832 for s in snaps:
833 for s in snaps:
833 siblings.update(snapshots[s])
834 siblings.update(snapshots[s])
834 # Before considering making a new intermediate snapshot, we check
835 # Before considering making a new intermediate snapshot, we check
835 # if an existing snapshot, children of base we consider, would be
836 # if an existing snapshot, children of base we consider, would be
836 # suitable.
837 # suitable.
837 #
838 #
838 # It give a change to reuse a delta chain "unrelated" to the
839 # It give a change to reuse a delta chain "unrelated" to the
839 # current revision instead of starting our own. Without such
840 # current revision instead of starting our own. Without such
840 # re-use, topological branches would keep reopening new chains.
841 # re-use, topological branches would keep reopening new chains.
841 # Creating more and more snapshot as the repository grow.
842 # Creating more and more snapshot as the repository grow.
842
843
843 if floor is not None:
844 if floor is not None:
844 # We only do this for siblings created after the one in our
845 # We only do this for siblings created after the one in our
845 # parent's delta chain. Those created before has less chances
846 # parent's delta chain. Those created before has less chances
846 # to be valid base since our ancestors had to create a new
847 # to be valid base since our ancestors had to create a new
847 # snapshot.
848 # snapshot.
848 siblings = [r for r in siblings if floor < r]
849 siblings = [r for r in siblings if floor < r]
849 yield tuple(sorted(siblings))
850 yield tuple(sorted(siblings))
850 # then test the base from our parent's delta chain.
851 # then test the base from our parent's delta chain.
851 yield tuple(sorted(snaps))
852 yield tuple(sorted(snaps))
852 floor = min(snaps)
853 floor = min(snaps)
853 # No suitable base found in the parent chain, search if any full
854 # No suitable base found in the parent chain, search if any full
854 # snapshots emitted since parent's base would be a suitable base for an
855 # snapshots emitted since parent's base would be a suitable base for an
855 # intermediate snapshot.
856 # intermediate snapshot.
856 #
857 #
857 # It give a chance to reuse a delta chain unrelated to the current
858 # It give a chance to reuse a delta chain unrelated to the current
858 # revisions instead of starting our own. Without such re-use,
859 # revisions instead of starting our own. Without such re-use,
859 # topological branches would keep reopening new full chains. Creating
860 # topological branches would keep reopening new full chains. Creating
860 # more and more snapshot as the repository grow.
861 # more and more snapshot as the repository grow.
861 yield tuple(snapshots[nullrev])
862 yield tuple(snapshots[nullrev])
862
863
863 if not sparse:
864 if not sparse:
864 # other approach failed try against prev to hopefully save us a
865 # other approach failed try against prev to hopefully save us a
865 # fulltext.
866 # fulltext.
866 yield (prev,)
867 yield (prev,)
867
868
868 class deltacomputer(object):
869 class deltacomputer(object):
869 def __init__(self, revlog):
870 def __init__(self, revlog):
870 self.revlog = revlog
871 self.revlog = revlog
871
872
872 def buildtext(self, revinfo, fh):
873 def buildtext(self, revinfo, fh):
873 """Builds a fulltext version of a revision
874 """Builds a fulltext version of a revision
874
875
875 revinfo: _revisioninfo instance that contains all needed info
876 revinfo: _revisioninfo instance that contains all needed info
876 fh: file handle to either the .i or the .d revlog file,
877 fh: file handle to either the .i or the .d revlog file,
877 depending on whether it is inlined or not
878 depending on whether it is inlined or not
878 """
879 """
879 btext = revinfo.btext
880 btext = revinfo.btext
880 if btext[0] is not None:
881 if btext[0] is not None:
881 return btext[0]
882 return btext[0]
882
883
883 revlog = self.revlog
884 revlog = self.revlog
884 cachedelta = revinfo.cachedelta
885 cachedelta = revinfo.cachedelta
885 baserev = cachedelta[0]
886 baserev = cachedelta[0]
886 delta = cachedelta[1]
887 delta = cachedelta[1]
887
888
888 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
889 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
889 revinfo.p1, revinfo.p2,
890 revinfo.p1, revinfo.p2,
890 revinfo.flags, revinfo.node)
891 revinfo.flags, revinfo.node)
891 return fulltext
892 return fulltext
892
893
893 def _builddeltadiff(self, base, revinfo, fh):
894 def _builddeltadiff(self, base, revinfo, fh):
894 revlog = self.revlog
895 revlog = self.revlog
895 t = self.buildtext(revinfo, fh)
896 t = self.buildtext(revinfo, fh)
896 if revlog.iscensored(base):
897 if revlog.iscensored(base):
897 # deltas based on a censored revision must replace the
898 # deltas based on a censored revision must replace the
898 # full content in one patch, so delta works everywhere
899 # full content in one patch, so delta works everywhere
899 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
900 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
900 delta = header + t
901 delta = header + t
901 else:
902 else:
902 ptext = revlog.revision(base, _df=fh, raw=True)
903 ptext = revlog.revision(base, _df=fh, raw=True)
903 delta = mdiff.textdiff(ptext, t)
904 delta = mdiff.textdiff(ptext, t)
904
905
905 return delta
906 return delta
906
907
907 def _builddeltainfo(self, revinfo, base, fh):
908 def _builddeltainfo(self, revinfo, base, fh):
908 # can we use the cached delta?
909 # can we use the cached delta?
909 delta = None
910 delta = None
910 if revinfo.cachedelta:
911 if revinfo.cachedelta:
911 cachebase, cachediff = revinfo.cachedelta
912 cachebase, cachediff = revinfo.cachedelta
912 #check if the diff still apply
913 #check if the diff still apply
913 currentbase = cachebase
914 currentbase = cachebase
914 while (currentbase != nullrev
915 while (currentbase != nullrev
915 and currentbase != base
916 and currentbase != base
916 and self.revlog.length(currentbase) == 0):
917 and self.revlog.length(currentbase) == 0):
917 currentbase = self.revlog.deltaparent(currentbase)
918 currentbase = self.revlog.deltaparent(currentbase)
918 if currentbase == base:
919 if currentbase == base:
919 delta = revinfo.cachedelta[1]
920 delta = revinfo.cachedelta[1]
920 if delta is None:
921 if delta is None:
921 delta = self._builddeltadiff(base, revinfo, fh)
922 delta = self._builddeltadiff(base, revinfo, fh)
922 revlog = self.revlog
923 revlog = self.revlog
923 header, data = revlog.compress(delta)
924 header, data = revlog.compress(delta)
924 deltalen = len(header) + len(data)
925 deltalen = len(header) + len(data)
925 chainbase = revlog.chainbase(base)
926 chainbase = revlog.chainbase(base)
926 offset = revlog.end(len(revlog) - 1)
927 offset = revlog.end(len(revlog) - 1)
927 dist = deltalen + offset - revlog.start(chainbase)
928 dist = deltalen + offset - revlog.start(chainbase)
928 if revlog._generaldelta:
929 if revlog._generaldelta:
929 deltabase = base
930 deltabase = base
930 else:
931 else:
931 deltabase = chainbase
932 deltabase = chainbase
932 chainlen, compresseddeltalen = revlog._chaininfo(base)
933 chainlen, compresseddeltalen = revlog._chaininfo(base)
933 chainlen += 1
934 chainlen += 1
934 compresseddeltalen += deltalen
935 compresseddeltalen += deltalen
935
936
936 revlog = self.revlog
937 revlog = self.revlog
937 snapshotdepth = None
938 snapshotdepth = None
938 if deltabase == nullrev:
939 if deltabase == nullrev:
939 snapshotdepth = 0
940 snapshotdepth = 0
940 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
941 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
941 # A delta chain should always be one full snapshot,
942 # A delta chain should always be one full snapshot,
942 # zero or more semi-snapshots, and zero or more deltas
943 # zero or more semi-snapshots, and zero or more deltas
943 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
944 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
944 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
945 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
945 snapshotdepth = len(revlog._deltachain(deltabase)[0])
946 snapshotdepth = len(revlog._deltachain(deltabase)[0])
946
947
947 return _deltainfo(dist, deltalen, (header, data), deltabase,
948 return _deltainfo(dist, deltalen, (header, data), deltabase,
948 chainbase, chainlen, compresseddeltalen,
949 chainbase, chainlen, compresseddeltalen,
949 snapshotdepth)
950 snapshotdepth)
950
951
951 def _fullsnapshotinfo(self, fh, revinfo):
952 def _fullsnapshotinfo(self, fh, revinfo):
952 curr = len(self.revlog)
953 curr = len(self.revlog)
953 rawtext = self.buildtext(revinfo, fh)
954 rawtext = self.buildtext(revinfo, fh)
954 data = self.revlog.compress(rawtext)
955 data = self.revlog.compress(rawtext)
955 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
956 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
956 deltabase = chainbase = curr
957 deltabase = chainbase = curr
957 snapshotdepth = 0
958 snapshotdepth = 0
958 chainlen = 1
959 chainlen = 1
959
960
960 return _deltainfo(dist, deltalen, data, deltabase,
961 return _deltainfo(dist, deltalen, data, deltabase,
961 chainbase, chainlen, compresseddeltalen,
962 chainbase, chainlen, compresseddeltalen,
962 snapshotdepth)
963 snapshotdepth)
963
964
964 def finddeltainfo(self, revinfo, fh):
965 def finddeltainfo(self, revinfo, fh):
965 """Find an acceptable delta against a candidate revision
966 """Find an acceptable delta against a candidate revision
966
967
967 revinfo: information about the revision (instance of _revisioninfo)
968 revinfo: information about the revision (instance of _revisioninfo)
968 fh: file handle to either the .i or the .d revlog file,
969 fh: file handle to either the .i or the .d revlog file,
969 depending on whether it is inlined or not
970 depending on whether it is inlined or not
970
971
971 Returns the first acceptable candidate revision, as ordered by
972 Returns the first acceptable candidate revision, as ordered by
972 _candidategroups
973 _candidategroups
973
974
974 If no suitable deltabase is found, we return delta info for a full
975 If no suitable deltabase is found, we return delta info for a full
975 snapshot.
976 snapshot.
976 """
977 """
977 if not revinfo.textlen:
978 if not revinfo.textlen:
978 return self._fullsnapshotinfo(fh, revinfo)
979 return self._fullsnapshotinfo(fh, revinfo)
979
980
980 # no delta for flag processor revision (see "candelta" for why)
981 # no delta for flag processor revision (see "candelta" for why)
981 # not calling candelta since only one revision needs test, also to
982 # not calling candelta since only one revision needs test, also to
982 # avoid overhead fetching flags again.
983 # avoid overhead fetching flags again.
983 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
984 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
984 return self._fullsnapshotinfo(fh, revinfo)
985 return self._fullsnapshotinfo(fh, revinfo)
985
986
986 cachedelta = revinfo.cachedelta
987 cachedelta = revinfo.cachedelta
987 p1 = revinfo.p1
988 p1 = revinfo.p1
988 p2 = revinfo.p2
989 p2 = revinfo.p2
989 revlog = self.revlog
990 revlog = self.revlog
990
991
991 deltainfo = None
992 deltainfo = None
992 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
993 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
993 groups = _candidategroups(self.revlog, revinfo.textlen,
994 groups = _candidategroups(self.revlog, revinfo.textlen,
994 p1r, p2r, cachedelta)
995 p1r, p2r, cachedelta)
995 candidaterevs = next(groups)
996 candidaterevs = next(groups)
996 while candidaterevs is not None:
997 while candidaterevs is not None:
997 nominateddeltas = []
998 nominateddeltas = []
998 if deltainfo is not None:
999 if deltainfo is not None:
999 # if we already found a good delta,
1000 # if we already found a good delta,
1000 # challenge it against refined candidates
1001 # challenge it against refined candidates
1001 nominateddeltas.append(deltainfo)
1002 nominateddeltas.append(deltainfo)
1002 for candidaterev in candidaterevs:
1003 for candidaterev in candidaterevs:
1003 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1004 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1004 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1005 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1005 nominateddeltas.append(candidatedelta)
1006 nominateddeltas.append(candidatedelta)
1006 if nominateddeltas:
1007 if nominateddeltas:
1007 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1008 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1008 if deltainfo is not None:
1009 if deltainfo is not None:
1009 candidaterevs = groups.send(deltainfo.base)
1010 candidaterevs = groups.send(deltainfo.base)
1010 else:
1011 else:
1011 candidaterevs = next(groups)
1012 candidaterevs = next(groups)
1012
1013
1013 if deltainfo is None:
1014 if deltainfo is None:
1014 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1015 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1015 return deltainfo
1016 return deltainfo
@@ -1,270 +1,292
1 This test tries to exercise the ssh functionality with a dummy script
1 This test tries to exercise the ssh functionality with a dummy script
2
2
3 $ checknewrepo()
3 $ checknewrepo()
4 > {
4 > {
5 > name=$1
5 > name=$1
6 > if [ -d "$name"/.hg/store ]; then
6 > if [ -d "$name"/.hg/store ]; then
7 > echo store created
7 > echo store created
8 > fi
8 > fi
9 > if [ -f "$name"/.hg/00changelog.i ]; then
9 > if [ -f "$name"/.hg/00changelog.i ]; then
10 > echo 00changelog.i created
10 > echo 00changelog.i created
11 > fi
11 > fi
12 > cat "$name"/.hg/requires
12 > cat "$name"/.hg/requires
13 > }
13 > }
14
14
15 creating 'local'
15 creating 'local'
16
16
17 $ hg init local
17 $ hg init local
18 $ checknewrepo local
18 $ checknewrepo local
19 store created
19 store created
20 00changelog.i created
20 00changelog.i created
21 dotencode
21 dotencode
22 fncache
22 fncache
23 generaldelta
23 generaldelta
24 revlogv1
24 revlogv1
25 sparserevlog
25 sparserevlog
26 store
26 store
27 testonly-simplestore (reposimplestore !)
27 testonly-simplestore (reposimplestore !)
28 $ echo this > local/foo
28 $ echo this > local/foo
29 $ hg ci --cwd local -A -m "init"
29 $ hg ci --cwd local -A -m "init"
30 adding foo
30 adding foo
31
31
32 test custom revlog chunk cache sizes
32 test custom revlog chunk cache sizes
33
33
34 $ hg --config format.chunkcachesize=0 log -R local -pv
34 $ hg --config format.chunkcachesize=0 log -R local -pv
35 abort: revlog chunk cache size 0 is not greater than 0!
35 abort: revlog chunk cache size 0 is not greater than 0!
36 [255]
36 [255]
37 $ hg --config format.chunkcachesize=1023 log -R local -pv
37 $ hg --config format.chunkcachesize=1023 log -R local -pv
38 abort: revlog chunk cache size 1023 is not a power of 2!
38 abort: revlog chunk cache size 1023 is not a power of 2!
39 [255]
39 [255]
40 $ hg --config format.chunkcachesize=1024 log -R local -pv
40 $ hg --config format.chunkcachesize=1024 log -R local -pv
41 changeset: 0:08b9e9f63b32
41 changeset: 0:08b9e9f63b32
42 tag: tip
42 tag: tip
43 user: test
43 user: test
44 date: Thu Jan 01 00:00:00 1970 +0000
44 date: Thu Jan 01 00:00:00 1970 +0000
45 files: foo
45 files: foo
46 description:
46 description:
47 init
47 init
48
48
49
49
50 diff -r 000000000000 -r 08b9e9f63b32 foo
50 diff -r 000000000000 -r 08b9e9f63b32 foo
51 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
51 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
52 +++ b/foo Thu Jan 01 00:00:00 1970 +0000
52 +++ b/foo Thu Jan 01 00:00:00 1970 +0000
53 @@ -0,0 +1,1 @@
53 @@ -0,0 +1,1 @@
54 +this
54 +this
55
55
56
56
57 creating repo with format.usestore=false
57 creating repo with format.usestore=false
58
58
59 $ hg --config format.usestore=false init old
59 $ hg --config format.usestore=false init old
60 $ checknewrepo old
60 $ checknewrepo old
61 generaldelta
61 generaldelta
62 revlogv1
62 revlogv1
63 testonly-simplestore (reposimplestore !)
63 testonly-simplestore (reposimplestore !)
64 sparserevlog
64 sparserevlog
65
65
66 creating repo with format.usefncache=false
66 creating repo with format.usefncache=false
67
67
68 $ hg --config format.usefncache=false init old2
68 $ hg --config format.usefncache=false init old2
69 $ checknewrepo old2
69 $ checknewrepo old2
70 store created
70 store created
71 00changelog.i created
71 00changelog.i created
72 generaldelta
72 generaldelta
73 revlogv1
73 revlogv1
74 sparserevlog
74 sparserevlog
75 store
75 store
76 testonly-simplestore (reposimplestore !)
76 testonly-simplestore (reposimplestore !)
77
77
78 creating repo with format.dotencode=false
78 creating repo with format.dotencode=false
79
79
80 $ hg --config format.dotencode=false init old3
80 $ hg --config format.dotencode=false init old3
81 $ checknewrepo old3
81 $ checknewrepo old3
82 store created
82 store created
83 00changelog.i created
83 00changelog.i created
84 fncache
84 fncache
85 generaldelta
85 generaldelta
86 revlogv1
86 revlogv1
87 sparserevlog
87 sparserevlog
88 store
88 store
89 testonly-simplestore (reposimplestore !)
89 testonly-simplestore (reposimplestore !)
90
90
91 creating repo with format.dotencode=false
91 creating repo with format.dotencode=false
92
92
93 $ hg --config format.generaldelta=false --config format.usegeneraldelta=false --config format.sparse-revlog=no init old4
93 $ hg --config format.generaldelta=false --config format.usegeneraldelta=false --config format.sparse-revlog=no init old4
94 $ checknewrepo old4
94 $ checknewrepo old4
95 store created
95 store created
96 00changelog.i created
96 00changelog.i created
97 dotencode
97 dotencode
98 fncache
98 fncache
99 revlogv1
99 revlogv1
100 store
100 store
101 testonly-simplestore (reposimplestore !)
101 testonly-simplestore (reposimplestore !)
102
102
103 test failure
103 test failure
104
104
105 $ hg init local
105 $ hg init local
106 abort: repository local already exists!
106 abort: repository local already exists!
107 [255]
107 [255]
108
108
109 init+push to remote2
109 init+push to remote2
110
110
111 $ hg init -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote2
111 $ hg init -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote2
112 $ hg incoming -R remote2 local
112 $ hg incoming -R remote2 local
113 comparing with local
113 comparing with local
114 changeset: 0:08b9e9f63b32
114 changeset: 0:08b9e9f63b32
115 tag: tip
115 tag: tip
116 user: test
116 user: test
117 date: Thu Jan 01 00:00:00 1970 +0000
117 date: Thu Jan 01 00:00:00 1970 +0000
118 summary: init
118 summary: init
119
119
120
120
121 $ hg push -R local -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote2
121 $ hg push -R local -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote2
122 pushing to ssh://user@dummy/remote2
122 pushing to ssh://user@dummy/remote2
123 searching for changes
123 searching for changes
124 remote: adding changesets
124 remote: adding changesets
125 remote: adding manifests
125 remote: adding manifests
126 remote: adding file changes
126 remote: adding file changes
127 remote: added 1 changesets with 1 changes to 1 files
127 remote: added 1 changesets with 1 changes to 1 files
128
128
129 clone to remote1
129 clone to remote1
130
130
131 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote1
131 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote1
132 searching for changes
132 searching for changes
133 remote: adding changesets
133 remote: adding changesets
134 remote: adding manifests
134 remote: adding manifests
135 remote: adding file changes
135 remote: adding file changes
136 remote: added 1 changesets with 1 changes to 1 files
136 remote: added 1 changesets with 1 changes to 1 files
137
137
138 The largefiles extension doesn't crash
138 The largefiles extension doesn't crash
139 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remotelf --config extensions.largefiles=
139 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remotelf --config extensions.largefiles=
140 The fsmonitor extension is incompatible with the largefiles extension and has been disabled. (fsmonitor !)
140 The fsmonitor extension is incompatible with the largefiles extension and has been disabled. (fsmonitor !)
141 The fsmonitor extension is incompatible with the largefiles extension and has been disabled. (fsmonitor !)
141 The fsmonitor extension is incompatible with the largefiles extension and has been disabled. (fsmonitor !)
142 searching for changes
142 searching for changes
143 remote: adding changesets
143 remote: adding changesets
144 remote: adding manifests
144 remote: adding manifests
145 remote: adding file changes
145 remote: adding file changes
146 remote: added 1 changesets with 1 changes to 1 files
146 remote: added 1 changesets with 1 changes to 1 files
147
147
148 init to existing repo
148 init to existing repo
149
149
150 $ hg init -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote1
150 $ hg init -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" ssh://user@dummy/remote1
151 abort: repository remote1 already exists!
151 abort: repository remote1 already exists!
152 abort: could not create remote repo!
152 abort: could not create remote repo!
153 [255]
153 [255]
154
154
155 clone to existing repo
155 clone to existing repo
156
156
157 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote1
157 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote1
158 abort: repository remote1 already exists!
158 abort: repository remote1 already exists!
159 abort: could not create remote repo!
159 abort: could not create remote repo!
160 [255]
160 [255]
161
161
162 output of dummyssh
162 output of dummyssh
163
163
164 $ cat dummylog
164 $ cat dummylog
165 Got arguments 1:user@dummy 2:hg init remote2
165 Got arguments 1:user@dummy 2:hg init remote2
166 Got arguments 1:user@dummy 2:hg -R remote2 serve --stdio
166 Got arguments 1:user@dummy 2:hg -R remote2 serve --stdio
167 Got arguments 1:user@dummy 2:hg -R remote2 serve --stdio
167 Got arguments 1:user@dummy 2:hg -R remote2 serve --stdio
168 Got arguments 1:user@dummy 2:hg init remote1
168 Got arguments 1:user@dummy 2:hg init remote1
169 Got arguments 1:user@dummy 2:hg -R remote1 serve --stdio
169 Got arguments 1:user@dummy 2:hg -R remote1 serve --stdio
170 Got arguments 1:user@dummy 2:hg init remotelf
170 Got arguments 1:user@dummy 2:hg init remotelf
171 Got arguments 1:user@dummy 2:hg -R remotelf serve --stdio
171 Got arguments 1:user@dummy 2:hg -R remotelf serve --stdio
172 Got arguments 1:user@dummy 2:hg init remote1
172 Got arguments 1:user@dummy 2:hg init remote1
173 Got arguments 1:user@dummy 2:hg init remote1
173 Got arguments 1:user@dummy 2:hg init remote1
174
174
175 comparing repositories
175 comparing repositories
176
176
177 $ hg tip -q -R local
177 $ hg tip -q -R local
178 0:08b9e9f63b32
178 0:08b9e9f63b32
179 $ hg tip -q -R remote1
179 $ hg tip -q -R remote1
180 0:08b9e9f63b32
180 0:08b9e9f63b32
181 $ hg tip -q -R remote2
181 $ hg tip -q -R remote2
182 0:08b9e9f63b32
182 0:08b9e9f63b32
183
183
184 check names for repositories (clashes with URL schemes, special chars)
184 check names for repositories (clashes with URL schemes, special chars)
185
185
186 $ for i in bundle file hg http https old-http ssh static-http "with space"; do
186 $ for i in bundle file hg http https old-http ssh static-http "with space"; do
187 > printf "hg init \"$i\"... "
187 > printf "hg init \"$i\"... "
188 > hg init "$i"
188 > hg init "$i"
189 > test -d "$i" -a -d "$i/.hg" && echo "ok" || echo "failed"
189 > test -d "$i" -a -d "$i/.hg" && echo "ok" || echo "failed"
190 > done
190 > done
191 hg init "bundle"... ok
191 hg init "bundle"... ok
192 hg init "file"... ok
192 hg init "file"... ok
193 hg init "hg"... ok
193 hg init "hg"... ok
194 hg init "http"... ok
194 hg init "http"... ok
195 hg init "https"... ok
195 hg init "https"... ok
196 hg init "old-http"... ok
196 hg init "old-http"... ok
197 hg init "ssh"... ok
197 hg init "ssh"... ok
198 hg init "static-http"... ok
198 hg init "static-http"... ok
199 hg init "with space"... ok
199 hg init "with space"... ok
200 #if eol-in-paths
200 #if eol-in-paths
201 /* " " is not a valid name for a directory on Windows */
201 /* " " is not a valid name for a directory on Windows */
202 $ hg init " "
202 $ hg init " "
203 $ test -d " "
203 $ test -d " "
204 $ test -d " /.hg"
204 $ test -d " /.hg"
205 #endif
205 #endif
206
206
207 creating 'local/sub/repo'
207 creating 'local/sub/repo'
208
208
209 $ hg init local/sub/repo
209 $ hg init local/sub/repo
210 $ checknewrepo local/sub/repo
210 $ checknewrepo local/sub/repo
211 store created
211 store created
212 00changelog.i created
212 00changelog.i created
213 dotencode
213 dotencode
214 fncache
214 fncache
215 generaldelta
215 generaldelta
216 revlogv1
216 revlogv1
217 sparserevlog
217 sparserevlog
218 store
218 store
219 testonly-simplestore (reposimplestore !)
219 testonly-simplestore (reposimplestore !)
220
220
221 prepare test of init of url configured from paths
221 prepare test of init of url configured from paths
222
222
223 $ echo '[paths]' >> $HGRCPATH
223 $ echo '[paths]' >> $HGRCPATH
224 $ echo "somewhere = `pwd`/url from paths" >> $HGRCPATH
224 $ echo "somewhere = `pwd`/url from paths" >> $HGRCPATH
225 $ echo "elsewhere = `pwd`/another paths url" >> $HGRCPATH
225 $ echo "elsewhere = `pwd`/another paths url" >> $HGRCPATH
226
226
227 init should (for consistency with clone) expand the url
227 init should (for consistency with clone) expand the url
228
228
229 $ hg init somewhere
229 $ hg init somewhere
230 $ checknewrepo "url from paths"
230 $ checknewrepo "url from paths"
231 store created
231 store created
232 00changelog.i created
232 00changelog.i created
233 dotencode
233 dotencode
234 fncache
234 fncache
235 generaldelta
235 generaldelta
236 revlogv1
236 revlogv1
237 sparserevlog
237 sparserevlog
238 store
238 store
239 testonly-simplestore (reposimplestore !)
239 testonly-simplestore (reposimplestore !)
240
240
241 verify that clone also expand urls
241 verify that clone also expand urls
242
242
243 $ hg clone somewhere elsewhere
243 $ hg clone somewhere elsewhere
244 updating to branch default
244 updating to branch default
245 0 files updated, 0 files merged, 0 files removed, 0 files unresolved
245 0 files updated, 0 files merged, 0 files removed, 0 files unresolved
246 $ checknewrepo "another paths url"
246 $ checknewrepo "another paths url"
247 store created
247 store created
248 00changelog.i created
248 00changelog.i created
249 dotencode
249 dotencode
250 fncache
250 fncache
251 generaldelta
251 generaldelta
252 revlogv1
252 revlogv1
253 sparserevlog
253 sparserevlog
254 store
254 store
255 testonly-simplestore (reposimplestore !)
255 testonly-simplestore (reposimplestore !)
256
256
257 clone bookmarks
257 clone bookmarks
258
258
259 $ hg -R local bookmark test
259 $ hg -R local bookmark test
260 $ hg -R local bookmarks
260 $ hg -R local bookmarks
261 * test 0:08b9e9f63b32
261 * test 0:08b9e9f63b32
262 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote-bookmarks
262 $ hg clone -e "\"$PYTHON\" \"$TESTDIR/dummyssh\"" local ssh://user@dummy/remote-bookmarks
263 searching for changes
263 searching for changes
264 remote: adding changesets
264 remote: adding changesets
265 remote: adding manifests
265 remote: adding manifests
266 remote: adding file changes
266 remote: adding file changes
267 remote: added 1 changesets with 1 changes to 1 files
267 remote: added 1 changesets with 1 changes to 1 files
268 exporting bookmark test
268 exporting bookmark test
269 $ hg -R remote-bookmarks bookmarks
269 $ hg -R remote-bookmarks bookmarks
270 test 0:08b9e9f63b32
270 test 0:08b9e9f63b32
271
272 Check format constraint
273 -----------------------
274
275 $ hg init issue6056 --config format.usegeneraldelta=0 --config format.sparse-revlog=0
276 $ cd issue6056
277 $ echo foo > 1
278 $ echo foo > 2
279 $ echo foo > 3
280 $ echo foo > 4
281 $ echo foo > 5
282 $ hg add *
283
284 Build a bogus repository (sparserevlog without general delta)
285
286 $ hg commit -m 'initial changesets'
287 $ echo 'sparserevlog' >> .hg/requires
288 $ for x in `$TESTDIR/seq.py 100`; do
289 > echo $x >> `expr $x % 5 + 1`
290 > hg commit -m $x
291 > done
292 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now