##// END OF EJS Templates
flagutil: move REVIDX_KNOWN_FLAGS source of truth in flagutil (API)...
marmoute -
r42956:92ac6b16 default
parent child Browse files
Show More
@@ -1,2700 +1,2698 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_KNOWN_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
57 )
59 from .thirdparty import (
58 from .thirdparty import (
60 attr,
59 attr,
61 )
60 )
62 from . import (
61 from . import (
63 ancestor,
62 ancestor,
64 dagop,
63 dagop,
65 error,
64 error,
66 mdiff,
65 mdiff,
67 policy,
66 policy,
68 pycompat,
67 pycompat,
69 repository,
68 repository,
70 templatefilters,
69 templatefilters,
71 util,
70 util,
72 )
71 )
73 from .revlogutils import (
72 from .revlogutils import (
74 deltas as deltautil,
73 deltas as deltautil,
75 flagutil,
74 flagutil,
76 )
75 )
77 from .utils import (
76 from .utils import (
78 interfaceutil,
77 interfaceutil,
79 storageutil,
78 storageutil,
80 stringutil,
79 stringutil,
81 )
80 )
82
81
83 # blanked usage of all the name to prevent pyflakes constraints
82 # blanked usage of all the name to prevent pyflakes constraints
84 # We need these name available in the module for extensions.
83 # We need these name available in the module for extensions.
85 REVLOGV0
84 REVLOGV0
86 REVLOGV1
85 REVLOGV1
87 REVLOGV2
86 REVLOGV2
88 FLAG_INLINE_DATA
87 FLAG_INLINE_DATA
89 FLAG_GENERALDELTA
88 FLAG_GENERALDELTA
90 REVLOG_DEFAULT_FLAGS
89 REVLOG_DEFAULT_FLAGS
91 REVLOG_DEFAULT_FORMAT
90 REVLOG_DEFAULT_FORMAT
92 REVLOG_DEFAULT_VERSION
91 REVLOG_DEFAULT_VERSION
93 REVLOGV1_FLAGS
92 REVLOGV1_FLAGS
94 REVLOGV2_FLAGS
93 REVLOGV2_FLAGS
95 REVIDX_ISCENSORED
94 REVIDX_ISCENSORED
96 REVIDX_ELLIPSIS
95 REVIDX_ELLIPSIS
97 REVIDX_EXTSTORED
96 REVIDX_EXTSTORED
98 REVIDX_DEFAULT_FLAGS
97 REVIDX_DEFAULT_FLAGS
99 REVIDX_FLAGS_ORDER
98 REVIDX_FLAGS_ORDER
100 REVIDX_KNOWN_FLAGS
101 REVIDX_RAWTEXT_CHANGING_FLAGS
99 REVIDX_RAWTEXT_CHANGING_FLAGS
102
100
103 parsers = policy.importmod(r'parsers')
101 parsers = policy.importmod(r'parsers')
104 rustancestor = policy.importrust(r'ancestor')
102 rustancestor = policy.importrust(r'ancestor')
105 rustdagop = policy.importrust(r'dagop')
103 rustdagop = policy.importrust(r'dagop')
106
104
107 # Aliased for performance.
105 # Aliased for performance.
108 _zlibdecompress = zlib.decompress
106 _zlibdecompress = zlib.decompress
109
107
110 # max size of revlog with inline data
108 # max size of revlog with inline data
111 _maxinline = 131072
109 _maxinline = 131072
112 _chunksize = 1048576
110 _chunksize = 1048576
113
111
114 # Flag processors for REVIDX_ELLIPSIS.
112 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
113 def ellipsisreadprocessor(rl, text):
116 return text, False
114 return text, False
117
115
118 def ellipsiswriteprocessor(rl, text):
116 def ellipsiswriteprocessor(rl, text):
119 return text, False
117 return text, False
120
118
121 def ellipsisrawprocessor(rl, text):
119 def ellipsisrawprocessor(rl, text):
122 return False
120 return False
123
121
124 ellipsisprocessor = (
122 ellipsisprocessor = (
125 ellipsisreadprocessor,
123 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
124 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
125 ellipsisrawprocessor,
128 )
126 )
129
127
130 def addflagprocessor(flag, processor):
128 def addflagprocessor(flag, processor):
131 """Register a flag processor on a revision data flag.
129 """Register a flag processor on a revision data flag.
132
130
133 Invariant:
131 Invariant:
134 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
132 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
135 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
133 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
136 - Only one flag processor can be registered on a specific flag.
134 - Only one flag processor can be registered on a specific flag.
137 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
135 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
138 following signatures:
136 following signatures:
139 - (read) f(self, rawtext) -> text, bool
137 - (read) f(self, rawtext) -> text, bool
140 - (write) f(self, text) -> rawtext, bool
138 - (write) f(self, text) -> rawtext, bool
141 - (raw) f(self, rawtext) -> bool
139 - (raw) f(self, rawtext) -> bool
142 "text" is presented to the user. "rawtext" is stored in revlog data, not
140 "text" is presented to the user. "rawtext" is stored in revlog data, not
143 directly visible to the user.
141 directly visible to the user.
144 The boolean returned by these transforms is used to determine whether
142 The boolean returned by these transforms is used to determine whether
145 the returned text can be used for hash integrity checking. For example,
143 the returned text can be used for hash integrity checking. For example,
146 if "write" returns False, then "text" is used to generate hash. If
144 if "write" returns False, then "text" is used to generate hash. If
147 "write" returns True, that basically means "rawtext" returned by "write"
145 "write" returns True, that basically means "rawtext" returned by "write"
148 should be used to generate hash. Usually, "write" and "read" return
146 should be used to generate hash. Usually, "write" and "read" return
149 different booleans. And "raw" returns a same boolean as "write".
147 different booleans. And "raw" returns a same boolean as "write".
150
148
151 Note: The 'raw' transform is used for changegroup generation and in some
149 Note: The 'raw' transform is used for changegroup generation and in some
152 debug commands. In this case the transform only indicates whether the
150 debug commands. In this case the transform only indicates whether the
153 contents can be used for hash integrity checks.
151 contents can be used for hash integrity checks.
154 """
152 """
155 _insertflagprocessor(flag, processor, flagutil.flagprocessors)
153 _insertflagprocessor(flag, processor, flagutil.flagprocessors)
156
154
157 def _insertflagprocessor(flag, processor, flagprocessors):
155 def _insertflagprocessor(flag, processor, flagprocessors):
158 if not flag & REVIDX_KNOWN_FLAGS:
156 if not flag & flagutil.REVIDX_KNOWN_FLAGS:
159 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
157 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
160 raise error.ProgrammingError(msg)
158 raise error.ProgrammingError(msg)
161 if flag not in REVIDX_FLAGS_ORDER:
159 if flag not in REVIDX_FLAGS_ORDER:
162 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
160 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
163 raise error.ProgrammingError(msg)
161 raise error.ProgrammingError(msg)
164 if flag in flagprocessors:
162 if flag in flagprocessors:
165 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
163 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
166 raise error.Abort(msg)
164 raise error.Abort(msg)
167 flagprocessors[flag] = processor
165 flagprocessors[flag] = processor
168
166
169 def getoffset(q):
167 def getoffset(q):
170 return int(q >> 16)
168 return int(q >> 16)
171
169
172 def gettype(q):
170 def gettype(q):
173 return int(q & 0xFFFF)
171 return int(q & 0xFFFF)
174
172
175 def offset_type(offset, type):
173 def offset_type(offset, type):
176 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
174 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
177 raise ValueError('unknown revlog index flags')
175 raise ValueError('unknown revlog index flags')
178 return int(int(offset) << 16 | type)
176 return int(int(offset) << 16 | type)
179
177
180 @attr.s(slots=True, frozen=True)
178 @attr.s(slots=True, frozen=True)
181 class _revisioninfo(object):
179 class _revisioninfo(object):
182 """Information about a revision that allows building its fulltext
180 """Information about a revision that allows building its fulltext
183 node: expected hash of the revision
181 node: expected hash of the revision
184 p1, p2: parent revs of the revision
182 p1, p2: parent revs of the revision
185 btext: built text cache consisting of a one-element list
183 btext: built text cache consisting of a one-element list
186 cachedelta: (baserev, uncompressed_delta) or None
184 cachedelta: (baserev, uncompressed_delta) or None
187 flags: flags associated to the revision storage
185 flags: flags associated to the revision storage
188
186
189 One of btext[0] or cachedelta must be set.
187 One of btext[0] or cachedelta must be set.
190 """
188 """
191 node = attr.ib()
189 node = attr.ib()
192 p1 = attr.ib()
190 p1 = attr.ib()
193 p2 = attr.ib()
191 p2 = attr.ib()
194 btext = attr.ib()
192 btext = attr.ib()
195 textlen = attr.ib()
193 textlen = attr.ib()
196 cachedelta = attr.ib()
194 cachedelta = attr.ib()
197 flags = attr.ib()
195 flags = attr.ib()
198
196
199 @interfaceutil.implementer(repository.irevisiondelta)
197 @interfaceutil.implementer(repository.irevisiondelta)
200 @attr.s(slots=True)
198 @attr.s(slots=True)
201 class revlogrevisiondelta(object):
199 class revlogrevisiondelta(object):
202 node = attr.ib()
200 node = attr.ib()
203 p1node = attr.ib()
201 p1node = attr.ib()
204 p2node = attr.ib()
202 p2node = attr.ib()
205 basenode = attr.ib()
203 basenode = attr.ib()
206 flags = attr.ib()
204 flags = attr.ib()
207 baserevisionsize = attr.ib()
205 baserevisionsize = attr.ib()
208 revision = attr.ib()
206 revision = attr.ib()
209 delta = attr.ib()
207 delta = attr.ib()
210 linknode = attr.ib(default=None)
208 linknode = attr.ib(default=None)
211
209
212 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
213 @attr.s(frozen=True)
211 @attr.s(frozen=True)
214 class revlogproblem(object):
212 class revlogproblem(object):
215 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
216 error = attr.ib(default=None)
214 error = attr.ib(default=None)
217 node = attr.ib(default=None)
215 node = attr.ib(default=None)
218
216
219 # index v0:
217 # index v0:
220 # 4 bytes: offset
218 # 4 bytes: offset
221 # 4 bytes: compressed length
219 # 4 bytes: compressed length
222 # 4 bytes: base rev
220 # 4 bytes: base rev
223 # 4 bytes: link rev
221 # 4 bytes: link rev
224 # 20 bytes: parent 1 nodeid
222 # 20 bytes: parent 1 nodeid
225 # 20 bytes: parent 2 nodeid
223 # 20 bytes: parent 2 nodeid
226 # 20 bytes: nodeid
224 # 20 bytes: nodeid
227 indexformatv0 = struct.Struct(">4l20s20s20s")
225 indexformatv0 = struct.Struct(">4l20s20s20s")
228 indexformatv0_pack = indexformatv0.pack
226 indexformatv0_pack = indexformatv0.pack
229 indexformatv0_unpack = indexformatv0.unpack
227 indexformatv0_unpack = indexformatv0.unpack
230
228
231 class revlogoldindex(list):
229 class revlogoldindex(list):
232 def __getitem__(self, i):
230 def __getitem__(self, i):
233 if i == -1:
231 if i == -1:
234 return (0, 0, 0, -1, -1, -1, -1, nullid)
232 return (0, 0, 0, -1, -1, -1, -1, nullid)
235 return list.__getitem__(self, i)
233 return list.__getitem__(self, i)
236
234
237 class revlogoldio(object):
235 class revlogoldio(object):
238 def __init__(self):
236 def __init__(self):
239 self.size = indexformatv0.size
237 self.size = indexformatv0.size
240
238
241 def parseindex(self, data, inline):
239 def parseindex(self, data, inline):
242 s = self.size
240 s = self.size
243 index = []
241 index = []
244 nodemap = {nullid: nullrev}
242 nodemap = {nullid: nullrev}
245 n = off = 0
243 n = off = 0
246 l = len(data)
244 l = len(data)
247 while off + s <= l:
245 while off + s <= l:
248 cur = data[off:off + s]
246 cur = data[off:off + s]
249 off += s
247 off += s
250 e = indexformatv0_unpack(cur)
248 e = indexformatv0_unpack(cur)
251 # transform to revlogv1 format
249 # transform to revlogv1 format
252 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
250 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
253 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
251 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
254 index.append(e2)
252 index.append(e2)
255 nodemap[e[6]] = n
253 nodemap[e[6]] = n
256 n += 1
254 n += 1
257
255
258 return revlogoldindex(index), nodemap, None
256 return revlogoldindex(index), nodemap, None
259
257
260 def packentry(self, entry, node, version, rev):
258 def packentry(self, entry, node, version, rev):
261 if gettype(entry[0]):
259 if gettype(entry[0]):
262 raise error.RevlogError(_('index entry flags need revlog '
260 raise error.RevlogError(_('index entry flags need revlog '
263 'version 1'))
261 'version 1'))
264 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
262 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
265 node(entry[5]), node(entry[6]), entry[7])
263 node(entry[5]), node(entry[6]), entry[7])
266 return indexformatv0_pack(*e2)
264 return indexformatv0_pack(*e2)
267
265
268 # index ng:
266 # index ng:
269 # 6 bytes: offset
267 # 6 bytes: offset
270 # 2 bytes: flags
268 # 2 bytes: flags
271 # 4 bytes: compressed length
269 # 4 bytes: compressed length
272 # 4 bytes: uncompressed length
270 # 4 bytes: uncompressed length
273 # 4 bytes: base rev
271 # 4 bytes: base rev
274 # 4 bytes: link rev
272 # 4 bytes: link rev
275 # 4 bytes: parent 1 rev
273 # 4 bytes: parent 1 rev
276 # 4 bytes: parent 2 rev
274 # 4 bytes: parent 2 rev
277 # 32 bytes: nodeid
275 # 32 bytes: nodeid
278 indexformatng = struct.Struct(">Qiiiiii20s12x")
276 indexformatng = struct.Struct(">Qiiiiii20s12x")
279 indexformatng_pack = indexformatng.pack
277 indexformatng_pack = indexformatng.pack
280 versionformat = struct.Struct(">I")
278 versionformat = struct.Struct(">I")
281 versionformat_pack = versionformat.pack
279 versionformat_pack = versionformat.pack
282 versionformat_unpack = versionformat.unpack
280 versionformat_unpack = versionformat.unpack
283
281
284 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
282 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
285 # signed integer)
283 # signed integer)
286 _maxentrysize = 0x7fffffff
284 _maxentrysize = 0x7fffffff
287
285
288 class revlogio(object):
286 class revlogio(object):
289 def __init__(self):
287 def __init__(self):
290 self.size = indexformatng.size
288 self.size = indexformatng.size
291
289
292 def parseindex(self, data, inline):
290 def parseindex(self, data, inline):
293 # call the C implementation to parse the index data
291 # call the C implementation to parse the index data
294 index, cache = parsers.parse_index2(data, inline)
292 index, cache = parsers.parse_index2(data, inline)
295 return index, getattr(index, 'nodemap', None), cache
293 return index, getattr(index, 'nodemap', None), cache
296
294
297 def packentry(self, entry, node, version, rev):
295 def packentry(self, entry, node, version, rev):
298 p = indexformatng_pack(*entry)
296 p = indexformatng_pack(*entry)
299 if rev == 0:
297 if rev == 0:
300 p = versionformat_pack(version) + p[4:]
298 p = versionformat_pack(version) + p[4:]
301 return p
299 return p
302
300
303 class revlog(object):
301 class revlog(object):
304 """
302 """
305 the underlying revision storage object
303 the underlying revision storage object
306
304
307 A revlog consists of two parts, an index and the revision data.
305 A revlog consists of two parts, an index and the revision data.
308
306
309 The index is a file with a fixed record size containing
307 The index is a file with a fixed record size containing
310 information on each revision, including its nodeid (hash), the
308 information on each revision, including its nodeid (hash), the
311 nodeids of its parents, the position and offset of its data within
309 nodeids of its parents, the position and offset of its data within
312 the data file, and the revision it's based on. Finally, each entry
310 the data file, and the revision it's based on. Finally, each entry
313 contains a linkrev entry that can serve as a pointer to external
311 contains a linkrev entry that can serve as a pointer to external
314 data.
312 data.
315
313
316 The revision data itself is a linear collection of data chunks.
314 The revision data itself is a linear collection of data chunks.
317 Each chunk represents a revision and is usually represented as a
315 Each chunk represents a revision and is usually represented as a
318 delta against the previous chunk. To bound lookup time, runs of
316 delta against the previous chunk. To bound lookup time, runs of
319 deltas are limited to about 2 times the length of the original
317 deltas are limited to about 2 times the length of the original
320 version data. This makes retrieval of a version proportional to
318 version data. This makes retrieval of a version proportional to
321 its size, or O(1) relative to the number of revisions.
319 its size, or O(1) relative to the number of revisions.
322
320
323 Both pieces of the revlog are written to in an append-only
321 Both pieces of the revlog are written to in an append-only
324 fashion, which means we never need to rewrite a file to insert or
322 fashion, which means we never need to rewrite a file to insert or
325 remove data, and can use some simple techniques to avoid the need
323 remove data, and can use some simple techniques to avoid the need
326 for locking while reading.
324 for locking while reading.
327
325
328 If checkambig, indexfile is opened with checkambig=True at
326 If checkambig, indexfile is opened with checkambig=True at
329 writing, to avoid file stat ambiguity.
327 writing, to avoid file stat ambiguity.
330
328
331 If mmaplargeindex is True, and an mmapindexthreshold is set, the
329 If mmaplargeindex is True, and an mmapindexthreshold is set, the
332 index will be mmapped rather than read if it is larger than the
330 index will be mmapped rather than read if it is larger than the
333 configured threshold.
331 configured threshold.
334
332
335 If censorable is True, the revlog can have censored revisions.
333 If censorable is True, the revlog can have censored revisions.
336
334
337 If `upperboundcomp` is not None, this is the expected maximal gain from
335 If `upperboundcomp` is not None, this is the expected maximal gain from
338 compression for the data content.
336 compression for the data content.
339 """
337 """
340 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
338 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 mmaplargeindex=False, censorable=False,
339 mmaplargeindex=False, censorable=False,
342 upperboundcomp=None):
340 upperboundcomp=None):
343 """
341 """
344 create a revlog object
342 create a revlog object
345
343
346 opener is a function that abstracts the file opening operation
344 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
345 and can be used to implement COW semantics or the like.
348
346
349 """
347 """
350 self.upperboundcomp = upperboundcomp
348 self.upperboundcomp = upperboundcomp
351 self.indexfile = indexfile
349 self.indexfile = indexfile
352 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
353 self.opener = opener
351 self.opener = opener
354 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
355 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
356 self._checkambig = checkambig
354 self._checkambig = checkambig
357 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
358 self._censorable = censorable
356 self._censorable = censorable
359 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
360 self._revisioncache = None
358 self._revisioncache = None
361 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
362 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
363 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
364 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
365 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
366 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
367 self._maxchainlen = None
365 self._maxchainlen = None
368 self._deltabothparents = True
366 self._deltabothparents = True
369 self.index = []
367 self.index = []
370 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
371 self._pcache = {}
369 self._pcache = {}
372 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
373 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
374 self._nodepos = None
372 self._nodepos = None
375 self._compengine = 'zlib'
373 self._compengine = 'zlib'
376 self._compengineopts = {}
374 self._compengineopts = {}
377 self._maxdeltachainspan = -1
375 self._maxdeltachainspan = -1
378 self._withsparseread = False
376 self._withsparseread = False
379 self._sparserevlog = False
377 self._sparserevlog = False
380 self._srdensitythreshold = 0.50
378 self._srdensitythreshold = 0.50
381 self._srmingapsize = 262144
379 self._srmingapsize = 262144
382
380
383 # Make copy of flag processors so each revlog instance can support
381 # Make copy of flag processors so each revlog instance can support
384 # custom flags.
382 # custom flags.
385 self._flagprocessors = dict(flagutil.flagprocessors)
383 self._flagprocessors = dict(flagutil.flagprocessors)
386
384
387 # 2-tuple of file handles being used for active writing.
385 # 2-tuple of file handles being used for active writing.
388 self._writinghandles = None
386 self._writinghandles = None
389
387
390 self._loadindex()
388 self._loadindex()
391
389
392 def _loadindex(self):
390 def _loadindex(self):
393 mmapindexthreshold = None
391 mmapindexthreshold = None
394 opts = getattr(self.opener, 'options', {}) or {}
392 opts = getattr(self.opener, 'options', {}) or {}
395
393
396 if 'revlogv2' in opts:
394 if 'revlogv2' in opts:
397 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
398 elif 'revlogv1' in opts:
396 elif 'revlogv1' in opts:
399 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
400 if 'generaldelta' in opts:
398 if 'generaldelta' in opts:
401 newversionflags |= FLAG_GENERALDELTA
399 newversionflags |= FLAG_GENERALDELTA
402 elif getattr(self.opener, 'options', None) is not None:
400 elif getattr(self.opener, 'options', None) is not None:
403 # If options provided but no 'revlog*' found, the repository
401 # If options provided but no 'revlog*' found, the repository
404 # would have no 'requires' file in it, which means we have to
402 # would have no 'requires' file in it, which means we have to
405 # stick to the old format.
403 # stick to the old format.
406 newversionflags = REVLOGV0
404 newversionflags = REVLOGV0
407 else:
405 else:
408 newversionflags = REVLOG_DEFAULT_VERSION
406 newversionflags = REVLOG_DEFAULT_VERSION
409
407
410 if 'chunkcachesize' in opts:
408 if 'chunkcachesize' in opts:
411 self._chunkcachesize = opts['chunkcachesize']
409 self._chunkcachesize = opts['chunkcachesize']
412 if 'maxchainlen' in opts:
410 if 'maxchainlen' in opts:
413 self._maxchainlen = opts['maxchainlen']
411 self._maxchainlen = opts['maxchainlen']
414 if 'deltabothparents' in opts:
412 if 'deltabothparents' in opts:
415 self._deltabothparents = opts['deltabothparents']
413 self._deltabothparents = opts['deltabothparents']
416 self._lazydelta = bool(opts.get('lazydelta', True))
414 self._lazydelta = bool(opts.get('lazydelta', True))
417 self._lazydeltabase = False
415 self._lazydeltabase = False
418 if self._lazydelta:
416 if self._lazydelta:
419 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
420 if 'compengine' in opts:
418 if 'compengine' in opts:
421 self._compengine = opts['compengine']
419 self._compengine = opts['compengine']
422 if 'zlib.level' in opts:
420 if 'zlib.level' in opts:
423 self._compengineopts['zlib.level'] = opts['zlib.level']
421 self._compengineopts['zlib.level'] = opts['zlib.level']
424 if 'zstd.level' in opts:
422 if 'zstd.level' in opts:
425 self._compengineopts['zstd.level'] = opts['zstd.level']
423 self._compengineopts['zstd.level'] = opts['zstd.level']
426 if 'maxdeltachainspan' in opts:
424 if 'maxdeltachainspan' in opts:
427 self._maxdeltachainspan = opts['maxdeltachainspan']
425 self._maxdeltachainspan = opts['maxdeltachainspan']
428 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
429 mmapindexthreshold = opts['mmapindexthreshold']
427 mmapindexthreshold = opts['mmapindexthreshold']
430 self._sparserevlog = bool(opts.get('sparse-revlog', False))
428 self._sparserevlog = bool(opts.get('sparse-revlog', False))
431 withsparseread = bool(opts.get('with-sparse-read', False))
429 withsparseread = bool(opts.get('with-sparse-read', False))
432 # sparse-revlog forces sparse-read
430 # sparse-revlog forces sparse-read
433 self._withsparseread = self._sparserevlog or withsparseread
431 self._withsparseread = self._sparserevlog or withsparseread
434 if 'sparse-read-density-threshold' in opts:
432 if 'sparse-read-density-threshold' in opts:
435 self._srdensitythreshold = opts['sparse-read-density-threshold']
433 self._srdensitythreshold = opts['sparse-read-density-threshold']
436 if 'sparse-read-min-gap-size' in opts:
434 if 'sparse-read-min-gap-size' in opts:
437 self._srmingapsize = opts['sparse-read-min-gap-size']
435 self._srmingapsize = opts['sparse-read-min-gap-size']
438 if opts.get('enableellipsis'):
436 if opts.get('enableellipsis'):
439 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
440
438
441 # revlog v0 doesn't have flag processors
439 # revlog v0 doesn't have flag processors
442 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
440 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
443 _insertflagprocessor(flag, processor, self._flagprocessors)
441 _insertflagprocessor(flag, processor, self._flagprocessors)
444
442
445 if self._chunkcachesize <= 0:
443 if self._chunkcachesize <= 0:
446 raise error.RevlogError(_('revlog chunk cache size %r is not '
444 raise error.RevlogError(_('revlog chunk cache size %r is not '
447 'greater than 0') % self._chunkcachesize)
445 'greater than 0') % self._chunkcachesize)
448 elif self._chunkcachesize & (self._chunkcachesize - 1):
446 elif self._chunkcachesize & (self._chunkcachesize - 1):
449 raise error.RevlogError(_('revlog chunk cache size %r is not a '
447 raise error.RevlogError(_('revlog chunk cache size %r is not a '
450 'power of 2') % self._chunkcachesize)
448 'power of 2') % self._chunkcachesize)
451
449
452 indexdata = ''
450 indexdata = ''
453 self._initempty = True
451 self._initempty = True
454 try:
452 try:
455 with self._indexfp() as f:
453 with self._indexfp() as f:
456 if (mmapindexthreshold is not None and
454 if (mmapindexthreshold is not None and
457 self.opener.fstat(f).st_size >= mmapindexthreshold):
455 self.opener.fstat(f).st_size >= mmapindexthreshold):
458 # TODO: should .close() to release resources without
456 # TODO: should .close() to release resources without
459 # relying on Python GC
457 # relying on Python GC
460 indexdata = util.buffer(util.mmapread(f))
458 indexdata = util.buffer(util.mmapread(f))
461 else:
459 else:
462 indexdata = f.read()
460 indexdata = f.read()
463 if len(indexdata) > 0:
461 if len(indexdata) > 0:
464 versionflags = versionformat_unpack(indexdata[:4])[0]
462 versionflags = versionformat_unpack(indexdata[:4])[0]
465 self._initempty = False
463 self._initempty = False
466 else:
464 else:
467 versionflags = newversionflags
465 versionflags = newversionflags
468 except IOError as inst:
466 except IOError as inst:
469 if inst.errno != errno.ENOENT:
467 if inst.errno != errno.ENOENT:
470 raise
468 raise
471
469
472 versionflags = newversionflags
470 versionflags = newversionflags
473
471
474 self.version = versionflags
472 self.version = versionflags
475
473
476 flags = versionflags & ~0xFFFF
474 flags = versionflags & ~0xFFFF
477 fmt = versionflags & 0xFFFF
475 fmt = versionflags & 0xFFFF
478
476
479 if fmt == REVLOGV0:
477 if fmt == REVLOGV0:
480 if flags:
478 if flags:
481 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
479 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
482 'revlog %s') %
480 'revlog %s') %
483 (flags >> 16, fmt, self.indexfile))
481 (flags >> 16, fmt, self.indexfile))
484
482
485 self._inline = False
483 self._inline = False
486 self._generaldelta = False
484 self._generaldelta = False
487
485
488 elif fmt == REVLOGV1:
486 elif fmt == REVLOGV1:
489 if flags & ~REVLOGV1_FLAGS:
487 if flags & ~REVLOGV1_FLAGS:
490 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
491 'revlog %s') %
489 'revlog %s') %
492 (flags >> 16, fmt, self.indexfile))
490 (flags >> 16, fmt, self.indexfile))
493
491
494 self._inline = versionflags & FLAG_INLINE_DATA
492 self._inline = versionflags & FLAG_INLINE_DATA
495 self._generaldelta = versionflags & FLAG_GENERALDELTA
493 self._generaldelta = versionflags & FLAG_GENERALDELTA
496
494
497 elif fmt == REVLOGV2:
495 elif fmt == REVLOGV2:
498 if flags & ~REVLOGV2_FLAGS:
496 if flags & ~REVLOGV2_FLAGS:
499 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
497 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
500 'revlog %s') %
498 'revlog %s') %
501 (flags >> 16, fmt, self.indexfile))
499 (flags >> 16, fmt, self.indexfile))
502
500
503 self._inline = versionflags & FLAG_INLINE_DATA
501 self._inline = versionflags & FLAG_INLINE_DATA
504 # generaldelta implied by version 2 revlogs.
502 # generaldelta implied by version 2 revlogs.
505 self._generaldelta = True
503 self._generaldelta = True
506
504
507 else:
505 else:
508 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
506 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
509 (fmt, self.indexfile))
507 (fmt, self.indexfile))
510 # sparse-revlog can't be on without general-delta (issue6056)
508 # sparse-revlog can't be on without general-delta (issue6056)
511 if not self._generaldelta:
509 if not self._generaldelta:
512 self._sparserevlog = False
510 self._sparserevlog = False
513
511
514 self._storedeltachains = True
512 self._storedeltachains = True
515
513
516 self._io = revlogio()
514 self._io = revlogio()
517 if self.version == REVLOGV0:
515 if self.version == REVLOGV0:
518 self._io = revlogoldio()
516 self._io = revlogoldio()
519 try:
517 try:
520 d = self._io.parseindex(indexdata, self._inline)
518 d = self._io.parseindex(indexdata, self._inline)
521 except (ValueError, IndexError):
519 except (ValueError, IndexError):
522 raise error.RevlogError(_("index %s is corrupted") %
520 raise error.RevlogError(_("index %s is corrupted") %
523 self.indexfile)
521 self.indexfile)
524 self.index, nodemap, self._chunkcache = d
522 self.index, nodemap, self._chunkcache = d
525 if nodemap is not None:
523 if nodemap is not None:
526 self.nodemap = self._nodecache = nodemap
524 self.nodemap = self._nodecache = nodemap
527 if not self._chunkcache:
525 if not self._chunkcache:
528 self._chunkclear()
526 self._chunkclear()
529 # revnum -> (chain-length, sum-delta-length)
527 # revnum -> (chain-length, sum-delta-length)
530 self._chaininfocache = {}
528 self._chaininfocache = {}
531 # revlog header -> revlog compressor
529 # revlog header -> revlog compressor
532 self._decompressors = {}
530 self._decompressors = {}
533
531
534 @util.propertycache
532 @util.propertycache
535 def _compressor(self):
533 def _compressor(self):
536 engine = util.compengines[self._compengine]
534 engine = util.compengines[self._compengine]
537 return engine.revlogcompressor(self._compengineopts)
535 return engine.revlogcompressor(self._compengineopts)
538
536
539 def _indexfp(self, mode='r'):
537 def _indexfp(self, mode='r'):
540 """file object for the revlog's index file"""
538 """file object for the revlog's index file"""
541 args = {r'mode': mode}
539 args = {r'mode': mode}
542 if mode != 'r':
540 if mode != 'r':
543 args[r'checkambig'] = self._checkambig
541 args[r'checkambig'] = self._checkambig
544 if mode == 'w':
542 if mode == 'w':
545 args[r'atomictemp'] = True
543 args[r'atomictemp'] = True
546 return self.opener(self.indexfile, **args)
544 return self.opener(self.indexfile, **args)
547
545
548 def _datafp(self, mode='r'):
546 def _datafp(self, mode='r'):
549 """file object for the revlog's data file"""
547 """file object for the revlog's data file"""
550 return self.opener(self.datafile, mode=mode)
548 return self.opener(self.datafile, mode=mode)
551
549
552 @contextlib.contextmanager
550 @contextlib.contextmanager
553 def _datareadfp(self, existingfp=None):
551 def _datareadfp(self, existingfp=None):
554 """file object suitable to read data"""
552 """file object suitable to read data"""
555 # Use explicit file handle, if given.
553 # Use explicit file handle, if given.
556 if existingfp is not None:
554 if existingfp is not None:
557 yield existingfp
555 yield existingfp
558
556
559 # Use a file handle being actively used for writes, if available.
557 # Use a file handle being actively used for writes, if available.
560 # There is some danger to doing this because reads will seek the
558 # There is some danger to doing this because reads will seek the
561 # file. However, _writeentry() performs a SEEK_END before all writes,
559 # file. However, _writeentry() performs a SEEK_END before all writes,
562 # so we should be safe.
560 # so we should be safe.
563 elif self._writinghandles:
561 elif self._writinghandles:
564 if self._inline:
562 if self._inline:
565 yield self._writinghandles[0]
563 yield self._writinghandles[0]
566 else:
564 else:
567 yield self._writinghandles[1]
565 yield self._writinghandles[1]
568
566
569 # Otherwise open a new file handle.
567 # Otherwise open a new file handle.
570 else:
568 else:
571 if self._inline:
569 if self._inline:
572 func = self._indexfp
570 func = self._indexfp
573 else:
571 else:
574 func = self._datafp
572 func = self._datafp
575 with func() as fp:
573 with func() as fp:
576 yield fp
574 yield fp
577
575
578 def tip(self):
576 def tip(self):
579 return self.node(len(self.index) - 1)
577 return self.node(len(self.index) - 1)
580 def __contains__(self, rev):
578 def __contains__(self, rev):
581 return 0 <= rev < len(self)
579 return 0 <= rev < len(self)
582 def __len__(self):
580 def __len__(self):
583 return len(self.index)
581 return len(self.index)
584 def __iter__(self):
582 def __iter__(self):
585 return iter(pycompat.xrange(len(self)))
583 return iter(pycompat.xrange(len(self)))
586 def revs(self, start=0, stop=None):
584 def revs(self, start=0, stop=None):
587 """iterate over all rev in this revlog (from start to stop)"""
585 """iterate over all rev in this revlog (from start to stop)"""
588 return storageutil.iterrevs(len(self), start=start, stop=stop)
586 return storageutil.iterrevs(len(self), start=start, stop=stop)
589
587
590 @util.propertycache
588 @util.propertycache
591 def nodemap(self):
589 def nodemap(self):
592 if self.index:
590 if self.index:
593 # populate mapping down to the initial node
591 # populate mapping down to the initial node
594 node0 = self.index[0][7] # get around changelog filtering
592 node0 = self.index[0][7] # get around changelog filtering
595 self.rev(node0)
593 self.rev(node0)
596 return self._nodecache
594 return self._nodecache
597
595
598 def hasnode(self, node):
596 def hasnode(self, node):
599 try:
597 try:
600 self.rev(node)
598 self.rev(node)
601 return True
599 return True
602 except KeyError:
600 except KeyError:
603 return False
601 return False
604
602
605 def candelta(self, baserev, rev):
603 def candelta(self, baserev, rev):
606 """whether two revisions (baserev, rev) can be delta-ed or not"""
604 """whether two revisions (baserev, rev) can be delta-ed or not"""
607 # Disable delta if either rev requires a content-changing flag
605 # Disable delta if either rev requires a content-changing flag
608 # processor (ex. LFS). This is because such flag processor can alter
606 # processor (ex. LFS). This is because such flag processor can alter
609 # the rawtext content that the delta will be based on, and two clients
607 # the rawtext content that the delta will be based on, and two clients
610 # could have a same revlog node with different flags (i.e. different
608 # could have a same revlog node with different flags (i.e. different
611 # rawtext contents) and the delta could be incompatible.
609 # rawtext contents) and the delta could be incompatible.
612 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
610 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
613 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
611 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
614 return False
612 return False
615 return True
613 return True
616
614
617 def clearcaches(self):
615 def clearcaches(self):
618 self._revisioncache = None
616 self._revisioncache = None
619 self._chainbasecache.clear()
617 self._chainbasecache.clear()
620 self._chunkcache = (0, '')
618 self._chunkcache = (0, '')
621 self._pcache = {}
619 self._pcache = {}
622
620
623 try:
621 try:
624 # If we are using the native C version, you are in a fun case
622 # If we are using the native C version, you are in a fun case
625 # where self.index, self.nodemap and self._nodecaches is the same
623 # where self.index, self.nodemap and self._nodecaches is the same
626 # object.
624 # object.
627 self._nodecache.clearcaches()
625 self._nodecache.clearcaches()
628 except AttributeError:
626 except AttributeError:
629 self._nodecache = {nullid: nullrev}
627 self._nodecache = {nullid: nullrev}
630 self._nodepos = None
628 self._nodepos = None
631
629
632 def rev(self, node):
630 def rev(self, node):
633 try:
631 try:
634 return self._nodecache[node]
632 return self._nodecache[node]
635 except TypeError:
633 except TypeError:
636 raise
634 raise
637 except error.RevlogError:
635 except error.RevlogError:
638 # parsers.c radix tree lookup failed
636 # parsers.c radix tree lookup failed
639 if node == wdirid or node in wdirfilenodeids:
637 if node == wdirid or node in wdirfilenodeids:
640 raise error.WdirUnsupported
638 raise error.WdirUnsupported
641 raise error.LookupError(node, self.indexfile, _('no node'))
639 raise error.LookupError(node, self.indexfile, _('no node'))
642 except KeyError:
640 except KeyError:
643 # pure python cache lookup failed
641 # pure python cache lookup failed
644 n = self._nodecache
642 n = self._nodecache
645 i = self.index
643 i = self.index
646 p = self._nodepos
644 p = self._nodepos
647 if p is None:
645 if p is None:
648 p = len(i) - 1
646 p = len(i) - 1
649 else:
647 else:
650 assert p < len(i)
648 assert p < len(i)
651 for r in pycompat.xrange(p, -1, -1):
649 for r in pycompat.xrange(p, -1, -1):
652 v = i[r][7]
650 v = i[r][7]
653 n[v] = r
651 n[v] = r
654 if v == node:
652 if v == node:
655 self._nodepos = r - 1
653 self._nodepos = r - 1
656 return r
654 return r
657 if node == wdirid or node in wdirfilenodeids:
655 if node == wdirid or node in wdirfilenodeids:
658 raise error.WdirUnsupported
656 raise error.WdirUnsupported
659 raise error.LookupError(node, self.indexfile, _('no node'))
657 raise error.LookupError(node, self.indexfile, _('no node'))
660
658
661 # Accessors for index entries.
659 # Accessors for index entries.
662
660
663 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
661 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
664 # are flags.
662 # are flags.
665 def start(self, rev):
663 def start(self, rev):
666 return int(self.index[rev][0] >> 16)
664 return int(self.index[rev][0] >> 16)
667
665
668 def flags(self, rev):
666 def flags(self, rev):
669 return self.index[rev][0] & 0xFFFF
667 return self.index[rev][0] & 0xFFFF
670
668
671 def length(self, rev):
669 def length(self, rev):
672 return self.index[rev][1]
670 return self.index[rev][1]
673
671
674 def rawsize(self, rev):
672 def rawsize(self, rev):
675 """return the length of the uncompressed text for a given revision"""
673 """return the length of the uncompressed text for a given revision"""
676 l = self.index[rev][2]
674 l = self.index[rev][2]
677 if l >= 0:
675 if l >= 0:
678 return l
676 return l
679
677
680 t = self.revision(rev, raw=True)
678 t = self.revision(rev, raw=True)
681 return len(t)
679 return len(t)
682
680
683 def size(self, rev):
681 def size(self, rev):
684 """length of non-raw text (processed by a "read" flag processor)"""
682 """length of non-raw text (processed by a "read" flag processor)"""
685 # fast path: if no "read" flag processor could change the content,
683 # fast path: if no "read" flag processor could change the content,
686 # size is rawsize. note: ELLIPSIS is known to not change the content.
684 # size is rawsize. note: ELLIPSIS is known to not change the content.
687 flags = self.flags(rev)
685 flags = self.flags(rev)
688 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
686 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
689 return self.rawsize(rev)
687 return self.rawsize(rev)
690
688
691 return len(self.revision(rev, raw=False))
689 return len(self.revision(rev, raw=False))
692
690
693 def chainbase(self, rev):
691 def chainbase(self, rev):
694 base = self._chainbasecache.get(rev)
692 base = self._chainbasecache.get(rev)
695 if base is not None:
693 if base is not None:
696 return base
694 return base
697
695
698 index = self.index
696 index = self.index
699 iterrev = rev
697 iterrev = rev
700 base = index[iterrev][3]
698 base = index[iterrev][3]
701 while base != iterrev:
699 while base != iterrev:
702 iterrev = base
700 iterrev = base
703 base = index[iterrev][3]
701 base = index[iterrev][3]
704
702
705 self._chainbasecache[rev] = base
703 self._chainbasecache[rev] = base
706 return base
704 return base
707
705
708 def linkrev(self, rev):
706 def linkrev(self, rev):
709 return self.index[rev][4]
707 return self.index[rev][4]
710
708
711 def parentrevs(self, rev):
709 def parentrevs(self, rev):
712 try:
710 try:
713 entry = self.index[rev]
711 entry = self.index[rev]
714 except IndexError:
712 except IndexError:
715 if rev == wdirrev:
713 if rev == wdirrev:
716 raise error.WdirUnsupported
714 raise error.WdirUnsupported
717 raise
715 raise
718
716
719 return entry[5], entry[6]
717 return entry[5], entry[6]
720
718
721 # fast parentrevs(rev) where rev isn't filtered
719 # fast parentrevs(rev) where rev isn't filtered
722 _uncheckedparentrevs = parentrevs
720 _uncheckedparentrevs = parentrevs
723
721
724 def node(self, rev):
722 def node(self, rev):
725 try:
723 try:
726 return self.index[rev][7]
724 return self.index[rev][7]
727 except IndexError:
725 except IndexError:
728 if rev == wdirrev:
726 if rev == wdirrev:
729 raise error.WdirUnsupported
727 raise error.WdirUnsupported
730 raise
728 raise
731
729
732 # Derived from index values.
730 # Derived from index values.
733
731
734 def end(self, rev):
732 def end(self, rev):
735 return self.start(rev) + self.length(rev)
733 return self.start(rev) + self.length(rev)
736
734
737 def parents(self, node):
735 def parents(self, node):
738 i = self.index
736 i = self.index
739 d = i[self.rev(node)]
737 d = i[self.rev(node)]
740 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
738 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
741
739
742 def chainlen(self, rev):
740 def chainlen(self, rev):
743 return self._chaininfo(rev)[0]
741 return self._chaininfo(rev)[0]
744
742
745 def _chaininfo(self, rev):
743 def _chaininfo(self, rev):
746 chaininfocache = self._chaininfocache
744 chaininfocache = self._chaininfocache
747 if rev in chaininfocache:
745 if rev in chaininfocache:
748 return chaininfocache[rev]
746 return chaininfocache[rev]
749 index = self.index
747 index = self.index
750 generaldelta = self._generaldelta
748 generaldelta = self._generaldelta
751 iterrev = rev
749 iterrev = rev
752 e = index[iterrev]
750 e = index[iterrev]
753 clen = 0
751 clen = 0
754 compresseddeltalen = 0
752 compresseddeltalen = 0
755 while iterrev != e[3]:
753 while iterrev != e[3]:
756 clen += 1
754 clen += 1
757 compresseddeltalen += e[1]
755 compresseddeltalen += e[1]
758 if generaldelta:
756 if generaldelta:
759 iterrev = e[3]
757 iterrev = e[3]
760 else:
758 else:
761 iterrev -= 1
759 iterrev -= 1
762 if iterrev in chaininfocache:
760 if iterrev in chaininfocache:
763 t = chaininfocache[iterrev]
761 t = chaininfocache[iterrev]
764 clen += t[0]
762 clen += t[0]
765 compresseddeltalen += t[1]
763 compresseddeltalen += t[1]
766 break
764 break
767 e = index[iterrev]
765 e = index[iterrev]
768 else:
766 else:
769 # Add text length of base since decompressing that also takes
767 # Add text length of base since decompressing that also takes
770 # work. For cache hits the length is already included.
768 # work. For cache hits the length is already included.
771 compresseddeltalen += e[1]
769 compresseddeltalen += e[1]
772 r = (clen, compresseddeltalen)
770 r = (clen, compresseddeltalen)
773 chaininfocache[rev] = r
771 chaininfocache[rev] = r
774 return r
772 return r
775
773
776 def _deltachain(self, rev, stoprev=None):
774 def _deltachain(self, rev, stoprev=None):
777 """Obtain the delta chain for a revision.
775 """Obtain the delta chain for a revision.
778
776
779 ``stoprev`` specifies a revision to stop at. If not specified, we
777 ``stoprev`` specifies a revision to stop at. If not specified, we
780 stop at the base of the chain.
778 stop at the base of the chain.
781
779
782 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
780 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
783 revs in ascending order and ``stopped`` is a bool indicating whether
781 revs in ascending order and ``stopped`` is a bool indicating whether
784 ``stoprev`` was hit.
782 ``stoprev`` was hit.
785 """
783 """
786 # Try C implementation.
784 # Try C implementation.
787 try:
785 try:
788 return self.index.deltachain(rev, stoprev, self._generaldelta)
786 return self.index.deltachain(rev, stoprev, self._generaldelta)
789 except AttributeError:
787 except AttributeError:
790 pass
788 pass
791
789
792 chain = []
790 chain = []
793
791
794 # Alias to prevent attribute lookup in tight loop.
792 # Alias to prevent attribute lookup in tight loop.
795 index = self.index
793 index = self.index
796 generaldelta = self._generaldelta
794 generaldelta = self._generaldelta
797
795
798 iterrev = rev
796 iterrev = rev
799 e = index[iterrev]
797 e = index[iterrev]
800 while iterrev != e[3] and iterrev != stoprev:
798 while iterrev != e[3] and iterrev != stoprev:
801 chain.append(iterrev)
799 chain.append(iterrev)
802 if generaldelta:
800 if generaldelta:
803 iterrev = e[3]
801 iterrev = e[3]
804 else:
802 else:
805 iterrev -= 1
803 iterrev -= 1
806 e = index[iterrev]
804 e = index[iterrev]
807
805
808 if iterrev == stoprev:
806 if iterrev == stoprev:
809 stopped = True
807 stopped = True
810 else:
808 else:
811 chain.append(iterrev)
809 chain.append(iterrev)
812 stopped = False
810 stopped = False
813
811
814 chain.reverse()
812 chain.reverse()
815 return chain, stopped
813 return chain, stopped
816
814
817 def ancestors(self, revs, stoprev=0, inclusive=False):
815 def ancestors(self, revs, stoprev=0, inclusive=False):
818 """Generate the ancestors of 'revs' in reverse revision order.
816 """Generate the ancestors of 'revs' in reverse revision order.
819 Does not generate revs lower than stoprev.
817 Does not generate revs lower than stoprev.
820
818
821 See the documentation for ancestor.lazyancestors for more details."""
819 See the documentation for ancestor.lazyancestors for more details."""
822
820
823 # first, make sure start revisions aren't filtered
821 # first, make sure start revisions aren't filtered
824 revs = list(revs)
822 revs = list(revs)
825 checkrev = self.node
823 checkrev = self.node
826 for r in revs:
824 for r in revs:
827 checkrev(r)
825 checkrev(r)
828 # and we're sure ancestors aren't filtered as well
826 # and we're sure ancestors aren't filtered as well
829
827
830 if rustancestor is not None:
828 if rustancestor is not None:
831 lazyancestors = rustancestor.LazyAncestors
829 lazyancestors = rustancestor.LazyAncestors
832 arg = self.index
830 arg = self.index
833 elif util.safehasattr(parsers, 'rustlazyancestors'):
831 elif util.safehasattr(parsers, 'rustlazyancestors'):
834 lazyancestors = ancestor.rustlazyancestors
832 lazyancestors = ancestor.rustlazyancestors
835 arg = self.index
833 arg = self.index
836 else:
834 else:
837 lazyancestors = ancestor.lazyancestors
835 lazyancestors = ancestor.lazyancestors
838 arg = self._uncheckedparentrevs
836 arg = self._uncheckedparentrevs
839 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
837 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
840
838
841 def descendants(self, revs):
839 def descendants(self, revs):
842 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
840 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
843
841
844 def findcommonmissing(self, common=None, heads=None):
842 def findcommonmissing(self, common=None, heads=None):
845 """Return a tuple of the ancestors of common and the ancestors of heads
843 """Return a tuple of the ancestors of common and the ancestors of heads
846 that are not ancestors of common. In revset terminology, we return the
844 that are not ancestors of common. In revset terminology, we return the
847 tuple:
845 tuple:
848
846
849 ::common, (::heads) - (::common)
847 ::common, (::heads) - (::common)
850
848
851 The list is sorted by revision number, meaning it is
849 The list is sorted by revision number, meaning it is
852 topologically sorted.
850 topologically sorted.
853
851
854 'heads' and 'common' are both lists of node IDs. If heads is
852 'heads' and 'common' are both lists of node IDs. If heads is
855 not supplied, uses all of the revlog's heads. If common is not
853 not supplied, uses all of the revlog's heads. If common is not
856 supplied, uses nullid."""
854 supplied, uses nullid."""
857 if common is None:
855 if common is None:
858 common = [nullid]
856 common = [nullid]
859 if heads is None:
857 if heads is None:
860 heads = self.heads()
858 heads = self.heads()
861
859
862 common = [self.rev(n) for n in common]
860 common = [self.rev(n) for n in common]
863 heads = [self.rev(n) for n in heads]
861 heads = [self.rev(n) for n in heads]
864
862
865 # we want the ancestors, but inclusive
863 # we want the ancestors, but inclusive
866 class lazyset(object):
864 class lazyset(object):
867 def __init__(self, lazyvalues):
865 def __init__(self, lazyvalues):
868 self.addedvalues = set()
866 self.addedvalues = set()
869 self.lazyvalues = lazyvalues
867 self.lazyvalues = lazyvalues
870
868
871 def __contains__(self, value):
869 def __contains__(self, value):
872 return value in self.addedvalues or value in self.lazyvalues
870 return value in self.addedvalues or value in self.lazyvalues
873
871
874 def __iter__(self):
872 def __iter__(self):
875 added = self.addedvalues
873 added = self.addedvalues
876 for r in added:
874 for r in added:
877 yield r
875 yield r
878 for r in self.lazyvalues:
876 for r in self.lazyvalues:
879 if not r in added:
877 if not r in added:
880 yield r
878 yield r
881
879
882 def add(self, value):
880 def add(self, value):
883 self.addedvalues.add(value)
881 self.addedvalues.add(value)
884
882
885 def update(self, values):
883 def update(self, values):
886 self.addedvalues.update(values)
884 self.addedvalues.update(values)
887
885
888 has = lazyset(self.ancestors(common))
886 has = lazyset(self.ancestors(common))
889 has.add(nullrev)
887 has.add(nullrev)
890 has.update(common)
888 has.update(common)
891
889
892 # take all ancestors from heads that aren't in has
890 # take all ancestors from heads that aren't in has
893 missing = set()
891 missing = set()
894 visit = collections.deque(r for r in heads if r not in has)
892 visit = collections.deque(r for r in heads if r not in has)
895 while visit:
893 while visit:
896 r = visit.popleft()
894 r = visit.popleft()
897 if r in missing:
895 if r in missing:
898 continue
896 continue
899 else:
897 else:
900 missing.add(r)
898 missing.add(r)
901 for p in self.parentrevs(r):
899 for p in self.parentrevs(r):
902 if p not in has:
900 if p not in has:
903 visit.append(p)
901 visit.append(p)
904 missing = list(missing)
902 missing = list(missing)
905 missing.sort()
903 missing.sort()
906 return has, [self.node(miss) for miss in missing]
904 return has, [self.node(miss) for miss in missing]
907
905
908 def incrementalmissingrevs(self, common=None):
906 def incrementalmissingrevs(self, common=None):
909 """Return an object that can be used to incrementally compute the
907 """Return an object that can be used to incrementally compute the
910 revision numbers of the ancestors of arbitrary sets that are not
908 revision numbers of the ancestors of arbitrary sets that are not
911 ancestors of common. This is an ancestor.incrementalmissingancestors
909 ancestors of common. This is an ancestor.incrementalmissingancestors
912 object.
910 object.
913
911
914 'common' is a list of revision numbers. If common is not supplied, uses
912 'common' is a list of revision numbers. If common is not supplied, uses
915 nullrev.
913 nullrev.
916 """
914 """
917 if common is None:
915 if common is None:
918 common = [nullrev]
916 common = [nullrev]
919
917
920 if rustancestor is not None:
918 if rustancestor is not None:
921 return rustancestor.MissingAncestors(self.index, common)
919 return rustancestor.MissingAncestors(self.index, common)
922 return ancestor.incrementalmissingancestors(self.parentrevs, common)
920 return ancestor.incrementalmissingancestors(self.parentrevs, common)
923
921
924 def findmissingrevs(self, common=None, heads=None):
922 def findmissingrevs(self, common=None, heads=None):
925 """Return the revision numbers of the ancestors of heads that
923 """Return the revision numbers of the ancestors of heads that
926 are not ancestors of common.
924 are not ancestors of common.
927
925
928 More specifically, return a list of revision numbers corresponding to
926 More specifically, return a list of revision numbers corresponding to
929 nodes N such that every N satisfies the following constraints:
927 nodes N such that every N satisfies the following constraints:
930
928
931 1. N is an ancestor of some node in 'heads'
929 1. N is an ancestor of some node in 'heads'
932 2. N is not an ancestor of any node in 'common'
930 2. N is not an ancestor of any node in 'common'
933
931
934 The list is sorted by revision number, meaning it is
932 The list is sorted by revision number, meaning it is
935 topologically sorted.
933 topologically sorted.
936
934
937 'heads' and 'common' are both lists of revision numbers. If heads is
935 'heads' and 'common' are both lists of revision numbers. If heads is
938 not supplied, uses all of the revlog's heads. If common is not
936 not supplied, uses all of the revlog's heads. If common is not
939 supplied, uses nullid."""
937 supplied, uses nullid."""
940 if common is None:
938 if common is None:
941 common = [nullrev]
939 common = [nullrev]
942 if heads is None:
940 if heads is None:
943 heads = self.headrevs()
941 heads = self.headrevs()
944
942
945 inc = self.incrementalmissingrevs(common=common)
943 inc = self.incrementalmissingrevs(common=common)
946 return inc.missingancestors(heads)
944 return inc.missingancestors(heads)
947
945
948 def findmissing(self, common=None, heads=None):
946 def findmissing(self, common=None, heads=None):
949 """Return the ancestors of heads that are not ancestors of common.
947 """Return the ancestors of heads that are not ancestors of common.
950
948
951 More specifically, return a list of nodes N such that every N
949 More specifically, return a list of nodes N such that every N
952 satisfies the following constraints:
950 satisfies the following constraints:
953
951
954 1. N is an ancestor of some node in 'heads'
952 1. N is an ancestor of some node in 'heads'
955 2. N is not an ancestor of any node in 'common'
953 2. N is not an ancestor of any node in 'common'
956
954
957 The list is sorted by revision number, meaning it is
955 The list is sorted by revision number, meaning it is
958 topologically sorted.
956 topologically sorted.
959
957
960 'heads' and 'common' are both lists of node IDs. If heads is
958 'heads' and 'common' are both lists of node IDs. If heads is
961 not supplied, uses all of the revlog's heads. If common is not
959 not supplied, uses all of the revlog's heads. If common is not
962 supplied, uses nullid."""
960 supplied, uses nullid."""
963 if common is None:
961 if common is None:
964 common = [nullid]
962 common = [nullid]
965 if heads is None:
963 if heads is None:
966 heads = self.heads()
964 heads = self.heads()
967
965
968 common = [self.rev(n) for n in common]
966 common = [self.rev(n) for n in common]
969 heads = [self.rev(n) for n in heads]
967 heads = [self.rev(n) for n in heads]
970
968
971 inc = self.incrementalmissingrevs(common=common)
969 inc = self.incrementalmissingrevs(common=common)
972 return [self.node(r) for r in inc.missingancestors(heads)]
970 return [self.node(r) for r in inc.missingancestors(heads)]
973
971
974 def nodesbetween(self, roots=None, heads=None):
972 def nodesbetween(self, roots=None, heads=None):
975 """Return a topological path from 'roots' to 'heads'.
973 """Return a topological path from 'roots' to 'heads'.
976
974
977 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
975 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
978 topologically sorted list of all nodes N that satisfy both of
976 topologically sorted list of all nodes N that satisfy both of
979 these constraints:
977 these constraints:
980
978
981 1. N is a descendant of some node in 'roots'
979 1. N is a descendant of some node in 'roots'
982 2. N is an ancestor of some node in 'heads'
980 2. N is an ancestor of some node in 'heads'
983
981
984 Every node is considered to be both a descendant and an ancestor
982 Every node is considered to be both a descendant and an ancestor
985 of itself, so every reachable node in 'roots' and 'heads' will be
983 of itself, so every reachable node in 'roots' and 'heads' will be
986 included in 'nodes'.
984 included in 'nodes'.
987
985
988 'outroots' is the list of reachable nodes in 'roots', i.e., the
986 'outroots' is the list of reachable nodes in 'roots', i.e., the
989 subset of 'roots' that is returned in 'nodes'. Likewise,
987 subset of 'roots' that is returned in 'nodes'. Likewise,
990 'outheads' is the subset of 'heads' that is also in 'nodes'.
988 'outheads' is the subset of 'heads' that is also in 'nodes'.
991
989
992 'roots' and 'heads' are both lists of node IDs. If 'roots' is
990 'roots' and 'heads' are both lists of node IDs. If 'roots' is
993 unspecified, uses nullid as the only root. If 'heads' is
991 unspecified, uses nullid as the only root. If 'heads' is
994 unspecified, uses list of all of the revlog's heads."""
992 unspecified, uses list of all of the revlog's heads."""
995 nonodes = ([], [], [])
993 nonodes = ([], [], [])
996 if roots is not None:
994 if roots is not None:
997 roots = list(roots)
995 roots = list(roots)
998 if not roots:
996 if not roots:
999 return nonodes
997 return nonodes
1000 lowestrev = min([self.rev(n) for n in roots])
998 lowestrev = min([self.rev(n) for n in roots])
1001 else:
999 else:
1002 roots = [nullid] # Everybody's a descendant of nullid
1000 roots = [nullid] # Everybody's a descendant of nullid
1003 lowestrev = nullrev
1001 lowestrev = nullrev
1004 if (lowestrev == nullrev) and (heads is None):
1002 if (lowestrev == nullrev) and (heads is None):
1005 # We want _all_ the nodes!
1003 # We want _all_ the nodes!
1006 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1004 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1007 if heads is None:
1005 if heads is None:
1008 # All nodes are ancestors, so the latest ancestor is the last
1006 # All nodes are ancestors, so the latest ancestor is the last
1009 # node.
1007 # node.
1010 highestrev = len(self) - 1
1008 highestrev = len(self) - 1
1011 # Set ancestors to None to signal that every node is an ancestor.
1009 # Set ancestors to None to signal that every node is an ancestor.
1012 ancestors = None
1010 ancestors = None
1013 # Set heads to an empty dictionary for later discovery of heads
1011 # Set heads to an empty dictionary for later discovery of heads
1014 heads = {}
1012 heads = {}
1015 else:
1013 else:
1016 heads = list(heads)
1014 heads = list(heads)
1017 if not heads:
1015 if not heads:
1018 return nonodes
1016 return nonodes
1019 ancestors = set()
1017 ancestors = set()
1020 # Turn heads into a dictionary so we can remove 'fake' heads.
1018 # Turn heads into a dictionary so we can remove 'fake' heads.
1021 # Also, later we will be using it to filter out the heads we can't
1019 # Also, later we will be using it to filter out the heads we can't
1022 # find from roots.
1020 # find from roots.
1023 heads = dict.fromkeys(heads, False)
1021 heads = dict.fromkeys(heads, False)
1024 # Start at the top and keep marking parents until we're done.
1022 # Start at the top and keep marking parents until we're done.
1025 nodestotag = set(heads)
1023 nodestotag = set(heads)
1026 # Remember where the top was so we can use it as a limit later.
1024 # Remember where the top was so we can use it as a limit later.
1027 highestrev = max([self.rev(n) for n in nodestotag])
1025 highestrev = max([self.rev(n) for n in nodestotag])
1028 while nodestotag:
1026 while nodestotag:
1029 # grab a node to tag
1027 # grab a node to tag
1030 n = nodestotag.pop()
1028 n = nodestotag.pop()
1031 # Never tag nullid
1029 # Never tag nullid
1032 if n == nullid:
1030 if n == nullid:
1033 continue
1031 continue
1034 # A node's revision number represents its place in a
1032 # A node's revision number represents its place in a
1035 # topologically sorted list of nodes.
1033 # topologically sorted list of nodes.
1036 r = self.rev(n)
1034 r = self.rev(n)
1037 if r >= lowestrev:
1035 if r >= lowestrev:
1038 if n not in ancestors:
1036 if n not in ancestors:
1039 # If we are possibly a descendant of one of the roots
1037 # If we are possibly a descendant of one of the roots
1040 # and we haven't already been marked as an ancestor
1038 # and we haven't already been marked as an ancestor
1041 ancestors.add(n) # Mark as ancestor
1039 ancestors.add(n) # Mark as ancestor
1042 # Add non-nullid parents to list of nodes to tag.
1040 # Add non-nullid parents to list of nodes to tag.
1043 nodestotag.update([p for p in self.parents(n) if
1041 nodestotag.update([p for p in self.parents(n) if
1044 p != nullid])
1042 p != nullid])
1045 elif n in heads: # We've seen it before, is it a fake head?
1043 elif n in heads: # We've seen it before, is it a fake head?
1046 # So it is, real heads should not be the ancestors of
1044 # So it is, real heads should not be the ancestors of
1047 # any other heads.
1045 # any other heads.
1048 heads.pop(n)
1046 heads.pop(n)
1049 if not ancestors:
1047 if not ancestors:
1050 return nonodes
1048 return nonodes
1051 # Now that we have our set of ancestors, we want to remove any
1049 # Now that we have our set of ancestors, we want to remove any
1052 # roots that are not ancestors.
1050 # roots that are not ancestors.
1053
1051
1054 # If one of the roots was nullid, everything is included anyway.
1052 # If one of the roots was nullid, everything is included anyway.
1055 if lowestrev > nullrev:
1053 if lowestrev > nullrev:
1056 # But, since we weren't, let's recompute the lowest rev to not
1054 # But, since we weren't, let's recompute the lowest rev to not
1057 # include roots that aren't ancestors.
1055 # include roots that aren't ancestors.
1058
1056
1059 # Filter out roots that aren't ancestors of heads
1057 # Filter out roots that aren't ancestors of heads
1060 roots = [root for root in roots if root in ancestors]
1058 roots = [root for root in roots if root in ancestors]
1061 # Recompute the lowest revision
1059 # Recompute the lowest revision
1062 if roots:
1060 if roots:
1063 lowestrev = min([self.rev(root) for root in roots])
1061 lowestrev = min([self.rev(root) for root in roots])
1064 else:
1062 else:
1065 # No more roots? Return empty list
1063 # No more roots? Return empty list
1066 return nonodes
1064 return nonodes
1067 else:
1065 else:
1068 # We are descending from nullid, and don't need to care about
1066 # We are descending from nullid, and don't need to care about
1069 # any other roots.
1067 # any other roots.
1070 lowestrev = nullrev
1068 lowestrev = nullrev
1071 roots = [nullid]
1069 roots = [nullid]
1072 # Transform our roots list into a set.
1070 # Transform our roots list into a set.
1073 descendants = set(roots)
1071 descendants = set(roots)
1074 # Also, keep the original roots so we can filter out roots that aren't
1072 # Also, keep the original roots so we can filter out roots that aren't
1075 # 'real' roots (i.e. are descended from other roots).
1073 # 'real' roots (i.e. are descended from other roots).
1076 roots = descendants.copy()
1074 roots = descendants.copy()
1077 # Our topologically sorted list of output nodes.
1075 # Our topologically sorted list of output nodes.
1078 orderedout = []
1076 orderedout = []
1079 # Don't start at nullid since we don't want nullid in our output list,
1077 # Don't start at nullid since we don't want nullid in our output list,
1080 # and if nullid shows up in descendants, empty parents will look like
1078 # and if nullid shows up in descendants, empty parents will look like
1081 # they're descendants.
1079 # they're descendants.
1082 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1080 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1083 n = self.node(r)
1081 n = self.node(r)
1084 isdescendant = False
1082 isdescendant = False
1085 if lowestrev == nullrev: # Everybody is a descendant of nullid
1083 if lowestrev == nullrev: # Everybody is a descendant of nullid
1086 isdescendant = True
1084 isdescendant = True
1087 elif n in descendants:
1085 elif n in descendants:
1088 # n is already a descendant
1086 # n is already a descendant
1089 isdescendant = True
1087 isdescendant = True
1090 # This check only needs to be done here because all the roots
1088 # This check only needs to be done here because all the roots
1091 # will start being marked is descendants before the loop.
1089 # will start being marked is descendants before the loop.
1092 if n in roots:
1090 if n in roots:
1093 # If n was a root, check if it's a 'real' root.
1091 # If n was a root, check if it's a 'real' root.
1094 p = tuple(self.parents(n))
1092 p = tuple(self.parents(n))
1095 # If any of its parents are descendants, it's not a root.
1093 # If any of its parents are descendants, it's not a root.
1096 if (p[0] in descendants) or (p[1] in descendants):
1094 if (p[0] in descendants) or (p[1] in descendants):
1097 roots.remove(n)
1095 roots.remove(n)
1098 else:
1096 else:
1099 p = tuple(self.parents(n))
1097 p = tuple(self.parents(n))
1100 # A node is a descendant if either of its parents are
1098 # A node is a descendant if either of its parents are
1101 # descendants. (We seeded the dependents list with the roots
1099 # descendants. (We seeded the dependents list with the roots
1102 # up there, remember?)
1100 # up there, remember?)
1103 if (p[0] in descendants) or (p[1] in descendants):
1101 if (p[0] in descendants) or (p[1] in descendants):
1104 descendants.add(n)
1102 descendants.add(n)
1105 isdescendant = True
1103 isdescendant = True
1106 if isdescendant and ((ancestors is None) or (n in ancestors)):
1104 if isdescendant and ((ancestors is None) or (n in ancestors)):
1107 # Only include nodes that are both descendants and ancestors.
1105 # Only include nodes that are both descendants and ancestors.
1108 orderedout.append(n)
1106 orderedout.append(n)
1109 if (ancestors is not None) and (n in heads):
1107 if (ancestors is not None) and (n in heads):
1110 # We're trying to figure out which heads are reachable
1108 # We're trying to figure out which heads are reachable
1111 # from roots.
1109 # from roots.
1112 # Mark this head as having been reached
1110 # Mark this head as having been reached
1113 heads[n] = True
1111 heads[n] = True
1114 elif ancestors is None:
1112 elif ancestors is None:
1115 # Otherwise, we're trying to discover the heads.
1113 # Otherwise, we're trying to discover the heads.
1116 # Assume this is a head because if it isn't, the next step
1114 # Assume this is a head because if it isn't, the next step
1117 # will eventually remove it.
1115 # will eventually remove it.
1118 heads[n] = True
1116 heads[n] = True
1119 # But, obviously its parents aren't.
1117 # But, obviously its parents aren't.
1120 for p in self.parents(n):
1118 for p in self.parents(n):
1121 heads.pop(p, None)
1119 heads.pop(p, None)
1122 heads = [head for head, flag in heads.iteritems() if flag]
1120 heads = [head for head, flag in heads.iteritems() if flag]
1123 roots = list(roots)
1121 roots = list(roots)
1124 assert orderedout
1122 assert orderedout
1125 assert roots
1123 assert roots
1126 assert heads
1124 assert heads
1127 return (orderedout, roots, heads)
1125 return (orderedout, roots, heads)
1128
1126
1129 def headrevs(self, revs=None):
1127 def headrevs(self, revs=None):
1130 if revs is None:
1128 if revs is None:
1131 try:
1129 try:
1132 return self.index.headrevs()
1130 return self.index.headrevs()
1133 except AttributeError:
1131 except AttributeError:
1134 return self._headrevs()
1132 return self._headrevs()
1135 if rustdagop is not None:
1133 if rustdagop is not None:
1136 return rustdagop.headrevs(self.index, revs)
1134 return rustdagop.headrevs(self.index, revs)
1137 return dagop.headrevs(revs, self._uncheckedparentrevs)
1135 return dagop.headrevs(revs, self._uncheckedparentrevs)
1138
1136
1139 def computephases(self, roots):
1137 def computephases(self, roots):
1140 return self.index.computephasesmapsets(roots)
1138 return self.index.computephasesmapsets(roots)
1141
1139
1142 def _headrevs(self):
1140 def _headrevs(self):
1143 count = len(self)
1141 count = len(self)
1144 if not count:
1142 if not count:
1145 return [nullrev]
1143 return [nullrev]
1146 # we won't iter over filtered rev so nobody is a head at start
1144 # we won't iter over filtered rev so nobody is a head at start
1147 ishead = [0] * (count + 1)
1145 ishead = [0] * (count + 1)
1148 index = self.index
1146 index = self.index
1149 for r in self:
1147 for r in self:
1150 ishead[r] = 1 # I may be an head
1148 ishead[r] = 1 # I may be an head
1151 e = index[r]
1149 e = index[r]
1152 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1150 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1153 return [r for r, val in enumerate(ishead) if val]
1151 return [r for r, val in enumerate(ishead) if val]
1154
1152
1155 def heads(self, start=None, stop=None):
1153 def heads(self, start=None, stop=None):
1156 """return the list of all nodes that have no children
1154 """return the list of all nodes that have no children
1157
1155
1158 if start is specified, only heads that are descendants of
1156 if start is specified, only heads that are descendants of
1159 start will be returned
1157 start will be returned
1160 if stop is specified, it will consider all the revs from stop
1158 if stop is specified, it will consider all the revs from stop
1161 as if they had no children
1159 as if they had no children
1162 """
1160 """
1163 if start is None and stop is None:
1161 if start is None and stop is None:
1164 if not len(self):
1162 if not len(self):
1165 return [nullid]
1163 return [nullid]
1166 return [self.node(r) for r in self.headrevs()]
1164 return [self.node(r) for r in self.headrevs()]
1167
1165
1168 if start is None:
1166 if start is None:
1169 start = nullrev
1167 start = nullrev
1170 else:
1168 else:
1171 start = self.rev(start)
1169 start = self.rev(start)
1172
1170
1173 stoprevs = set(self.rev(n) for n in stop or [])
1171 stoprevs = set(self.rev(n) for n in stop or [])
1174
1172
1175 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1173 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1176 stoprevs=stoprevs)
1174 stoprevs=stoprevs)
1177
1175
1178 return [self.node(rev) for rev in revs]
1176 return [self.node(rev) for rev in revs]
1179
1177
1180 def children(self, node):
1178 def children(self, node):
1181 """find the children of a given node"""
1179 """find the children of a given node"""
1182 c = []
1180 c = []
1183 p = self.rev(node)
1181 p = self.rev(node)
1184 for r in self.revs(start=p + 1):
1182 for r in self.revs(start=p + 1):
1185 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1183 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1186 if prevs:
1184 if prevs:
1187 for pr in prevs:
1185 for pr in prevs:
1188 if pr == p:
1186 if pr == p:
1189 c.append(self.node(r))
1187 c.append(self.node(r))
1190 elif p == nullrev:
1188 elif p == nullrev:
1191 c.append(self.node(r))
1189 c.append(self.node(r))
1192 return c
1190 return c
1193
1191
1194 def commonancestorsheads(self, a, b):
1192 def commonancestorsheads(self, a, b):
1195 """calculate all the heads of the common ancestors of nodes a and b"""
1193 """calculate all the heads of the common ancestors of nodes a and b"""
1196 a, b = self.rev(a), self.rev(b)
1194 a, b = self.rev(a), self.rev(b)
1197 ancs = self._commonancestorsheads(a, b)
1195 ancs = self._commonancestorsheads(a, b)
1198 return pycompat.maplist(self.node, ancs)
1196 return pycompat.maplist(self.node, ancs)
1199
1197
1200 def _commonancestorsheads(self, *revs):
1198 def _commonancestorsheads(self, *revs):
1201 """calculate all the heads of the common ancestors of revs"""
1199 """calculate all the heads of the common ancestors of revs"""
1202 try:
1200 try:
1203 ancs = self.index.commonancestorsheads(*revs)
1201 ancs = self.index.commonancestorsheads(*revs)
1204 except (AttributeError, OverflowError): # C implementation failed
1202 except (AttributeError, OverflowError): # C implementation failed
1205 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1203 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1206 return ancs
1204 return ancs
1207
1205
1208 def isancestor(self, a, b):
1206 def isancestor(self, a, b):
1209 """return True if node a is an ancestor of node b
1207 """return True if node a is an ancestor of node b
1210
1208
1211 A revision is considered an ancestor of itself."""
1209 A revision is considered an ancestor of itself."""
1212 a, b = self.rev(a), self.rev(b)
1210 a, b = self.rev(a), self.rev(b)
1213 return self.isancestorrev(a, b)
1211 return self.isancestorrev(a, b)
1214
1212
1215 def isancestorrev(self, a, b):
1213 def isancestorrev(self, a, b):
1216 """return True if revision a is an ancestor of revision b
1214 """return True if revision a is an ancestor of revision b
1217
1215
1218 A revision is considered an ancestor of itself.
1216 A revision is considered an ancestor of itself.
1219
1217
1220 The implementation of this is trivial but the use of
1218 The implementation of this is trivial but the use of
1221 reachableroots is not."""
1219 reachableroots is not."""
1222 if a == nullrev:
1220 if a == nullrev:
1223 return True
1221 return True
1224 elif a == b:
1222 elif a == b:
1225 return True
1223 return True
1226 elif a > b:
1224 elif a > b:
1227 return False
1225 return False
1228 return bool(self.reachableroots(a, [b], [a], includepath=False))
1226 return bool(self.reachableroots(a, [b], [a], includepath=False))
1229
1227
1230 def reachableroots(self, minroot, heads, roots, includepath=False):
1228 def reachableroots(self, minroot, heads, roots, includepath=False):
1231 """return (heads(::<roots> and <roots>::<heads>))
1229 """return (heads(::<roots> and <roots>::<heads>))
1232
1230
1233 If includepath is True, return (<roots>::<heads>)."""
1231 If includepath is True, return (<roots>::<heads>)."""
1234 try:
1232 try:
1235 return self.index.reachableroots2(minroot, heads, roots,
1233 return self.index.reachableroots2(minroot, heads, roots,
1236 includepath)
1234 includepath)
1237 except AttributeError:
1235 except AttributeError:
1238 return dagop._reachablerootspure(self.parentrevs,
1236 return dagop._reachablerootspure(self.parentrevs,
1239 minroot, roots, heads, includepath)
1237 minroot, roots, heads, includepath)
1240
1238
1241 def ancestor(self, a, b):
1239 def ancestor(self, a, b):
1242 """calculate the "best" common ancestor of nodes a and b"""
1240 """calculate the "best" common ancestor of nodes a and b"""
1243
1241
1244 a, b = self.rev(a), self.rev(b)
1242 a, b = self.rev(a), self.rev(b)
1245 try:
1243 try:
1246 ancs = self.index.ancestors(a, b)
1244 ancs = self.index.ancestors(a, b)
1247 except (AttributeError, OverflowError):
1245 except (AttributeError, OverflowError):
1248 ancs = ancestor.ancestors(self.parentrevs, a, b)
1246 ancs = ancestor.ancestors(self.parentrevs, a, b)
1249 if ancs:
1247 if ancs:
1250 # choose a consistent winner when there's a tie
1248 # choose a consistent winner when there's a tie
1251 return min(map(self.node, ancs))
1249 return min(map(self.node, ancs))
1252 return nullid
1250 return nullid
1253
1251
1254 def _match(self, id):
1252 def _match(self, id):
1255 if isinstance(id, int):
1253 if isinstance(id, int):
1256 # rev
1254 # rev
1257 return self.node(id)
1255 return self.node(id)
1258 if len(id) == 20:
1256 if len(id) == 20:
1259 # possibly a binary node
1257 # possibly a binary node
1260 # odds of a binary node being all hex in ASCII are 1 in 10**25
1258 # odds of a binary node being all hex in ASCII are 1 in 10**25
1261 try:
1259 try:
1262 node = id
1260 node = id
1263 self.rev(node) # quick search the index
1261 self.rev(node) # quick search the index
1264 return node
1262 return node
1265 except error.LookupError:
1263 except error.LookupError:
1266 pass # may be partial hex id
1264 pass # may be partial hex id
1267 try:
1265 try:
1268 # str(rev)
1266 # str(rev)
1269 rev = int(id)
1267 rev = int(id)
1270 if "%d" % rev != id:
1268 if "%d" % rev != id:
1271 raise ValueError
1269 raise ValueError
1272 if rev < 0:
1270 if rev < 0:
1273 rev = len(self) + rev
1271 rev = len(self) + rev
1274 if rev < 0 or rev >= len(self):
1272 if rev < 0 or rev >= len(self):
1275 raise ValueError
1273 raise ValueError
1276 return self.node(rev)
1274 return self.node(rev)
1277 except (ValueError, OverflowError):
1275 except (ValueError, OverflowError):
1278 pass
1276 pass
1279 if len(id) == 40:
1277 if len(id) == 40:
1280 try:
1278 try:
1281 # a full hex nodeid?
1279 # a full hex nodeid?
1282 node = bin(id)
1280 node = bin(id)
1283 self.rev(node)
1281 self.rev(node)
1284 return node
1282 return node
1285 except (TypeError, error.LookupError):
1283 except (TypeError, error.LookupError):
1286 pass
1284 pass
1287
1285
1288 def _partialmatch(self, id):
1286 def _partialmatch(self, id):
1289 # we don't care wdirfilenodeids as they should be always full hash
1287 # we don't care wdirfilenodeids as they should be always full hash
1290 maybewdir = wdirhex.startswith(id)
1288 maybewdir = wdirhex.startswith(id)
1291 try:
1289 try:
1292 partial = self.index.partialmatch(id)
1290 partial = self.index.partialmatch(id)
1293 if partial and self.hasnode(partial):
1291 if partial and self.hasnode(partial):
1294 if maybewdir:
1292 if maybewdir:
1295 # single 'ff...' match in radix tree, ambiguous with wdir
1293 # single 'ff...' match in radix tree, ambiguous with wdir
1296 raise error.RevlogError
1294 raise error.RevlogError
1297 return partial
1295 return partial
1298 if maybewdir:
1296 if maybewdir:
1299 # no 'ff...' match in radix tree, wdir identified
1297 # no 'ff...' match in radix tree, wdir identified
1300 raise error.WdirUnsupported
1298 raise error.WdirUnsupported
1301 return None
1299 return None
1302 except error.RevlogError:
1300 except error.RevlogError:
1303 # parsers.c radix tree lookup gave multiple matches
1301 # parsers.c radix tree lookup gave multiple matches
1304 # fast path: for unfiltered changelog, radix tree is accurate
1302 # fast path: for unfiltered changelog, radix tree is accurate
1305 if not getattr(self, 'filteredrevs', None):
1303 if not getattr(self, 'filteredrevs', None):
1306 raise error.AmbiguousPrefixLookupError(
1304 raise error.AmbiguousPrefixLookupError(
1307 id, self.indexfile, _('ambiguous identifier'))
1305 id, self.indexfile, _('ambiguous identifier'))
1308 # fall through to slow path that filters hidden revisions
1306 # fall through to slow path that filters hidden revisions
1309 except (AttributeError, ValueError):
1307 except (AttributeError, ValueError):
1310 # we are pure python, or key was too short to search radix tree
1308 # we are pure python, or key was too short to search radix tree
1311 pass
1309 pass
1312
1310
1313 if id in self._pcache:
1311 if id in self._pcache:
1314 return self._pcache[id]
1312 return self._pcache[id]
1315
1313
1316 if len(id) <= 40:
1314 if len(id) <= 40:
1317 try:
1315 try:
1318 # hex(node)[:...]
1316 # hex(node)[:...]
1319 l = len(id) // 2 # grab an even number of digits
1317 l = len(id) // 2 # grab an even number of digits
1320 prefix = bin(id[:l * 2])
1318 prefix = bin(id[:l * 2])
1321 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1319 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1322 nl = [n for n in nl if hex(n).startswith(id) and
1320 nl = [n for n in nl if hex(n).startswith(id) and
1323 self.hasnode(n)]
1321 self.hasnode(n)]
1324 if nullhex.startswith(id):
1322 if nullhex.startswith(id):
1325 nl.append(nullid)
1323 nl.append(nullid)
1326 if len(nl) > 0:
1324 if len(nl) > 0:
1327 if len(nl) == 1 and not maybewdir:
1325 if len(nl) == 1 and not maybewdir:
1328 self._pcache[id] = nl[0]
1326 self._pcache[id] = nl[0]
1329 return nl[0]
1327 return nl[0]
1330 raise error.AmbiguousPrefixLookupError(
1328 raise error.AmbiguousPrefixLookupError(
1331 id, self.indexfile, _('ambiguous identifier'))
1329 id, self.indexfile, _('ambiguous identifier'))
1332 if maybewdir:
1330 if maybewdir:
1333 raise error.WdirUnsupported
1331 raise error.WdirUnsupported
1334 return None
1332 return None
1335 except TypeError:
1333 except TypeError:
1336 pass
1334 pass
1337
1335
1338 def lookup(self, id):
1336 def lookup(self, id):
1339 """locate a node based on:
1337 """locate a node based on:
1340 - revision number or str(revision number)
1338 - revision number or str(revision number)
1341 - nodeid or subset of hex nodeid
1339 - nodeid or subset of hex nodeid
1342 """
1340 """
1343 n = self._match(id)
1341 n = self._match(id)
1344 if n is not None:
1342 if n is not None:
1345 return n
1343 return n
1346 n = self._partialmatch(id)
1344 n = self._partialmatch(id)
1347 if n:
1345 if n:
1348 return n
1346 return n
1349
1347
1350 raise error.LookupError(id, self.indexfile, _('no match found'))
1348 raise error.LookupError(id, self.indexfile, _('no match found'))
1351
1349
1352 def shortest(self, node, minlength=1):
1350 def shortest(self, node, minlength=1):
1353 """Find the shortest unambiguous prefix that matches node."""
1351 """Find the shortest unambiguous prefix that matches node."""
1354 def isvalid(prefix):
1352 def isvalid(prefix):
1355 try:
1353 try:
1356 matchednode = self._partialmatch(prefix)
1354 matchednode = self._partialmatch(prefix)
1357 except error.AmbiguousPrefixLookupError:
1355 except error.AmbiguousPrefixLookupError:
1358 return False
1356 return False
1359 except error.WdirUnsupported:
1357 except error.WdirUnsupported:
1360 # single 'ff...' match
1358 # single 'ff...' match
1361 return True
1359 return True
1362 if matchednode is None:
1360 if matchednode is None:
1363 raise error.LookupError(node, self.indexfile, _('no node'))
1361 raise error.LookupError(node, self.indexfile, _('no node'))
1364 return True
1362 return True
1365
1363
1366 def maybewdir(prefix):
1364 def maybewdir(prefix):
1367 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1365 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1368
1366
1369 hexnode = hex(node)
1367 hexnode = hex(node)
1370
1368
1371 def disambiguate(hexnode, minlength):
1369 def disambiguate(hexnode, minlength):
1372 """Disambiguate against wdirid."""
1370 """Disambiguate against wdirid."""
1373 for length in range(minlength, 41):
1371 for length in range(minlength, 41):
1374 prefix = hexnode[:length]
1372 prefix = hexnode[:length]
1375 if not maybewdir(prefix):
1373 if not maybewdir(prefix):
1376 return prefix
1374 return prefix
1377
1375
1378 if not getattr(self, 'filteredrevs', None):
1376 if not getattr(self, 'filteredrevs', None):
1379 try:
1377 try:
1380 length = max(self.index.shortest(node), minlength)
1378 length = max(self.index.shortest(node), minlength)
1381 return disambiguate(hexnode, length)
1379 return disambiguate(hexnode, length)
1382 except error.RevlogError:
1380 except error.RevlogError:
1383 if node != wdirid:
1381 if node != wdirid:
1384 raise error.LookupError(node, self.indexfile, _('no node'))
1382 raise error.LookupError(node, self.indexfile, _('no node'))
1385 except AttributeError:
1383 except AttributeError:
1386 # Fall through to pure code
1384 # Fall through to pure code
1387 pass
1385 pass
1388
1386
1389 if node == wdirid:
1387 if node == wdirid:
1390 for length in range(minlength, 41):
1388 for length in range(minlength, 41):
1391 prefix = hexnode[:length]
1389 prefix = hexnode[:length]
1392 if isvalid(prefix):
1390 if isvalid(prefix):
1393 return prefix
1391 return prefix
1394
1392
1395 for length in range(minlength, 41):
1393 for length in range(minlength, 41):
1396 prefix = hexnode[:length]
1394 prefix = hexnode[:length]
1397 if isvalid(prefix):
1395 if isvalid(prefix):
1398 return disambiguate(hexnode, length)
1396 return disambiguate(hexnode, length)
1399
1397
1400 def cmp(self, node, text):
1398 def cmp(self, node, text):
1401 """compare text with a given file revision
1399 """compare text with a given file revision
1402
1400
1403 returns True if text is different than what is stored.
1401 returns True if text is different than what is stored.
1404 """
1402 """
1405 p1, p2 = self.parents(node)
1403 p1, p2 = self.parents(node)
1406 return storageutil.hashrevisionsha1(text, p1, p2) != node
1404 return storageutil.hashrevisionsha1(text, p1, p2) != node
1407
1405
1408 def _cachesegment(self, offset, data):
1406 def _cachesegment(self, offset, data):
1409 """Add a segment to the revlog cache.
1407 """Add a segment to the revlog cache.
1410
1408
1411 Accepts an absolute offset and the data that is at that location.
1409 Accepts an absolute offset and the data that is at that location.
1412 """
1410 """
1413 o, d = self._chunkcache
1411 o, d = self._chunkcache
1414 # try to add to existing cache
1412 # try to add to existing cache
1415 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1413 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1416 self._chunkcache = o, d + data
1414 self._chunkcache = o, d + data
1417 else:
1415 else:
1418 self._chunkcache = offset, data
1416 self._chunkcache = offset, data
1419
1417
1420 def _readsegment(self, offset, length, df=None):
1418 def _readsegment(self, offset, length, df=None):
1421 """Load a segment of raw data from the revlog.
1419 """Load a segment of raw data from the revlog.
1422
1420
1423 Accepts an absolute offset, length to read, and an optional existing
1421 Accepts an absolute offset, length to read, and an optional existing
1424 file handle to read from.
1422 file handle to read from.
1425
1423
1426 If an existing file handle is passed, it will be seeked and the
1424 If an existing file handle is passed, it will be seeked and the
1427 original seek position will NOT be restored.
1425 original seek position will NOT be restored.
1428
1426
1429 Returns a str or buffer of raw byte data.
1427 Returns a str or buffer of raw byte data.
1430
1428
1431 Raises if the requested number of bytes could not be read.
1429 Raises if the requested number of bytes could not be read.
1432 """
1430 """
1433 # Cache data both forward and backward around the requested
1431 # Cache data both forward and backward around the requested
1434 # data, in a fixed size window. This helps speed up operations
1432 # data, in a fixed size window. This helps speed up operations
1435 # involving reading the revlog backwards.
1433 # involving reading the revlog backwards.
1436 cachesize = self._chunkcachesize
1434 cachesize = self._chunkcachesize
1437 realoffset = offset & ~(cachesize - 1)
1435 realoffset = offset & ~(cachesize - 1)
1438 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1436 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1439 - realoffset)
1437 - realoffset)
1440 with self._datareadfp(df) as df:
1438 with self._datareadfp(df) as df:
1441 df.seek(realoffset)
1439 df.seek(realoffset)
1442 d = df.read(reallength)
1440 d = df.read(reallength)
1443
1441
1444 self._cachesegment(realoffset, d)
1442 self._cachesegment(realoffset, d)
1445 if offset != realoffset or reallength != length:
1443 if offset != realoffset or reallength != length:
1446 startoffset = offset - realoffset
1444 startoffset = offset - realoffset
1447 if len(d) - startoffset < length:
1445 if len(d) - startoffset < length:
1448 raise error.RevlogError(
1446 raise error.RevlogError(
1449 _('partial read of revlog %s; expected %d bytes from '
1447 _('partial read of revlog %s; expected %d bytes from '
1450 'offset %d, got %d') %
1448 'offset %d, got %d') %
1451 (self.indexfile if self._inline else self.datafile,
1449 (self.indexfile if self._inline else self.datafile,
1452 length, realoffset, len(d) - startoffset))
1450 length, realoffset, len(d) - startoffset))
1453
1451
1454 return util.buffer(d, startoffset, length)
1452 return util.buffer(d, startoffset, length)
1455
1453
1456 if len(d) < length:
1454 if len(d) < length:
1457 raise error.RevlogError(
1455 raise error.RevlogError(
1458 _('partial read of revlog %s; expected %d bytes from offset '
1456 _('partial read of revlog %s; expected %d bytes from offset '
1459 '%d, got %d') %
1457 '%d, got %d') %
1460 (self.indexfile if self._inline else self.datafile,
1458 (self.indexfile if self._inline else self.datafile,
1461 length, offset, len(d)))
1459 length, offset, len(d)))
1462
1460
1463 return d
1461 return d
1464
1462
1465 def _getsegment(self, offset, length, df=None):
1463 def _getsegment(self, offset, length, df=None):
1466 """Obtain a segment of raw data from the revlog.
1464 """Obtain a segment of raw data from the revlog.
1467
1465
1468 Accepts an absolute offset, length of bytes to obtain, and an
1466 Accepts an absolute offset, length of bytes to obtain, and an
1469 optional file handle to the already-opened revlog. If the file
1467 optional file handle to the already-opened revlog. If the file
1470 handle is used, it's original seek position will not be preserved.
1468 handle is used, it's original seek position will not be preserved.
1471
1469
1472 Requests for data may be returned from a cache.
1470 Requests for data may be returned from a cache.
1473
1471
1474 Returns a str or a buffer instance of raw byte data.
1472 Returns a str or a buffer instance of raw byte data.
1475 """
1473 """
1476 o, d = self._chunkcache
1474 o, d = self._chunkcache
1477 l = len(d)
1475 l = len(d)
1478
1476
1479 # is it in the cache?
1477 # is it in the cache?
1480 cachestart = offset - o
1478 cachestart = offset - o
1481 cacheend = cachestart + length
1479 cacheend = cachestart + length
1482 if cachestart >= 0 and cacheend <= l:
1480 if cachestart >= 0 and cacheend <= l:
1483 if cachestart == 0 and cacheend == l:
1481 if cachestart == 0 and cacheend == l:
1484 return d # avoid a copy
1482 return d # avoid a copy
1485 return util.buffer(d, cachestart, cacheend - cachestart)
1483 return util.buffer(d, cachestart, cacheend - cachestart)
1486
1484
1487 return self._readsegment(offset, length, df=df)
1485 return self._readsegment(offset, length, df=df)
1488
1486
1489 def _getsegmentforrevs(self, startrev, endrev, df=None):
1487 def _getsegmentforrevs(self, startrev, endrev, df=None):
1490 """Obtain a segment of raw data corresponding to a range of revisions.
1488 """Obtain a segment of raw data corresponding to a range of revisions.
1491
1489
1492 Accepts the start and end revisions and an optional already-open
1490 Accepts the start and end revisions and an optional already-open
1493 file handle to be used for reading. If the file handle is read, its
1491 file handle to be used for reading. If the file handle is read, its
1494 seek position will not be preserved.
1492 seek position will not be preserved.
1495
1493
1496 Requests for data may be satisfied by a cache.
1494 Requests for data may be satisfied by a cache.
1497
1495
1498 Returns a 2-tuple of (offset, data) for the requested range of
1496 Returns a 2-tuple of (offset, data) for the requested range of
1499 revisions. Offset is the integer offset from the beginning of the
1497 revisions. Offset is the integer offset from the beginning of the
1500 revlog and data is a str or buffer of the raw byte data.
1498 revlog and data is a str or buffer of the raw byte data.
1501
1499
1502 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1500 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1503 to determine where each revision's data begins and ends.
1501 to determine where each revision's data begins and ends.
1504 """
1502 """
1505 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1503 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1506 # (functions are expensive).
1504 # (functions are expensive).
1507 index = self.index
1505 index = self.index
1508 istart = index[startrev]
1506 istart = index[startrev]
1509 start = int(istart[0] >> 16)
1507 start = int(istart[0] >> 16)
1510 if startrev == endrev:
1508 if startrev == endrev:
1511 end = start + istart[1]
1509 end = start + istart[1]
1512 else:
1510 else:
1513 iend = index[endrev]
1511 iend = index[endrev]
1514 end = int(iend[0] >> 16) + iend[1]
1512 end = int(iend[0] >> 16) + iend[1]
1515
1513
1516 if self._inline:
1514 if self._inline:
1517 start += (startrev + 1) * self._io.size
1515 start += (startrev + 1) * self._io.size
1518 end += (endrev + 1) * self._io.size
1516 end += (endrev + 1) * self._io.size
1519 length = end - start
1517 length = end - start
1520
1518
1521 return start, self._getsegment(start, length, df=df)
1519 return start, self._getsegment(start, length, df=df)
1522
1520
1523 def _chunk(self, rev, df=None):
1521 def _chunk(self, rev, df=None):
1524 """Obtain a single decompressed chunk for a revision.
1522 """Obtain a single decompressed chunk for a revision.
1525
1523
1526 Accepts an integer revision and an optional already-open file handle
1524 Accepts an integer revision and an optional already-open file handle
1527 to be used for reading. If used, the seek position of the file will not
1525 to be used for reading. If used, the seek position of the file will not
1528 be preserved.
1526 be preserved.
1529
1527
1530 Returns a str holding uncompressed data for the requested revision.
1528 Returns a str holding uncompressed data for the requested revision.
1531 """
1529 """
1532 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1530 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1533
1531
1534 def _chunks(self, revs, df=None, targetsize=None):
1532 def _chunks(self, revs, df=None, targetsize=None):
1535 """Obtain decompressed chunks for the specified revisions.
1533 """Obtain decompressed chunks for the specified revisions.
1536
1534
1537 Accepts an iterable of numeric revisions that are assumed to be in
1535 Accepts an iterable of numeric revisions that are assumed to be in
1538 ascending order. Also accepts an optional already-open file handle
1536 ascending order. Also accepts an optional already-open file handle
1539 to be used for reading. If used, the seek position of the file will
1537 to be used for reading. If used, the seek position of the file will
1540 not be preserved.
1538 not be preserved.
1541
1539
1542 This function is similar to calling ``self._chunk()`` multiple times,
1540 This function is similar to calling ``self._chunk()`` multiple times,
1543 but is faster.
1541 but is faster.
1544
1542
1545 Returns a list with decompressed data for each requested revision.
1543 Returns a list with decompressed data for each requested revision.
1546 """
1544 """
1547 if not revs:
1545 if not revs:
1548 return []
1546 return []
1549 start = self.start
1547 start = self.start
1550 length = self.length
1548 length = self.length
1551 inline = self._inline
1549 inline = self._inline
1552 iosize = self._io.size
1550 iosize = self._io.size
1553 buffer = util.buffer
1551 buffer = util.buffer
1554
1552
1555 l = []
1553 l = []
1556 ladd = l.append
1554 ladd = l.append
1557
1555
1558 if not self._withsparseread:
1556 if not self._withsparseread:
1559 slicedchunks = (revs,)
1557 slicedchunks = (revs,)
1560 else:
1558 else:
1561 slicedchunks = deltautil.slicechunk(self, revs,
1559 slicedchunks = deltautil.slicechunk(self, revs,
1562 targetsize=targetsize)
1560 targetsize=targetsize)
1563
1561
1564 for revschunk in slicedchunks:
1562 for revschunk in slicedchunks:
1565 firstrev = revschunk[0]
1563 firstrev = revschunk[0]
1566 # Skip trailing revisions with empty diff
1564 # Skip trailing revisions with empty diff
1567 for lastrev in revschunk[::-1]:
1565 for lastrev in revschunk[::-1]:
1568 if length(lastrev) != 0:
1566 if length(lastrev) != 0:
1569 break
1567 break
1570
1568
1571 try:
1569 try:
1572 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1570 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1573 except OverflowError:
1571 except OverflowError:
1574 # issue4215 - we can't cache a run of chunks greater than
1572 # issue4215 - we can't cache a run of chunks greater than
1575 # 2G on Windows
1573 # 2G on Windows
1576 return [self._chunk(rev, df=df) for rev in revschunk]
1574 return [self._chunk(rev, df=df) for rev in revschunk]
1577
1575
1578 decomp = self.decompress
1576 decomp = self.decompress
1579 for rev in revschunk:
1577 for rev in revschunk:
1580 chunkstart = start(rev)
1578 chunkstart = start(rev)
1581 if inline:
1579 if inline:
1582 chunkstart += (rev + 1) * iosize
1580 chunkstart += (rev + 1) * iosize
1583 chunklength = length(rev)
1581 chunklength = length(rev)
1584 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1582 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1585
1583
1586 return l
1584 return l
1587
1585
1588 def _chunkclear(self):
1586 def _chunkclear(self):
1589 """Clear the raw chunk cache."""
1587 """Clear the raw chunk cache."""
1590 self._chunkcache = (0, '')
1588 self._chunkcache = (0, '')
1591
1589
1592 def deltaparent(self, rev):
1590 def deltaparent(self, rev):
1593 """return deltaparent of the given revision"""
1591 """return deltaparent of the given revision"""
1594 base = self.index[rev][3]
1592 base = self.index[rev][3]
1595 if base == rev:
1593 if base == rev:
1596 return nullrev
1594 return nullrev
1597 elif self._generaldelta:
1595 elif self._generaldelta:
1598 return base
1596 return base
1599 else:
1597 else:
1600 return rev - 1
1598 return rev - 1
1601
1599
1602 def issnapshot(self, rev):
1600 def issnapshot(self, rev):
1603 """tells whether rev is a snapshot
1601 """tells whether rev is a snapshot
1604 """
1602 """
1605 if not self._sparserevlog:
1603 if not self._sparserevlog:
1606 return self.deltaparent(rev) == nullrev
1604 return self.deltaparent(rev) == nullrev
1607 elif util.safehasattr(self.index, 'issnapshot'):
1605 elif util.safehasattr(self.index, 'issnapshot'):
1608 # directly assign the method to cache the testing and access
1606 # directly assign the method to cache the testing and access
1609 self.issnapshot = self.index.issnapshot
1607 self.issnapshot = self.index.issnapshot
1610 return self.issnapshot(rev)
1608 return self.issnapshot(rev)
1611 if rev == nullrev:
1609 if rev == nullrev:
1612 return True
1610 return True
1613 entry = self.index[rev]
1611 entry = self.index[rev]
1614 base = entry[3]
1612 base = entry[3]
1615 if base == rev:
1613 if base == rev:
1616 return True
1614 return True
1617 if base == nullrev:
1615 if base == nullrev:
1618 return True
1616 return True
1619 p1 = entry[5]
1617 p1 = entry[5]
1620 p2 = entry[6]
1618 p2 = entry[6]
1621 if base == p1 or base == p2:
1619 if base == p1 or base == p2:
1622 return False
1620 return False
1623 return self.issnapshot(base)
1621 return self.issnapshot(base)
1624
1622
1625 def snapshotdepth(self, rev):
1623 def snapshotdepth(self, rev):
1626 """number of snapshot in the chain before this one"""
1624 """number of snapshot in the chain before this one"""
1627 if not self.issnapshot(rev):
1625 if not self.issnapshot(rev):
1628 raise error.ProgrammingError('revision %d not a snapshot')
1626 raise error.ProgrammingError('revision %d not a snapshot')
1629 return len(self._deltachain(rev)[0]) - 1
1627 return len(self._deltachain(rev)[0]) - 1
1630
1628
1631 def revdiff(self, rev1, rev2):
1629 def revdiff(self, rev1, rev2):
1632 """return or calculate a delta between two revisions
1630 """return or calculate a delta between two revisions
1633
1631
1634 The delta calculated is in binary form and is intended to be written to
1632 The delta calculated is in binary form and is intended to be written to
1635 revlog data directly. So this function needs raw revision data.
1633 revlog data directly. So this function needs raw revision data.
1636 """
1634 """
1637 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1635 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1638 return bytes(self._chunk(rev2))
1636 return bytes(self._chunk(rev2))
1639
1637
1640 return mdiff.textdiff(self.revision(rev1, raw=True),
1638 return mdiff.textdiff(self.revision(rev1, raw=True),
1641 self.revision(rev2, raw=True))
1639 self.revision(rev2, raw=True))
1642
1640
1643 def revision(self, nodeorrev, _df=None, raw=False):
1641 def revision(self, nodeorrev, _df=None, raw=False):
1644 """return an uncompressed revision of a given node or revision
1642 """return an uncompressed revision of a given node or revision
1645 number.
1643 number.
1646
1644
1647 _df - an existing file handle to read from. (internal-only)
1645 _df - an existing file handle to read from. (internal-only)
1648 raw - an optional argument specifying if the revision data is to be
1646 raw - an optional argument specifying if the revision data is to be
1649 treated as raw data when applying flag transforms. 'raw' should be set
1647 treated as raw data when applying flag transforms. 'raw' should be set
1650 to True when generating changegroups or in debug commands.
1648 to True when generating changegroups or in debug commands.
1651 """
1649 """
1652 return self._revisiondata(nodeorrev, _df, raw=raw)
1650 return self._revisiondata(nodeorrev, _df, raw=raw)
1653
1651
1654 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1652 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1655 if isinstance(nodeorrev, int):
1653 if isinstance(nodeorrev, int):
1656 rev = nodeorrev
1654 rev = nodeorrev
1657 node = self.node(rev)
1655 node = self.node(rev)
1658 else:
1656 else:
1659 node = nodeorrev
1657 node = nodeorrev
1660 rev = None
1658 rev = None
1661
1659
1662 cachedrev = None
1660 cachedrev = None
1663 flags = None
1661 flags = None
1664 rawtext = None
1662 rawtext = None
1665 if node == nullid:
1663 if node == nullid:
1666 return ""
1664 return ""
1667 if self._revisioncache:
1665 if self._revisioncache:
1668 if self._revisioncache[0] == node:
1666 if self._revisioncache[0] == node:
1669 # _cache only stores rawtext
1667 # _cache only stores rawtext
1670 if raw:
1668 if raw:
1671 return self._revisioncache[2]
1669 return self._revisioncache[2]
1672 # duplicated, but good for perf
1670 # duplicated, but good for perf
1673 if rev is None:
1671 if rev is None:
1674 rev = self.rev(node)
1672 rev = self.rev(node)
1675 if flags is None:
1673 if flags is None:
1676 flags = self.flags(rev)
1674 flags = self.flags(rev)
1677 # no extra flags set, no flag processor runs, text = rawtext
1675 # no extra flags set, no flag processor runs, text = rawtext
1678 if flags == REVIDX_DEFAULT_FLAGS:
1676 if flags == REVIDX_DEFAULT_FLAGS:
1679 return self._revisioncache[2]
1677 return self._revisioncache[2]
1680 # rawtext is reusable. need to run flag processor
1678 # rawtext is reusable. need to run flag processor
1681 rawtext = self._revisioncache[2]
1679 rawtext = self._revisioncache[2]
1682
1680
1683 cachedrev = self._revisioncache[1]
1681 cachedrev = self._revisioncache[1]
1684
1682
1685 # look up what we need to read
1683 # look up what we need to read
1686 if rawtext is None:
1684 if rawtext is None:
1687 if rev is None:
1685 if rev is None:
1688 rev = self.rev(node)
1686 rev = self.rev(node)
1689
1687
1690 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1688 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1691 if stopped:
1689 if stopped:
1692 rawtext = self._revisioncache[2]
1690 rawtext = self._revisioncache[2]
1693
1691
1694 # drop cache to save memory
1692 # drop cache to save memory
1695 self._revisioncache = None
1693 self._revisioncache = None
1696
1694
1697 targetsize = None
1695 targetsize = None
1698 rawsize = self.index[rev][2]
1696 rawsize = self.index[rev][2]
1699 if 0 <= rawsize:
1697 if 0 <= rawsize:
1700 targetsize = 4 * rawsize
1698 targetsize = 4 * rawsize
1701
1699
1702 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1700 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1703 if rawtext is None:
1701 if rawtext is None:
1704 rawtext = bytes(bins[0])
1702 rawtext = bytes(bins[0])
1705 bins = bins[1:]
1703 bins = bins[1:]
1706
1704
1707 rawtext = mdiff.patches(rawtext, bins)
1705 rawtext = mdiff.patches(rawtext, bins)
1708 self._revisioncache = (node, rev, rawtext)
1706 self._revisioncache = (node, rev, rawtext)
1709
1707
1710 if flags is None:
1708 if flags is None:
1711 if rev is None:
1709 if rev is None:
1712 rev = self.rev(node)
1710 rev = self.rev(node)
1713 flags = self.flags(rev)
1711 flags = self.flags(rev)
1714
1712
1715 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1713 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1716 if validatehash:
1714 if validatehash:
1717 self.checkhash(text, node, rev=rev)
1715 self.checkhash(text, node, rev=rev)
1718
1716
1719 return text
1717 return text
1720
1718
1721 def rawdata(self, nodeorrev, _df=None, raw=False):
1719 def rawdata(self, nodeorrev, _df=None, raw=False):
1722 """return an uncompressed raw data of a given node or revision number.
1720 """return an uncompressed raw data of a given node or revision number.
1723
1721
1724 _df - an existing file handle to read from. (internal-only)
1722 _df - an existing file handle to read from. (internal-only)
1725 """
1723 """
1726 return self._revisiondata(nodeorrev, _df, raw=True)
1724 return self._revisiondata(nodeorrev, _df, raw=True)
1727
1725
1728 def hash(self, text, p1, p2):
1726 def hash(self, text, p1, p2):
1729 """Compute a node hash.
1727 """Compute a node hash.
1730
1728
1731 Available as a function so that subclasses can replace the hash
1729 Available as a function so that subclasses can replace the hash
1732 as needed.
1730 as needed.
1733 """
1731 """
1734 return storageutil.hashrevisionsha1(text, p1, p2)
1732 return storageutil.hashrevisionsha1(text, p1, p2)
1735
1733
1736 def _processflags(self, text, flags, operation, raw=False):
1734 def _processflags(self, text, flags, operation, raw=False):
1737 """Inspect revision data flags and applies transforms defined by
1735 """Inspect revision data flags and applies transforms defined by
1738 registered flag processors.
1736 registered flag processors.
1739
1737
1740 ``text`` - the revision data to process
1738 ``text`` - the revision data to process
1741 ``flags`` - the revision flags
1739 ``flags`` - the revision flags
1742 ``operation`` - the operation being performed (read or write)
1740 ``operation`` - the operation being performed (read or write)
1743 ``raw`` - an optional argument describing if the raw transform should be
1741 ``raw`` - an optional argument describing if the raw transform should be
1744 applied.
1742 applied.
1745
1743
1746 This method processes the flags in the order (or reverse order if
1744 This method processes the flags in the order (or reverse order if
1747 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1745 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1748 flag processors registered for present flags. The order of flags defined
1746 flag processors registered for present flags. The order of flags defined
1749 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1747 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1750
1748
1751 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1749 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1752 processed text and ``validatehash`` is a bool indicating whether the
1750 processed text and ``validatehash`` is a bool indicating whether the
1753 returned text should be checked for hash integrity.
1751 returned text should be checked for hash integrity.
1754
1752
1755 Note: If the ``raw`` argument is set, it has precedence over the
1753 Note: If the ``raw`` argument is set, it has precedence over the
1756 operation and will only update the value of ``validatehash``.
1754 operation and will only update the value of ``validatehash``.
1757 """
1755 """
1758 # fast path: no flag processors will run
1756 # fast path: no flag processors will run
1759 if flags == 0:
1757 if flags == 0:
1760 return text, True
1758 return text, True
1761 if not operation in ('read', 'write'):
1759 if not operation in ('read', 'write'):
1762 raise error.ProgrammingError(_("invalid '%s' operation") %
1760 raise error.ProgrammingError(_("invalid '%s' operation") %
1763 operation)
1761 operation)
1764 # Check all flags are known.
1762 # Check all flags are known.
1765 if flags & ~REVIDX_KNOWN_FLAGS:
1763 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
1766 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1764 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1767 (flags & ~REVIDX_KNOWN_FLAGS))
1765 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
1768 validatehash = True
1766 validatehash = True
1769 # Depending on the operation (read or write), the order might be
1767 # Depending on the operation (read or write), the order might be
1770 # reversed due to non-commutative transforms.
1768 # reversed due to non-commutative transforms.
1771 orderedflags = REVIDX_FLAGS_ORDER
1769 orderedflags = REVIDX_FLAGS_ORDER
1772 if operation == 'write':
1770 if operation == 'write':
1773 orderedflags = reversed(orderedflags)
1771 orderedflags = reversed(orderedflags)
1774
1772
1775 for flag in orderedflags:
1773 for flag in orderedflags:
1776 # If a flagprocessor has been registered for a known flag, apply the
1774 # If a flagprocessor has been registered for a known flag, apply the
1777 # related operation transform and update result tuple.
1775 # related operation transform and update result tuple.
1778 if flag & flags:
1776 if flag & flags:
1779 vhash = True
1777 vhash = True
1780
1778
1781 if flag not in self._flagprocessors:
1779 if flag not in self._flagprocessors:
1782 message = _("missing processor for flag '%#x'") % (flag)
1780 message = _("missing processor for flag '%#x'") % (flag)
1783 raise error.RevlogError(message)
1781 raise error.RevlogError(message)
1784
1782
1785 processor = self._flagprocessors[flag]
1783 processor = self._flagprocessors[flag]
1786 if processor is not None:
1784 if processor is not None:
1787 readtransform, writetransform, rawtransform = processor
1785 readtransform, writetransform, rawtransform = processor
1788
1786
1789 if raw:
1787 if raw:
1790 vhash = rawtransform(self, text)
1788 vhash = rawtransform(self, text)
1791 elif operation == 'read':
1789 elif operation == 'read':
1792 text, vhash = readtransform(self, text)
1790 text, vhash = readtransform(self, text)
1793 else: # write operation
1791 else: # write operation
1794 text, vhash = writetransform(self, text)
1792 text, vhash = writetransform(self, text)
1795 validatehash = validatehash and vhash
1793 validatehash = validatehash and vhash
1796
1794
1797 return text, validatehash
1795 return text, validatehash
1798
1796
1799 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1797 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1800 """Check node hash integrity.
1798 """Check node hash integrity.
1801
1799
1802 Available as a function so that subclasses can extend hash mismatch
1800 Available as a function so that subclasses can extend hash mismatch
1803 behaviors as needed.
1801 behaviors as needed.
1804 """
1802 """
1805 try:
1803 try:
1806 if p1 is None and p2 is None:
1804 if p1 is None and p2 is None:
1807 p1, p2 = self.parents(node)
1805 p1, p2 = self.parents(node)
1808 if node != self.hash(text, p1, p2):
1806 if node != self.hash(text, p1, p2):
1809 # Clear the revision cache on hash failure. The revision cache
1807 # Clear the revision cache on hash failure. The revision cache
1810 # only stores the raw revision and clearing the cache does have
1808 # only stores the raw revision and clearing the cache does have
1811 # the side-effect that we won't have a cache hit when the raw
1809 # the side-effect that we won't have a cache hit when the raw
1812 # revision data is accessed. But this case should be rare and
1810 # revision data is accessed. But this case should be rare and
1813 # it is extra work to teach the cache about the hash
1811 # it is extra work to teach the cache about the hash
1814 # verification state.
1812 # verification state.
1815 if self._revisioncache and self._revisioncache[0] == node:
1813 if self._revisioncache and self._revisioncache[0] == node:
1816 self._revisioncache = None
1814 self._revisioncache = None
1817
1815
1818 revornode = rev
1816 revornode = rev
1819 if revornode is None:
1817 if revornode is None:
1820 revornode = templatefilters.short(hex(node))
1818 revornode = templatefilters.short(hex(node))
1821 raise error.RevlogError(_("integrity check failed on %s:%s")
1819 raise error.RevlogError(_("integrity check failed on %s:%s")
1822 % (self.indexfile, pycompat.bytestr(revornode)))
1820 % (self.indexfile, pycompat.bytestr(revornode)))
1823 except error.RevlogError:
1821 except error.RevlogError:
1824 if self._censorable and storageutil.iscensoredtext(text):
1822 if self._censorable and storageutil.iscensoredtext(text):
1825 raise error.CensoredNodeError(self.indexfile, node, text)
1823 raise error.CensoredNodeError(self.indexfile, node, text)
1826 raise
1824 raise
1827
1825
1828 def _enforceinlinesize(self, tr, fp=None):
1826 def _enforceinlinesize(self, tr, fp=None):
1829 """Check if the revlog is too big for inline and convert if so.
1827 """Check if the revlog is too big for inline and convert if so.
1830
1828
1831 This should be called after revisions are added to the revlog. If the
1829 This should be called after revisions are added to the revlog. If the
1832 revlog has grown too large to be an inline revlog, it will convert it
1830 revlog has grown too large to be an inline revlog, it will convert it
1833 to use multiple index and data files.
1831 to use multiple index and data files.
1834 """
1832 """
1835 tiprev = len(self) - 1
1833 tiprev = len(self) - 1
1836 if (not self._inline or
1834 if (not self._inline or
1837 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1835 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1838 return
1836 return
1839
1837
1840 trinfo = tr.find(self.indexfile)
1838 trinfo = tr.find(self.indexfile)
1841 if trinfo is None:
1839 if trinfo is None:
1842 raise error.RevlogError(_("%s not found in the transaction")
1840 raise error.RevlogError(_("%s not found in the transaction")
1843 % self.indexfile)
1841 % self.indexfile)
1844
1842
1845 trindex = trinfo[2]
1843 trindex = trinfo[2]
1846 if trindex is not None:
1844 if trindex is not None:
1847 dataoff = self.start(trindex)
1845 dataoff = self.start(trindex)
1848 else:
1846 else:
1849 # revlog was stripped at start of transaction, use all leftover data
1847 # revlog was stripped at start of transaction, use all leftover data
1850 trindex = len(self) - 1
1848 trindex = len(self) - 1
1851 dataoff = self.end(tiprev)
1849 dataoff = self.end(tiprev)
1852
1850
1853 tr.add(self.datafile, dataoff)
1851 tr.add(self.datafile, dataoff)
1854
1852
1855 if fp:
1853 if fp:
1856 fp.flush()
1854 fp.flush()
1857 fp.close()
1855 fp.close()
1858 # We can't use the cached file handle after close(). So prevent
1856 # We can't use the cached file handle after close(). So prevent
1859 # its usage.
1857 # its usage.
1860 self._writinghandles = None
1858 self._writinghandles = None
1861
1859
1862 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1860 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1863 for r in self:
1861 for r in self:
1864 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1862 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1865
1863
1866 with self._indexfp('w') as fp:
1864 with self._indexfp('w') as fp:
1867 self.version &= ~FLAG_INLINE_DATA
1865 self.version &= ~FLAG_INLINE_DATA
1868 self._inline = False
1866 self._inline = False
1869 io = self._io
1867 io = self._io
1870 for i in self:
1868 for i in self:
1871 e = io.packentry(self.index[i], self.node, self.version, i)
1869 e = io.packentry(self.index[i], self.node, self.version, i)
1872 fp.write(e)
1870 fp.write(e)
1873
1871
1874 # the temp file replace the real index when we exit the context
1872 # the temp file replace the real index when we exit the context
1875 # manager
1873 # manager
1876
1874
1877 tr.replace(self.indexfile, trindex * self._io.size)
1875 tr.replace(self.indexfile, trindex * self._io.size)
1878 self._chunkclear()
1876 self._chunkclear()
1879
1877
1880 def _nodeduplicatecallback(self, transaction, node):
1878 def _nodeduplicatecallback(self, transaction, node):
1881 """called when trying to add a node already stored.
1879 """called when trying to add a node already stored.
1882 """
1880 """
1883
1881
1884 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1882 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1885 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1883 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1886 """add a revision to the log
1884 """add a revision to the log
1887
1885
1888 text - the revision data to add
1886 text - the revision data to add
1889 transaction - the transaction object used for rollback
1887 transaction - the transaction object used for rollback
1890 link - the linkrev data to add
1888 link - the linkrev data to add
1891 p1, p2 - the parent nodeids of the revision
1889 p1, p2 - the parent nodeids of the revision
1892 cachedelta - an optional precomputed delta
1890 cachedelta - an optional precomputed delta
1893 node - nodeid of revision; typically node is not specified, and it is
1891 node - nodeid of revision; typically node is not specified, and it is
1894 computed by default as hash(text, p1, p2), however subclasses might
1892 computed by default as hash(text, p1, p2), however subclasses might
1895 use different hashing method (and override checkhash() in such case)
1893 use different hashing method (and override checkhash() in such case)
1896 flags - the known flags to set on the revision
1894 flags - the known flags to set on the revision
1897 deltacomputer - an optional deltacomputer instance shared between
1895 deltacomputer - an optional deltacomputer instance shared between
1898 multiple calls
1896 multiple calls
1899 """
1897 """
1900 if link == nullrev:
1898 if link == nullrev:
1901 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1899 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1902 % self.indexfile)
1900 % self.indexfile)
1903
1901
1904 if flags:
1902 if flags:
1905 node = node or self.hash(text, p1, p2)
1903 node = node or self.hash(text, p1, p2)
1906
1904
1907 rawtext, validatehash = self._processflags(text, flags, 'write')
1905 rawtext, validatehash = self._processflags(text, flags, 'write')
1908
1906
1909 # If the flag processor modifies the revision data, ignore any provided
1907 # If the flag processor modifies the revision data, ignore any provided
1910 # cachedelta.
1908 # cachedelta.
1911 if rawtext != text:
1909 if rawtext != text:
1912 cachedelta = None
1910 cachedelta = None
1913
1911
1914 if len(rawtext) > _maxentrysize:
1912 if len(rawtext) > _maxentrysize:
1915 raise error.RevlogError(
1913 raise error.RevlogError(
1916 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1914 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1917 % (self.indexfile, len(rawtext)))
1915 % (self.indexfile, len(rawtext)))
1918
1916
1919 node = node or self.hash(rawtext, p1, p2)
1917 node = node or self.hash(rawtext, p1, p2)
1920 if node in self.nodemap:
1918 if node in self.nodemap:
1921 return node
1919 return node
1922
1920
1923 if validatehash:
1921 if validatehash:
1924 self.checkhash(rawtext, node, p1=p1, p2=p2)
1922 self.checkhash(rawtext, node, p1=p1, p2=p2)
1925
1923
1926 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1924 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1927 flags, cachedelta=cachedelta,
1925 flags, cachedelta=cachedelta,
1928 deltacomputer=deltacomputer)
1926 deltacomputer=deltacomputer)
1929
1927
1930 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1928 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1931 cachedelta=None, deltacomputer=None):
1929 cachedelta=None, deltacomputer=None):
1932 """add a raw revision with known flags, node and parents
1930 """add a raw revision with known flags, node and parents
1933 useful when reusing a revision not stored in this revlog (ex: received
1931 useful when reusing a revision not stored in this revlog (ex: received
1934 over wire, or read from an external bundle).
1932 over wire, or read from an external bundle).
1935 """
1933 """
1936 dfh = None
1934 dfh = None
1937 if not self._inline:
1935 if not self._inline:
1938 dfh = self._datafp("a+")
1936 dfh = self._datafp("a+")
1939 ifh = self._indexfp("a+")
1937 ifh = self._indexfp("a+")
1940 try:
1938 try:
1941 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1939 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1942 flags, cachedelta, ifh, dfh,
1940 flags, cachedelta, ifh, dfh,
1943 deltacomputer=deltacomputer)
1941 deltacomputer=deltacomputer)
1944 finally:
1942 finally:
1945 if dfh:
1943 if dfh:
1946 dfh.close()
1944 dfh.close()
1947 ifh.close()
1945 ifh.close()
1948
1946
1949 def compress(self, data):
1947 def compress(self, data):
1950 """Generate a possibly-compressed representation of data."""
1948 """Generate a possibly-compressed representation of data."""
1951 if not data:
1949 if not data:
1952 return '', data
1950 return '', data
1953
1951
1954 compressed = self._compressor.compress(data)
1952 compressed = self._compressor.compress(data)
1955
1953
1956 if compressed:
1954 if compressed:
1957 # The revlog compressor added the header in the returned data.
1955 # The revlog compressor added the header in the returned data.
1958 return '', compressed
1956 return '', compressed
1959
1957
1960 if data[0:1] == '\0':
1958 if data[0:1] == '\0':
1961 return '', data
1959 return '', data
1962 return 'u', data
1960 return 'u', data
1963
1961
1964 def decompress(self, data):
1962 def decompress(self, data):
1965 """Decompress a revlog chunk.
1963 """Decompress a revlog chunk.
1966
1964
1967 The chunk is expected to begin with a header identifying the
1965 The chunk is expected to begin with a header identifying the
1968 format type so it can be routed to an appropriate decompressor.
1966 format type so it can be routed to an appropriate decompressor.
1969 """
1967 """
1970 if not data:
1968 if not data:
1971 return data
1969 return data
1972
1970
1973 # Revlogs are read much more frequently than they are written and many
1971 # Revlogs are read much more frequently than they are written and many
1974 # chunks only take microseconds to decompress, so performance is
1972 # chunks only take microseconds to decompress, so performance is
1975 # important here.
1973 # important here.
1976 #
1974 #
1977 # We can make a few assumptions about revlogs:
1975 # We can make a few assumptions about revlogs:
1978 #
1976 #
1979 # 1) the majority of chunks will be compressed (as opposed to inline
1977 # 1) the majority of chunks will be compressed (as opposed to inline
1980 # raw data).
1978 # raw data).
1981 # 2) decompressing *any* data will likely by at least 10x slower than
1979 # 2) decompressing *any* data will likely by at least 10x slower than
1982 # returning raw inline data.
1980 # returning raw inline data.
1983 # 3) we want to prioritize common and officially supported compression
1981 # 3) we want to prioritize common and officially supported compression
1984 # engines
1982 # engines
1985 #
1983 #
1986 # It follows that we want to optimize for "decompress compressed data
1984 # It follows that we want to optimize for "decompress compressed data
1987 # when encoded with common and officially supported compression engines"
1985 # when encoded with common and officially supported compression engines"
1988 # case over "raw data" and "data encoded by less common or non-official
1986 # case over "raw data" and "data encoded by less common or non-official
1989 # compression engines." That is why we have the inline lookup first
1987 # compression engines." That is why we have the inline lookup first
1990 # followed by the compengines lookup.
1988 # followed by the compengines lookup.
1991 #
1989 #
1992 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1990 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1993 # compressed chunks. And this matters for changelog and manifest reads.
1991 # compressed chunks. And this matters for changelog and manifest reads.
1994 t = data[0:1]
1992 t = data[0:1]
1995
1993
1996 if t == 'x':
1994 if t == 'x':
1997 try:
1995 try:
1998 return _zlibdecompress(data)
1996 return _zlibdecompress(data)
1999 except zlib.error as e:
1997 except zlib.error as e:
2000 raise error.RevlogError(_('revlog decompress error: %s') %
1998 raise error.RevlogError(_('revlog decompress error: %s') %
2001 stringutil.forcebytestr(e))
1999 stringutil.forcebytestr(e))
2002 # '\0' is more common than 'u' so it goes first.
2000 # '\0' is more common than 'u' so it goes first.
2003 elif t == '\0':
2001 elif t == '\0':
2004 return data
2002 return data
2005 elif t == 'u':
2003 elif t == 'u':
2006 return util.buffer(data, 1)
2004 return util.buffer(data, 1)
2007
2005
2008 try:
2006 try:
2009 compressor = self._decompressors[t]
2007 compressor = self._decompressors[t]
2010 except KeyError:
2008 except KeyError:
2011 try:
2009 try:
2012 engine = util.compengines.forrevlogheader(t)
2010 engine = util.compengines.forrevlogheader(t)
2013 compressor = engine.revlogcompressor(self._compengineopts)
2011 compressor = engine.revlogcompressor(self._compengineopts)
2014 self._decompressors[t] = compressor
2012 self._decompressors[t] = compressor
2015 except KeyError:
2013 except KeyError:
2016 raise error.RevlogError(_('unknown compression type %r') % t)
2014 raise error.RevlogError(_('unknown compression type %r') % t)
2017
2015
2018 return compressor.decompress(data)
2016 return compressor.decompress(data)
2019
2017
2020 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2018 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2021 cachedelta, ifh, dfh, alwayscache=False,
2019 cachedelta, ifh, dfh, alwayscache=False,
2022 deltacomputer=None):
2020 deltacomputer=None):
2023 """internal function to add revisions to the log
2021 """internal function to add revisions to the log
2024
2022
2025 see addrevision for argument descriptions.
2023 see addrevision for argument descriptions.
2026
2024
2027 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2025 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2028
2026
2029 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2027 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2030 be used.
2028 be used.
2031
2029
2032 invariants:
2030 invariants:
2033 - rawtext is optional (can be None); if not set, cachedelta must be set.
2031 - rawtext is optional (can be None); if not set, cachedelta must be set.
2034 if both are set, they must correspond to each other.
2032 if both are set, they must correspond to each other.
2035 """
2033 """
2036 if node == nullid:
2034 if node == nullid:
2037 raise error.RevlogError(_("%s: attempt to add null revision") %
2035 raise error.RevlogError(_("%s: attempt to add null revision") %
2038 self.indexfile)
2036 self.indexfile)
2039 if node == wdirid or node in wdirfilenodeids:
2037 if node == wdirid or node in wdirfilenodeids:
2040 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2038 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2041 self.indexfile)
2039 self.indexfile)
2042
2040
2043 if self._inline:
2041 if self._inline:
2044 fh = ifh
2042 fh = ifh
2045 else:
2043 else:
2046 fh = dfh
2044 fh = dfh
2047
2045
2048 btext = [rawtext]
2046 btext = [rawtext]
2049
2047
2050 curr = len(self)
2048 curr = len(self)
2051 prev = curr - 1
2049 prev = curr - 1
2052 offset = self.end(prev)
2050 offset = self.end(prev)
2053 p1r, p2r = self.rev(p1), self.rev(p2)
2051 p1r, p2r = self.rev(p1), self.rev(p2)
2054
2052
2055 # full versions are inserted when the needed deltas
2053 # full versions are inserted when the needed deltas
2056 # become comparable to the uncompressed text
2054 # become comparable to the uncompressed text
2057 if rawtext is None:
2055 if rawtext is None:
2058 # need rawtext size, before changed by flag processors, which is
2056 # need rawtext size, before changed by flag processors, which is
2059 # the non-raw size. use revlog explicitly to avoid filelog's extra
2057 # the non-raw size. use revlog explicitly to avoid filelog's extra
2060 # logic that might remove metadata size.
2058 # logic that might remove metadata size.
2061 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2059 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2062 cachedelta[1])
2060 cachedelta[1])
2063 else:
2061 else:
2064 textlen = len(rawtext)
2062 textlen = len(rawtext)
2065
2063
2066 if deltacomputer is None:
2064 if deltacomputer is None:
2067 deltacomputer = deltautil.deltacomputer(self)
2065 deltacomputer = deltautil.deltacomputer(self)
2068
2066
2069 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2067 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2070
2068
2071 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2069 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2072
2070
2073 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2071 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2074 deltainfo.base, link, p1r, p2r, node)
2072 deltainfo.base, link, p1r, p2r, node)
2075 self.index.append(e)
2073 self.index.append(e)
2076 self.nodemap[node] = curr
2074 self.nodemap[node] = curr
2077
2075
2078 # Reset the pure node cache start lookup offset to account for new
2076 # Reset the pure node cache start lookup offset to account for new
2079 # revision.
2077 # revision.
2080 if self._nodepos is not None:
2078 if self._nodepos is not None:
2081 self._nodepos = curr
2079 self._nodepos = curr
2082
2080
2083 entry = self._io.packentry(e, self.node, self.version, curr)
2081 entry = self._io.packentry(e, self.node, self.version, curr)
2084 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2082 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2085 link, offset)
2083 link, offset)
2086
2084
2087 rawtext = btext[0]
2085 rawtext = btext[0]
2088
2086
2089 if alwayscache and rawtext is None:
2087 if alwayscache and rawtext is None:
2090 rawtext = deltacomputer.buildtext(revinfo, fh)
2088 rawtext = deltacomputer.buildtext(revinfo, fh)
2091
2089
2092 if type(rawtext) == bytes: # only accept immutable objects
2090 if type(rawtext) == bytes: # only accept immutable objects
2093 self._revisioncache = (node, curr, rawtext)
2091 self._revisioncache = (node, curr, rawtext)
2094 self._chainbasecache[curr] = deltainfo.chainbase
2092 self._chainbasecache[curr] = deltainfo.chainbase
2095 return node
2093 return node
2096
2094
2097 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2095 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2098 # Files opened in a+ mode have inconsistent behavior on various
2096 # Files opened in a+ mode have inconsistent behavior on various
2099 # platforms. Windows requires that a file positioning call be made
2097 # platforms. Windows requires that a file positioning call be made
2100 # when the file handle transitions between reads and writes. See
2098 # when the file handle transitions between reads and writes. See
2101 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2099 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2102 # platforms, Python or the platform itself can be buggy. Some versions
2100 # platforms, Python or the platform itself can be buggy. Some versions
2103 # of Solaris have been observed to not append at the end of the file
2101 # of Solaris have been observed to not append at the end of the file
2104 # if the file was seeked to before the end. See issue4943 for more.
2102 # if the file was seeked to before the end. See issue4943 for more.
2105 #
2103 #
2106 # We work around this issue by inserting a seek() before writing.
2104 # We work around this issue by inserting a seek() before writing.
2107 # Note: This is likely not necessary on Python 3. However, because
2105 # Note: This is likely not necessary on Python 3. However, because
2108 # the file handle is reused for reads and may be seeked there, we need
2106 # the file handle is reused for reads and may be seeked there, we need
2109 # to be careful before changing this.
2107 # to be careful before changing this.
2110 ifh.seek(0, os.SEEK_END)
2108 ifh.seek(0, os.SEEK_END)
2111 if dfh:
2109 if dfh:
2112 dfh.seek(0, os.SEEK_END)
2110 dfh.seek(0, os.SEEK_END)
2113
2111
2114 curr = len(self) - 1
2112 curr = len(self) - 1
2115 if not self._inline:
2113 if not self._inline:
2116 transaction.add(self.datafile, offset)
2114 transaction.add(self.datafile, offset)
2117 transaction.add(self.indexfile, curr * len(entry))
2115 transaction.add(self.indexfile, curr * len(entry))
2118 if data[0]:
2116 if data[0]:
2119 dfh.write(data[0])
2117 dfh.write(data[0])
2120 dfh.write(data[1])
2118 dfh.write(data[1])
2121 ifh.write(entry)
2119 ifh.write(entry)
2122 else:
2120 else:
2123 offset += curr * self._io.size
2121 offset += curr * self._io.size
2124 transaction.add(self.indexfile, offset, curr)
2122 transaction.add(self.indexfile, offset, curr)
2125 ifh.write(entry)
2123 ifh.write(entry)
2126 ifh.write(data[0])
2124 ifh.write(data[0])
2127 ifh.write(data[1])
2125 ifh.write(data[1])
2128 self._enforceinlinesize(transaction, ifh)
2126 self._enforceinlinesize(transaction, ifh)
2129
2127
2130 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2128 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2131 """
2129 """
2132 add a delta group
2130 add a delta group
2133
2131
2134 given a set of deltas, add them to the revision log. the
2132 given a set of deltas, add them to the revision log. the
2135 first delta is against its parent, which should be in our
2133 first delta is against its parent, which should be in our
2136 log, the rest are against the previous delta.
2134 log, the rest are against the previous delta.
2137
2135
2138 If ``addrevisioncb`` is defined, it will be called with arguments of
2136 If ``addrevisioncb`` is defined, it will be called with arguments of
2139 this revlog and the node that was added.
2137 this revlog and the node that was added.
2140 """
2138 """
2141
2139
2142 if self._writinghandles:
2140 if self._writinghandles:
2143 raise error.ProgrammingError('cannot nest addgroup() calls')
2141 raise error.ProgrammingError('cannot nest addgroup() calls')
2144
2142
2145 nodes = []
2143 nodes = []
2146
2144
2147 r = len(self)
2145 r = len(self)
2148 end = 0
2146 end = 0
2149 if r:
2147 if r:
2150 end = self.end(r - 1)
2148 end = self.end(r - 1)
2151 ifh = self._indexfp("a+")
2149 ifh = self._indexfp("a+")
2152 isize = r * self._io.size
2150 isize = r * self._io.size
2153 if self._inline:
2151 if self._inline:
2154 transaction.add(self.indexfile, end + isize, r)
2152 transaction.add(self.indexfile, end + isize, r)
2155 dfh = None
2153 dfh = None
2156 else:
2154 else:
2157 transaction.add(self.indexfile, isize, r)
2155 transaction.add(self.indexfile, isize, r)
2158 transaction.add(self.datafile, end)
2156 transaction.add(self.datafile, end)
2159 dfh = self._datafp("a+")
2157 dfh = self._datafp("a+")
2160 def flush():
2158 def flush():
2161 if dfh:
2159 if dfh:
2162 dfh.flush()
2160 dfh.flush()
2163 ifh.flush()
2161 ifh.flush()
2164
2162
2165 self._writinghandles = (ifh, dfh)
2163 self._writinghandles = (ifh, dfh)
2166
2164
2167 try:
2165 try:
2168 deltacomputer = deltautil.deltacomputer(self)
2166 deltacomputer = deltautil.deltacomputer(self)
2169 # loop through our set of deltas
2167 # loop through our set of deltas
2170 for data in deltas:
2168 for data in deltas:
2171 node, p1, p2, linknode, deltabase, delta, flags = data
2169 node, p1, p2, linknode, deltabase, delta, flags = data
2172 link = linkmapper(linknode)
2170 link = linkmapper(linknode)
2173 flags = flags or REVIDX_DEFAULT_FLAGS
2171 flags = flags or REVIDX_DEFAULT_FLAGS
2174
2172
2175 nodes.append(node)
2173 nodes.append(node)
2176
2174
2177 if node in self.nodemap:
2175 if node in self.nodemap:
2178 self._nodeduplicatecallback(transaction, node)
2176 self._nodeduplicatecallback(transaction, node)
2179 # this can happen if two branches make the same change
2177 # this can happen if two branches make the same change
2180 continue
2178 continue
2181
2179
2182 for p in (p1, p2):
2180 for p in (p1, p2):
2183 if p not in self.nodemap:
2181 if p not in self.nodemap:
2184 raise error.LookupError(p, self.indexfile,
2182 raise error.LookupError(p, self.indexfile,
2185 _('unknown parent'))
2183 _('unknown parent'))
2186
2184
2187 if deltabase not in self.nodemap:
2185 if deltabase not in self.nodemap:
2188 raise error.LookupError(deltabase, self.indexfile,
2186 raise error.LookupError(deltabase, self.indexfile,
2189 _('unknown delta base'))
2187 _('unknown delta base'))
2190
2188
2191 baserev = self.rev(deltabase)
2189 baserev = self.rev(deltabase)
2192
2190
2193 if baserev != nullrev and self.iscensored(baserev):
2191 if baserev != nullrev and self.iscensored(baserev):
2194 # if base is censored, delta must be full replacement in a
2192 # if base is censored, delta must be full replacement in a
2195 # single patch operation
2193 # single patch operation
2196 hlen = struct.calcsize(">lll")
2194 hlen = struct.calcsize(">lll")
2197 oldlen = self.rawsize(baserev)
2195 oldlen = self.rawsize(baserev)
2198 newlen = len(delta) - hlen
2196 newlen = len(delta) - hlen
2199 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2197 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2200 raise error.CensoredBaseError(self.indexfile,
2198 raise error.CensoredBaseError(self.indexfile,
2201 self.node(baserev))
2199 self.node(baserev))
2202
2200
2203 if not flags and self._peek_iscensored(baserev, delta, flush):
2201 if not flags and self._peek_iscensored(baserev, delta, flush):
2204 flags |= REVIDX_ISCENSORED
2202 flags |= REVIDX_ISCENSORED
2205
2203
2206 # We assume consumers of addrevisioncb will want to retrieve
2204 # We assume consumers of addrevisioncb will want to retrieve
2207 # the added revision, which will require a call to
2205 # the added revision, which will require a call to
2208 # revision(). revision() will fast path if there is a cache
2206 # revision(). revision() will fast path if there is a cache
2209 # hit. So, we tell _addrevision() to always cache in this case.
2207 # hit. So, we tell _addrevision() to always cache in this case.
2210 # We're only using addgroup() in the context of changegroup
2208 # We're only using addgroup() in the context of changegroup
2211 # generation so the revision data can always be handled as raw
2209 # generation so the revision data can always be handled as raw
2212 # by the flagprocessor.
2210 # by the flagprocessor.
2213 self._addrevision(node, None, transaction, link,
2211 self._addrevision(node, None, transaction, link,
2214 p1, p2, flags, (baserev, delta),
2212 p1, p2, flags, (baserev, delta),
2215 ifh, dfh,
2213 ifh, dfh,
2216 alwayscache=bool(addrevisioncb),
2214 alwayscache=bool(addrevisioncb),
2217 deltacomputer=deltacomputer)
2215 deltacomputer=deltacomputer)
2218
2216
2219 if addrevisioncb:
2217 if addrevisioncb:
2220 addrevisioncb(self, node)
2218 addrevisioncb(self, node)
2221
2219
2222 if not dfh and not self._inline:
2220 if not dfh and not self._inline:
2223 # addrevision switched from inline to conventional
2221 # addrevision switched from inline to conventional
2224 # reopen the index
2222 # reopen the index
2225 ifh.close()
2223 ifh.close()
2226 dfh = self._datafp("a+")
2224 dfh = self._datafp("a+")
2227 ifh = self._indexfp("a+")
2225 ifh = self._indexfp("a+")
2228 self._writinghandles = (ifh, dfh)
2226 self._writinghandles = (ifh, dfh)
2229 finally:
2227 finally:
2230 self._writinghandles = None
2228 self._writinghandles = None
2231
2229
2232 if dfh:
2230 if dfh:
2233 dfh.close()
2231 dfh.close()
2234 ifh.close()
2232 ifh.close()
2235
2233
2236 return nodes
2234 return nodes
2237
2235
2238 def iscensored(self, rev):
2236 def iscensored(self, rev):
2239 """Check if a file revision is censored."""
2237 """Check if a file revision is censored."""
2240 if not self._censorable:
2238 if not self._censorable:
2241 return False
2239 return False
2242
2240
2243 return self.flags(rev) & REVIDX_ISCENSORED
2241 return self.flags(rev) & REVIDX_ISCENSORED
2244
2242
2245 def _peek_iscensored(self, baserev, delta, flush):
2243 def _peek_iscensored(self, baserev, delta, flush):
2246 """Quickly check if a delta produces a censored revision."""
2244 """Quickly check if a delta produces a censored revision."""
2247 if not self._censorable:
2245 if not self._censorable:
2248 return False
2246 return False
2249
2247
2250 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2248 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2251
2249
2252 def getstrippoint(self, minlink):
2250 def getstrippoint(self, minlink):
2253 """find the minimum rev that must be stripped to strip the linkrev
2251 """find the minimum rev that must be stripped to strip the linkrev
2254
2252
2255 Returns a tuple containing the minimum rev and a set of all revs that
2253 Returns a tuple containing the minimum rev and a set of all revs that
2256 have linkrevs that will be broken by this strip.
2254 have linkrevs that will be broken by this strip.
2257 """
2255 """
2258 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2256 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2259 self.headrevs(),
2257 self.headrevs(),
2260 self.linkrev, self.parentrevs)
2258 self.linkrev, self.parentrevs)
2261
2259
2262 def strip(self, minlink, transaction):
2260 def strip(self, minlink, transaction):
2263 """truncate the revlog on the first revision with a linkrev >= minlink
2261 """truncate the revlog on the first revision with a linkrev >= minlink
2264
2262
2265 This function is called when we're stripping revision minlink and
2263 This function is called when we're stripping revision minlink and
2266 its descendants from the repository.
2264 its descendants from the repository.
2267
2265
2268 We have to remove all revisions with linkrev >= minlink, because
2266 We have to remove all revisions with linkrev >= minlink, because
2269 the equivalent changelog revisions will be renumbered after the
2267 the equivalent changelog revisions will be renumbered after the
2270 strip.
2268 strip.
2271
2269
2272 So we truncate the revlog on the first of these revisions, and
2270 So we truncate the revlog on the first of these revisions, and
2273 trust that the caller has saved the revisions that shouldn't be
2271 trust that the caller has saved the revisions that shouldn't be
2274 removed and that it'll re-add them after this truncation.
2272 removed and that it'll re-add them after this truncation.
2275 """
2273 """
2276 if len(self) == 0:
2274 if len(self) == 0:
2277 return
2275 return
2278
2276
2279 rev, _ = self.getstrippoint(minlink)
2277 rev, _ = self.getstrippoint(minlink)
2280 if rev == len(self):
2278 if rev == len(self):
2281 return
2279 return
2282
2280
2283 # first truncate the files on disk
2281 # first truncate the files on disk
2284 end = self.start(rev)
2282 end = self.start(rev)
2285 if not self._inline:
2283 if not self._inline:
2286 transaction.add(self.datafile, end)
2284 transaction.add(self.datafile, end)
2287 end = rev * self._io.size
2285 end = rev * self._io.size
2288 else:
2286 else:
2289 end += rev * self._io.size
2287 end += rev * self._io.size
2290
2288
2291 transaction.add(self.indexfile, end)
2289 transaction.add(self.indexfile, end)
2292
2290
2293 # then reset internal state in memory to forget those revisions
2291 # then reset internal state in memory to forget those revisions
2294 self._revisioncache = None
2292 self._revisioncache = None
2295 self._chaininfocache = {}
2293 self._chaininfocache = {}
2296 self._chunkclear()
2294 self._chunkclear()
2297 for x in pycompat.xrange(rev, len(self)):
2295 for x in pycompat.xrange(rev, len(self)):
2298 del self.nodemap[self.node(x)]
2296 del self.nodemap[self.node(x)]
2299
2297
2300 del self.index[rev:-1]
2298 del self.index[rev:-1]
2301 self._nodepos = None
2299 self._nodepos = None
2302
2300
2303 def checksize(self):
2301 def checksize(self):
2304 """Check size of index and data files
2302 """Check size of index and data files
2305
2303
2306 return a (dd, di) tuple.
2304 return a (dd, di) tuple.
2307 - dd: extra bytes for the "data" file
2305 - dd: extra bytes for the "data" file
2308 - di: extra bytes for the "index" file
2306 - di: extra bytes for the "index" file
2309
2307
2310 A healthy revlog will return (0, 0).
2308 A healthy revlog will return (0, 0).
2311 """
2309 """
2312 expected = 0
2310 expected = 0
2313 if len(self):
2311 if len(self):
2314 expected = max(0, self.end(len(self) - 1))
2312 expected = max(0, self.end(len(self) - 1))
2315
2313
2316 try:
2314 try:
2317 with self._datafp() as f:
2315 with self._datafp() as f:
2318 f.seek(0, io.SEEK_END)
2316 f.seek(0, io.SEEK_END)
2319 actual = f.tell()
2317 actual = f.tell()
2320 dd = actual - expected
2318 dd = actual - expected
2321 except IOError as inst:
2319 except IOError as inst:
2322 if inst.errno != errno.ENOENT:
2320 if inst.errno != errno.ENOENT:
2323 raise
2321 raise
2324 dd = 0
2322 dd = 0
2325
2323
2326 try:
2324 try:
2327 f = self.opener(self.indexfile)
2325 f = self.opener(self.indexfile)
2328 f.seek(0, io.SEEK_END)
2326 f.seek(0, io.SEEK_END)
2329 actual = f.tell()
2327 actual = f.tell()
2330 f.close()
2328 f.close()
2331 s = self._io.size
2329 s = self._io.size
2332 i = max(0, actual // s)
2330 i = max(0, actual // s)
2333 di = actual - (i * s)
2331 di = actual - (i * s)
2334 if self._inline:
2332 if self._inline:
2335 databytes = 0
2333 databytes = 0
2336 for r in self:
2334 for r in self:
2337 databytes += max(0, self.length(r))
2335 databytes += max(0, self.length(r))
2338 dd = 0
2336 dd = 0
2339 di = actual - len(self) * s - databytes
2337 di = actual - len(self) * s - databytes
2340 except IOError as inst:
2338 except IOError as inst:
2341 if inst.errno != errno.ENOENT:
2339 if inst.errno != errno.ENOENT:
2342 raise
2340 raise
2343 di = 0
2341 di = 0
2344
2342
2345 return (dd, di)
2343 return (dd, di)
2346
2344
2347 def files(self):
2345 def files(self):
2348 res = [self.indexfile]
2346 res = [self.indexfile]
2349 if not self._inline:
2347 if not self._inline:
2350 res.append(self.datafile)
2348 res.append(self.datafile)
2351 return res
2349 return res
2352
2350
2353 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2351 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2354 assumehaveparentrevisions=False,
2352 assumehaveparentrevisions=False,
2355 deltamode=repository.CG_DELTAMODE_STD):
2353 deltamode=repository.CG_DELTAMODE_STD):
2356 if nodesorder not in ('nodes', 'storage', 'linear', None):
2354 if nodesorder not in ('nodes', 'storage', 'linear', None):
2357 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2355 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2358 nodesorder)
2356 nodesorder)
2359
2357
2360 if nodesorder is None and not self._generaldelta:
2358 if nodesorder is None and not self._generaldelta:
2361 nodesorder = 'storage'
2359 nodesorder = 'storage'
2362
2360
2363 if (not self._storedeltachains and
2361 if (not self._storedeltachains and
2364 deltamode != repository.CG_DELTAMODE_PREV):
2362 deltamode != repository.CG_DELTAMODE_PREV):
2365 deltamode = repository.CG_DELTAMODE_FULL
2363 deltamode = repository.CG_DELTAMODE_FULL
2366
2364
2367 return storageutil.emitrevisions(
2365 return storageutil.emitrevisions(
2368 self, nodes, nodesorder, revlogrevisiondelta,
2366 self, nodes, nodesorder, revlogrevisiondelta,
2369 deltaparentfn=self.deltaparent,
2367 deltaparentfn=self.deltaparent,
2370 candeltafn=self.candelta,
2368 candeltafn=self.candelta,
2371 rawsizefn=self.rawsize,
2369 rawsizefn=self.rawsize,
2372 revdifffn=self.revdiff,
2370 revdifffn=self.revdiff,
2373 flagsfn=self.flags,
2371 flagsfn=self.flags,
2374 deltamode=deltamode,
2372 deltamode=deltamode,
2375 revisiondata=revisiondata,
2373 revisiondata=revisiondata,
2376 assumehaveparentrevisions=assumehaveparentrevisions)
2374 assumehaveparentrevisions=assumehaveparentrevisions)
2377
2375
2378 DELTAREUSEALWAYS = 'always'
2376 DELTAREUSEALWAYS = 'always'
2379 DELTAREUSESAMEREVS = 'samerevs'
2377 DELTAREUSESAMEREVS = 'samerevs'
2380 DELTAREUSENEVER = 'never'
2378 DELTAREUSENEVER = 'never'
2381
2379
2382 DELTAREUSEFULLADD = 'fulladd'
2380 DELTAREUSEFULLADD = 'fulladd'
2383
2381
2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2382 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2385
2383
2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2384 def clone(self, tr, destrevlog, addrevisioncb=None,
2387 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2385 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2388 """Copy this revlog to another, possibly with format changes.
2386 """Copy this revlog to another, possibly with format changes.
2389
2387
2390 The destination revlog will contain the same revisions and nodes.
2388 The destination revlog will contain the same revisions and nodes.
2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2389 However, it may not be bit-for-bit identical due to e.g. delta encoding
2392 differences.
2390 differences.
2393
2391
2394 The ``deltareuse`` argument control how deltas from the existing revlog
2392 The ``deltareuse`` argument control how deltas from the existing revlog
2395 are preserved in the destination revlog. The argument can have the
2393 are preserved in the destination revlog. The argument can have the
2396 following values:
2394 following values:
2397
2395
2398 DELTAREUSEALWAYS
2396 DELTAREUSEALWAYS
2399 Deltas will always be reused (if possible), even if the destination
2397 Deltas will always be reused (if possible), even if the destination
2400 revlog would not select the same revisions for the delta. This is the
2398 revlog would not select the same revisions for the delta. This is the
2401 fastest mode of operation.
2399 fastest mode of operation.
2402 DELTAREUSESAMEREVS
2400 DELTAREUSESAMEREVS
2403 Deltas will be reused if the destination revlog would pick the same
2401 Deltas will be reused if the destination revlog would pick the same
2404 revisions for the delta. This mode strikes a balance between speed
2402 revisions for the delta. This mode strikes a balance between speed
2405 and optimization.
2403 and optimization.
2406 DELTAREUSENEVER
2404 DELTAREUSENEVER
2407 Deltas will never be reused. This is the slowest mode of execution.
2405 Deltas will never be reused. This is the slowest mode of execution.
2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2406 This mode can be used to recompute deltas (e.g. if the diff/delta
2409 algorithm changes).
2407 algorithm changes).
2410
2408
2411 Delta computation can be slow, so the choice of delta reuse policy can
2409 Delta computation can be slow, so the choice of delta reuse policy can
2412 significantly affect run time.
2410 significantly affect run time.
2413
2411
2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2412 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2415 two extremes. Deltas will be reused if they are appropriate. But if the
2413 two extremes. Deltas will be reused if they are appropriate. But if the
2416 delta could choose a better revision, it will do so. This means if you
2414 delta could choose a better revision, it will do so. This means if you
2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2415 are converting a non-generaldelta revlog to a generaldelta revlog,
2418 deltas will be recomputed if the delta's parent isn't a parent of the
2416 deltas will be recomputed if the delta's parent isn't a parent of the
2419 revision.
2417 revision.
2420
2418
2421 In addition to the delta policy, the ``forcedeltabothparents``
2419 In addition to the delta policy, the ``forcedeltabothparents``
2422 argument controls whether to force compute deltas against both parents
2420 argument controls whether to force compute deltas against both parents
2423 for merges. By default, the current default is used.
2421 for merges. By default, the current default is used.
2424 """
2422 """
2425 if deltareuse not in self.DELTAREUSEALL:
2423 if deltareuse not in self.DELTAREUSEALL:
2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2424 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2427
2425
2428 if len(destrevlog):
2426 if len(destrevlog):
2429 raise ValueError(_('destination revlog is not empty'))
2427 raise ValueError(_('destination revlog is not empty'))
2430
2428
2431 if getattr(self, 'filteredrevs', None):
2429 if getattr(self, 'filteredrevs', None):
2432 raise ValueError(_('source revlog has filtered revisions'))
2430 raise ValueError(_('source revlog has filtered revisions'))
2433 if getattr(destrevlog, 'filteredrevs', None):
2431 if getattr(destrevlog, 'filteredrevs', None):
2434 raise ValueError(_('destination revlog has filtered revisions'))
2432 raise ValueError(_('destination revlog has filtered revisions'))
2435
2433
2436 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2434 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2437 # if possible.
2435 # if possible.
2438 oldlazydelta = destrevlog._lazydelta
2436 oldlazydelta = destrevlog._lazydelta
2439 oldlazydeltabase = destrevlog._lazydeltabase
2437 oldlazydeltabase = destrevlog._lazydeltabase
2440 oldamd = destrevlog._deltabothparents
2438 oldamd = destrevlog._deltabothparents
2441
2439
2442 try:
2440 try:
2443 if deltareuse == self.DELTAREUSEALWAYS:
2441 if deltareuse == self.DELTAREUSEALWAYS:
2444 destrevlog._lazydeltabase = True
2442 destrevlog._lazydeltabase = True
2445 destrevlog._lazydelta = True
2443 destrevlog._lazydelta = True
2446 elif deltareuse == self.DELTAREUSESAMEREVS:
2444 elif deltareuse == self.DELTAREUSESAMEREVS:
2447 destrevlog._lazydeltabase = False
2445 destrevlog._lazydeltabase = False
2448 destrevlog._lazydelta = True
2446 destrevlog._lazydelta = True
2449 elif deltareuse == self.DELTAREUSENEVER:
2447 elif deltareuse == self.DELTAREUSENEVER:
2450 destrevlog._lazydeltabase = False
2448 destrevlog._lazydeltabase = False
2451 destrevlog._lazydelta = False
2449 destrevlog._lazydelta = False
2452
2450
2453 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2451 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2454
2452
2455 deltacomputer = deltautil.deltacomputer(destrevlog)
2453 deltacomputer = deltautil.deltacomputer(destrevlog)
2456 index = self.index
2454 index = self.index
2457 for rev in self:
2455 for rev in self:
2458 entry = index[rev]
2456 entry = index[rev]
2459
2457
2460 # Some classes override linkrev to take filtered revs into
2458 # Some classes override linkrev to take filtered revs into
2461 # account. Use raw entry from index.
2459 # account. Use raw entry from index.
2462 flags = entry[0] & 0xffff
2460 flags = entry[0] & 0xffff
2463 linkrev = entry[4]
2461 linkrev = entry[4]
2464 p1 = index[entry[5]][7]
2462 p1 = index[entry[5]][7]
2465 p2 = index[entry[6]][7]
2463 p2 = index[entry[6]][7]
2466 node = entry[7]
2464 node = entry[7]
2467
2465
2468 # (Possibly) reuse the delta from the revlog if allowed and
2466 # (Possibly) reuse the delta from the revlog if allowed and
2469 # the revlog chunk is a delta.
2467 # the revlog chunk is a delta.
2470 cachedelta = None
2468 cachedelta = None
2471 rawtext = None
2469 rawtext = None
2472 if (deltareuse != self.DELTAREUSEFULLADD
2470 if (deltareuse != self.DELTAREUSEFULLADD
2473 and destrevlog._lazydelta):
2471 and destrevlog._lazydelta):
2474 dp = self.deltaparent(rev)
2472 dp = self.deltaparent(rev)
2475 if dp != nullrev:
2473 if dp != nullrev:
2476 cachedelta = (dp, bytes(self._chunk(rev)))
2474 cachedelta = (dp, bytes(self._chunk(rev)))
2477
2475
2478 if not cachedelta:
2476 if not cachedelta:
2479 rawtext = self.revision(rev, raw=True)
2477 rawtext = self.revision(rev, raw=True)
2480
2478
2481
2479
2482 if deltareuse == self.DELTAREUSEFULLADD:
2480 if deltareuse == self.DELTAREUSEFULLADD:
2483 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2481 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2484 cachedelta=cachedelta,
2482 cachedelta=cachedelta,
2485 node=node, flags=flags,
2483 node=node, flags=flags,
2486 deltacomputer=deltacomputer)
2484 deltacomputer=deltacomputer)
2487 else:
2485 else:
2488 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2486 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2489 checkambig=False)
2487 checkambig=False)
2490 dfh = None
2488 dfh = None
2491 if not destrevlog._inline:
2489 if not destrevlog._inline:
2492 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2490 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2493 try:
2491 try:
2494 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2492 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2495 p2, flags, cachedelta, ifh, dfh,
2493 p2, flags, cachedelta, ifh, dfh,
2496 deltacomputer=deltacomputer)
2494 deltacomputer=deltacomputer)
2497 finally:
2495 finally:
2498 if dfh:
2496 if dfh:
2499 dfh.close()
2497 dfh.close()
2500 ifh.close()
2498 ifh.close()
2501
2499
2502 if addrevisioncb:
2500 if addrevisioncb:
2503 addrevisioncb(self, rev, node)
2501 addrevisioncb(self, rev, node)
2504 finally:
2502 finally:
2505 destrevlog._lazydelta = oldlazydelta
2503 destrevlog._lazydelta = oldlazydelta
2506 destrevlog._lazydeltabase = oldlazydeltabase
2504 destrevlog._lazydeltabase = oldlazydeltabase
2507 destrevlog._deltabothparents = oldamd
2505 destrevlog._deltabothparents = oldamd
2508
2506
2509 def censorrevision(self, tr, censornode, tombstone=b''):
2507 def censorrevision(self, tr, censornode, tombstone=b''):
2510 if (self.version & 0xFFFF) == REVLOGV0:
2508 if (self.version & 0xFFFF) == REVLOGV0:
2511 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2509 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2512 self.version)
2510 self.version)
2513
2511
2514 censorrev = self.rev(censornode)
2512 censorrev = self.rev(censornode)
2515 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2513 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2516
2514
2517 if len(tombstone) > self.rawsize(censorrev):
2515 if len(tombstone) > self.rawsize(censorrev):
2518 raise error.Abort(_('censor tombstone must be no longer than '
2516 raise error.Abort(_('censor tombstone must be no longer than '
2519 'censored data'))
2517 'censored data'))
2520
2518
2521 # Rewriting the revlog in place is hard. Our strategy for censoring is
2519 # Rewriting the revlog in place is hard. Our strategy for censoring is
2522 # to create a new revlog, copy all revisions to it, then replace the
2520 # to create a new revlog, copy all revisions to it, then replace the
2523 # revlogs on transaction close.
2521 # revlogs on transaction close.
2524
2522
2525 newindexfile = self.indexfile + b'.tmpcensored'
2523 newindexfile = self.indexfile + b'.tmpcensored'
2526 newdatafile = self.datafile + b'.tmpcensored'
2524 newdatafile = self.datafile + b'.tmpcensored'
2527
2525
2528 # This is a bit dangerous. We could easily have a mismatch of state.
2526 # This is a bit dangerous. We could easily have a mismatch of state.
2529 newrl = revlog(self.opener, newindexfile, newdatafile,
2527 newrl = revlog(self.opener, newindexfile, newdatafile,
2530 censorable=True)
2528 censorable=True)
2531 newrl.version = self.version
2529 newrl.version = self.version
2532 newrl._generaldelta = self._generaldelta
2530 newrl._generaldelta = self._generaldelta
2533 newrl._io = self._io
2531 newrl._io = self._io
2534
2532
2535 for rev in self.revs():
2533 for rev in self.revs():
2536 node = self.node(rev)
2534 node = self.node(rev)
2537 p1, p2 = self.parents(node)
2535 p1, p2 = self.parents(node)
2538
2536
2539 if rev == censorrev:
2537 if rev == censorrev:
2540 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2538 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2541 p1, p2, censornode, REVIDX_ISCENSORED)
2539 p1, p2, censornode, REVIDX_ISCENSORED)
2542
2540
2543 if newrl.deltaparent(rev) != nullrev:
2541 if newrl.deltaparent(rev) != nullrev:
2544 raise error.Abort(_('censored revision stored as delta; '
2542 raise error.Abort(_('censored revision stored as delta; '
2545 'cannot censor'),
2543 'cannot censor'),
2546 hint=_('censoring of revlogs is not '
2544 hint=_('censoring of revlogs is not '
2547 'fully implemented; please report '
2545 'fully implemented; please report '
2548 'this bug'))
2546 'this bug'))
2549 continue
2547 continue
2550
2548
2551 if self.iscensored(rev):
2549 if self.iscensored(rev):
2552 if self.deltaparent(rev) != nullrev:
2550 if self.deltaparent(rev) != nullrev:
2553 raise error.Abort(_('cannot censor due to censored '
2551 raise error.Abort(_('cannot censor due to censored '
2554 'revision having delta stored'))
2552 'revision having delta stored'))
2555 rawtext = self._chunk(rev)
2553 rawtext = self._chunk(rev)
2556 else:
2554 else:
2557 rawtext = self.revision(rev, raw=True)
2555 rawtext = self.revision(rev, raw=True)
2558
2556
2559 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2557 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2560 self.flags(rev))
2558 self.flags(rev))
2561
2559
2562 tr.addbackup(self.indexfile, location='store')
2560 tr.addbackup(self.indexfile, location='store')
2563 if not self._inline:
2561 if not self._inline:
2564 tr.addbackup(self.datafile, location='store')
2562 tr.addbackup(self.datafile, location='store')
2565
2563
2566 self.opener.rename(newrl.indexfile, self.indexfile)
2564 self.opener.rename(newrl.indexfile, self.indexfile)
2567 if not self._inline:
2565 if not self._inline:
2568 self.opener.rename(newrl.datafile, self.datafile)
2566 self.opener.rename(newrl.datafile, self.datafile)
2569
2567
2570 self.clearcaches()
2568 self.clearcaches()
2571 self._loadindex()
2569 self._loadindex()
2572
2570
2573 def verifyintegrity(self, state):
2571 def verifyintegrity(self, state):
2574 """Verifies the integrity of the revlog.
2572 """Verifies the integrity of the revlog.
2575
2573
2576 Yields ``revlogproblem`` instances describing problems that are
2574 Yields ``revlogproblem`` instances describing problems that are
2577 found.
2575 found.
2578 """
2576 """
2579 dd, di = self.checksize()
2577 dd, di = self.checksize()
2580 if dd:
2578 if dd:
2581 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2579 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2582 if di:
2580 if di:
2583 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2581 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2584
2582
2585 version = self.version & 0xFFFF
2583 version = self.version & 0xFFFF
2586
2584
2587 # The verifier tells us what version revlog we should be.
2585 # The verifier tells us what version revlog we should be.
2588 if version != state['expectedversion']:
2586 if version != state['expectedversion']:
2589 yield revlogproblem(
2587 yield revlogproblem(
2590 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2588 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2591 (self.indexfile, version, state['expectedversion']))
2589 (self.indexfile, version, state['expectedversion']))
2592
2590
2593 state['skipread'] = set()
2591 state['skipread'] = set()
2594
2592
2595 for rev in self:
2593 for rev in self:
2596 node = self.node(rev)
2594 node = self.node(rev)
2597
2595
2598 # Verify contents. 4 cases to care about:
2596 # Verify contents. 4 cases to care about:
2599 #
2597 #
2600 # common: the most common case
2598 # common: the most common case
2601 # rename: with a rename
2599 # rename: with a rename
2602 # meta: file content starts with b'\1\n', the metadata
2600 # meta: file content starts with b'\1\n', the metadata
2603 # header defined in filelog.py, but without a rename
2601 # header defined in filelog.py, but without a rename
2604 # ext: content stored externally
2602 # ext: content stored externally
2605 #
2603 #
2606 # More formally, their differences are shown below:
2604 # More formally, their differences are shown below:
2607 #
2605 #
2608 # | common | rename | meta | ext
2606 # | common | rename | meta | ext
2609 # -------------------------------------------------------
2607 # -------------------------------------------------------
2610 # flags() | 0 | 0 | 0 | not 0
2608 # flags() | 0 | 0 | 0 | not 0
2611 # renamed() | False | True | False | ?
2609 # renamed() | False | True | False | ?
2612 # rawtext[0:2]=='\1\n'| False | True | True | ?
2610 # rawtext[0:2]=='\1\n'| False | True | True | ?
2613 #
2611 #
2614 # "rawtext" means the raw text stored in revlog data, which
2612 # "rawtext" means the raw text stored in revlog data, which
2615 # could be retrieved by "revision(rev, raw=True)". "text"
2613 # could be retrieved by "revision(rev, raw=True)". "text"
2616 # mentioned below is "revision(rev, raw=False)".
2614 # mentioned below is "revision(rev, raw=False)".
2617 #
2615 #
2618 # There are 3 different lengths stored physically:
2616 # There are 3 different lengths stored physically:
2619 # 1. L1: rawsize, stored in revlog index
2617 # 1. L1: rawsize, stored in revlog index
2620 # 2. L2: len(rawtext), stored in revlog data
2618 # 2. L2: len(rawtext), stored in revlog data
2621 # 3. L3: len(text), stored in revlog data if flags==0, or
2619 # 3. L3: len(text), stored in revlog data if flags==0, or
2622 # possibly somewhere else if flags!=0
2620 # possibly somewhere else if flags!=0
2623 #
2621 #
2624 # L1 should be equal to L2. L3 could be different from them.
2622 # L1 should be equal to L2. L3 could be different from them.
2625 # "text" may or may not affect commit hash depending on flag
2623 # "text" may or may not affect commit hash depending on flag
2626 # processors (see revlog.addflagprocessor).
2624 # processors (see revlog.addflagprocessor).
2627 #
2625 #
2628 # | common | rename | meta | ext
2626 # | common | rename | meta | ext
2629 # -------------------------------------------------
2627 # -------------------------------------------------
2630 # rawsize() | L1 | L1 | L1 | L1
2628 # rawsize() | L1 | L1 | L1 | L1
2631 # size() | L1 | L2-LM | L1(*) | L1 (?)
2629 # size() | L1 | L2-LM | L1(*) | L1 (?)
2632 # len(rawtext) | L2 | L2 | L2 | L2
2630 # len(rawtext) | L2 | L2 | L2 | L2
2633 # len(text) | L2 | L2 | L2 | L3
2631 # len(text) | L2 | L2 | L2 | L3
2634 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2632 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2635 #
2633 #
2636 # LM: length of metadata, depending on rawtext
2634 # LM: length of metadata, depending on rawtext
2637 # (*): not ideal, see comment in filelog.size
2635 # (*): not ideal, see comment in filelog.size
2638 # (?): could be "- len(meta)" if the resolved content has
2636 # (?): could be "- len(meta)" if the resolved content has
2639 # rename metadata
2637 # rename metadata
2640 #
2638 #
2641 # Checks needed to be done:
2639 # Checks needed to be done:
2642 # 1. length check: L1 == L2, in all cases.
2640 # 1. length check: L1 == L2, in all cases.
2643 # 2. hash check: depending on flag processor, we may need to
2641 # 2. hash check: depending on flag processor, we may need to
2644 # use either "text" (external), or "rawtext" (in revlog).
2642 # use either "text" (external), or "rawtext" (in revlog).
2645
2643
2646 try:
2644 try:
2647 skipflags = state.get('skipflags', 0)
2645 skipflags = state.get('skipflags', 0)
2648 if skipflags:
2646 if skipflags:
2649 skipflags &= self.flags(rev)
2647 skipflags &= self.flags(rev)
2650
2648
2651 if skipflags:
2649 if skipflags:
2652 state['skipread'].add(node)
2650 state['skipread'].add(node)
2653 else:
2651 else:
2654 # Side-effect: read content and verify hash.
2652 # Side-effect: read content and verify hash.
2655 self.revision(node)
2653 self.revision(node)
2656
2654
2657 l1 = self.rawsize(rev)
2655 l1 = self.rawsize(rev)
2658 l2 = len(self.revision(node, raw=True))
2656 l2 = len(self.revision(node, raw=True))
2659
2657
2660 if l1 != l2:
2658 if l1 != l2:
2661 yield revlogproblem(
2659 yield revlogproblem(
2662 error=_('unpacked size is %d, %d expected') % (l2, l1),
2660 error=_('unpacked size is %d, %d expected') % (l2, l1),
2663 node=node)
2661 node=node)
2664
2662
2665 except error.CensoredNodeError:
2663 except error.CensoredNodeError:
2666 if state['erroroncensored']:
2664 if state['erroroncensored']:
2667 yield revlogproblem(error=_('censored file data'),
2665 yield revlogproblem(error=_('censored file data'),
2668 node=node)
2666 node=node)
2669 state['skipread'].add(node)
2667 state['skipread'].add(node)
2670 except Exception as e:
2668 except Exception as e:
2671 yield revlogproblem(
2669 yield revlogproblem(
2672 error=_('unpacking %s: %s') % (short(node),
2670 error=_('unpacking %s: %s') % (short(node),
2673 stringutil.forcebytestr(e)),
2671 stringutil.forcebytestr(e)),
2674 node=node)
2672 node=node)
2675 state['skipread'].add(node)
2673 state['skipread'].add(node)
2676
2674
2677 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2675 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2678 revisionscount=False, trackedsize=False,
2676 revisionscount=False, trackedsize=False,
2679 storedsize=False):
2677 storedsize=False):
2680 d = {}
2678 d = {}
2681
2679
2682 if exclusivefiles:
2680 if exclusivefiles:
2683 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2681 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2684 if not self._inline:
2682 if not self._inline:
2685 d['exclusivefiles'].append((self.opener, self.datafile))
2683 d['exclusivefiles'].append((self.opener, self.datafile))
2686
2684
2687 if sharedfiles:
2685 if sharedfiles:
2688 d['sharedfiles'] = []
2686 d['sharedfiles'] = []
2689
2687
2690 if revisionscount:
2688 if revisionscount:
2691 d['revisionscount'] = len(self)
2689 d['revisionscount'] = len(self)
2692
2690
2693 if trackedsize:
2691 if trackedsize:
2694 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2692 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2695
2693
2696 if storedsize:
2694 if storedsize:
2697 d['storedsize'] = sum(self.opener.stat(path).st_size
2695 d['storedsize'] = sum(self.opener.stat(path).st_size
2698 for path in self.files())
2696 for path in self.files())
2699
2697
2700 return d
2698 return d
@@ -1,56 +1,55 b''
1 # revlogdeltas.py - constant used for revlog logic
1 # revlogdeltas.py - constant used for revlog logic
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 from .. import (
12 from .. import (
13 repository,
13 repository,
14 util,
15 )
14 )
16
15
17 # revlog header flags
16 # revlog header flags
18 REVLOGV0 = 0
17 REVLOGV0 = 0
19 REVLOGV1 = 1
18 REVLOGV1 = 1
20 # Dummy value until file format is finalized.
19 # Dummy value until file format is finalized.
21 # Reminder: change the bounds check in revlog.__init__ when this is changed.
20 # Reminder: change the bounds check in revlog.__init__ when this is changed.
22 REVLOGV2 = 0xDEAD
21 REVLOGV2 = 0xDEAD
23 # Shared across v1 and v2.
22 # Shared across v1 and v2.
24 FLAG_INLINE_DATA = (1 << 16)
23 FLAG_INLINE_DATA = (1 << 16)
25 # Only used by v1, implied by v2.
24 # Only used by v1, implied by v2.
26 FLAG_GENERALDELTA = (1 << 17)
25 FLAG_GENERALDELTA = (1 << 17)
27 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
26 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
28 REVLOG_DEFAULT_FORMAT = REVLOGV1
27 REVLOG_DEFAULT_FORMAT = REVLOGV1
29 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
29 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
31 REVLOGV2_FLAGS = FLAG_INLINE_DATA
30 REVLOGV2_FLAGS = FLAG_INLINE_DATA
32
31
33 # revlog index flags
32 # revlog index flags
34
33
35 # For historical reasons, revlog's internal flags were exposed via the
34 # For historical reasons, revlog's internal flags were exposed via the
36 # wire protocol and are even exposed in parts of the storage APIs.
35 # wire protocol and are even exposed in parts of the storage APIs.
37
36
38 # revision has censor metadata, must be verified
37 # revision has censor metadata, must be verified
39 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
38 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
40 # revision hash does not match data (narrowhg)
39 # revision hash does not match data (narrowhg)
41 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
40 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
42 # revision data is stored externally
41 # revision data is stored externally
43 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
42 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
44 REVIDX_DEFAULT_FLAGS = 0
43 REVIDX_DEFAULT_FLAGS = 0
45 # stable order in which flags need to be processed and their processors applied
44 # stable order in which flags need to be processed and their processors applied
46 REVIDX_FLAGS_ORDER = [
45 REVIDX_FLAGS_ORDER = [
47 REVIDX_ISCENSORED,
46 REVIDX_ISCENSORED,
48 REVIDX_ELLIPSIS,
47 REVIDX_ELLIPSIS,
49 REVIDX_EXTSTORED,
48 REVIDX_EXTSTORED,
50 ]
49 ]
51 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
50
52 # bitmark for flags that could cause rawdata content change
51 # bitmark for flags that could cause rawdata content change
53 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
52 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
54
53
55 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
54 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
56
55
@@ -1,35 +1,39 b''
1 # flagutils.py - code to deal with revlog flags and their processors
1 # flagutils.py - code to deal with revlog flags and their processors
2 #
2 #
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 from .constants import (
11 from .constants import (
12 REVIDX_DEFAULT_FLAGS,
12 REVIDX_DEFAULT_FLAGS,
13 REVIDX_ELLIPSIS,
13 REVIDX_ELLIPSIS,
14 REVIDX_EXTSTORED,
14 REVIDX_EXTSTORED,
15 REVIDX_FLAGS_ORDER,
15 REVIDX_FLAGS_ORDER,
16 REVIDX_ISCENSORED,
16 REVIDX_ISCENSORED,
17 REVIDX_KNOWN_FLAGS,
18 REVIDX_RAWTEXT_CHANGING_FLAGS,
17 REVIDX_RAWTEXT_CHANGING_FLAGS,
19 )
18 )
20
19
20 from .. import (
21 util
22 )
23
21 # blanked usage of all the name to prevent pyflakes constraints
24 # blanked usage of all the name to prevent pyflakes constraints
22 # We need these name available in the module for extensions.
25 # We need these name available in the module for extensions.
23 REVIDX_ISCENSORED
26 REVIDX_ISCENSORED
24 REVIDX_ELLIPSIS
27 REVIDX_ELLIPSIS
25 REVIDX_EXTSTORED
28 REVIDX_EXTSTORED
26 REVIDX_DEFAULT_FLAGS
29 REVIDX_DEFAULT_FLAGS
27 REVIDX_FLAGS_ORDER
30 REVIDX_FLAGS_ORDER
28 REVIDX_KNOWN_FLAGS
29 REVIDX_RAWTEXT_CHANGING_FLAGS
31 REVIDX_RAWTEXT_CHANGING_FLAGS
30
32
33 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
34
31 # Store flag processors (cf. 'addflagprocessor()' to register)
35 # Store flag processors (cf. 'addflagprocessor()' to register)
32 flagprocessors = {
36 flagprocessors = {
33 REVIDX_ISCENSORED: None,
37 REVIDX_ISCENSORED: None,
34 }
38 }
35
39
@@ -1,136 +1,139 b''
1 # coding=UTF-8
1 # coding=UTF-8
2
2
3 from __future__ import absolute_import
3 from __future__ import absolute_import
4
4
5 import base64
5 import base64
6 import zlib
6 import zlib
7
7
8 from mercurial import (
8 from mercurial import (
9 changegroup,
9 changegroup,
10 exchange,
10 exchange,
11 extensions,
11 extensions,
12 revlog,
12 revlog,
13 util,
13 util,
14 )
14 )
15 from mercurial.revlogutils import (
16 flagutil,
17 )
15
18
16 # Test only: These flags are defined here only in the context of testing the
19 # Test only: These flags are defined here only in the context of testing the
17 # behavior of the flag processor. The canonical way to add flags is to get in
20 # behavior of the flag processor. The canonical way to add flags is to get in
18 # touch with the community and make them known in revlog.
21 # touch with the community and make them known in revlog.
19 REVIDX_NOOP = (1 << 3)
22 REVIDX_NOOP = (1 << 3)
20 REVIDX_BASE64 = (1 << 2)
23 REVIDX_BASE64 = (1 << 2)
21 REVIDX_GZIP = (1 << 1)
24 REVIDX_GZIP = (1 << 1)
22 REVIDX_FAIL = 1
25 REVIDX_FAIL = 1
23
26
24 def validatehash(self, text):
27 def validatehash(self, text):
25 return True
28 return True
26
29
27 def bypass(self, text):
30 def bypass(self, text):
28 return False
31 return False
29
32
30 def noopdonothing(self, text):
33 def noopdonothing(self, text):
31 return (text, True)
34 return (text, True)
32
35
33 def b64encode(self, text):
36 def b64encode(self, text):
34 return (base64.b64encode(text), False)
37 return (base64.b64encode(text), False)
35
38
36 def b64decode(self, text):
39 def b64decode(self, text):
37 return (base64.b64decode(text), True)
40 return (base64.b64decode(text), True)
38
41
39 def gzipcompress(self, text):
42 def gzipcompress(self, text):
40 return (zlib.compress(text), False)
43 return (zlib.compress(text), False)
41
44
42 def gzipdecompress(self, text):
45 def gzipdecompress(self, text):
43 return (zlib.decompress(text), True)
46 return (zlib.decompress(text), True)
44
47
45 def supportedoutgoingversions(orig, repo):
48 def supportedoutgoingversions(orig, repo):
46 versions = orig(repo)
49 versions = orig(repo)
47 versions.discard(b'01')
50 versions.discard(b'01')
48 versions.discard(b'02')
51 versions.discard(b'02')
49 versions.add(b'03')
52 versions.add(b'03')
50 return versions
53 return versions
51
54
52 def allsupportedversions(orig, ui):
55 def allsupportedversions(orig, ui):
53 versions = orig(ui)
56 versions = orig(ui)
54 versions.add(b'03')
57 versions.add(b'03')
55 return versions
58 return versions
56
59
57 def makewrappedfile(obj):
60 def makewrappedfile(obj):
58 class wrappedfile(obj.__class__):
61 class wrappedfile(obj.__class__):
59 def addrevision(self, text, transaction, link, p1, p2,
62 def addrevision(self, text, transaction, link, p1, p2,
60 cachedelta=None, node=None,
63 cachedelta=None, node=None,
61 flags=revlog.REVIDX_DEFAULT_FLAGS):
64 flags=flagutil.REVIDX_DEFAULT_FLAGS):
62 if b'[NOOP]' in text:
65 if b'[NOOP]' in text:
63 flags |= REVIDX_NOOP
66 flags |= REVIDX_NOOP
64
67
65 if b'[BASE64]' in text:
68 if b'[BASE64]' in text:
66 flags |= REVIDX_BASE64
69 flags |= REVIDX_BASE64
67
70
68 if b'[GZIP]' in text:
71 if b'[GZIP]' in text:
69 flags |= REVIDX_GZIP
72 flags |= REVIDX_GZIP
70
73
71 # This addrevision wrapper is meant to add a flag we will not have
74 # This addrevision wrapper is meant to add a flag we will not have
72 # transforms registered for, ensuring we handle this error case.
75 # transforms registered for, ensuring we handle this error case.
73 if b'[FAIL]' in text:
76 if b'[FAIL]' in text:
74 flags |= REVIDX_FAIL
77 flags |= REVIDX_FAIL
75
78
76 return super(wrappedfile, self).addrevision(text, transaction, link,
79 return super(wrappedfile, self).addrevision(text, transaction, link,
77 p1, p2,
80 p1, p2,
78 cachedelta=cachedelta,
81 cachedelta=cachedelta,
79 node=node,
82 node=node,
80 flags=flags)
83 flags=flags)
81
84
82 obj.__class__ = wrappedfile
85 obj.__class__ = wrappedfile
83
86
84 def reposetup(ui, repo):
87 def reposetup(ui, repo):
85 class wrappingflagprocessorrepo(repo.__class__):
88 class wrappingflagprocessorrepo(repo.__class__):
86 def file(self, f):
89 def file(self, f):
87 orig = super(wrappingflagprocessorrepo, self).file(f)
90 orig = super(wrappingflagprocessorrepo, self).file(f)
88 makewrappedfile(orig)
91 makewrappedfile(orig)
89 return orig
92 return orig
90
93
91 repo.__class__ = wrappingflagprocessorrepo
94 repo.__class__ = wrappingflagprocessorrepo
92
95
93 def extsetup(ui):
96 def extsetup(ui):
94 # Enable changegroup3 for flags to be sent over the wire
97 # Enable changegroup3 for flags to be sent over the wire
95 wrapfunction = extensions.wrapfunction
98 wrapfunction = extensions.wrapfunction
96 wrapfunction(changegroup,
99 wrapfunction(changegroup,
97 'supportedoutgoingversions',
100 'supportedoutgoingversions',
98 supportedoutgoingversions)
101 supportedoutgoingversions)
99 wrapfunction(changegroup,
102 wrapfunction(changegroup,
100 'allsupportedversions',
103 'allsupportedversions',
101 allsupportedversions)
104 allsupportedversions)
102
105
103 # Teach revlog about our test flags
106 # Teach revlog about our test flags
104 flags = [REVIDX_NOOP, REVIDX_BASE64, REVIDX_GZIP, REVIDX_FAIL]
107 flags = [REVIDX_NOOP, REVIDX_BASE64, REVIDX_GZIP, REVIDX_FAIL]
105 revlog.REVIDX_KNOWN_FLAGS |= util.bitsfrom(flags)
108 flagutil.REVIDX_KNOWN_FLAGS |= util.bitsfrom(flags)
106 revlog.REVIDX_FLAGS_ORDER.extend(flags)
109 revlog.REVIDX_FLAGS_ORDER.extend(flags)
107
110
108 # Teach exchange to use changegroup 3
111 # Teach exchange to use changegroup 3
109 for k in exchange._bundlespeccontentopts.keys():
112 for k in exchange._bundlespeccontentopts.keys():
110 exchange._bundlespeccontentopts[k][b"cg.version"] = b"03"
113 exchange._bundlespeccontentopts[k][b"cg.version"] = b"03"
111
114
112 # Register flag processors for each extension
115 # Register flag processors for each extension
113 revlog.addflagprocessor(
116 revlog.addflagprocessor(
114 REVIDX_NOOP,
117 REVIDX_NOOP,
115 (
118 (
116 noopdonothing,
119 noopdonothing,
117 noopdonothing,
120 noopdonothing,
118 validatehash,
121 validatehash,
119 )
122 )
120 )
123 )
121 revlog.addflagprocessor(
124 revlog.addflagprocessor(
122 REVIDX_BASE64,
125 REVIDX_BASE64,
123 (
126 (
124 b64decode,
127 b64decode,
125 b64encode,
128 b64encode,
126 bypass,
129 bypass,
127 ),
130 ),
128 )
131 )
129 revlog.addflagprocessor(
132 revlog.addflagprocessor(
130 REVIDX_GZIP,
133 REVIDX_GZIP,
131 (
134 (
132 gzipdecompress,
135 gzipdecompress,
133 gzipcompress,
136 gzipcompress,
134 bypass
137 bypass
135 )
138 )
136 )
139 )
@@ -1,708 +1,711 b''
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 nullrev,
22 nullrev,
23 )
23 )
24 from mercurial.thirdparty import (
24 from mercurial.thirdparty import (
25 attr,
25 attr,
26 )
26 )
27 from mercurial import (
27 from mercurial import (
28 ancestor,
28 ancestor,
29 bundlerepo,
29 bundlerepo,
30 error,
30 error,
31 extensions,
31 extensions,
32 localrepo,
32 localrepo,
33 mdiff,
33 mdiff,
34 pycompat,
34 pycompat,
35 repository,
35 repository,
36 revlog,
36 revlog,
37 store,
37 store,
38 verify,
38 verify,
39 )
39 )
40 from mercurial.utils import (
40 from mercurial.utils import (
41 cborutil,
41 cborutil,
42 interfaceutil,
42 interfaceutil,
43 storageutil,
43 storageutil,
44 )
44 )
45 from mercurial.revlogutils import (
46 flagutil,
47 )
45
48
46 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
49 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
47 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
50 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
48 # be specifying the version(s) of Mercurial they are tested with, or
51 # be specifying the version(s) of Mercurial they are tested with, or
49 # leave the attribute unspecified.
52 # leave the attribute unspecified.
50 testedwith = 'ships-with-hg-core'
53 testedwith = 'ships-with-hg-core'
51
54
52 REQUIREMENT = 'testonly-simplestore'
55 REQUIREMENT = 'testonly-simplestore'
53
56
54 def validatenode(node):
57 def validatenode(node):
55 if isinstance(node, int):
58 if isinstance(node, int):
56 raise ValueError('expected node; got int')
59 raise ValueError('expected node; got int')
57
60
58 if len(node) != 20:
61 if len(node) != 20:
59 raise ValueError('expected 20 byte node')
62 raise ValueError('expected 20 byte node')
60
63
61 def validaterev(rev):
64 def validaterev(rev):
62 if not isinstance(rev, int):
65 if not isinstance(rev, int):
63 raise ValueError('expected int')
66 raise ValueError('expected int')
64
67
65 class simplestoreerror(error.StorageError):
68 class simplestoreerror(error.StorageError):
66 pass
69 pass
67
70
68 @interfaceutil.implementer(repository.irevisiondelta)
71 @interfaceutil.implementer(repository.irevisiondelta)
69 @attr.s(slots=True)
72 @attr.s(slots=True)
70 class simplestorerevisiondelta(object):
73 class simplestorerevisiondelta(object):
71 node = attr.ib()
74 node = attr.ib()
72 p1node = attr.ib()
75 p1node = attr.ib()
73 p2node = attr.ib()
76 p2node = attr.ib()
74 basenode = attr.ib()
77 basenode = attr.ib()
75 flags = attr.ib()
78 flags = attr.ib()
76 baserevisionsize = attr.ib()
79 baserevisionsize = attr.ib()
77 revision = attr.ib()
80 revision = attr.ib()
78 delta = attr.ib()
81 delta = attr.ib()
79 linknode = attr.ib(default=None)
82 linknode = attr.ib(default=None)
80
83
81 @interfaceutil.implementer(repository.iverifyproblem)
84 @interfaceutil.implementer(repository.iverifyproblem)
82 @attr.s(frozen=True)
85 @attr.s(frozen=True)
83 class simplefilestoreproblem(object):
86 class simplefilestoreproblem(object):
84 warning = attr.ib(default=None)
87 warning = attr.ib(default=None)
85 error = attr.ib(default=None)
88 error = attr.ib(default=None)
86 node = attr.ib(default=None)
89 node = attr.ib(default=None)
87
90
88 @interfaceutil.implementer(repository.ifilestorage)
91 @interfaceutil.implementer(repository.ifilestorage)
89 class filestorage(object):
92 class filestorage(object):
90 """Implements storage for a tracked path.
93 """Implements storage for a tracked path.
91
94
92 Data is stored in the VFS in a directory corresponding to the tracked
95 Data is stored in the VFS in a directory corresponding to the tracked
93 path.
96 path.
94
97
95 Index data is stored in an ``index`` file using CBOR.
98 Index data is stored in an ``index`` file using CBOR.
96
99
97 Fulltext data is stored in files having names of the node.
100 Fulltext data is stored in files having names of the node.
98 """
101 """
99
102
100 def __init__(self, svfs, path):
103 def __init__(self, svfs, path):
101 self._svfs = svfs
104 self._svfs = svfs
102 self._path = path
105 self._path = path
103
106
104 self._storepath = b'/'.join([b'data', path])
107 self._storepath = b'/'.join([b'data', path])
105 self._indexpath = b'/'.join([self._storepath, b'index'])
108 self._indexpath = b'/'.join([self._storepath, b'index'])
106
109
107 indexdata = self._svfs.tryread(self._indexpath)
110 indexdata = self._svfs.tryread(self._indexpath)
108 if indexdata:
111 if indexdata:
109 indexdata = cborutil.decodeall(indexdata)
112 indexdata = cborutil.decodeall(indexdata)
110
113
111 self._indexdata = indexdata or []
114 self._indexdata = indexdata or []
112 self._indexbynode = {}
115 self._indexbynode = {}
113 self._indexbyrev = {}
116 self._indexbyrev = {}
114 self._index = []
117 self._index = []
115 self._refreshindex()
118 self._refreshindex()
116
119
117 def _refreshindex(self):
120 def _refreshindex(self):
118 self._indexbynode.clear()
121 self._indexbynode.clear()
119 self._indexbyrev.clear()
122 self._indexbyrev.clear()
120 self._index = []
123 self._index = []
121
124
122 for i, entry in enumerate(self._indexdata):
125 for i, entry in enumerate(self._indexdata):
123 self._indexbynode[entry[b'node']] = entry
126 self._indexbynode[entry[b'node']] = entry
124 self._indexbyrev[i] = entry
127 self._indexbyrev[i] = entry
125
128
126 self._indexbynode[nullid] = {
129 self._indexbynode[nullid] = {
127 b'node': nullid,
130 b'node': nullid,
128 b'p1': nullid,
131 b'p1': nullid,
129 b'p2': nullid,
132 b'p2': nullid,
130 b'linkrev': nullrev,
133 b'linkrev': nullrev,
131 b'flags': 0,
134 b'flags': 0,
132 }
135 }
133
136
134 self._indexbyrev[nullrev] = {
137 self._indexbyrev[nullrev] = {
135 b'node': nullid,
138 b'node': nullid,
136 b'p1': nullid,
139 b'p1': nullid,
137 b'p2': nullid,
140 b'p2': nullid,
138 b'linkrev': nullrev,
141 b'linkrev': nullrev,
139 b'flags': 0,
142 b'flags': 0,
140 }
143 }
141
144
142 for i, entry in enumerate(self._indexdata):
145 for i, entry in enumerate(self._indexdata):
143 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
146 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
144
147
145 # start, length, rawsize, chainbase, linkrev, p1, p2, node
148 # start, length, rawsize, chainbase, linkrev, p1, p2, node
146 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
149 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
147 entry[b'node']))
150 entry[b'node']))
148
151
149 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
152 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
150
153
151 def __len__(self):
154 def __len__(self):
152 return len(self._indexdata)
155 return len(self._indexdata)
153
156
154 def __iter__(self):
157 def __iter__(self):
155 return iter(range(len(self)))
158 return iter(range(len(self)))
156
159
157 def revs(self, start=0, stop=None):
160 def revs(self, start=0, stop=None):
158 step = 1
161 step = 1
159 if stop is not None:
162 if stop is not None:
160 if start > stop:
163 if start > stop:
161 step = -1
164 step = -1
162
165
163 stop += step
166 stop += step
164 else:
167 else:
165 stop = len(self)
168 stop = len(self)
166
169
167 return range(start, stop, step)
170 return range(start, stop, step)
168
171
169 def parents(self, node):
172 def parents(self, node):
170 validatenode(node)
173 validatenode(node)
171
174
172 if node not in self._indexbynode:
175 if node not in self._indexbynode:
173 raise KeyError('unknown node')
176 raise KeyError('unknown node')
174
177
175 entry = self._indexbynode[node]
178 entry = self._indexbynode[node]
176
179
177 return entry[b'p1'], entry[b'p2']
180 return entry[b'p1'], entry[b'p2']
178
181
179 def parentrevs(self, rev):
182 def parentrevs(self, rev):
180 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
183 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
181 return self.rev(p1), self.rev(p2)
184 return self.rev(p1), self.rev(p2)
182
185
183 def rev(self, node):
186 def rev(self, node):
184 validatenode(node)
187 validatenode(node)
185
188
186 try:
189 try:
187 self._indexbynode[node]
190 self._indexbynode[node]
188 except KeyError:
191 except KeyError:
189 raise error.LookupError(node, self._indexpath, _('no node'))
192 raise error.LookupError(node, self._indexpath, _('no node'))
190
193
191 for rev, entry in self._indexbyrev.items():
194 for rev, entry in self._indexbyrev.items():
192 if entry[b'node'] == node:
195 if entry[b'node'] == node:
193 return rev
196 return rev
194
197
195 raise error.ProgrammingError('this should not occur')
198 raise error.ProgrammingError('this should not occur')
196
199
197 def node(self, rev):
200 def node(self, rev):
198 validaterev(rev)
201 validaterev(rev)
199
202
200 return self._indexbyrev[rev][b'node']
203 return self._indexbyrev[rev][b'node']
201
204
202 def hasnode(self, node):
205 def hasnode(self, node):
203 validatenode(node)
206 validatenode(node)
204 return node in self._indexbynode
207 return node in self._indexbynode
205
208
206 def censorrevision(self, tr, censornode, tombstone=b''):
209 def censorrevision(self, tr, censornode, tombstone=b''):
207 raise NotImplementedError('TODO')
210 raise NotImplementedError('TODO')
208
211
209 def lookup(self, node):
212 def lookup(self, node):
210 if isinstance(node, int):
213 if isinstance(node, int):
211 return self.node(node)
214 return self.node(node)
212
215
213 if len(node) == 20:
216 if len(node) == 20:
214 self.rev(node)
217 self.rev(node)
215 return node
218 return node
216
219
217 try:
220 try:
218 rev = int(node)
221 rev = int(node)
219 if '%d' % rev != node:
222 if '%d' % rev != node:
220 raise ValueError
223 raise ValueError
221
224
222 if rev < 0:
225 if rev < 0:
223 rev = len(self) + rev
226 rev = len(self) + rev
224 if rev < 0 or rev >= len(self):
227 if rev < 0 or rev >= len(self):
225 raise ValueError
228 raise ValueError
226
229
227 return self.node(rev)
230 return self.node(rev)
228 except (ValueError, OverflowError):
231 except (ValueError, OverflowError):
229 pass
232 pass
230
233
231 if len(node) == 40:
234 if len(node) == 40:
232 try:
235 try:
233 rawnode = bin(node)
236 rawnode = bin(node)
234 self.rev(rawnode)
237 self.rev(rawnode)
235 return rawnode
238 return rawnode
236 except TypeError:
239 except TypeError:
237 pass
240 pass
238
241
239 raise error.LookupError(node, self._path, _('invalid lookup input'))
242 raise error.LookupError(node, self._path, _('invalid lookup input'))
240
243
241 def linkrev(self, rev):
244 def linkrev(self, rev):
242 validaterev(rev)
245 validaterev(rev)
243
246
244 return self._indexbyrev[rev][b'linkrev']
247 return self._indexbyrev[rev][b'linkrev']
245
248
246 def _flags(self, rev):
249 def _flags(self, rev):
247 validaterev(rev)
250 validaterev(rev)
248
251
249 return self._indexbyrev[rev][b'flags']
252 return self._indexbyrev[rev][b'flags']
250
253
251 def _candelta(self, baserev, rev):
254 def _candelta(self, baserev, rev):
252 validaterev(baserev)
255 validaterev(baserev)
253 validaterev(rev)
256 validaterev(rev)
254
257
255 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
258 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
256 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
259 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
257 return False
260 return False
258
261
259 return True
262 return True
260
263
261 def _processflags(self, text, flags, operation, raw=False):
264 def _processflags(self, text, flags, operation, raw=False):
262 if flags == 0:
265 if flags == 0:
263 return text, True
266 return text, True
264
267
265 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
268 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
266 raise simplestoreerror(_("incompatible revision flag '%#x'") %
269 raise simplestoreerror(_("incompatible revision flag '%#x'") %
267 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
270 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
268
271
269 validatehash = True
272 validatehash = True
270 # Depending on the operation (read or write), the order might be
273 # Depending on the operation (read or write), the order might be
271 # reversed due to non-commutative transforms.
274 # reversed due to non-commutative transforms.
272 orderedflags = revlog.REVIDX_FLAGS_ORDER
275 orderedflags = revlog.REVIDX_FLAGS_ORDER
273 if operation == 'write':
276 if operation == 'write':
274 orderedflags = reversed(orderedflags)
277 orderedflags = reversed(orderedflags)
275
278
276 for flag in orderedflags:
279 for flag in orderedflags:
277 # If a flagprocessor has been registered for a known flag, apply the
280 # If a flagprocessor has been registered for a known flag, apply the
278 # related operation transform and update result tuple.
281 # related operation transform and update result tuple.
279 if flag & flags:
282 if flag & flags:
280 vhash = True
283 vhash = True
281
284
282 if flag not in revlog._flagprocessors:
285 if flag not in revlog._flagprocessors:
283 message = _("missing processor for flag '%#x'") % (flag)
286 message = _("missing processor for flag '%#x'") % (flag)
284 raise simplestoreerror(message)
287 raise simplestoreerror(message)
285
288
286 processor = revlog._flagprocessors[flag]
289 processor = revlog._flagprocessors[flag]
287 if processor is not None:
290 if processor is not None:
288 readtransform, writetransform, rawtransform = processor
291 readtransform, writetransform, rawtransform = processor
289
292
290 if raw:
293 if raw:
291 vhash = rawtransform(self, text)
294 vhash = rawtransform(self, text)
292 elif operation == 'read':
295 elif operation == 'read':
293 text, vhash = readtransform(self, text)
296 text, vhash = readtransform(self, text)
294 else: # write operation
297 else: # write operation
295 text, vhash = writetransform(self, text)
298 text, vhash = writetransform(self, text)
296 validatehash = validatehash and vhash
299 validatehash = validatehash and vhash
297
300
298 return text, validatehash
301 return text, validatehash
299
302
300 def checkhash(self, text, node, p1=None, p2=None, rev=None):
303 def checkhash(self, text, node, p1=None, p2=None, rev=None):
301 if p1 is None and p2 is None:
304 if p1 is None and p2 is None:
302 p1, p2 = self.parents(node)
305 p1, p2 = self.parents(node)
303 if node != storageutil.hashrevisionsha1(text, p1, p2):
306 if node != storageutil.hashrevisionsha1(text, p1, p2):
304 raise simplestoreerror(_("integrity check failed on %s") %
307 raise simplestoreerror(_("integrity check failed on %s") %
305 self._path)
308 self._path)
306
309
307 def revision(self, nodeorrev, raw=False):
310 def revision(self, nodeorrev, raw=False):
308 if isinstance(nodeorrev, int):
311 if isinstance(nodeorrev, int):
309 node = self.node(nodeorrev)
312 node = self.node(nodeorrev)
310 else:
313 else:
311 node = nodeorrev
314 node = nodeorrev
312 validatenode(node)
315 validatenode(node)
313
316
314 if node == nullid:
317 if node == nullid:
315 return b''
318 return b''
316
319
317 rev = self.rev(node)
320 rev = self.rev(node)
318 flags = self._flags(rev)
321 flags = self._flags(rev)
319
322
320 path = b'/'.join([self._storepath, hex(node)])
323 path = b'/'.join([self._storepath, hex(node)])
321 rawtext = self._svfs.read(path)
324 rawtext = self._svfs.read(path)
322
325
323 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
326 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
324 if validatehash:
327 if validatehash:
325 self.checkhash(text, node, rev=rev)
328 self.checkhash(text, node, rev=rev)
326
329
327 return text
330 return text
328
331
329 def rawdata(self, nodeorrev):
332 def rawdata(self, nodeorrev):
330 return self.revision(raw=True)
333 return self.revision(raw=True)
331
334
332 def read(self, node):
335 def read(self, node):
333 validatenode(node)
336 validatenode(node)
334
337
335 revision = self.revision(node)
338 revision = self.revision(node)
336
339
337 if not revision.startswith(b'\1\n'):
340 if not revision.startswith(b'\1\n'):
338 return revision
341 return revision
339
342
340 start = revision.index(b'\1\n', 2)
343 start = revision.index(b'\1\n', 2)
341 return revision[start + 2:]
344 return revision[start + 2:]
342
345
343 def renamed(self, node):
346 def renamed(self, node):
344 validatenode(node)
347 validatenode(node)
345
348
346 if self.parents(node)[0] != nullid:
349 if self.parents(node)[0] != nullid:
347 return False
350 return False
348
351
349 fulltext = self.revision(node)
352 fulltext = self.revision(node)
350 m = storageutil.parsemeta(fulltext)[0]
353 m = storageutil.parsemeta(fulltext)[0]
351
354
352 if m and 'copy' in m:
355 if m and 'copy' in m:
353 return m['copy'], bin(m['copyrev'])
356 return m['copy'], bin(m['copyrev'])
354
357
355 return False
358 return False
356
359
357 def cmp(self, node, text):
360 def cmp(self, node, text):
358 validatenode(node)
361 validatenode(node)
359
362
360 t = text
363 t = text
361
364
362 if text.startswith(b'\1\n'):
365 if text.startswith(b'\1\n'):
363 t = b'\1\n\1\n' + text
366 t = b'\1\n\1\n' + text
364
367
365 p1, p2 = self.parents(node)
368 p1, p2 = self.parents(node)
366
369
367 if storageutil.hashrevisionsha1(t, p1, p2) == node:
370 if storageutil.hashrevisionsha1(t, p1, p2) == node:
368 return False
371 return False
369
372
370 if self.iscensored(self.rev(node)):
373 if self.iscensored(self.rev(node)):
371 return text != b''
374 return text != b''
372
375
373 if self.renamed(node):
376 if self.renamed(node):
374 t2 = self.read(node)
377 t2 = self.read(node)
375 return t2 != text
378 return t2 != text
376
379
377 return True
380 return True
378
381
379 def size(self, rev):
382 def size(self, rev):
380 validaterev(rev)
383 validaterev(rev)
381
384
382 node = self._indexbyrev[rev][b'node']
385 node = self._indexbyrev[rev][b'node']
383
386
384 if self.renamed(node):
387 if self.renamed(node):
385 return len(self.read(node))
388 return len(self.read(node))
386
389
387 if self.iscensored(rev):
390 if self.iscensored(rev):
388 return 0
391 return 0
389
392
390 return len(self.revision(node))
393 return len(self.revision(node))
391
394
392 def iscensored(self, rev):
395 def iscensored(self, rev):
393 validaterev(rev)
396 validaterev(rev)
394
397
395 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
398 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
396
399
397 def commonancestorsheads(self, a, b):
400 def commonancestorsheads(self, a, b):
398 validatenode(a)
401 validatenode(a)
399 validatenode(b)
402 validatenode(b)
400
403
401 a = self.rev(a)
404 a = self.rev(a)
402 b = self.rev(b)
405 b = self.rev(b)
403
406
404 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
407 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
405 return pycompat.maplist(self.node, ancestors)
408 return pycompat.maplist(self.node, ancestors)
406
409
407 def descendants(self, revs):
410 def descendants(self, revs):
408 # This is a copy of revlog.descendants()
411 # This is a copy of revlog.descendants()
409 first = min(revs)
412 first = min(revs)
410 if first == nullrev:
413 if first == nullrev:
411 for i in self:
414 for i in self:
412 yield i
415 yield i
413 return
416 return
414
417
415 seen = set(revs)
418 seen = set(revs)
416 for i in self.revs(start=first + 1):
419 for i in self.revs(start=first + 1):
417 for x in self.parentrevs(i):
420 for x in self.parentrevs(i):
418 if x != nullrev and x in seen:
421 if x != nullrev and x in seen:
419 seen.add(i)
422 seen.add(i)
420 yield i
423 yield i
421 break
424 break
422
425
423 # Required by verify.
426 # Required by verify.
424 def files(self):
427 def files(self):
425 entries = self._svfs.listdir(self._storepath)
428 entries = self._svfs.listdir(self._storepath)
426
429
427 # Strip out undo.backup.* files created as part of transaction
430 # Strip out undo.backup.* files created as part of transaction
428 # recording.
431 # recording.
429 entries = [f for f in entries if not f.startswith('undo.backup.')]
432 entries = [f for f in entries if not f.startswith('undo.backup.')]
430
433
431 return [b'/'.join((self._storepath, f)) for f in entries]
434 return [b'/'.join((self._storepath, f)) for f in entries]
432
435
433 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
436 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
434 revisionscount=False, trackedsize=False,
437 revisionscount=False, trackedsize=False,
435 storedsize=False):
438 storedsize=False):
436 # TODO do a real implementation of this
439 # TODO do a real implementation of this
437 return {
440 return {
438 'exclusivefiles': [],
441 'exclusivefiles': [],
439 'sharedfiles': [],
442 'sharedfiles': [],
440 'revisionscount': len(self),
443 'revisionscount': len(self),
441 'trackedsize': 0,
444 'trackedsize': 0,
442 'storedsize': None,
445 'storedsize': None,
443 }
446 }
444
447
445 def verifyintegrity(self, state):
448 def verifyintegrity(self, state):
446 state['skipread'] = set()
449 state['skipread'] = set()
447 for rev in self:
450 for rev in self:
448 node = self.node(rev)
451 node = self.node(rev)
449 try:
452 try:
450 self.revision(node)
453 self.revision(node)
451 except Exception as e:
454 except Exception as e:
452 yield simplefilestoreproblem(
455 yield simplefilestoreproblem(
453 error='unpacking %s: %s' % (node, e),
456 error='unpacking %s: %s' % (node, e),
454 node=node)
457 node=node)
455 state['skipread'].add(node)
458 state['skipread'].add(node)
456
459
457 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
460 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
458 assumehaveparentrevisions=False,
461 assumehaveparentrevisions=False,
459 deltamode=repository.CG_DELTAMODE_STD):
462 deltamode=repository.CG_DELTAMODE_STD):
460 # TODO this will probably break on some ordering options.
463 # TODO this will probably break on some ordering options.
461 nodes = [n for n in nodes if n != nullid]
464 nodes = [n for n in nodes if n != nullid]
462 if not nodes:
465 if not nodes:
463 return
466 return
464 for delta in storageutil.emitrevisions(
467 for delta in storageutil.emitrevisions(
465 self, nodes, nodesorder, simplestorerevisiondelta,
468 self, nodes, nodesorder, simplestorerevisiondelta,
466 revisiondata=revisiondata,
469 revisiondata=revisiondata,
467 assumehaveparentrevisions=assumehaveparentrevisions,
470 assumehaveparentrevisions=assumehaveparentrevisions,
468 deltamode=deltamode):
471 deltamode=deltamode):
469 yield delta
472 yield delta
470
473
471 def add(self, text, meta, transaction, linkrev, p1, p2):
474 def add(self, text, meta, transaction, linkrev, p1, p2):
472 if meta or text.startswith(b'\1\n'):
475 if meta or text.startswith(b'\1\n'):
473 text = storageutil.packmeta(meta, text)
476 text = storageutil.packmeta(meta, text)
474
477
475 return self.addrevision(text, transaction, linkrev, p1, p2)
478 return self.addrevision(text, transaction, linkrev, p1, p2)
476
479
477 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
480 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
478 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
481 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
479 validatenode(p1)
482 validatenode(p1)
480 validatenode(p2)
483 validatenode(p2)
481
484
482 if flags:
485 if flags:
483 node = node or storageutil.hashrevisionsha1(text, p1, p2)
486 node = node or storageutil.hashrevisionsha1(text, p1, p2)
484
487
485 rawtext, validatehash = self._processflags(text, flags, 'write')
488 rawtext, validatehash = self._processflags(text, flags, 'write')
486
489
487 node = node or storageutil.hashrevisionsha1(text, p1, p2)
490 node = node or storageutil.hashrevisionsha1(text, p1, p2)
488
491
489 if node in self._indexbynode:
492 if node in self._indexbynode:
490 return node
493 return node
491
494
492 if validatehash:
495 if validatehash:
493 self.checkhash(rawtext, node, p1=p1, p2=p2)
496 self.checkhash(rawtext, node, p1=p1, p2=p2)
494
497
495 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
498 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
496 flags)
499 flags)
497
500
498 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
501 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
499 transaction.addbackup(self._indexpath)
502 transaction.addbackup(self._indexpath)
500
503
501 path = b'/'.join([self._storepath, hex(node)])
504 path = b'/'.join([self._storepath, hex(node)])
502
505
503 self._svfs.write(path, rawtext)
506 self._svfs.write(path, rawtext)
504
507
505 self._indexdata.append({
508 self._indexdata.append({
506 b'node': node,
509 b'node': node,
507 b'p1': p1,
510 b'p1': p1,
508 b'p2': p2,
511 b'p2': p2,
509 b'linkrev': link,
512 b'linkrev': link,
510 b'flags': flags,
513 b'flags': flags,
511 })
514 })
512
515
513 self._reflectindexupdate()
516 self._reflectindexupdate()
514
517
515 return node
518 return node
516
519
517 def _reflectindexupdate(self):
520 def _reflectindexupdate(self):
518 self._refreshindex()
521 self._refreshindex()
519 self._svfs.write(self._indexpath,
522 self._svfs.write(self._indexpath,
520 ''.join(cborutil.streamencode(self._indexdata)))
523 ''.join(cborutil.streamencode(self._indexdata)))
521
524
522 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
525 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
523 maybemissingparents=False):
526 maybemissingparents=False):
524 if maybemissingparents:
527 if maybemissingparents:
525 raise error.Abort(_('simple store does not support missing parents '
528 raise error.Abort(_('simple store does not support missing parents '
526 'write mode'))
529 'write mode'))
527
530
528 nodes = []
531 nodes = []
529
532
530 transaction.addbackup(self._indexpath)
533 transaction.addbackup(self._indexpath)
531
534
532 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
535 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
533 linkrev = linkmapper(linknode)
536 linkrev = linkmapper(linknode)
534 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
537 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
535
538
536 nodes.append(node)
539 nodes.append(node)
537
540
538 if node in self._indexbynode:
541 if node in self._indexbynode:
539 continue
542 continue
540
543
541 # Need to resolve the fulltext from the delta base.
544 # Need to resolve the fulltext from the delta base.
542 if deltabase == nullid:
545 if deltabase == nullid:
543 text = mdiff.patch(b'', delta)
546 text = mdiff.patch(b'', delta)
544 else:
547 else:
545 text = mdiff.patch(self.revision(deltabase), delta)
548 text = mdiff.patch(self.revision(deltabase), delta)
546
549
547 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
550 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
548 flags)
551 flags)
549
552
550 if addrevisioncb:
553 if addrevisioncb:
551 addrevisioncb(self, node)
554 addrevisioncb(self, node)
552 return nodes
555 return nodes
553
556
554 def _headrevs(self):
557 def _headrevs(self):
555 # Assume all revisions are heads by default.
558 # Assume all revisions are heads by default.
556 revishead = {rev: True for rev in self._indexbyrev}
559 revishead = {rev: True for rev in self._indexbyrev}
557
560
558 for rev, entry in self._indexbyrev.items():
561 for rev, entry in self._indexbyrev.items():
559 # Unset head flag for all seen parents.
562 # Unset head flag for all seen parents.
560 revishead[self.rev(entry[b'p1'])] = False
563 revishead[self.rev(entry[b'p1'])] = False
561 revishead[self.rev(entry[b'p2'])] = False
564 revishead[self.rev(entry[b'p2'])] = False
562
565
563 return [rev for rev, ishead in sorted(revishead.items())
566 return [rev for rev, ishead in sorted(revishead.items())
564 if ishead]
567 if ishead]
565
568
566 def heads(self, start=None, stop=None):
569 def heads(self, start=None, stop=None):
567 # This is copied from revlog.py.
570 # This is copied from revlog.py.
568 if start is None and stop is None:
571 if start is None and stop is None:
569 if not len(self):
572 if not len(self):
570 return [nullid]
573 return [nullid]
571 return [self.node(r) for r in self._headrevs()]
574 return [self.node(r) for r in self._headrevs()]
572
575
573 if start is None:
576 if start is None:
574 start = nullid
577 start = nullid
575 if stop is None:
578 if stop is None:
576 stop = []
579 stop = []
577 stoprevs = set([self.rev(n) for n in stop])
580 stoprevs = set([self.rev(n) for n in stop])
578 startrev = self.rev(start)
581 startrev = self.rev(start)
579 reachable = {startrev}
582 reachable = {startrev}
580 heads = {startrev}
583 heads = {startrev}
581
584
582 parentrevs = self.parentrevs
585 parentrevs = self.parentrevs
583 for r in self.revs(start=startrev + 1):
586 for r in self.revs(start=startrev + 1):
584 for p in parentrevs(r):
587 for p in parentrevs(r):
585 if p in reachable:
588 if p in reachable:
586 if r not in stoprevs:
589 if r not in stoprevs:
587 reachable.add(r)
590 reachable.add(r)
588 heads.add(r)
591 heads.add(r)
589 if p in heads and p not in stoprevs:
592 if p in heads and p not in stoprevs:
590 heads.remove(p)
593 heads.remove(p)
591
594
592 return [self.node(r) for r in heads]
595 return [self.node(r) for r in heads]
593
596
594 def children(self, node):
597 def children(self, node):
595 validatenode(node)
598 validatenode(node)
596
599
597 # This is a copy of revlog.children().
600 # This is a copy of revlog.children().
598 c = []
601 c = []
599 p = self.rev(node)
602 p = self.rev(node)
600 for r in self.revs(start=p + 1):
603 for r in self.revs(start=p + 1):
601 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
604 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
602 if prevs:
605 if prevs:
603 for pr in prevs:
606 for pr in prevs:
604 if pr == p:
607 if pr == p:
605 c.append(self.node(r))
608 c.append(self.node(r))
606 elif p == nullrev:
609 elif p == nullrev:
607 c.append(self.node(r))
610 c.append(self.node(r))
608 return c
611 return c
609
612
610 def getstrippoint(self, minlink):
613 def getstrippoint(self, minlink):
611 return storageutil.resolvestripinfo(
614 return storageutil.resolvestripinfo(
612 minlink, len(self) - 1, self._headrevs(), self.linkrev,
615 minlink, len(self) - 1, self._headrevs(), self.linkrev,
613 self.parentrevs)
616 self.parentrevs)
614
617
615 def strip(self, minlink, transaction):
618 def strip(self, minlink, transaction):
616 if not len(self):
619 if not len(self):
617 return
620 return
618
621
619 rev, _ignored = self.getstrippoint(minlink)
622 rev, _ignored = self.getstrippoint(minlink)
620 if rev == len(self):
623 if rev == len(self):
621 return
624 return
622
625
623 # Purge index data starting at the requested revision.
626 # Purge index data starting at the requested revision.
624 self._indexdata[rev:] = []
627 self._indexdata[rev:] = []
625 self._reflectindexupdate()
628 self._reflectindexupdate()
626
629
627 def issimplestorefile(f, kind, st):
630 def issimplestorefile(f, kind, st):
628 if kind != stat.S_IFREG:
631 if kind != stat.S_IFREG:
629 return False
632 return False
630
633
631 if store.isrevlog(f, kind, st):
634 if store.isrevlog(f, kind, st):
632 return False
635 return False
633
636
634 # Ignore transaction undo files.
637 # Ignore transaction undo files.
635 if f.startswith('undo.'):
638 if f.startswith('undo.'):
636 return False
639 return False
637
640
638 # Otherwise assume it belongs to the simple store.
641 # Otherwise assume it belongs to the simple store.
639 return True
642 return True
640
643
641 class simplestore(store.encodedstore):
644 class simplestore(store.encodedstore):
642 def datafiles(self):
645 def datafiles(self):
643 for x in super(simplestore, self).datafiles():
646 for x in super(simplestore, self).datafiles():
644 yield x
647 yield x
645
648
646 # Supplement with non-revlog files.
649 # Supplement with non-revlog files.
647 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
650 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
648
651
649 for unencoded, encoded, size in extrafiles:
652 for unencoded, encoded, size in extrafiles:
650 try:
653 try:
651 unencoded = store.decodefilename(unencoded)
654 unencoded = store.decodefilename(unencoded)
652 except KeyError:
655 except KeyError:
653 unencoded = None
656 unencoded = None
654
657
655 yield unencoded, encoded, size
658 yield unencoded, encoded, size
656
659
657 def reposetup(ui, repo):
660 def reposetup(ui, repo):
658 if not repo.local():
661 if not repo.local():
659 return
662 return
660
663
661 if isinstance(repo, bundlerepo.bundlerepository):
664 if isinstance(repo, bundlerepo.bundlerepository):
662 raise error.Abort(_('cannot use simple store with bundlerepo'))
665 raise error.Abort(_('cannot use simple store with bundlerepo'))
663
666
664 class simplestorerepo(repo.__class__):
667 class simplestorerepo(repo.__class__):
665 def file(self, f):
668 def file(self, f):
666 return filestorage(self.svfs, f)
669 return filestorage(self.svfs, f)
667
670
668 repo.__class__ = simplestorerepo
671 repo.__class__ = simplestorerepo
669
672
670 def featuresetup(ui, supported):
673 def featuresetup(ui, supported):
671 supported.add(REQUIREMENT)
674 supported.add(REQUIREMENT)
672
675
673 def newreporequirements(orig, ui, createopts):
676 def newreporequirements(orig, ui, createopts):
674 """Modifies default requirements for new repos to use the simple store."""
677 """Modifies default requirements for new repos to use the simple store."""
675 requirements = orig(ui, createopts)
678 requirements = orig(ui, createopts)
676
679
677 # These requirements are only used to affect creation of the store
680 # These requirements are only used to affect creation of the store
678 # object. We have our own store. So we can remove them.
681 # object. We have our own store. So we can remove them.
679 # TODO do this once we feel like taking the test hit.
682 # TODO do this once we feel like taking the test hit.
680 #if 'fncache' in requirements:
683 #if 'fncache' in requirements:
681 # requirements.remove('fncache')
684 # requirements.remove('fncache')
682 #if 'dotencode' in requirements:
685 #if 'dotencode' in requirements:
683 # requirements.remove('dotencode')
686 # requirements.remove('dotencode')
684
687
685 requirements.add(REQUIREMENT)
688 requirements.add(REQUIREMENT)
686
689
687 return requirements
690 return requirements
688
691
689 def makestore(orig, requirements, path, vfstype):
692 def makestore(orig, requirements, path, vfstype):
690 if REQUIREMENT not in requirements:
693 if REQUIREMENT not in requirements:
691 return orig(requirements, path, vfstype)
694 return orig(requirements, path, vfstype)
692
695
693 return simplestore(path, vfstype)
696 return simplestore(path, vfstype)
694
697
695 def verifierinit(orig, self, *args, **kwargs):
698 def verifierinit(orig, self, *args, **kwargs):
696 orig(self, *args, **kwargs)
699 orig(self, *args, **kwargs)
697
700
698 # We don't care that files in the store don't align with what is
701 # We don't care that files in the store don't align with what is
699 # advertised. So suppress these warnings.
702 # advertised. So suppress these warnings.
700 self.warnorphanstorefiles = False
703 self.warnorphanstorefiles = False
701
704
702 def extsetup(ui):
705 def extsetup(ui):
703 localrepo.featuresetupfuncs.add(featuresetup)
706 localrepo.featuresetupfuncs.add(featuresetup)
704
707
705 extensions.wrapfunction(localrepo, 'newreporequirements',
708 extensions.wrapfunction(localrepo, 'newreporequirements',
706 newreporequirements)
709 newreporequirements)
707 extensions.wrapfunction(localrepo, 'makestore', makestore)
710 extensions.wrapfunction(localrepo, 'makestore', makestore)
708 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
711 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now