##// END OF EJS Templates
revlog: introduce v2 format...
Raphaël Gomès -
r47438:91348577 default
parent child Browse files
Show More
@@ -1,292 +1,343 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid, nullrev
14 14 from .. import (
15 15 pycompat,
16 16 util,
17 17 )
18 18
19 19 from ..revlogutils import nodemap as nodemaputil
20 20
21 21 stringio = pycompat.bytesio
22 22
23 23
24 24 _pack = struct.pack
25 25 _unpack = struct.unpack
26 26 _compress = zlib.compress
27 27 _decompress = zlib.decompress
28 28
29 29 # Some code below makes tuples directly because it's more convenient. However,
30 30 # code outside this module should always use dirstatetuple.
31 31 def dirstatetuple(*x):
32 32 # x is a tuple
33 33 return x
34 34
35 35
36 36 def gettype(q):
37 37 return int(q & 0xFFFF)
38 38
39 39
40 40 def offset_type(offset, type):
41 41 return int(int(offset) << 16 | type)
42 42
43 43
44 44 class BaseIndexObject(object):
45 45 # Format of an index entry according to Python's `struct` language
46 46 index_format = b">Qiiiiii20s12x"
47 47 # Size of a C unsigned long long int, platform independent
48 48 big_int_size = struct.calcsize(b'>Q')
49 49 # Size of a C long int, platform independent
50 50 int_size = struct.calcsize(b'>i')
51 51 # Size of the entire index format
52 52 index_size = struct.calcsize(index_format)
53 53 # An empty index entry, used as a default value to be overridden, or nullrev
54 54 null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
55 55
56 56 @property
57 57 def nodemap(self):
58 58 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
59 59 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
60 60 return self._nodemap
61 61
62 62 @util.propertycache
63 63 def _nodemap(self):
64 64 nodemap = nodemaputil.NodeMap({nullid: nullrev})
65 65 for r in range(0, len(self)):
66 66 n = self[r][7]
67 67 nodemap[n] = r
68 68 return nodemap
69 69
70 70 def has_node(self, node):
71 71 """return True if the node exist in the index"""
72 72 return node in self._nodemap
73 73
74 74 def rev(self, node):
75 75 """return a revision for a node
76 76
77 77 If the node is unknown, raise a RevlogError"""
78 78 return self._nodemap[node]
79 79
80 80 def get_rev(self, node):
81 81 """return a revision for a node
82 82
83 83 If the node is unknown, return None"""
84 84 return self._nodemap.get(node)
85 85
86 86 def _stripnodes(self, start):
87 87 if '_nodemap' in vars(self):
88 88 for r in range(start, len(self)):
89 89 n = self[r][7]
90 90 del self._nodemap[n]
91 91
92 92 def clearcaches(self):
93 93 self.__dict__.pop('_nodemap', None)
94 94
95 95 def __len__(self):
96 96 return self._lgt + len(self._extra)
97 97
98 98 def append(self, tup):
99 99 if '_nodemap' in vars(self):
100 100 self._nodemap[tup[7]] = len(self)
101 101 data = _pack(self.index_format, *tup)
102 102 self._extra.append(data)
103 103
104 104 def _check_index(self, i):
105 105 if not isinstance(i, int):
106 106 raise TypeError(b"expecting int indexes")
107 107 if i < 0 or i >= len(self):
108 108 raise IndexError
109 109
110 110 def __getitem__(self, i):
111 111 if i == -1:
112 112 return self.null_item
113 113 self._check_index(i)
114 114 if i >= self._lgt:
115 115 data = self._extra[i - self._lgt]
116 116 else:
117 117 index = self._calculate_index(i)
118 118 data = self._data[index : index + self.index_size]
119 119 r = _unpack(self.index_format, data)
120 120 if self._lgt and i == 0:
121 121 r = (offset_type(0, gettype(r[0])),) + r[1:]
122 122 return r
123 123
124 124
125 125 class IndexObject(BaseIndexObject):
126 126 def __init__(self, data):
127 127 assert len(data) % self.index_size == 0
128 128 self._data = data
129 129 self._lgt = len(data) // self.index_size
130 130 self._extra = []
131 131
132 132 def _calculate_index(self, i):
133 133 return i * self.index_size
134 134
135 135 def __delitem__(self, i):
136 136 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
137 137 raise ValueError(b"deleting slices only supports a:-1 with step 1")
138 138 i = i.start
139 139 self._check_index(i)
140 140 self._stripnodes(i)
141 141 if i < self._lgt:
142 142 self._data = self._data[: i * self.index_size]
143 143 self._lgt = i
144 144 self._extra = []
145 145 else:
146 146 self._extra = self._extra[: i - self._lgt]
147 147
148 148
149 149 class PersistentNodeMapIndexObject(IndexObject):
150 150 """a Debug oriented class to test persistent nodemap
151 151
152 152 We need a simple python object to test API and higher level behavior. See
153 153 the Rust implementation for more serious usage. This should be used only
154 154 through the dedicated `devel.persistent-nodemap` config.
155 155 """
156 156
157 157 def nodemap_data_all(self):
158 158 """Return bytes containing a full serialization of a nodemap
159 159
160 160 The nodemap should be valid for the full set of revisions in the
161 161 index."""
162 162 return nodemaputil.persistent_data(self)
163 163
164 164 def nodemap_data_incremental(self):
165 165 """Return bytes containing a incremental update to persistent nodemap
166 166
167 167 This containst the data for an append-only update of the data provided
168 168 in the last call to `update_nodemap_data`.
169 169 """
170 170 if self._nm_root is None:
171 171 return None
172 172 docket = self._nm_docket
173 173 changed, data = nodemaputil.update_persistent_data(
174 174 self, self._nm_root, self._nm_max_idx, self._nm_docket.tip_rev
175 175 )
176 176
177 177 self._nm_root = self._nm_max_idx = self._nm_docket = None
178 178 return docket, changed, data
179 179
180 180 def update_nodemap_data(self, docket, nm_data):
181 181 """provide full block of persisted binary data for a nodemap
182 182
183 183 The data are expected to come from disk. See `nodemap_data_all` for a
184 184 produceur of such data."""
185 185 if nm_data is not None:
186 186 self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
187 187 if self._nm_root:
188 188 self._nm_docket = docket
189 189 else:
190 190 self._nm_root = self._nm_max_idx = self._nm_docket = None
191 191
192 192
193 193 class InlinedIndexObject(BaseIndexObject):
194 194 def __init__(self, data, inline=0):
195 195 self._data = data
196 196 self._lgt = self._inline_scan(None)
197 197 self._inline_scan(self._lgt)
198 198 self._extra = []
199 199
200 200 def _inline_scan(self, lgt):
201 201 off = 0
202 202 if lgt is not None:
203 203 self._offsets = [0] * lgt
204 204 count = 0
205 205 while off <= len(self._data) - self.index_size:
206 206 start = off + self.big_int_size
207 207 (s,) = struct.unpack(
208 208 b'>i',
209 209 self._data[start : start + self.int_size],
210 210 )
211 211 if lgt is not None:
212 212 self._offsets[count] = off
213 213 count += 1
214 214 off += self.index_size + s
215 215 if off != len(self._data):
216 216 raise ValueError(b"corrupted data")
217 217 return count
218 218
219 219 def __delitem__(self, i):
220 220 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
221 221 raise ValueError(b"deleting slices only supports a:-1 with step 1")
222 222 i = i.start
223 223 self._check_index(i)
224 224 self._stripnodes(i)
225 225 if i < self._lgt:
226 226 self._offsets = self._offsets[:i]
227 227 self._lgt = i
228 228 self._extra = []
229 229 else:
230 230 self._extra = self._extra[: i - self._lgt]
231 231
232 232 def _calculate_index(self, i):
233 233 return self._offsets[i]
234 234
235 235
236 def parse_index2(data, inline):
236 def parse_index2(data, inline, revlogv2=False):
237 237 if not inline:
238 return IndexObject(data), None
239 return InlinedIndexObject(data, inline), (0, data)
238 cls = IndexObject2 if revlogv2 else IndexObject
239 return cls(data), None
240 cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
241 return cls(data, inline), (0, data)
242
243
244 class Index2Mixin(object):
245 # 6 bytes: offset
246 # 2 bytes: flags
247 # 4 bytes: compressed length
248 # 4 bytes: uncompressed length
249 # 4 bytes: base rev
250 # 4 bytes: link rev
251 # 4 bytes: parent 1 rev
252 # 4 bytes: parent 2 rev
253 # 32 bytes: nodeid
254 # 8 bytes: sidedata offset
255 # 4 bytes: sidedata compressed length
256 # 20 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
257 index_format = b">Qiiiiii20s12xQi20x"
258 index_size = struct.calcsize(index_format)
259 assert index_size == 96, index_size
260 null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
261
262
263 class IndexObject2(Index2Mixin, IndexObject):
264 pass
265
266
267 class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
268 def _inline_scan(self, lgt):
269 sidedata_length_pos = 72
270 off = 0
271 if lgt is not None:
272 self._offsets = [0] * lgt
273 count = 0
274 while off <= len(self._data) - self.index_size:
275 start = off + self.big_int_size
276 (data_size,) = struct.unpack(
277 b'>i',
278 self._data[start : start + self.int_size],
279 )
280 start = off + sidedata_length_pos
281 (side_data_size,) = struct.unpack(
282 b'>i', self._data[start : start + self.int_size]
283 )
284 if lgt is not None:
285 self._offsets[count] = off
286 count += 1
287 off += self.index_size + data_size + side_data_size
288 if off != len(self._data):
289 raise ValueError(b"corrupted data")
290 return count
240 291
241 292
242 293 def parse_index_devel_nodemap(data, inline):
243 294 """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
244 295 return PersistentNodeMapIndexObject(data), None
245 296
246 297
247 298 def parse_dirstate(dmap, copymap, st):
248 299 parents = [st[:20], st[20:40]]
249 300 # dereference fields so they will be local in loop
250 301 format = b">cllll"
251 302 e_size = struct.calcsize(format)
252 303 pos1 = 40
253 304 l = len(st)
254 305
255 306 # the inner loop
256 307 while pos1 < l:
257 308 pos2 = pos1 + e_size
258 309 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
259 310 pos1 = pos2 + e[4]
260 311 f = st[pos2:pos1]
261 312 if b'\0' in f:
262 313 f, c = f.split(b'\0')
263 314 copymap[f] = c
264 315 dmap[f] = e[:4]
265 316 return parents
266 317
267 318
268 319 def pack_dirstate(dmap, copymap, pl, now):
269 320 now = int(now)
270 321 cs = stringio()
271 322 write = cs.write
272 323 write(b"".join(pl))
273 324 for f, e in pycompat.iteritems(dmap):
274 325 if e[0] == b'n' and e[3] == now:
275 326 # The file was last modified "simultaneously" with the current
276 327 # write to dirstate (i.e. within the same second for file-
277 328 # systems with a granularity of 1 sec). This commonly happens
278 329 # for at least a couple of files on 'update'.
279 330 # The user could change the file without changing its size
280 331 # within the same second. Invalidate the file's mtime in
281 332 # dirstate, forcing future 'status' calls to compare the
282 333 # contents of the file if the size is the same. This prevents
283 334 # mistakenly treating such files as clean.
284 335 e = dirstatetuple(e[0], e[1], e[2], -1)
285 336 dmap[f] = e
286 337
287 338 if f in copymap:
288 339 f = b"%s\0%s" % (f, copymap[f])
289 340 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
290 341 write(e)
291 342 write(f)
292 343 return cs.getvalue()
@@ -1,82 +1,82 b''
1 1 # requirements.py - objects and functions related to repository requirements
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 GENERALDELTA_REQUIREMENT = b'generaldelta'
11 11 DOTENCODE_REQUIREMENT = b'dotencode'
12 12 STORE_REQUIREMENT = b'store'
13 13 FNCACHE_REQUIREMENT = b'fncache'
14 14
15 15 # When narrowing is finalized and no longer subject to format changes,
16 16 # we should move this to just "narrow" or similar.
17 17 NARROW_REQUIREMENT = b'narrowhg-experimental'
18 18
19 19 # Enables sparse working directory usage
20 20 SPARSE_REQUIREMENT = b'exp-sparse'
21 21
22 22 # Enables the internal phase which is used to hide changesets instead
23 23 # of stripping them
24 24 INTERNAL_PHASE_REQUIREMENT = b'internal-phase'
25 25
26 26 # Stores manifest in Tree structure
27 27 TREEMANIFEST_REQUIREMENT = b'treemanifest'
28 28
29 29 REVLOGV1_REQUIREMENT = b'revlogv1'
30 30
31 31 # Increment the sub-version when the revlog v2 format changes to lock out old
32 32 # clients.
33 REVLOGV2_REQUIREMENT = b'exp-revlogv2.1'
33 REVLOGV2_REQUIREMENT = b'exp-revlogv2.2'
34 34
35 35 # A repository with the sparserevlog feature will have delta chains that
36 36 # can spread over a larger span. Sparse reading cuts these large spans into
37 37 # pieces, so that each piece isn't too big.
38 38 # Without the sparserevlog capability, reading from the repository could use
39 39 # huge amounts of memory, because the whole span would be read at once,
40 40 # including all the intermediate revisions that aren't pertinent for the chain.
41 41 # This is why once a repository has enabled sparse-read, it becomes required.
42 42 SPARSEREVLOG_REQUIREMENT = b'sparserevlog'
43 43
44 44 # A repository with the sidedataflag requirement will allow to store extra
45 45 # information for revision without altering their original hashes.
46 46 SIDEDATA_REQUIREMENT = b'exp-sidedata-flag'
47 47
48 48 # A repository with the the copies-sidedata-changeset requirement will store
49 49 # copies related information in changeset's sidedata.
50 50 COPIESSDC_REQUIREMENT = b'exp-copies-sidedata-changeset'
51 51
52 52 # The repository use persistent nodemap for the changelog and the manifest.
53 53 NODEMAP_REQUIREMENT = b'persistent-nodemap'
54 54
55 55 # Denotes that the current repository is a share
56 56 SHARED_REQUIREMENT = b'shared'
57 57
58 58 # Denotes that current repository is a share and the shared source path is
59 59 # relative to the current repository root path
60 60 RELATIVE_SHARED_REQUIREMENT = b'relshared'
61 61
62 62 # A repository with share implemented safely. The repository has different
63 63 # store and working copy requirements i.e. both `.hg/requires` and
64 64 # `.hg/store/requires` are present.
65 65 SHARESAFE_REQUIREMENT = b'share-safe'
66 66
67 67 # List of requirements which are working directory specific
68 68 # These requirements cannot be shared between repositories if they
69 69 # share the same store
70 70 # * sparse is a working directory specific functionality and hence working
71 71 # directory specific requirement
72 72 # * SHARED_REQUIREMENT and RELATIVE_SHARED_REQUIREMENT are requirements which
73 73 # represents that the current working copy/repository shares store of another
74 74 # repo. Hence both of them should be stored in working copy
75 75 # * SHARESAFE_REQUIREMENT needs to be stored in working dir to mark that rest of
76 76 # the requirements are stored in store's requires
77 77 WORKING_DIR_REQUIREMENTS = {
78 78 SPARSE_REQUIREMENT,
79 79 SHARED_REQUIREMENT,
80 80 RELATIVE_SHARED_REQUIREMENT,
81 81 SHARESAFE_REQUIREMENT,
82 82 }
@@ -1,3110 +1,3138 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 from .pure import parsers as pureparsers
86 87
87 88 # blanked usage of all the name to prevent pyflakes constraints
88 89 # We need these name available in the module for extensions.
89 90 REVLOGV0
90 91 REVLOGV1
91 92 REVLOGV2
92 93 FLAG_INLINE_DATA
93 94 FLAG_GENERALDELTA
94 95 REVLOG_DEFAULT_FLAGS
95 96 REVLOG_DEFAULT_FORMAT
96 97 REVLOG_DEFAULT_VERSION
97 98 REVLOGV1_FLAGS
98 99 REVLOGV2_FLAGS
99 100 REVIDX_ISCENSORED
100 101 REVIDX_ELLIPSIS
101 102 REVIDX_SIDEDATA
102 103 REVIDX_HASCOPIESINFO
103 104 REVIDX_EXTSTORED
104 105 REVIDX_DEFAULT_FLAGS
105 106 REVIDX_FLAGS_ORDER
106 107 REVIDX_RAWTEXT_CHANGING_FLAGS
107 108
108 109 parsers = policy.importmod('parsers')
109 110 rustancestor = policy.importrust('ancestor')
110 111 rustdagop = policy.importrust('dagop')
111 112 rustrevlog = policy.importrust('revlog')
112 113
113 114 # Aliased for performance.
114 115 _zlibdecompress = zlib.decompress
115 116
116 117 # max size of revlog with inline data
117 118 _maxinline = 131072
118 119 _chunksize = 1048576
119 120
120 121 # Flag processors for REVIDX_ELLIPSIS.
121 122 def ellipsisreadprocessor(rl, text):
122 123 return text, False, {}
123 124
124 125
125 126 def ellipsiswriteprocessor(rl, text, sidedata):
126 127 return text, False
127 128
128 129
129 130 def ellipsisrawprocessor(rl, text):
130 131 return False
131 132
132 133
133 134 ellipsisprocessor = (
134 135 ellipsisreadprocessor,
135 136 ellipsiswriteprocessor,
136 137 ellipsisrawprocessor,
137 138 )
138 139
139 140
140 141 def getoffset(q):
141 142 return int(q >> 16)
142 143
143 144
144 145 def gettype(q):
145 146 return int(q & 0xFFFF)
146 147
147 148
148 149 def offset_type(offset, type):
149 150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 151 raise ValueError(b'unknown revlog index flags')
151 152 return int(int(offset) << 16 | type)
152 153
153 154
154 155 def _verify_revision(rl, skipflags, state, node):
155 156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 157 point for extensions to influence the operation."""
157 158 if skipflags:
158 159 state[b'skipread'].add(node)
159 160 else:
160 161 # Side-effect: read content and verify hash.
161 162 rl.revision(node)
162 163
163 164
164 165 # True if a fast implementation for persistent-nodemap is available
165 166 #
166 167 # We also consider we have a "fast" implementation in "pure" python because
167 168 # people using pure don't really have performance consideration (and a
168 169 # wheelbarrow of other slowness source)
169 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 171 parsers, 'BaseIndexObject'
171 172 )
172 173
173 174
174 175 @attr.s(slots=True, frozen=True)
175 176 class _revisioninfo(object):
176 177 """Information about a revision that allows building its fulltext
177 178 node: expected hash of the revision
178 179 p1, p2: parent revs of the revision
179 180 btext: built text cache consisting of a one-element list
180 181 cachedelta: (baserev, uncompressed_delta) or None
181 182 flags: flags associated to the revision storage
182 183
183 184 One of btext[0] or cachedelta must be set.
184 185 """
185 186
186 187 node = attr.ib()
187 188 p1 = attr.ib()
188 189 p2 = attr.ib()
189 190 btext = attr.ib()
190 191 textlen = attr.ib()
191 192 cachedelta = attr.ib()
192 193 flags = attr.ib()
193 194
194 195
195 196 @interfaceutil.implementer(repository.irevisiondelta)
196 197 @attr.s(slots=True)
197 198 class revlogrevisiondelta(object):
198 199 node = attr.ib()
199 200 p1node = attr.ib()
200 201 p2node = attr.ib()
201 202 basenode = attr.ib()
202 203 flags = attr.ib()
203 204 baserevisionsize = attr.ib()
204 205 revision = attr.ib()
205 206 delta = attr.ib()
206 207 linknode = attr.ib(default=None)
207 208
208 209
209 210 @interfaceutil.implementer(repository.iverifyproblem)
210 211 @attr.s(frozen=True)
211 212 class revlogproblem(object):
212 213 warning = attr.ib(default=None)
213 214 error = attr.ib(default=None)
214 215 node = attr.ib(default=None)
215 216
216 217
217 218 # index v0:
218 219 # 4 bytes: offset
219 220 # 4 bytes: compressed length
220 221 # 4 bytes: base rev
221 222 # 4 bytes: link rev
222 223 # 20 bytes: parent 1 nodeid
223 224 # 20 bytes: parent 2 nodeid
224 225 # 20 bytes: nodeid
225 226 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 227 indexformatv0_pack = indexformatv0.pack
227 228 indexformatv0_unpack = indexformatv0.unpack
228 229
229 230
230 231 class revlogoldindex(list):
231 232 @property
232 233 def nodemap(self):
233 234 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 235 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 236 return self._nodemap
236 237
237 238 @util.propertycache
238 239 def _nodemap(self):
239 240 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 241 for r in range(0, len(self)):
241 242 n = self[r][7]
242 243 nodemap[n] = r
243 244 return nodemap
244 245
245 246 def has_node(self, node):
246 247 """return True if the node exist in the index"""
247 248 return node in self._nodemap
248 249
249 250 def rev(self, node):
250 251 """return a revision for a node
251 252
252 253 If the node is unknown, raise a RevlogError"""
253 254 return self._nodemap[node]
254 255
255 256 def get_rev(self, node):
256 257 """return a revision for a node
257 258
258 259 If the node is unknown, return None"""
259 260 return self._nodemap.get(node)
260 261
261 262 def append(self, tup):
262 263 self._nodemap[tup[7]] = len(self)
263 264 super(revlogoldindex, self).append(tup)
264 265
265 266 def __delitem__(self, i):
266 267 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 268 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 269 for r in pycompat.xrange(i.start, len(self)):
269 270 del self._nodemap[self[r][7]]
270 271 super(revlogoldindex, self).__delitem__(i)
271 272
272 273 def clearcaches(self):
273 274 self.__dict__.pop('_nodemap', None)
274 275
275 276 def __getitem__(self, i):
276 277 if i == -1:
277 278 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 279 return list.__getitem__(self, i)
279 280
280 281
281 282 class revlogoldio(object):
282 283 def __init__(self):
283 284 self.size = indexformatv0.size
284 285
285 286 def parseindex(self, data, inline):
286 287 s = self.size
287 288 index = []
288 289 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 290 n = off = 0
290 291 l = len(data)
291 292 while off + s <= l:
292 293 cur = data[off : off + s]
293 294 off += s
294 295 e = indexformatv0_unpack(cur)
295 296 # transform to revlogv1 format
296 297 e2 = (
297 298 offset_type(e[0], 0),
298 299 e[1],
299 300 -1,
300 301 e[2],
301 302 e[3],
302 303 nodemap.get(e[4], nullrev),
303 304 nodemap.get(e[5], nullrev),
304 305 e[6],
305 306 )
306 307 index.append(e2)
307 308 nodemap[e[6]] = n
308 309 n += 1
309 310
310 311 index = revlogoldindex(index)
311 312 return index, None
312 313
313 314 def packentry(self, entry, node, version, rev):
314 315 if gettype(entry[0]):
315 316 raise error.RevlogError(
316 317 _(b'index entry flags need revlog version 1')
317 318 )
318 319 e2 = (
319 320 getoffset(entry[0]),
320 321 entry[1],
321 322 entry[3],
322 323 entry[4],
323 324 node(entry[5]),
324 325 node(entry[6]),
325 326 entry[7],
326 327 )
327 328 return indexformatv0_pack(*e2)
328 329
329 330
330 331 # index ng:
331 332 # 6 bytes: offset
332 333 # 2 bytes: flags
333 334 # 4 bytes: compressed length
334 335 # 4 bytes: uncompressed length
335 336 # 4 bytes: base rev
336 337 # 4 bytes: link rev
337 338 # 4 bytes: parent 1 rev
338 339 # 4 bytes: parent 2 rev
339 340 # 32 bytes: nodeid
340 341 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 342 indexformatng_pack = indexformatng.pack
342 343 versionformat = struct.Struct(b">I")
343 344 versionformat_pack = versionformat.pack
344 345 versionformat_unpack = versionformat.unpack
345 346
346 347 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 348 # signed integer)
348 349 _maxentrysize = 0x7FFFFFFF
349 350
350 351
351 352 class revlogio(object):
352 353 def __init__(self):
353 354 self.size = indexformatng.size
354 355
355 356 def parseindex(self, data, inline):
356 357 # call the C implementation to parse the index data
357 358 index, cache = parsers.parse_index2(data, inline)
358 359 return index, cache
359 360
360 361 def packentry(self, entry, node, version, rev):
361 362 p = indexformatng_pack(*entry)
362 363 if rev == 0:
363 364 p = versionformat_pack(version) + p[4:]
364 365 return p
365 366
366 367
368 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
369 indexformatv2_pack = indexformatv2.pack
370
371
372 class revlogv2io(object):
373 def __init__(self):
374 self.size = indexformatv2.size
375
376 def parseindex(self, data, inline):
377 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
378 return index, cache
379
380 def packentry(self, entry, node, version, rev):
381 p = indexformatv2_pack(*entry)
382 if rev == 0:
383 p = versionformat_pack(version) + p[4:]
384 return p
385
386
367 387 NodemapRevlogIO = None
368 388
369 389 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370 390
371 391 class NodemapRevlogIO(revlogio):
372 392 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373 393
374 394 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 395 """
376 396
377 397 def parseindex(self, data, inline):
378 398 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 399 return index, cache
380 400
381 401
382 402 class rustrevlogio(revlogio):
383 403 def parseindex(self, data, inline):
384 404 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 405 return rustrevlog.MixedIndex(index), cache
386 406
387 407
388 408 class revlog(object):
389 409 """
390 410 the underlying revision storage object
391 411
392 412 A revlog consists of two parts, an index and the revision data.
393 413
394 414 The index is a file with a fixed record size containing
395 415 information on each revision, including its nodeid (hash), the
396 416 nodeids of its parents, the position and offset of its data within
397 417 the data file, and the revision it's based on. Finally, each entry
398 418 contains a linkrev entry that can serve as a pointer to external
399 419 data.
400 420
401 421 The revision data itself is a linear collection of data chunks.
402 422 Each chunk represents a revision and is usually represented as a
403 423 delta against the previous chunk. To bound lookup time, runs of
404 424 deltas are limited to about 2 times the length of the original
405 425 version data. This makes retrieval of a version proportional to
406 426 its size, or O(1) relative to the number of revisions.
407 427
408 428 Both pieces of the revlog are written to in an append-only
409 429 fashion, which means we never need to rewrite a file to insert or
410 430 remove data, and can use some simple techniques to avoid the need
411 431 for locking while reading.
412 432
413 433 If checkambig, indexfile is opened with checkambig=True at
414 434 writing, to avoid file stat ambiguity.
415 435
416 436 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 437 index will be mmapped rather than read if it is larger than the
418 438 configured threshold.
419 439
420 440 If censorable is True, the revlog can have censored revisions.
421 441
422 442 If `upperboundcomp` is not None, this is the expected maximal gain from
423 443 compression for the data content.
424 444
425 445 `concurrencychecker` is an optional function that receives 3 arguments: a
426 446 file handle, a filename, and an expected position. It should check whether
427 447 the current position in the file handle is valid, and log/warn/fail (by
428 448 raising).
429 449 """
430 450
431 451 _flagserrorclass = error.RevlogError
432 452
433 453 def __init__(
434 454 self,
435 455 opener,
436 456 indexfile,
437 457 datafile=None,
438 458 checkambig=False,
439 459 mmaplargeindex=False,
440 460 censorable=False,
441 461 upperboundcomp=None,
442 462 persistentnodemap=False,
443 463 concurrencychecker=None,
444 464 ):
445 465 """
446 466 create a revlog object
447 467
448 468 opener is a function that abstracts the file opening operation
449 469 and can be used to implement COW semantics or the like.
450 470
451 471 """
452 472 self.upperboundcomp = upperboundcomp
453 473 self.indexfile = indexfile
454 474 self.datafile = datafile or (indexfile[:-2] + b".d")
455 475 self.nodemap_file = None
456 476 if persistentnodemap:
457 477 self.nodemap_file = nodemaputil.get_nodemap_file(
458 478 opener, self.indexfile
459 479 )
460 480
461 481 self.opener = opener
462 482 # When True, indexfile is opened with checkambig=True at writing, to
463 483 # avoid file stat ambiguity.
464 484 self._checkambig = checkambig
465 485 self._mmaplargeindex = mmaplargeindex
466 486 self._censorable = censorable
467 487 # 3-tuple of (node, rev, text) for a raw revision.
468 488 self._revisioncache = None
469 489 # Maps rev to chain base rev.
470 490 self._chainbasecache = util.lrucachedict(100)
471 491 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
472 492 self._chunkcache = (0, b'')
473 493 # How much data to read and cache into the raw revlog data cache.
474 494 self._chunkcachesize = 65536
475 495 self._maxchainlen = None
476 496 self._deltabothparents = True
477 497 self.index = None
478 498 self._nodemap_docket = None
479 499 # Mapping of partial identifiers to full nodes.
480 500 self._pcache = {}
481 501 # Mapping of revision integer to full node.
482 502 self._compengine = b'zlib'
483 503 self._compengineopts = {}
484 504 self._maxdeltachainspan = -1
485 505 self._withsparseread = False
486 506 self._sparserevlog = False
487 507 self._srdensitythreshold = 0.50
488 508 self._srmingapsize = 262144
489 509
490 510 # Make copy of flag processors so each revlog instance can support
491 511 # custom flags.
492 512 self._flagprocessors = dict(flagutil.flagprocessors)
493 513
494 514 # 2-tuple of file handles being used for active writing.
495 515 self._writinghandles = None
496 516
497 517 self._loadindex()
498 518
499 519 self._concurrencychecker = concurrencychecker
500 520
501 521 def _loadindex(self):
502 522 mmapindexthreshold = None
503 523 opts = self.opener.options
504 524
505 525 if b'revlogv2' in opts:
506 526 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
507 527 elif b'revlogv1' in opts:
508 528 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
509 529 if b'generaldelta' in opts:
510 530 newversionflags |= FLAG_GENERALDELTA
511 531 elif b'revlogv0' in self.opener.options:
512 532 newversionflags = REVLOGV0
513 533 else:
514 534 newversionflags = REVLOG_DEFAULT_VERSION
515 535
516 536 if b'chunkcachesize' in opts:
517 537 self._chunkcachesize = opts[b'chunkcachesize']
518 538 if b'maxchainlen' in opts:
519 539 self._maxchainlen = opts[b'maxchainlen']
520 540 if b'deltabothparents' in opts:
521 541 self._deltabothparents = opts[b'deltabothparents']
522 542 self._lazydelta = bool(opts.get(b'lazydelta', True))
523 543 self._lazydeltabase = False
524 544 if self._lazydelta:
525 545 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
526 546 if b'compengine' in opts:
527 547 self._compengine = opts[b'compengine']
528 548 if b'zlib.level' in opts:
529 549 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
530 550 if b'zstd.level' in opts:
531 551 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
532 552 if b'maxdeltachainspan' in opts:
533 553 self._maxdeltachainspan = opts[b'maxdeltachainspan']
534 554 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
535 555 mmapindexthreshold = opts[b'mmapindexthreshold']
536 556 self.hassidedata = bool(opts.get(b'side-data', False))
537 557 if self.hassidedata:
538 558 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
539 559 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
540 560 withsparseread = bool(opts.get(b'with-sparse-read', False))
541 561 # sparse-revlog forces sparse-read
542 562 self._withsparseread = self._sparserevlog or withsparseread
543 563 if b'sparse-read-density-threshold' in opts:
544 564 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
545 565 if b'sparse-read-min-gap-size' in opts:
546 566 self._srmingapsize = opts[b'sparse-read-min-gap-size']
547 567 if opts.get(b'enableellipsis'):
548 568 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
549 569
550 570 # revlog v0 doesn't have flag processors
551 571 for flag, processor in pycompat.iteritems(
552 572 opts.get(b'flagprocessors', {})
553 573 ):
554 574 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
555 575
556 576 if self._chunkcachesize <= 0:
557 577 raise error.RevlogError(
558 578 _(b'revlog chunk cache size %r is not greater than 0')
559 579 % self._chunkcachesize
560 580 )
561 581 elif self._chunkcachesize & (self._chunkcachesize - 1):
562 582 raise error.RevlogError(
563 583 _(b'revlog chunk cache size %r is not a power of 2')
564 584 % self._chunkcachesize
565 585 )
566 586
567 587 indexdata = b''
568 588 self._initempty = True
569 589 try:
570 590 with self._indexfp() as f:
571 591 if (
572 592 mmapindexthreshold is not None
573 593 and self.opener.fstat(f).st_size >= mmapindexthreshold
574 594 ):
575 595 # TODO: should .close() to release resources without
576 596 # relying on Python GC
577 597 indexdata = util.buffer(util.mmapread(f))
578 598 else:
579 599 indexdata = f.read()
580 600 if len(indexdata) > 0:
581 601 versionflags = versionformat_unpack(indexdata[:4])[0]
582 602 self._initempty = False
583 603 else:
584 604 versionflags = newversionflags
585 605 except IOError as inst:
586 606 if inst.errno != errno.ENOENT:
587 607 raise
588 608
589 609 versionflags = newversionflags
590 610
591 611 self.version = versionflags
592 612
593 613 flags = versionflags & ~0xFFFF
594 614 fmt = versionflags & 0xFFFF
595 615
596 616 if fmt == REVLOGV0:
597 617 if flags:
598 618 raise error.RevlogError(
599 619 _(b'unknown flags (%#04x) in version %d revlog %s')
600 620 % (flags >> 16, fmt, self.indexfile)
601 621 )
602 622
603 623 self._inline = False
604 624 self._generaldelta = False
605 625
606 626 elif fmt == REVLOGV1:
607 627 if flags & ~REVLOGV1_FLAGS:
608 628 raise error.RevlogError(
609 629 _(b'unknown flags (%#04x) in version %d revlog %s')
610 630 % (flags >> 16, fmt, self.indexfile)
611 631 )
612 632
613 633 self._inline = versionflags & FLAG_INLINE_DATA
614 634 self._generaldelta = versionflags & FLAG_GENERALDELTA
615 635
616 636 elif fmt == REVLOGV2:
617 637 if flags & ~REVLOGV2_FLAGS:
618 638 raise error.RevlogError(
619 639 _(b'unknown flags (%#04x) in version %d revlog %s')
620 640 % (flags >> 16, fmt, self.indexfile)
621 641 )
622 642
623 643 self._inline = versionflags & FLAG_INLINE_DATA
624 644 # generaldelta implied by version 2 revlogs.
625 645 self._generaldelta = True
626 646
627 647 else:
628 648 raise error.RevlogError(
629 649 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
630 650 )
631 651 # sparse-revlog can't be on without general-delta (issue6056)
632 652 if not self._generaldelta:
633 653 self._sparserevlog = False
634 654
635 655 self._storedeltachains = True
636 656
637 657 devel_nodemap = (
638 658 self.nodemap_file
639 659 and opts.get(b'devel-force-nodemap', False)
640 660 and NodemapRevlogIO is not None
641 661 )
642 662
643 663 use_rust_index = False
644 664 if rustrevlog is not None:
645 665 if self.nodemap_file is not None:
646 666 use_rust_index = True
647 667 else:
648 668 use_rust_index = self.opener.options.get(b'rust.index')
649 669
650 670 self._io = revlogio()
651 671 if self.version == REVLOGV0:
652 672 self._io = revlogoldio()
673 elif fmt == REVLOGV2:
674 self._io = revlogv2io()
653 675 elif devel_nodemap:
654 676 self._io = NodemapRevlogIO()
655 677 elif use_rust_index:
656 678 self._io = rustrevlogio()
657 679 try:
658 680 d = self._io.parseindex(indexdata, self._inline)
659 681 index, _chunkcache = d
660 682 use_nodemap = (
661 683 not self._inline
662 684 and self.nodemap_file is not None
663 685 and util.safehasattr(index, 'update_nodemap_data')
664 686 )
665 687 if use_nodemap:
666 688 nodemap_data = nodemaputil.persisted_data(self)
667 689 if nodemap_data is not None:
668 690 docket = nodemap_data[0]
669 691 if (
670 692 len(d[0]) > docket.tip_rev
671 693 and d[0][docket.tip_rev][7] == docket.tip_node
672 694 ):
673 695 # no changelog tampering
674 696 self._nodemap_docket = docket
675 697 index.update_nodemap_data(*nodemap_data)
676 698 except (ValueError, IndexError):
677 699 raise error.RevlogError(
678 700 _(b"index %s is corrupted") % self.indexfile
679 701 )
680 702 self.index, self._chunkcache = d
681 703 if not self._chunkcache:
682 704 self._chunkclear()
683 705 # revnum -> (chain-length, sum-delta-length)
684 706 self._chaininfocache = util.lrucachedict(500)
685 707 # revlog header -> revlog compressor
686 708 self._decompressors = {}
687 709
688 710 @util.propertycache
689 711 def _compressor(self):
690 712 engine = util.compengines[self._compengine]
691 713 return engine.revlogcompressor(self._compengineopts)
692 714
693 715 def _indexfp(self, mode=b'r'):
694 716 """file object for the revlog's index file"""
695 717 args = {'mode': mode}
696 718 if mode != b'r':
697 719 args['checkambig'] = self._checkambig
698 720 if mode == b'w':
699 721 args['atomictemp'] = True
700 722 return self.opener(self.indexfile, **args)
701 723
702 724 def _datafp(self, mode=b'r'):
703 725 """file object for the revlog's data file"""
704 726 return self.opener(self.datafile, mode=mode)
705 727
706 728 @contextlib.contextmanager
707 729 def _datareadfp(self, existingfp=None):
708 730 """file object suitable to read data"""
709 731 # Use explicit file handle, if given.
710 732 if existingfp is not None:
711 733 yield existingfp
712 734
713 735 # Use a file handle being actively used for writes, if available.
714 736 # There is some danger to doing this because reads will seek the
715 737 # file. However, _writeentry() performs a SEEK_END before all writes,
716 738 # so we should be safe.
717 739 elif self._writinghandles:
718 740 if self._inline:
719 741 yield self._writinghandles[0]
720 742 else:
721 743 yield self._writinghandles[1]
722 744
723 745 # Otherwise open a new file handle.
724 746 else:
725 747 if self._inline:
726 748 func = self._indexfp
727 749 else:
728 750 func = self._datafp
729 751 with func() as fp:
730 752 yield fp
731 753
732 754 def tiprev(self):
733 755 return len(self.index) - 1
734 756
735 757 def tip(self):
736 758 return self.node(self.tiprev())
737 759
738 760 def __contains__(self, rev):
739 761 return 0 <= rev < len(self)
740 762
741 763 def __len__(self):
742 764 return len(self.index)
743 765
744 766 def __iter__(self):
745 767 return iter(pycompat.xrange(len(self)))
746 768
747 769 def revs(self, start=0, stop=None):
748 770 """iterate over all rev in this revlog (from start to stop)"""
749 771 return storageutil.iterrevs(len(self), start=start, stop=stop)
750 772
751 773 @property
752 774 def nodemap(self):
753 775 msg = (
754 776 b"revlog.nodemap is deprecated, "
755 777 b"use revlog.index.[has_node|rev|get_rev]"
756 778 )
757 779 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
758 780 return self.index.nodemap
759 781
760 782 @property
761 783 def _nodecache(self):
762 784 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
763 785 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
764 786 return self.index.nodemap
765 787
766 788 def hasnode(self, node):
767 789 try:
768 790 self.rev(node)
769 791 return True
770 792 except KeyError:
771 793 return False
772 794
773 795 def candelta(self, baserev, rev):
774 796 """whether two revisions (baserev, rev) can be delta-ed or not"""
775 797 # Disable delta if either rev requires a content-changing flag
776 798 # processor (ex. LFS). This is because such flag processor can alter
777 799 # the rawtext content that the delta will be based on, and two clients
778 800 # could have a same revlog node with different flags (i.e. different
779 801 # rawtext contents) and the delta could be incompatible.
780 802 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
781 803 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
782 804 ):
783 805 return False
784 806 return True
785 807
786 808 def update_caches(self, transaction):
787 809 if self.nodemap_file is not None:
788 810 if transaction is None:
789 811 nodemaputil.update_persistent_nodemap(self)
790 812 else:
791 813 nodemaputil.setup_persistent_nodemap(transaction, self)
792 814
793 815 def clearcaches(self):
794 816 self._revisioncache = None
795 817 self._chainbasecache.clear()
796 818 self._chunkcache = (0, b'')
797 819 self._pcache = {}
798 820 self._nodemap_docket = None
799 821 self.index.clearcaches()
800 822 # The python code is the one responsible for validating the docket, we
801 823 # end up having to refresh it here.
802 824 use_nodemap = (
803 825 not self._inline
804 826 and self.nodemap_file is not None
805 827 and util.safehasattr(self.index, 'update_nodemap_data')
806 828 )
807 829 if use_nodemap:
808 830 nodemap_data = nodemaputil.persisted_data(self)
809 831 if nodemap_data is not None:
810 832 self._nodemap_docket = nodemap_data[0]
811 833 self.index.update_nodemap_data(*nodemap_data)
812 834
813 835 def rev(self, node):
814 836 try:
815 837 return self.index.rev(node)
816 838 except TypeError:
817 839 raise
818 840 except error.RevlogError:
819 841 # parsers.c radix tree lookup failed
820 842 if node == wdirid or node in wdirfilenodeids:
821 843 raise error.WdirUnsupported
822 844 raise error.LookupError(node, self.indexfile, _(b'no node'))
823 845
824 846 # Accessors for index entries.
825 847
826 848 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
827 849 # are flags.
828 850 def start(self, rev):
829 851 return int(self.index[rev][0] >> 16)
830 852
831 853 def flags(self, rev):
832 854 return self.index[rev][0] & 0xFFFF
833 855
834 856 def length(self, rev):
835 857 return self.index[rev][1]
836 858
837 859 def rawsize(self, rev):
838 860 """return the length of the uncompressed text for a given revision"""
839 861 l = self.index[rev][2]
840 862 if l >= 0:
841 863 return l
842 864
843 865 t = self.rawdata(rev)
844 866 return len(t)
845 867
846 868 def size(self, rev):
847 869 """length of non-raw text (processed by a "read" flag processor)"""
848 870 # fast path: if no "read" flag processor could change the content,
849 871 # size is rawsize. note: ELLIPSIS is known to not change the content.
850 872 flags = self.flags(rev)
851 873 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
852 874 return self.rawsize(rev)
853 875
854 876 return len(self.revision(rev, raw=False))
855 877
856 878 def chainbase(self, rev):
857 879 base = self._chainbasecache.get(rev)
858 880 if base is not None:
859 881 return base
860 882
861 883 index = self.index
862 884 iterrev = rev
863 885 base = index[iterrev][3]
864 886 while base != iterrev:
865 887 iterrev = base
866 888 base = index[iterrev][3]
867 889
868 890 self._chainbasecache[rev] = base
869 891 return base
870 892
871 893 def linkrev(self, rev):
872 894 return self.index[rev][4]
873 895
874 896 def parentrevs(self, rev):
875 897 try:
876 898 entry = self.index[rev]
877 899 except IndexError:
878 900 if rev == wdirrev:
879 901 raise error.WdirUnsupported
880 902 raise
881 903
882 904 return entry[5], entry[6]
883 905
884 906 # fast parentrevs(rev) where rev isn't filtered
885 907 _uncheckedparentrevs = parentrevs
886 908
887 909 def node(self, rev):
888 910 try:
889 911 return self.index[rev][7]
890 912 except IndexError:
891 913 if rev == wdirrev:
892 914 raise error.WdirUnsupported
893 915 raise
894 916
895 917 # Derived from index values.
896 918
897 919 def end(self, rev):
898 920 return self.start(rev) + self.length(rev)
899 921
900 922 def parents(self, node):
901 923 i = self.index
902 924 d = i[self.rev(node)]
903 925 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
904 926
905 927 def chainlen(self, rev):
906 928 return self._chaininfo(rev)[0]
907 929
908 930 def _chaininfo(self, rev):
909 931 chaininfocache = self._chaininfocache
910 932 if rev in chaininfocache:
911 933 return chaininfocache[rev]
912 934 index = self.index
913 935 generaldelta = self._generaldelta
914 936 iterrev = rev
915 937 e = index[iterrev]
916 938 clen = 0
917 939 compresseddeltalen = 0
918 940 while iterrev != e[3]:
919 941 clen += 1
920 942 compresseddeltalen += e[1]
921 943 if generaldelta:
922 944 iterrev = e[3]
923 945 else:
924 946 iterrev -= 1
925 947 if iterrev in chaininfocache:
926 948 t = chaininfocache[iterrev]
927 949 clen += t[0]
928 950 compresseddeltalen += t[1]
929 951 break
930 952 e = index[iterrev]
931 953 else:
932 954 # Add text length of base since decompressing that also takes
933 955 # work. For cache hits the length is already included.
934 956 compresseddeltalen += e[1]
935 957 r = (clen, compresseddeltalen)
936 958 chaininfocache[rev] = r
937 959 return r
938 960
939 961 def _deltachain(self, rev, stoprev=None):
940 962 """Obtain the delta chain for a revision.
941 963
942 964 ``stoprev`` specifies a revision to stop at. If not specified, we
943 965 stop at the base of the chain.
944 966
945 967 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
946 968 revs in ascending order and ``stopped`` is a bool indicating whether
947 969 ``stoprev`` was hit.
948 970 """
949 971 # Try C implementation.
950 972 try:
951 973 return self.index.deltachain(rev, stoprev, self._generaldelta)
952 974 except AttributeError:
953 975 pass
954 976
955 977 chain = []
956 978
957 979 # Alias to prevent attribute lookup in tight loop.
958 980 index = self.index
959 981 generaldelta = self._generaldelta
960 982
961 983 iterrev = rev
962 984 e = index[iterrev]
963 985 while iterrev != e[3] and iterrev != stoprev:
964 986 chain.append(iterrev)
965 987 if generaldelta:
966 988 iterrev = e[3]
967 989 else:
968 990 iterrev -= 1
969 991 e = index[iterrev]
970 992
971 993 if iterrev == stoprev:
972 994 stopped = True
973 995 else:
974 996 chain.append(iterrev)
975 997 stopped = False
976 998
977 999 chain.reverse()
978 1000 return chain, stopped
979 1001
980 1002 def ancestors(self, revs, stoprev=0, inclusive=False):
981 1003 """Generate the ancestors of 'revs' in reverse revision order.
982 1004 Does not generate revs lower than stoprev.
983 1005
984 1006 See the documentation for ancestor.lazyancestors for more details."""
985 1007
986 1008 # first, make sure start revisions aren't filtered
987 1009 revs = list(revs)
988 1010 checkrev = self.node
989 1011 for r in revs:
990 1012 checkrev(r)
991 1013 # and we're sure ancestors aren't filtered as well
992 1014
993 1015 if rustancestor is not None:
994 1016 lazyancestors = rustancestor.LazyAncestors
995 1017 arg = self.index
996 1018 else:
997 1019 lazyancestors = ancestor.lazyancestors
998 1020 arg = self._uncheckedparentrevs
999 1021 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1000 1022
1001 1023 def descendants(self, revs):
1002 1024 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1003 1025
1004 1026 def findcommonmissing(self, common=None, heads=None):
1005 1027 """Return a tuple of the ancestors of common and the ancestors of heads
1006 1028 that are not ancestors of common. In revset terminology, we return the
1007 1029 tuple:
1008 1030
1009 1031 ::common, (::heads) - (::common)
1010 1032
1011 1033 The list is sorted by revision number, meaning it is
1012 1034 topologically sorted.
1013 1035
1014 1036 'heads' and 'common' are both lists of node IDs. If heads is
1015 1037 not supplied, uses all of the revlog's heads. If common is not
1016 1038 supplied, uses nullid."""
1017 1039 if common is None:
1018 1040 common = [nullid]
1019 1041 if heads is None:
1020 1042 heads = self.heads()
1021 1043
1022 1044 common = [self.rev(n) for n in common]
1023 1045 heads = [self.rev(n) for n in heads]
1024 1046
1025 1047 # we want the ancestors, but inclusive
1026 1048 class lazyset(object):
1027 1049 def __init__(self, lazyvalues):
1028 1050 self.addedvalues = set()
1029 1051 self.lazyvalues = lazyvalues
1030 1052
1031 1053 def __contains__(self, value):
1032 1054 return value in self.addedvalues or value in self.lazyvalues
1033 1055
1034 1056 def __iter__(self):
1035 1057 added = self.addedvalues
1036 1058 for r in added:
1037 1059 yield r
1038 1060 for r in self.lazyvalues:
1039 1061 if not r in added:
1040 1062 yield r
1041 1063
1042 1064 def add(self, value):
1043 1065 self.addedvalues.add(value)
1044 1066
1045 1067 def update(self, values):
1046 1068 self.addedvalues.update(values)
1047 1069
1048 1070 has = lazyset(self.ancestors(common))
1049 1071 has.add(nullrev)
1050 1072 has.update(common)
1051 1073
1052 1074 # take all ancestors from heads that aren't in has
1053 1075 missing = set()
1054 1076 visit = collections.deque(r for r in heads if r not in has)
1055 1077 while visit:
1056 1078 r = visit.popleft()
1057 1079 if r in missing:
1058 1080 continue
1059 1081 else:
1060 1082 missing.add(r)
1061 1083 for p in self.parentrevs(r):
1062 1084 if p not in has:
1063 1085 visit.append(p)
1064 1086 missing = list(missing)
1065 1087 missing.sort()
1066 1088 return has, [self.node(miss) for miss in missing]
1067 1089
1068 1090 def incrementalmissingrevs(self, common=None):
1069 1091 """Return an object that can be used to incrementally compute the
1070 1092 revision numbers of the ancestors of arbitrary sets that are not
1071 1093 ancestors of common. This is an ancestor.incrementalmissingancestors
1072 1094 object.
1073 1095
1074 1096 'common' is a list of revision numbers. If common is not supplied, uses
1075 1097 nullrev.
1076 1098 """
1077 1099 if common is None:
1078 1100 common = [nullrev]
1079 1101
1080 1102 if rustancestor is not None:
1081 1103 return rustancestor.MissingAncestors(self.index, common)
1082 1104 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1083 1105
1084 1106 def findmissingrevs(self, common=None, heads=None):
1085 1107 """Return the revision numbers of the ancestors of heads that
1086 1108 are not ancestors of common.
1087 1109
1088 1110 More specifically, return a list of revision numbers corresponding to
1089 1111 nodes N such that every N satisfies the following constraints:
1090 1112
1091 1113 1. N is an ancestor of some node in 'heads'
1092 1114 2. N is not an ancestor of any node in 'common'
1093 1115
1094 1116 The list is sorted by revision number, meaning it is
1095 1117 topologically sorted.
1096 1118
1097 1119 'heads' and 'common' are both lists of revision numbers. If heads is
1098 1120 not supplied, uses all of the revlog's heads. If common is not
1099 1121 supplied, uses nullid."""
1100 1122 if common is None:
1101 1123 common = [nullrev]
1102 1124 if heads is None:
1103 1125 heads = self.headrevs()
1104 1126
1105 1127 inc = self.incrementalmissingrevs(common=common)
1106 1128 return inc.missingancestors(heads)
1107 1129
1108 1130 def findmissing(self, common=None, heads=None):
1109 1131 """Return the ancestors of heads that are not ancestors of common.
1110 1132
1111 1133 More specifically, return a list of nodes N such that every N
1112 1134 satisfies the following constraints:
1113 1135
1114 1136 1. N is an ancestor of some node in 'heads'
1115 1137 2. N is not an ancestor of any node in 'common'
1116 1138
1117 1139 The list is sorted by revision number, meaning it is
1118 1140 topologically sorted.
1119 1141
1120 1142 'heads' and 'common' are both lists of node IDs. If heads is
1121 1143 not supplied, uses all of the revlog's heads. If common is not
1122 1144 supplied, uses nullid."""
1123 1145 if common is None:
1124 1146 common = [nullid]
1125 1147 if heads is None:
1126 1148 heads = self.heads()
1127 1149
1128 1150 common = [self.rev(n) for n in common]
1129 1151 heads = [self.rev(n) for n in heads]
1130 1152
1131 1153 inc = self.incrementalmissingrevs(common=common)
1132 1154 return [self.node(r) for r in inc.missingancestors(heads)]
1133 1155
1134 1156 def nodesbetween(self, roots=None, heads=None):
1135 1157 """Return a topological path from 'roots' to 'heads'.
1136 1158
1137 1159 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1138 1160 topologically sorted list of all nodes N that satisfy both of
1139 1161 these constraints:
1140 1162
1141 1163 1. N is a descendant of some node in 'roots'
1142 1164 2. N is an ancestor of some node in 'heads'
1143 1165
1144 1166 Every node is considered to be both a descendant and an ancestor
1145 1167 of itself, so every reachable node in 'roots' and 'heads' will be
1146 1168 included in 'nodes'.
1147 1169
1148 1170 'outroots' is the list of reachable nodes in 'roots', i.e., the
1149 1171 subset of 'roots' that is returned in 'nodes'. Likewise,
1150 1172 'outheads' is the subset of 'heads' that is also in 'nodes'.
1151 1173
1152 1174 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1153 1175 unspecified, uses nullid as the only root. If 'heads' is
1154 1176 unspecified, uses list of all of the revlog's heads."""
1155 1177 nonodes = ([], [], [])
1156 1178 if roots is not None:
1157 1179 roots = list(roots)
1158 1180 if not roots:
1159 1181 return nonodes
1160 1182 lowestrev = min([self.rev(n) for n in roots])
1161 1183 else:
1162 1184 roots = [nullid] # Everybody's a descendant of nullid
1163 1185 lowestrev = nullrev
1164 1186 if (lowestrev == nullrev) and (heads is None):
1165 1187 # We want _all_ the nodes!
1166 1188 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1167 1189 if heads is None:
1168 1190 # All nodes are ancestors, so the latest ancestor is the last
1169 1191 # node.
1170 1192 highestrev = len(self) - 1
1171 1193 # Set ancestors to None to signal that every node is an ancestor.
1172 1194 ancestors = None
1173 1195 # Set heads to an empty dictionary for later discovery of heads
1174 1196 heads = {}
1175 1197 else:
1176 1198 heads = list(heads)
1177 1199 if not heads:
1178 1200 return nonodes
1179 1201 ancestors = set()
1180 1202 # Turn heads into a dictionary so we can remove 'fake' heads.
1181 1203 # Also, later we will be using it to filter out the heads we can't
1182 1204 # find from roots.
1183 1205 heads = dict.fromkeys(heads, False)
1184 1206 # Start at the top and keep marking parents until we're done.
1185 1207 nodestotag = set(heads)
1186 1208 # Remember where the top was so we can use it as a limit later.
1187 1209 highestrev = max([self.rev(n) for n in nodestotag])
1188 1210 while nodestotag:
1189 1211 # grab a node to tag
1190 1212 n = nodestotag.pop()
1191 1213 # Never tag nullid
1192 1214 if n == nullid:
1193 1215 continue
1194 1216 # A node's revision number represents its place in a
1195 1217 # topologically sorted list of nodes.
1196 1218 r = self.rev(n)
1197 1219 if r >= lowestrev:
1198 1220 if n not in ancestors:
1199 1221 # If we are possibly a descendant of one of the roots
1200 1222 # and we haven't already been marked as an ancestor
1201 1223 ancestors.add(n) # Mark as ancestor
1202 1224 # Add non-nullid parents to list of nodes to tag.
1203 1225 nodestotag.update(
1204 1226 [p for p in self.parents(n) if p != nullid]
1205 1227 )
1206 1228 elif n in heads: # We've seen it before, is it a fake head?
1207 1229 # So it is, real heads should not be the ancestors of
1208 1230 # any other heads.
1209 1231 heads.pop(n)
1210 1232 if not ancestors:
1211 1233 return nonodes
1212 1234 # Now that we have our set of ancestors, we want to remove any
1213 1235 # roots that are not ancestors.
1214 1236
1215 1237 # If one of the roots was nullid, everything is included anyway.
1216 1238 if lowestrev > nullrev:
1217 1239 # But, since we weren't, let's recompute the lowest rev to not
1218 1240 # include roots that aren't ancestors.
1219 1241
1220 1242 # Filter out roots that aren't ancestors of heads
1221 1243 roots = [root for root in roots if root in ancestors]
1222 1244 # Recompute the lowest revision
1223 1245 if roots:
1224 1246 lowestrev = min([self.rev(root) for root in roots])
1225 1247 else:
1226 1248 # No more roots? Return empty list
1227 1249 return nonodes
1228 1250 else:
1229 1251 # We are descending from nullid, and don't need to care about
1230 1252 # any other roots.
1231 1253 lowestrev = nullrev
1232 1254 roots = [nullid]
1233 1255 # Transform our roots list into a set.
1234 1256 descendants = set(roots)
1235 1257 # Also, keep the original roots so we can filter out roots that aren't
1236 1258 # 'real' roots (i.e. are descended from other roots).
1237 1259 roots = descendants.copy()
1238 1260 # Our topologically sorted list of output nodes.
1239 1261 orderedout = []
1240 1262 # Don't start at nullid since we don't want nullid in our output list,
1241 1263 # and if nullid shows up in descendants, empty parents will look like
1242 1264 # they're descendants.
1243 1265 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1244 1266 n = self.node(r)
1245 1267 isdescendant = False
1246 1268 if lowestrev == nullrev: # Everybody is a descendant of nullid
1247 1269 isdescendant = True
1248 1270 elif n in descendants:
1249 1271 # n is already a descendant
1250 1272 isdescendant = True
1251 1273 # This check only needs to be done here because all the roots
1252 1274 # will start being marked is descendants before the loop.
1253 1275 if n in roots:
1254 1276 # If n was a root, check if it's a 'real' root.
1255 1277 p = tuple(self.parents(n))
1256 1278 # If any of its parents are descendants, it's not a root.
1257 1279 if (p[0] in descendants) or (p[1] in descendants):
1258 1280 roots.remove(n)
1259 1281 else:
1260 1282 p = tuple(self.parents(n))
1261 1283 # A node is a descendant if either of its parents are
1262 1284 # descendants. (We seeded the dependents list with the roots
1263 1285 # up there, remember?)
1264 1286 if (p[0] in descendants) or (p[1] in descendants):
1265 1287 descendants.add(n)
1266 1288 isdescendant = True
1267 1289 if isdescendant and ((ancestors is None) or (n in ancestors)):
1268 1290 # Only include nodes that are both descendants and ancestors.
1269 1291 orderedout.append(n)
1270 1292 if (ancestors is not None) and (n in heads):
1271 1293 # We're trying to figure out which heads are reachable
1272 1294 # from roots.
1273 1295 # Mark this head as having been reached
1274 1296 heads[n] = True
1275 1297 elif ancestors is None:
1276 1298 # Otherwise, we're trying to discover the heads.
1277 1299 # Assume this is a head because if it isn't, the next step
1278 1300 # will eventually remove it.
1279 1301 heads[n] = True
1280 1302 # But, obviously its parents aren't.
1281 1303 for p in self.parents(n):
1282 1304 heads.pop(p, None)
1283 1305 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1284 1306 roots = list(roots)
1285 1307 assert orderedout
1286 1308 assert roots
1287 1309 assert heads
1288 1310 return (orderedout, roots, heads)
1289 1311
1290 1312 def headrevs(self, revs=None):
1291 1313 if revs is None:
1292 1314 try:
1293 1315 return self.index.headrevs()
1294 1316 except AttributeError:
1295 1317 return self._headrevs()
1296 1318 if rustdagop is not None:
1297 1319 return rustdagop.headrevs(self.index, revs)
1298 1320 return dagop.headrevs(revs, self._uncheckedparentrevs)
1299 1321
1300 1322 def computephases(self, roots):
1301 1323 return self.index.computephasesmapsets(roots)
1302 1324
1303 1325 def _headrevs(self):
1304 1326 count = len(self)
1305 1327 if not count:
1306 1328 return [nullrev]
1307 1329 # we won't iter over filtered rev so nobody is a head at start
1308 1330 ishead = [0] * (count + 1)
1309 1331 index = self.index
1310 1332 for r in self:
1311 1333 ishead[r] = 1 # I may be an head
1312 1334 e = index[r]
1313 1335 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1314 1336 return [r for r, val in enumerate(ishead) if val]
1315 1337
1316 1338 def heads(self, start=None, stop=None):
1317 1339 """return the list of all nodes that have no children
1318 1340
1319 1341 if start is specified, only heads that are descendants of
1320 1342 start will be returned
1321 1343 if stop is specified, it will consider all the revs from stop
1322 1344 as if they had no children
1323 1345 """
1324 1346 if start is None and stop is None:
1325 1347 if not len(self):
1326 1348 return [nullid]
1327 1349 return [self.node(r) for r in self.headrevs()]
1328 1350
1329 1351 if start is None:
1330 1352 start = nullrev
1331 1353 else:
1332 1354 start = self.rev(start)
1333 1355
1334 1356 stoprevs = {self.rev(n) for n in stop or []}
1335 1357
1336 1358 revs = dagop.headrevssubset(
1337 1359 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1338 1360 )
1339 1361
1340 1362 return [self.node(rev) for rev in revs]
1341 1363
1342 1364 def children(self, node):
1343 1365 """find the children of a given node"""
1344 1366 c = []
1345 1367 p = self.rev(node)
1346 1368 for r in self.revs(start=p + 1):
1347 1369 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1348 1370 if prevs:
1349 1371 for pr in prevs:
1350 1372 if pr == p:
1351 1373 c.append(self.node(r))
1352 1374 elif p == nullrev:
1353 1375 c.append(self.node(r))
1354 1376 return c
1355 1377
1356 1378 def commonancestorsheads(self, a, b):
1357 1379 """calculate all the heads of the common ancestors of nodes a and b"""
1358 1380 a, b = self.rev(a), self.rev(b)
1359 1381 ancs = self._commonancestorsheads(a, b)
1360 1382 return pycompat.maplist(self.node, ancs)
1361 1383
1362 1384 def _commonancestorsheads(self, *revs):
1363 1385 """calculate all the heads of the common ancestors of revs"""
1364 1386 try:
1365 1387 ancs = self.index.commonancestorsheads(*revs)
1366 1388 except (AttributeError, OverflowError): # C implementation failed
1367 1389 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1368 1390 return ancs
1369 1391
1370 1392 def isancestor(self, a, b):
1371 1393 """return True if node a is an ancestor of node b
1372 1394
1373 1395 A revision is considered an ancestor of itself."""
1374 1396 a, b = self.rev(a), self.rev(b)
1375 1397 return self.isancestorrev(a, b)
1376 1398
1377 1399 def isancestorrev(self, a, b):
1378 1400 """return True if revision a is an ancestor of revision b
1379 1401
1380 1402 A revision is considered an ancestor of itself.
1381 1403
1382 1404 The implementation of this is trivial but the use of
1383 1405 reachableroots is not."""
1384 1406 if a == nullrev:
1385 1407 return True
1386 1408 elif a == b:
1387 1409 return True
1388 1410 elif a > b:
1389 1411 return False
1390 1412 return bool(self.reachableroots(a, [b], [a], includepath=False))
1391 1413
1392 1414 def reachableroots(self, minroot, heads, roots, includepath=False):
1393 1415 """return (heads(::(<roots> and <roots>::<heads>)))
1394 1416
1395 1417 If includepath is True, return (<roots>::<heads>)."""
1396 1418 try:
1397 1419 return self.index.reachableroots2(
1398 1420 minroot, heads, roots, includepath
1399 1421 )
1400 1422 except AttributeError:
1401 1423 return dagop._reachablerootspure(
1402 1424 self.parentrevs, minroot, roots, heads, includepath
1403 1425 )
1404 1426
1405 1427 def ancestor(self, a, b):
1406 1428 """calculate the "best" common ancestor of nodes a and b"""
1407 1429
1408 1430 a, b = self.rev(a), self.rev(b)
1409 1431 try:
1410 1432 ancs = self.index.ancestors(a, b)
1411 1433 except (AttributeError, OverflowError):
1412 1434 ancs = ancestor.ancestors(self.parentrevs, a, b)
1413 1435 if ancs:
1414 1436 # choose a consistent winner when there's a tie
1415 1437 return min(map(self.node, ancs))
1416 1438 return nullid
1417 1439
1418 1440 def _match(self, id):
1419 1441 if isinstance(id, int):
1420 1442 # rev
1421 1443 return self.node(id)
1422 1444 if len(id) == 20:
1423 1445 # possibly a binary node
1424 1446 # odds of a binary node being all hex in ASCII are 1 in 10**25
1425 1447 try:
1426 1448 node = id
1427 1449 self.rev(node) # quick search the index
1428 1450 return node
1429 1451 except error.LookupError:
1430 1452 pass # may be partial hex id
1431 1453 try:
1432 1454 # str(rev)
1433 1455 rev = int(id)
1434 1456 if b"%d" % rev != id:
1435 1457 raise ValueError
1436 1458 if rev < 0:
1437 1459 rev = len(self) + rev
1438 1460 if rev < 0 or rev >= len(self):
1439 1461 raise ValueError
1440 1462 return self.node(rev)
1441 1463 except (ValueError, OverflowError):
1442 1464 pass
1443 1465 if len(id) == 40:
1444 1466 try:
1445 1467 # a full hex nodeid?
1446 1468 node = bin(id)
1447 1469 self.rev(node)
1448 1470 return node
1449 1471 except (TypeError, error.LookupError):
1450 1472 pass
1451 1473
1452 1474 def _partialmatch(self, id):
1453 1475 # we don't care wdirfilenodeids as they should be always full hash
1454 1476 maybewdir = wdirhex.startswith(id)
1455 1477 try:
1456 1478 partial = self.index.partialmatch(id)
1457 1479 if partial and self.hasnode(partial):
1458 1480 if maybewdir:
1459 1481 # single 'ff...' match in radix tree, ambiguous with wdir
1460 1482 raise error.RevlogError
1461 1483 return partial
1462 1484 if maybewdir:
1463 1485 # no 'ff...' match in radix tree, wdir identified
1464 1486 raise error.WdirUnsupported
1465 1487 return None
1466 1488 except error.RevlogError:
1467 1489 # parsers.c radix tree lookup gave multiple matches
1468 1490 # fast path: for unfiltered changelog, radix tree is accurate
1469 1491 if not getattr(self, 'filteredrevs', None):
1470 1492 raise error.AmbiguousPrefixLookupError(
1471 1493 id, self.indexfile, _(b'ambiguous identifier')
1472 1494 )
1473 1495 # fall through to slow path that filters hidden revisions
1474 1496 except (AttributeError, ValueError):
1475 1497 # we are pure python, or key was too short to search radix tree
1476 1498 pass
1477 1499
1478 1500 if id in self._pcache:
1479 1501 return self._pcache[id]
1480 1502
1481 1503 if len(id) <= 40:
1482 1504 try:
1483 1505 # hex(node)[:...]
1484 1506 l = len(id) // 2 # grab an even number of digits
1485 1507 prefix = bin(id[: l * 2])
1486 1508 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1487 1509 nl = [
1488 1510 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1489 1511 ]
1490 1512 if nullhex.startswith(id):
1491 1513 nl.append(nullid)
1492 1514 if len(nl) > 0:
1493 1515 if len(nl) == 1 and not maybewdir:
1494 1516 self._pcache[id] = nl[0]
1495 1517 return nl[0]
1496 1518 raise error.AmbiguousPrefixLookupError(
1497 1519 id, self.indexfile, _(b'ambiguous identifier')
1498 1520 )
1499 1521 if maybewdir:
1500 1522 raise error.WdirUnsupported
1501 1523 return None
1502 1524 except TypeError:
1503 1525 pass
1504 1526
1505 1527 def lookup(self, id):
1506 1528 """locate a node based on:
1507 1529 - revision number or str(revision number)
1508 1530 - nodeid or subset of hex nodeid
1509 1531 """
1510 1532 n = self._match(id)
1511 1533 if n is not None:
1512 1534 return n
1513 1535 n = self._partialmatch(id)
1514 1536 if n:
1515 1537 return n
1516 1538
1517 1539 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1518 1540
1519 1541 def shortest(self, node, minlength=1):
1520 1542 """Find the shortest unambiguous prefix that matches node."""
1521 1543
1522 1544 def isvalid(prefix):
1523 1545 try:
1524 1546 matchednode = self._partialmatch(prefix)
1525 1547 except error.AmbiguousPrefixLookupError:
1526 1548 return False
1527 1549 except error.WdirUnsupported:
1528 1550 # single 'ff...' match
1529 1551 return True
1530 1552 if matchednode is None:
1531 1553 raise error.LookupError(node, self.indexfile, _(b'no node'))
1532 1554 return True
1533 1555
1534 1556 def maybewdir(prefix):
1535 1557 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1536 1558
1537 1559 hexnode = hex(node)
1538 1560
1539 1561 def disambiguate(hexnode, minlength):
1540 1562 """Disambiguate against wdirid."""
1541 1563 for length in range(minlength, len(hexnode) + 1):
1542 1564 prefix = hexnode[:length]
1543 1565 if not maybewdir(prefix):
1544 1566 return prefix
1545 1567
1546 1568 if not getattr(self, 'filteredrevs', None):
1547 1569 try:
1548 1570 length = max(self.index.shortest(node), minlength)
1549 1571 return disambiguate(hexnode, length)
1550 1572 except error.RevlogError:
1551 1573 if node != wdirid:
1552 1574 raise error.LookupError(node, self.indexfile, _(b'no node'))
1553 1575 except AttributeError:
1554 1576 # Fall through to pure code
1555 1577 pass
1556 1578
1557 1579 if node == wdirid:
1558 1580 for length in range(minlength, len(hexnode) + 1):
1559 1581 prefix = hexnode[:length]
1560 1582 if isvalid(prefix):
1561 1583 return prefix
1562 1584
1563 1585 for length in range(minlength, len(hexnode) + 1):
1564 1586 prefix = hexnode[:length]
1565 1587 if isvalid(prefix):
1566 1588 return disambiguate(hexnode, length)
1567 1589
1568 1590 def cmp(self, node, text):
1569 1591 """compare text with a given file revision
1570 1592
1571 1593 returns True if text is different than what is stored.
1572 1594 """
1573 1595 p1, p2 = self.parents(node)
1574 1596 return storageutil.hashrevisionsha1(text, p1, p2) != node
1575 1597
1576 1598 def _cachesegment(self, offset, data):
1577 1599 """Add a segment to the revlog cache.
1578 1600
1579 1601 Accepts an absolute offset and the data that is at that location.
1580 1602 """
1581 1603 o, d = self._chunkcache
1582 1604 # try to add to existing cache
1583 1605 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1584 1606 self._chunkcache = o, d + data
1585 1607 else:
1586 1608 self._chunkcache = offset, data
1587 1609
1588 1610 def _readsegment(self, offset, length, df=None):
1589 1611 """Load a segment of raw data from the revlog.
1590 1612
1591 1613 Accepts an absolute offset, length to read, and an optional existing
1592 1614 file handle to read from.
1593 1615
1594 1616 If an existing file handle is passed, it will be seeked and the
1595 1617 original seek position will NOT be restored.
1596 1618
1597 1619 Returns a str or buffer of raw byte data.
1598 1620
1599 1621 Raises if the requested number of bytes could not be read.
1600 1622 """
1601 1623 # Cache data both forward and backward around the requested
1602 1624 # data, in a fixed size window. This helps speed up operations
1603 1625 # involving reading the revlog backwards.
1604 1626 cachesize = self._chunkcachesize
1605 1627 realoffset = offset & ~(cachesize - 1)
1606 1628 reallength = (
1607 1629 (offset + length + cachesize) & ~(cachesize - 1)
1608 1630 ) - realoffset
1609 1631 with self._datareadfp(df) as df:
1610 1632 df.seek(realoffset)
1611 1633 d = df.read(reallength)
1612 1634
1613 1635 self._cachesegment(realoffset, d)
1614 1636 if offset != realoffset or reallength != length:
1615 1637 startoffset = offset - realoffset
1616 1638 if len(d) - startoffset < length:
1617 1639 raise error.RevlogError(
1618 1640 _(
1619 1641 b'partial read of revlog %s; expected %d bytes from '
1620 1642 b'offset %d, got %d'
1621 1643 )
1622 1644 % (
1623 1645 self.indexfile if self._inline else self.datafile,
1624 1646 length,
1625 1647 realoffset,
1626 1648 len(d) - startoffset,
1627 1649 )
1628 1650 )
1629 1651
1630 1652 return util.buffer(d, startoffset, length)
1631 1653
1632 1654 if len(d) < length:
1633 1655 raise error.RevlogError(
1634 1656 _(
1635 1657 b'partial read of revlog %s; expected %d bytes from offset '
1636 1658 b'%d, got %d'
1637 1659 )
1638 1660 % (
1639 1661 self.indexfile if self._inline else self.datafile,
1640 1662 length,
1641 1663 offset,
1642 1664 len(d),
1643 1665 )
1644 1666 )
1645 1667
1646 1668 return d
1647 1669
1648 1670 def _getsegment(self, offset, length, df=None):
1649 1671 """Obtain a segment of raw data from the revlog.
1650 1672
1651 1673 Accepts an absolute offset, length of bytes to obtain, and an
1652 1674 optional file handle to the already-opened revlog. If the file
1653 1675 handle is used, it's original seek position will not be preserved.
1654 1676
1655 1677 Requests for data may be returned from a cache.
1656 1678
1657 1679 Returns a str or a buffer instance of raw byte data.
1658 1680 """
1659 1681 o, d = self._chunkcache
1660 1682 l = len(d)
1661 1683
1662 1684 # is it in the cache?
1663 1685 cachestart = offset - o
1664 1686 cacheend = cachestart + length
1665 1687 if cachestart >= 0 and cacheend <= l:
1666 1688 if cachestart == 0 and cacheend == l:
1667 1689 return d # avoid a copy
1668 1690 return util.buffer(d, cachestart, cacheend - cachestart)
1669 1691
1670 1692 return self._readsegment(offset, length, df=df)
1671 1693
1672 1694 def _getsegmentforrevs(self, startrev, endrev, df=None):
1673 1695 """Obtain a segment of raw data corresponding to a range of revisions.
1674 1696
1675 1697 Accepts the start and end revisions and an optional already-open
1676 1698 file handle to be used for reading. If the file handle is read, its
1677 1699 seek position will not be preserved.
1678 1700
1679 1701 Requests for data may be satisfied by a cache.
1680 1702
1681 1703 Returns a 2-tuple of (offset, data) for the requested range of
1682 1704 revisions. Offset is the integer offset from the beginning of the
1683 1705 revlog and data is a str or buffer of the raw byte data.
1684 1706
1685 1707 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1686 1708 to determine where each revision's data begins and ends.
1687 1709 """
1688 1710 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1689 1711 # (functions are expensive).
1690 1712 index = self.index
1691 1713 istart = index[startrev]
1692 1714 start = int(istart[0] >> 16)
1693 1715 if startrev == endrev:
1694 1716 end = start + istart[1]
1695 1717 else:
1696 1718 iend = index[endrev]
1697 1719 end = int(iend[0] >> 16) + iend[1]
1698 1720
1699 1721 if self._inline:
1700 1722 start += (startrev + 1) * self._io.size
1701 1723 end += (endrev + 1) * self._io.size
1702 1724 length = end - start
1703 1725
1704 1726 return start, self._getsegment(start, length, df=df)
1705 1727
1706 1728 def _chunk(self, rev, df=None):
1707 1729 """Obtain a single decompressed chunk for a revision.
1708 1730
1709 1731 Accepts an integer revision and an optional already-open file handle
1710 1732 to be used for reading. If used, the seek position of the file will not
1711 1733 be preserved.
1712 1734
1713 1735 Returns a str holding uncompressed data for the requested revision.
1714 1736 """
1715 1737 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1716 1738
1717 1739 def _chunks(self, revs, df=None, targetsize=None):
1718 1740 """Obtain decompressed chunks for the specified revisions.
1719 1741
1720 1742 Accepts an iterable of numeric revisions that are assumed to be in
1721 1743 ascending order. Also accepts an optional already-open file handle
1722 1744 to be used for reading. If used, the seek position of the file will
1723 1745 not be preserved.
1724 1746
1725 1747 This function is similar to calling ``self._chunk()`` multiple times,
1726 1748 but is faster.
1727 1749
1728 1750 Returns a list with decompressed data for each requested revision.
1729 1751 """
1730 1752 if not revs:
1731 1753 return []
1732 1754 start = self.start
1733 1755 length = self.length
1734 1756 inline = self._inline
1735 1757 iosize = self._io.size
1736 1758 buffer = util.buffer
1737 1759
1738 1760 l = []
1739 1761 ladd = l.append
1740 1762
1741 1763 if not self._withsparseread:
1742 1764 slicedchunks = (revs,)
1743 1765 else:
1744 1766 slicedchunks = deltautil.slicechunk(
1745 1767 self, revs, targetsize=targetsize
1746 1768 )
1747 1769
1748 1770 for revschunk in slicedchunks:
1749 1771 firstrev = revschunk[0]
1750 1772 # Skip trailing revisions with empty diff
1751 1773 for lastrev in revschunk[::-1]:
1752 1774 if length(lastrev) != 0:
1753 1775 break
1754 1776
1755 1777 try:
1756 1778 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1757 1779 except OverflowError:
1758 1780 # issue4215 - we can't cache a run of chunks greater than
1759 1781 # 2G on Windows
1760 1782 return [self._chunk(rev, df=df) for rev in revschunk]
1761 1783
1762 1784 decomp = self.decompress
1763 1785 for rev in revschunk:
1764 1786 chunkstart = start(rev)
1765 1787 if inline:
1766 1788 chunkstart += (rev + 1) * iosize
1767 1789 chunklength = length(rev)
1768 1790 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1769 1791
1770 1792 return l
1771 1793
1772 1794 def _chunkclear(self):
1773 1795 """Clear the raw chunk cache."""
1774 1796 self._chunkcache = (0, b'')
1775 1797
1776 1798 def deltaparent(self, rev):
1777 1799 """return deltaparent of the given revision"""
1778 1800 base = self.index[rev][3]
1779 1801 if base == rev:
1780 1802 return nullrev
1781 1803 elif self._generaldelta:
1782 1804 return base
1783 1805 else:
1784 1806 return rev - 1
1785 1807
1786 1808 def issnapshot(self, rev):
1787 1809 """tells whether rev is a snapshot"""
1788 1810 if not self._sparserevlog:
1789 1811 return self.deltaparent(rev) == nullrev
1790 1812 elif util.safehasattr(self.index, b'issnapshot'):
1791 1813 # directly assign the method to cache the testing and access
1792 1814 self.issnapshot = self.index.issnapshot
1793 1815 return self.issnapshot(rev)
1794 1816 if rev == nullrev:
1795 1817 return True
1796 1818 entry = self.index[rev]
1797 1819 base = entry[3]
1798 1820 if base == rev:
1799 1821 return True
1800 1822 if base == nullrev:
1801 1823 return True
1802 1824 p1 = entry[5]
1803 1825 p2 = entry[6]
1804 1826 if base == p1 or base == p2:
1805 1827 return False
1806 1828 return self.issnapshot(base)
1807 1829
1808 1830 def snapshotdepth(self, rev):
1809 1831 """number of snapshot in the chain before this one"""
1810 1832 if not self.issnapshot(rev):
1811 1833 raise error.ProgrammingError(b'revision %d not a snapshot')
1812 1834 return len(self._deltachain(rev)[0]) - 1
1813 1835
1814 1836 def revdiff(self, rev1, rev2):
1815 1837 """return or calculate a delta between two revisions
1816 1838
1817 1839 The delta calculated is in binary form and is intended to be written to
1818 1840 revlog data directly. So this function needs raw revision data.
1819 1841 """
1820 1842 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1821 1843 return bytes(self._chunk(rev2))
1822 1844
1823 1845 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1824 1846
1825 1847 def _processflags(self, text, flags, operation, raw=False):
1826 1848 """deprecated entry point to access flag processors"""
1827 1849 msg = b'_processflag(...) use the specialized variant'
1828 1850 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1829 1851 if raw:
1830 1852 return text, flagutil.processflagsraw(self, text, flags)
1831 1853 elif operation == b'read':
1832 1854 return flagutil.processflagsread(self, text, flags)
1833 1855 else: # write operation
1834 1856 return flagutil.processflagswrite(self, text, flags, None)
1835 1857
1836 1858 def revision(self, nodeorrev, _df=None, raw=False):
1837 1859 """return an uncompressed revision of a given node or revision
1838 1860 number.
1839 1861
1840 1862 _df - an existing file handle to read from. (internal-only)
1841 1863 raw - an optional argument specifying if the revision data is to be
1842 1864 treated as raw data when applying flag transforms. 'raw' should be set
1843 1865 to True when generating changegroups or in debug commands.
1844 1866 """
1845 1867 if raw:
1846 1868 msg = (
1847 1869 b'revlog.revision(..., raw=True) is deprecated, '
1848 1870 b'use revlog.rawdata(...)'
1849 1871 )
1850 1872 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1851 1873 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1852 1874
1853 1875 def sidedata(self, nodeorrev, _df=None):
1854 1876 """a map of extra data related to the changeset but not part of the hash
1855 1877
1856 1878 This function currently return a dictionary. However, more advanced
1857 1879 mapping object will likely be used in the future for a more
1858 1880 efficient/lazy code.
1859 1881 """
1860 1882 return self._revisiondata(nodeorrev, _df)[1]
1861 1883
1862 1884 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1863 1885 # deal with <nodeorrev> argument type
1864 1886 if isinstance(nodeorrev, int):
1865 1887 rev = nodeorrev
1866 1888 node = self.node(rev)
1867 1889 else:
1868 1890 node = nodeorrev
1869 1891 rev = None
1870 1892
1871 1893 # fast path the special `nullid` rev
1872 1894 if node == nullid:
1873 1895 return b"", {}
1874 1896
1875 1897 # ``rawtext`` is the text as stored inside the revlog. Might be the
1876 1898 # revision or might need to be processed to retrieve the revision.
1877 1899 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1878 1900
1879 1901 if raw and validated:
1880 1902 # if we don't want to process the raw text and that raw
1881 1903 # text is cached, we can exit early.
1882 1904 return rawtext, {}
1883 1905 if rev is None:
1884 1906 rev = self.rev(node)
1885 1907 # the revlog's flag for this revision
1886 1908 # (usually alter its state or content)
1887 1909 flags = self.flags(rev)
1888 1910
1889 1911 if validated and flags == REVIDX_DEFAULT_FLAGS:
1890 1912 # no extra flags set, no flag processor runs, text = rawtext
1891 1913 return rawtext, {}
1892 1914
1893 1915 sidedata = {}
1894 1916 if raw:
1895 1917 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1896 1918 text = rawtext
1897 1919 else:
1898 1920 try:
1899 1921 r = flagutil.processflagsread(self, rawtext, flags)
1900 1922 except error.SidedataHashError as exc:
1901 1923 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1902 1924 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1903 1925 raise error.RevlogError(msg)
1904 1926 text, validatehash, sidedata = r
1905 1927 if validatehash:
1906 1928 self.checkhash(text, node, rev=rev)
1907 1929 if not validated:
1908 1930 self._revisioncache = (node, rev, rawtext)
1909 1931
1910 1932 return text, sidedata
1911 1933
1912 1934 def _rawtext(self, node, rev, _df=None):
1913 1935 """return the possibly unvalidated rawtext for a revision
1914 1936
1915 1937 returns (rev, rawtext, validated)
1916 1938 """
1917 1939
1918 1940 # revision in the cache (could be useful to apply delta)
1919 1941 cachedrev = None
1920 1942 # An intermediate text to apply deltas to
1921 1943 basetext = None
1922 1944
1923 1945 # Check if we have the entry in cache
1924 1946 # The cache entry looks like (node, rev, rawtext)
1925 1947 if self._revisioncache:
1926 1948 if self._revisioncache[0] == node:
1927 1949 return (rev, self._revisioncache[2], True)
1928 1950 cachedrev = self._revisioncache[1]
1929 1951
1930 1952 if rev is None:
1931 1953 rev = self.rev(node)
1932 1954
1933 1955 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1934 1956 if stopped:
1935 1957 basetext = self._revisioncache[2]
1936 1958
1937 1959 # drop cache to save memory, the caller is expected to
1938 1960 # update self._revisioncache after validating the text
1939 1961 self._revisioncache = None
1940 1962
1941 1963 targetsize = None
1942 1964 rawsize = self.index[rev][2]
1943 1965 if 0 <= rawsize:
1944 1966 targetsize = 4 * rawsize
1945 1967
1946 1968 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1947 1969 if basetext is None:
1948 1970 basetext = bytes(bins[0])
1949 1971 bins = bins[1:]
1950 1972
1951 1973 rawtext = mdiff.patches(basetext, bins)
1952 1974 del basetext # let us have a chance to free memory early
1953 1975 return (rev, rawtext, False)
1954 1976
1955 1977 def rawdata(self, nodeorrev, _df=None):
1956 1978 """return an uncompressed raw data of a given node or revision number.
1957 1979
1958 1980 _df - an existing file handle to read from. (internal-only)
1959 1981 """
1960 1982 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1961 1983
1962 1984 def hash(self, text, p1, p2):
1963 1985 """Compute a node hash.
1964 1986
1965 1987 Available as a function so that subclasses can replace the hash
1966 1988 as needed.
1967 1989 """
1968 1990 return storageutil.hashrevisionsha1(text, p1, p2)
1969 1991
1970 1992 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1971 1993 """Check node hash integrity.
1972 1994
1973 1995 Available as a function so that subclasses can extend hash mismatch
1974 1996 behaviors as needed.
1975 1997 """
1976 1998 try:
1977 1999 if p1 is None and p2 is None:
1978 2000 p1, p2 = self.parents(node)
1979 2001 if node != self.hash(text, p1, p2):
1980 2002 # Clear the revision cache on hash failure. The revision cache
1981 2003 # only stores the raw revision and clearing the cache does have
1982 2004 # the side-effect that we won't have a cache hit when the raw
1983 2005 # revision data is accessed. But this case should be rare and
1984 2006 # it is extra work to teach the cache about the hash
1985 2007 # verification state.
1986 2008 if self._revisioncache and self._revisioncache[0] == node:
1987 2009 self._revisioncache = None
1988 2010
1989 2011 revornode = rev
1990 2012 if revornode is None:
1991 2013 revornode = templatefilters.short(hex(node))
1992 2014 raise error.RevlogError(
1993 2015 _(b"integrity check failed on %s:%s")
1994 2016 % (self.indexfile, pycompat.bytestr(revornode))
1995 2017 )
1996 2018 except error.RevlogError:
1997 2019 if self._censorable and storageutil.iscensoredtext(text):
1998 2020 raise error.CensoredNodeError(self.indexfile, node, text)
1999 2021 raise
2000 2022
2001 2023 def _enforceinlinesize(self, tr, fp=None):
2002 2024 """Check if the revlog is too big for inline and convert if so.
2003 2025
2004 2026 This should be called after revisions are added to the revlog. If the
2005 2027 revlog has grown too large to be an inline revlog, it will convert it
2006 2028 to use multiple index and data files.
2007 2029 """
2008 2030 tiprev = len(self) - 1
2009 2031 if (
2010 2032 not self._inline
2011 2033 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2012 2034 ):
2013 2035 return
2014 2036
2015 2037 troffset = tr.findoffset(self.indexfile)
2016 2038 if troffset is None:
2017 2039 raise error.RevlogError(
2018 2040 _(b"%s not found in the transaction") % self.indexfile
2019 2041 )
2020 2042 trindex = 0
2021 2043 tr.add(self.datafile, 0)
2022 2044
2023 2045 if fp:
2024 2046 fp.flush()
2025 2047 fp.close()
2026 2048 # We can't use the cached file handle after close(). So prevent
2027 2049 # its usage.
2028 2050 self._writinghandles = None
2029 2051
2030 2052 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2031 2053 for r in self:
2032 2054 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2033 2055 if troffset <= self.start(r):
2034 2056 trindex = r
2035 2057
2036 2058 with self._indexfp(b'w') as fp:
2037 2059 self.version &= ~FLAG_INLINE_DATA
2038 2060 self._inline = False
2039 2061 io = self._io
2040 2062 for i in self:
2041 2063 e = io.packentry(self.index[i], self.node, self.version, i)
2042 2064 fp.write(e)
2043 2065
2044 2066 # the temp file replace the real index when we exit the context
2045 2067 # manager
2046 2068
2047 2069 tr.replace(self.indexfile, trindex * self._io.size)
2048 2070 nodemaputil.setup_persistent_nodemap(tr, self)
2049 2071 self._chunkclear()
2050 2072
2051 2073 def _nodeduplicatecallback(self, transaction, node):
2052 2074 """called when trying to add a node already stored."""
2053 2075
2054 2076 def addrevision(
2055 2077 self,
2056 2078 text,
2057 2079 transaction,
2058 2080 link,
2059 2081 p1,
2060 2082 p2,
2061 2083 cachedelta=None,
2062 2084 node=None,
2063 2085 flags=REVIDX_DEFAULT_FLAGS,
2064 2086 deltacomputer=None,
2065 2087 sidedata=None,
2066 2088 ):
2067 2089 """add a revision to the log
2068 2090
2069 2091 text - the revision data to add
2070 2092 transaction - the transaction object used for rollback
2071 2093 link - the linkrev data to add
2072 2094 p1, p2 - the parent nodeids of the revision
2073 2095 cachedelta - an optional precomputed delta
2074 2096 node - nodeid of revision; typically node is not specified, and it is
2075 2097 computed by default as hash(text, p1, p2), however subclasses might
2076 2098 use different hashing method (and override checkhash() in such case)
2077 2099 flags - the known flags to set on the revision
2078 2100 deltacomputer - an optional deltacomputer instance shared between
2079 2101 multiple calls
2080 2102 """
2081 2103 if link == nullrev:
2082 2104 raise error.RevlogError(
2083 2105 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2084 2106 )
2085 2107
2086 2108 if sidedata is None:
2087 2109 sidedata = {}
2088 2110 flags = flags & ~REVIDX_SIDEDATA
2089 2111 elif not self.hassidedata:
2090 2112 raise error.ProgrammingError(
2091 2113 _(b"trying to add sidedata to a revlog who don't support them")
2092 2114 )
2093 2115 else:
2094 2116 flags |= REVIDX_SIDEDATA
2095 2117
2096 2118 if flags:
2097 2119 node = node or self.hash(text, p1, p2)
2098 2120
2099 2121 rawtext, validatehash = flagutil.processflagswrite(
2100 2122 self, text, flags, sidedata=sidedata
2101 2123 )
2102 2124
2103 2125 # If the flag processor modifies the revision data, ignore any provided
2104 2126 # cachedelta.
2105 2127 if rawtext != text:
2106 2128 cachedelta = None
2107 2129
2108 2130 if len(rawtext) > _maxentrysize:
2109 2131 raise error.RevlogError(
2110 2132 _(
2111 2133 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2112 2134 )
2113 2135 % (self.indexfile, len(rawtext))
2114 2136 )
2115 2137
2116 2138 node = node or self.hash(rawtext, p1, p2)
2117 2139 rev = self.index.get_rev(node)
2118 2140 if rev is not None:
2119 2141 return rev
2120 2142
2121 2143 if validatehash:
2122 2144 self.checkhash(rawtext, node, p1=p1, p2=p2)
2123 2145
2124 2146 return self.addrawrevision(
2125 2147 rawtext,
2126 2148 transaction,
2127 2149 link,
2128 2150 p1,
2129 2151 p2,
2130 2152 node,
2131 2153 flags,
2132 2154 cachedelta=cachedelta,
2133 2155 deltacomputer=deltacomputer,
2134 2156 )
2135 2157
2136 2158 def addrawrevision(
2137 2159 self,
2138 2160 rawtext,
2139 2161 transaction,
2140 2162 link,
2141 2163 p1,
2142 2164 p2,
2143 2165 node,
2144 2166 flags,
2145 2167 cachedelta=None,
2146 2168 deltacomputer=None,
2147 2169 ):
2148 2170 """add a raw revision with known flags, node and parents
2149 2171 useful when reusing a revision not stored in this revlog (ex: received
2150 2172 over wire, or read from an external bundle).
2151 2173 """
2152 2174 dfh = None
2153 2175 if not self._inline:
2154 2176 dfh = self._datafp(b"a+")
2155 2177 ifh = self._indexfp(b"a+")
2156 2178 try:
2157 2179 return self._addrevision(
2158 2180 node,
2159 2181 rawtext,
2160 2182 transaction,
2161 2183 link,
2162 2184 p1,
2163 2185 p2,
2164 2186 flags,
2165 2187 cachedelta,
2166 2188 ifh,
2167 2189 dfh,
2168 2190 deltacomputer=deltacomputer,
2169 2191 )
2170 2192 finally:
2171 2193 if dfh:
2172 2194 dfh.close()
2173 2195 ifh.close()
2174 2196
2175 2197 def compress(self, data):
2176 2198 """Generate a possibly-compressed representation of data."""
2177 2199 if not data:
2178 2200 return b'', data
2179 2201
2180 2202 compressed = self._compressor.compress(data)
2181 2203
2182 2204 if compressed:
2183 2205 # The revlog compressor added the header in the returned data.
2184 2206 return b'', compressed
2185 2207
2186 2208 if data[0:1] == b'\0':
2187 2209 return b'', data
2188 2210 return b'u', data
2189 2211
2190 2212 def decompress(self, data):
2191 2213 """Decompress a revlog chunk.
2192 2214
2193 2215 The chunk is expected to begin with a header identifying the
2194 2216 format type so it can be routed to an appropriate decompressor.
2195 2217 """
2196 2218 if not data:
2197 2219 return data
2198 2220
2199 2221 # Revlogs are read much more frequently than they are written and many
2200 2222 # chunks only take microseconds to decompress, so performance is
2201 2223 # important here.
2202 2224 #
2203 2225 # We can make a few assumptions about revlogs:
2204 2226 #
2205 2227 # 1) the majority of chunks will be compressed (as opposed to inline
2206 2228 # raw data).
2207 2229 # 2) decompressing *any* data will likely by at least 10x slower than
2208 2230 # returning raw inline data.
2209 2231 # 3) we want to prioritize common and officially supported compression
2210 2232 # engines
2211 2233 #
2212 2234 # It follows that we want to optimize for "decompress compressed data
2213 2235 # when encoded with common and officially supported compression engines"
2214 2236 # case over "raw data" and "data encoded by less common or non-official
2215 2237 # compression engines." That is why we have the inline lookup first
2216 2238 # followed by the compengines lookup.
2217 2239 #
2218 2240 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2219 2241 # compressed chunks. And this matters for changelog and manifest reads.
2220 2242 t = data[0:1]
2221 2243
2222 2244 if t == b'x':
2223 2245 try:
2224 2246 return _zlibdecompress(data)
2225 2247 except zlib.error as e:
2226 2248 raise error.RevlogError(
2227 2249 _(b'revlog decompress error: %s')
2228 2250 % stringutil.forcebytestr(e)
2229 2251 )
2230 2252 # '\0' is more common than 'u' so it goes first.
2231 2253 elif t == b'\0':
2232 2254 return data
2233 2255 elif t == b'u':
2234 2256 return util.buffer(data, 1)
2235 2257
2236 2258 try:
2237 2259 compressor = self._decompressors[t]
2238 2260 except KeyError:
2239 2261 try:
2240 2262 engine = util.compengines.forrevlogheader(t)
2241 2263 compressor = engine.revlogcompressor(self._compengineopts)
2242 2264 self._decompressors[t] = compressor
2243 2265 except KeyError:
2244 2266 raise error.RevlogError(_(b'unknown compression type %r') % t)
2245 2267
2246 2268 return compressor.decompress(data)
2247 2269
2248 2270 def _addrevision(
2249 2271 self,
2250 2272 node,
2251 2273 rawtext,
2252 2274 transaction,
2253 2275 link,
2254 2276 p1,
2255 2277 p2,
2256 2278 flags,
2257 2279 cachedelta,
2258 2280 ifh,
2259 2281 dfh,
2260 2282 alwayscache=False,
2261 2283 deltacomputer=None,
2262 2284 ):
2263 2285 """internal function to add revisions to the log
2264 2286
2265 2287 see addrevision for argument descriptions.
2266 2288
2267 2289 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2268 2290
2269 2291 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2270 2292 be used.
2271 2293
2272 2294 invariants:
2273 2295 - rawtext is optional (can be None); if not set, cachedelta must be set.
2274 2296 if both are set, they must correspond to each other.
2275 2297 """
2276 2298 if node == nullid:
2277 2299 raise error.RevlogError(
2278 2300 _(b"%s: attempt to add null revision") % self.indexfile
2279 2301 )
2280 2302 if node == wdirid or node in wdirfilenodeids:
2281 2303 raise error.RevlogError(
2282 2304 _(b"%s: attempt to add wdir revision") % self.indexfile
2283 2305 )
2284 2306
2285 2307 if self._inline:
2286 2308 fh = ifh
2287 2309 else:
2288 2310 fh = dfh
2289 2311
2290 2312 btext = [rawtext]
2291 2313
2292 2314 curr = len(self)
2293 2315 prev = curr - 1
2294 2316 offset = self.end(prev)
2295 2317
2296 2318 if self._concurrencychecker:
2297 2319 if self._inline:
2298 2320 # offset is "as if" it were in the .d file, so we need to add on
2299 2321 # the size of the entry metadata.
2300 2322 self._concurrencychecker(
2301 2323 ifh, self.indexfile, offset + curr * self._io.size
2302 2324 )
2303 2325 else:
2304 2326 # Entries in the .i are a consistent size.
2305 2327 self._concurrencychecker(
2306 2328 ifh, self.indexfile, curr * self._io.size
2307 2329 )
2308 2330 self._concurrencychecker(dfh, self.datafile, offset)
2309 2331
2310 2332 p1r, p2r = self.rev(p1), self.rev(p2)
2311 2333
2312 2334 # full versions are inserted when the needed deltas
2313 2335 # become comparable to the uncompressed text
2314 2336 if rawtext is None:
2315 2337 # need rawtext size, before changed by flag processors, which is
2316 2338 # the non-raw size. use revlog explicitly to avoid filelog's extra
2317 2339 # logic that might remove metadata size.
2318 2340 textlen = mdiff.patchedsize(
2319 2341 revlog.size(self, cachedelta[0]), cachedelta[1]
2320 2342 )
2321 2343 else:
2322 2344 textlen = len(rawtext)
2323 2345
2324 2346 if deltacomputer is None:
2325 2347 deltacomputer = deltautil.deltacomputer(self)
2326 2348
2327 2349 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2328 2350
2329 2351 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2330 2352
2331 2353 e = (
2332 2354 offset_type(offset, flags),
2333 2355 deltainfo.deltalen,
2334 2356 textlen,
2335 2357 deltainfo.base,
2336 2358 link,
2337 2359 p1r,
2338 2360 p2r,
2339 2361 node,
2362 0,
2363 0,
2340 2364 )
2365
2366 if self.version & 0xFFFF != REVLOGV2:
2367 e = e[:8]
2368
2341 2369 self.index.append(e)
2342 2370
2343 2371 entry = self._io.packentry(e, self.node, self.version, curr)
2344 2372 self._writeentry(
2345 2373 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2346 2374 )
2347 2375
2348 2376 rawtext = btext[0]
2349 2377
2350 2378 if alwayscache and rawtext is None:
2351 2379 rawtext = deltacomputer.buildtext(revinfo, fh)
2352 2380
2353 2381 if type(rawtext) == bytes: # only accept immutable objects
2354 2382 self._revisioncache = (node, curr, rawtext)
2355 2383 self._chainbasecache[curr] = deltainfo.chainbase
2356 2384 return curr
2357 2385
2358 2386 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2359 2387 # Files opened in a+ mode have inconsistent behavior on various
2360 2388 # platforms. Windows requires that a file positioning call be made
2361 2389 # when the file handle transitions between reads and writes. See
2362 2390 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2363 2391 # platforms, Python or the platform itself can be buggy. Some versions
2364 2392 # of Solaris have been observed to not append at the end of the file
2365 2393 # if the file was seeked to before the end. See issue4943 for more.
2366 2394 #
2367 2395 # We work around this issue by inserting a seek() before writing.
2368 2396 # Note: This is likely not necessary on Python 3. However, because
2369 2397 # the file handle is reused for reads and may be seeked there, we need
2370 2398 # to be careful before changing this.
2371 2399 ifh.seek(0, os.SEEK_END)
2372 2400 if dfh:
2373 2401 dfh.seek(0, os.SEEK_END)
2374 2402
2375 2403 curr = len(self) - 1
2376 2404 if not self._inline:
2377 2405 transaction.add(self.datafile, offset)
2378 2406 transaction.add(self.indexfile, curr * len(entry))
2379 2407 if data[0]:
2380 2408 dfh.write(data[0])
2381 2409 dfh.write(data[1])
2382 2410 ifh.write(entry)
2383 2411 else:
2384 2412 offset += curr * self._io.size
2385 2413 transaction.add(self.indexfile, offset)
2386 2414 ifh.write(entry)
2387 2415 ifh.write(data[0])
2388 2416 ifh.write(data[1])
2389 2417 self._enforceinlinesize(transaction, ifh)
2390 2418 nodemaputil.setup_persistent_nodemap(transaction, self)
2391 2419
2392 2420 def addgroup(
2393 2421 self,
2394 2422 deltas,
2395 2423 linkmapper,
2396 2424 transaction,
2397 2425 alwayscache=False,
2398 2426 addrevisioncb=None,
2399 2427 duplicaterevisioncb=None,
2400 2428 ):
2401 2429 """
2402 2430 add a delta group
2403 2431
2404 2432 given a set of deltas, add them to the revision log. the
2405 2433 first delta is against its parent, which should be in our
2406 2434 log, the rest are against the previous delta.
2407 2435
2408 2436 If ``addrevisioncb`` is defined, it will be called with arguments of
2409 2437 this revlog and the node that was added.
2410 2438 """
2411 2439
2412 2440 if self._writinghandles:
2413 2441 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2414 2442
2415 2443 r = len(self)
2416 2444 end = 0
2417 2445 if r:
2418 2446 end = self.end(r - 1)
2419 2447 ifh = self._indexfp(b"a+")
2420 2448 isize = r * self._io.size
2421 2449 if self._inline:
2422 2450 transaction.add(self.indexfile, end + isize)
2423 2451 dfh = None
2424 2452 else:
2425 2453 transaction.add(self.indexfile, isize)
2426 2454 transaction.add(self.datafile, end)
2427 2455 dfh = self._datafp(b"a+")
2428 2456
2429 2457 def flush():
2430 2458 if dfh:
2431 2459 dfh.flush()
2432 2460 ifh.flush()
2433 2461
2434 2462 self._writinghandles = (ifh, dfh)
2435 2463 empty = True
2436 2464
2437 2465 try:
2438 2466 deltacomputer = deltautil.deltacomputer(self)
2439 2467 # loop through our set of deltas
2440 2468 for data in deltas:
2441 2469 node, p1, p2, linknode, deltabase, delta, flags = data
2442 2470 link = linkmapper(linknode)
2443 2471 flags = flags or REVIDX_DEFAULT_FLAGS
2444 2472
2445 2473 rev = self.index.get_rev(node)
2446 2474 if rev is not None:
2447 2475 # this can happen if two branches make the same change
2448 2476 self._nodeduplicatecallback(transaction, rev)
2449 2477 if duplicaterevisioncb:
2450 2478 duplicaterevisioncb(self, rev)
2451 2479 empty = False
2452 2480 continue
2453 2481
2454 2482 for p in (p1, p2):
2455 2483 if not self.index.has_node(p):
2456 2484 raise error.LookupError(
2457 2485 p, self.indexfile, _(b'unknown parent')
2458 2486 )
2459 2487
2460 2488 if not self.index.has_node(deltabase):
2461 2489 raise error.LookupError(
2462 2490 deltabase, self.indexfile, _(b'unknown delta base')
2463 2491 )
2464 2492
2465 2493 baserev = self.rev(deltabase)
2466 2494
2467 2495 if baserev != nullrev and self.iscensored(baserev):
2468 2496 # if base is censored, delta must be full replacement in a
2469 2497 # single patch operation
2470 2498 hlen = struct.calcsize(b">lll")
2471 2499 oldlen = self.rawsize(baserev)
2472 2500 newlen = len(delta) - hlen
2473 2501 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2474 2502 raise error.CensoredBaseError(
2475 2503 self.indexfile, self.node(baserev)
2476 2504 )
2477 2505
2478 2506 if not flags and self._peek_iscensored(baserev, delta, flush):
2479 2507 flags |= REVIDX_ISCENSORED
2480 2508
2481 2509 # We assume consumers of addrevisioncb will want to retrieve
2482 2510 # the added revision, which will require a call to
2483 2511 # revision(). revision() will fast path if there is a cache
2484 2512 # hit. So, we tell _addrevision() to always cache in this case.
2485 2513 # We're only using addgroup() in the context of changegroup
2486 2514 # generation so the revision data can always be handled as raw
2487 2515 # by the flagprocessor.
2488 2516 rev = self._addrevision(
2489 2517 node,
2490 2518 None,
2491 2519 transaction,
2492 2520 link,
2493 2521 p1,
2494 2522 p2,
2495 2523 flags,
2496 2524 (baserev, delta),
2497 2525 ifh,
2498 2526 dfh,
2499 2527 alwayscache=alwayscache,
2500 2528 deltacomputer=deltacomputer,
2501 2529 )
2502 2530
2503 2531 if addrevisioncb:
2504 2532 addrevisioncb(self, rev)
2505 2533 empty = False
2506 2534
2507 2535 if not dfh and not self._inline:
2508 2536 # addrevision switched from inline to conventional
2509 2537 # reopen the index
2510 2538 ifh.close()
2511 2539 dfh = self._datafp(b"a+")
2512 2540 ifh = self._indexfp(b"a+")
2513 2541 self._writinghandles = (ifh, dfh)
2514 2542 finally:
2515 2543 self._writinghandles = None
2516 2544
2517 2545 if dfh:
2518 2546 dfh.close()
2519 2547 ifh.close()
2520 2548 return not empty
2521 2549
2522 2550 def iscensored(self, rev):
2523 2551 """Check if a file revision is censored."""
2524 2552 if not self._censorable:
2525 2553 return False
2526 2554
2527 2555 return self.flags(rev) & REVIDX_ISCENSORED
2528 2556
2529 2557 def _peek_iscensored(self, baserev, delta, flush):
2530 2558 """Quickly check if a delta produces a censored revision."""
2531 2559 if not self._censorable:
2532 2560 return False
2533 2561
2534 2562 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2535 2563
2536 2564 def getstrippoint(self, minlink):
2537 2565 """find the minimum rev that must be stripped to strip the linkrev
2538 2566
2539 2567 Returns a tuple containing the minimum rev and a set of all revs that
2540 2568 have linkrevs that will be broken by this strip.
2541 2569 """
2542 2570 return storageutil.resolvestripinfo(
2543 2571 minlink,
2544 2572 len(self) - 1,
2545 2573 self.headrevs(),
2546 2574 self.linkrev,
2547 2575 self.parentrevs,
2548 2576 )
2549 2577
2550 2578 def strip(self, minlink, transaction):
2551 2579 """truncate the revlog on the first revision with a linkrev >= minlink
2552 2580
2553 2581 This function is called when we're stripping revision minlink and
2554 2582 its descendants from the repository.
2555 2583
2556 2584 We have to remove all revisions with linkrev >= minlink, because
2557 2585 the equivalent changelog revisions will be renumbered after the
2558 2586 strip.
2559 2587
2560 2588 So we truncate the revlog on the first of these revisions, and
2561 2589 trust that the caller has saved the revisions that shouldn't be
2562 2590 removed and that it'll re-add them after this truncation.
2563 2591 """
2564 2592 if len(self) == 0:
2565 2593 return
2566 2594
2567 2595 rev, _ = self.getstrippoint(minlink)
2568 2596 if rev == len(self):
2569 2597 return
2570 2598
2571 2599 # first truncate the files on disk
2572 2600 end = self.start(rev)
2573 2601 if not self._inline:
2574 2602 transaction.add(self.datafile, end)
2575 2603 end = rev * self._io.size
2576 2604 else:
2577 2605 end += rev * self._io.size
2578 2606
2579 2607 transaction.add(self.indexfile, end)
2580 2608
2581 2609 # then reset internal state in memory to forget those revisions
2582 2610 self._revisioncache = None
2583 2611 self._chaininfocache = util.lrucachedict(500)
2584 2612 self._chunkclear()
2585 2613
2586 2614 del self.index[rev:-1]
2587 2615
2588 2616 def checksize(self):
2589 2617 """Check size of index and data files
2590 2618
2591 2619 return a (dd, di) tuple.
2592 2620 - dd: extra bytes for the "data" file
2593 2621 - di: extra bytes for the "index" file
2594 2622
2595 2623 A healthy revlog will return (0, 0).
2596 2624 """
2597 2625 expected = 0
2598 2626 if len(self):
2599 2627 expected = max(0, self.end(len(self) - 1))
2600 2628
2601 2629 try:
2602 2630 with self._datafp() as f:
2603 2631 f.seek(0, io.SEEK_END)
2604 2632 actual = f.tell()
2605 2633 dd = actual - expected
2606 2634 except IOError as inst:
2607 2635 if inst.errno != errno.ENOENT:
2608 2636 raise
2609 2637 dd = 0
2610 2638
2611 2639 try:
2612 2640 f = self.opener(self.indexfile)
2613 2641 f.seek(0, io.SEEK_END)
2614 2642 actual = f.tell()
2615 2643 f.close()
2616 2644 s = self._io.size
2617 2645 i = max(0, actual // s)
2618 2646 di = actual - (i * s)
2619 2647 if self._inline:
2620 2648 databytes = 0
2621 2649 for r in self:
2622 2650 databytes += max(0, self.length(r))
2623 2651 dd = 0
2624 2652 di = actual - len(self) * s - databytes
2625 2653 except IOError as inst:
2626 2654 if inst.errno != errno.ENOENT:
2627 2655 raise
2628 2656 di = 0
2629 2657
2630 2658 return (dd, di)
2631 2659
2632 2660 def files(self):
2633 2661 res = [self.indexfile]
2634 2662 if not self._inline:
2635 2663 res.append(self.datafile)
2636 2664 return res
2637 2665
2638 2666 def emitrevisions(
2639 2667 self,
2640 2668 nodes,
2641 2669 nodesorder=None,
2642 2670 revisiondata=False,
2643 2671 assumehaveparentrevisions=False,
2644 2672 deltamode=repository.CG_DELTAMODE_STD,
2645 2673 ):
2646 2674 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2647 2675 raise error.ProgrammingError(
2648 2676 b'unhandled value for nodesorder: %s' % nodesorder
2649 2677 )
2650 2678
2651 2679 if nodesorder is None and not self._generaldelta:
2652 2680 nodesorder = b'storage'
2653 2681
2654 2682 if (
2655 2683 not self._storedeltachains
2656 2684 and deltamode != repository.CG_DELTAMODE_PREV
2657 2685 ):
2658 2686 deltamode = repository.CG_DELTAMODE_FULL
2659 2687
2660 2688 return storageutil.emitrevisions(
2661 2689 self,
2662 2690 nodes,
2663 2691 nodesorder,
2664 2692 revlogrevisiondelta,
2665 2693 deltaparentfn=self.deltaparent,
2666 2694 candeltafn=self.candelta,
2667 2695 rawsizefn=self.rawsize,
2668 2696 revdifffn=self.revdiff,
2669 2697 flagsfn=self.flags,
2670 2698 deltamode=deltamode,
2671 2699 revisiondata=revisiondata,
2672 2700 assumehaveparentrevisions=assumehaveparentrevisions,
2673 2701 )
2674 2702
2675 2703 DELTAREUSEALWAYS = b'always'
2676 2704 DELTAREUSESAMEREVS = b'samerevs'
2677 2705 DELTAREUSENEVER = b'never'
2678 2706
2679 2707 DELTAREUSEFULLADD = b'fulladd'
2680 2708
2681 2709 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2682 2710
2683 2711 def clone(
2684 2712 self,
2685 2713 tr,
2686 2714 destrevlog,
2687 2715 addrevisioncb=None,
2688 2716 deltareuse=DELTAREUSESAMEREVS,
2689 2717 forcedeltabothparents=None,
2690 2718 sidedatacompanion=None,
2691 2719 ):
2692 2720 """Copy this revlog to another, possibly with format changes.
2693 2721
2694 2722 The destination revlog will contain the same revisions and nodes.
2695 2723 However, it may not be bit-for-bit identical due to e.g. delta encoding
2696 2724 differences.
2697 2725
2698 2726 The ``deltareuse`` argument control how deltas from the existing revlog
2699 2727 are preserved in the destination revlog. The argument can have the
2700 2728 following values:
2701 2729
2702 2730 DELTAREUSEALWAYS
2703 2731 Deltas will always be reused (if possible), even if the destination
2704 2732 revlog would not select the same revisions for the delta. This is the
2705 2733 fastest mode of operation.
2706 2734 DELTAREUSESAMEREVS
2707 2735 Deltas will be reused if the destination revlog would pick the same
2708 2736 revisions for the delta. This mode strikes a balance between speed
2709 2737 and optimization.
2710 2738 DELTAREUSENEVER
2711 2739 Deltas will never be reused. This is the slowest mode of execution.
2712 2740 This mode can be used to recompute deltas (e.g. if the diff/delta
2713 2741 algorithm changes).
2714 2742 DELTAREUSEFULLADD
2715 2743 Revision will be re-added as if their were new content. This is
2716 2744 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2717 2745 eg: large file detection and handling.
2718 2746
2719 2747 Delta computation can be slow, so the choice of delta reuse policy can
2720 2748 significantly affect run time.
2721 2749
2722 2750 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2723 2751 two extremes. Deltas will be reused if they are appropriate. But if the
2724 2752 delta could choose a better revision, it will do so. This means if you
2725 2753 are converting a non-generaldelta revlog to a generaldelta revlog,
2726 2754 deltas will be recomputed if the delta's parent isn't a parent of the
2727 2755 revision.
2728 2756
2729 2757 In addition to the delta policy, the ``forcedeltabothparents``
2730 2758 argument controls whether to force compute deltas against both parents
2731 2759 for merges. By default, the current default is used.
2732 2760
2733 2761 If not None, the `sidedatacompanion` is callable that accept two
2734 2762 arguments:
2735 2763
2736 2764 (srcrevlog, rev)
2737 2765
2738 2766 and return a quintet that control changes to sidedata content from the
2739 2767 old revision to the new clone result:
2740 2768
2741 2769 (dropall, filterout, update, new_flags, dropped_flags)
2742 2770
2743 2771 * if `dropall` is True, all sidedata should be dropped
2744 2772 * `filterout` is a set of sidedata keys that should be dropped
2745 2773 * `update` is a mapping of additionnal/new key -> value
2746 2774 * new_flags is a bitfields of new flags that the revision should get
2747 2775 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2748 2776 """
2749 2777 if deltareuse not in self.DELTAREUSEALL:
2750 2778 raise ValueError(
2751 2779 _(b'value for deltareuse invalid: %s') % deltareuse
2752 2780 )
2753 2781
2754 2782 if len(destrevlog):
2755 2783 raise ValueError(_(b'destination revlog is not empty'))
2756 2784
2757 2785 if getattr(self, 'filteredrevs', None):
2758 2786 raise ValueError(_(b'source revlog has filtered revisions'))
2759 2787 if getattr(destrevlog, 'filteredrevs', None):
2760 2788 raise ValueError(_(b'destination revlog has filtered revisions'))
2761 2789
2762 2790 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2763 2791 # if possible.
2764 2792 oldlazydelta = destrevlog._lazydelta
2765 2793 oldlazydeltabase = destrevlog._lazydeltabase
2766 2794 oldamd = destrevlog._deltabothparents
2767 2795
2768 2796 try:
2769 2797 if deltareuse == self.DELTAREUSEALWAYS:
2770 2798 destrevlog._lazydeltabase = True
2771 2799 destrevlog._lazydelta = True
2772 2800 elif deltareuse == self.DELTAREUSESAMEREVS:
2773 2801 destrevlog._lazydeltabase = False
2774 2802 destrevlog._lazydelta = True
2775 2803 elif deltareuse == self.DELTAREUSENEVER:
2776 2804 destrevlog._lazydeltabase = False
2777 2805 destrevlog._lazydelta = False
2778 2806
2779 2807 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2780 2808
2781 2809 self._clone(
2782 2810 tr,
2783 2811 destrevlog,
2784 2812 addrevisioncb,
2785 2813 deltareuse,
2786 2814 forcedeltabothparents,
2787 2815 sidedatacompanion,
2788 2816 )
2789 2817
2790 2818 finally:
2791 2819 destrevlog._lazydelta = oldlazydelta
2792 2820 destrevlog._lazydeltabase = oldlazydeltabase
2793 2821 destrevlog._deltabothparents = oldamd
2794 2822
2795 2823 def _clone(
2796 2824 self,
2797 2825 tr,
2798 2826 destrevlog,
2799 2827 addrevisioncb,
2800 2828 deltareuse,
2801 2829 forcedeltabothparents,
2802 2830 sidedatacompanion,
2803 2831 ):
2804 2832 """perform the core duty of `revlog.clone` after parameter processing"""
2805 2833 deltacomputer = deltautil.deltacomputer(destrevlog)
2806 2834 index = self.index
2807 2835 for rev in self:
2808 2836 entry = index[rev]
2809 2837
2810 2838 # Some classes override linkrev to take filtered revs into
2811 2839 # account. Use raw entry from index.
2812 2840 flags = entry[0] & 0xFFFF
2813 2841 linkrev = entry[4]
2814 2842 p1 = index[entry[5]][7]
2815 2843 p2 = index[entry[6]][7]
2816 2844 node = entry[7]
2817 2845
2818 2846 sidedataactions = (False, [], {}, 0, 0)
2819 2847 if sidedatacompanion is not None:
2820 2848 sidedataactions = sidedatacompanion(self, rev)
2821 2849
2822 2850 # (Possibly) reuse the delta from the revlog if allowed and
2823 2851 # the revlog chunk is a delta.
2824 2852 cachedelta = None
2825 2853 rawtext = None
2826 2854 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2827 2855 dropall = sidedataactions[0]
2828 2856 filterout = sidedataactions[1]
2829 2857 update = sidedataactions[2]
2830 2858 new_flags = sidedataactions[3]
2831 2859 dropped_flags = sidedataactions[4]
2832 2860 text, sidedata = self._revisiondata(rev)
2833 2861 if dropall:
2834 2862 sidedata = {}
2835 2863 for key in filterout:
2836 2864 sidedata.pop(key, None)
2837 2865 sidedata.update(update)
2838 2866 if not sidedata:
2839 2867 sidedata = None
2840 2868
2841 2869 flags |= new_flags
2842 2870 flags &= ~dropped_flags
2843 2871
2844 2872 destrevlog.addrevision(
2845 2873 text,
2846 2874 tr,
2847 2875 linkrev,
2848 2876 p1,
2849 2877 p2,
2850 2878 cachedelta=cachedelta,
2851 2879 node=node,
2852 2880 flags=flags,
2853 2881 deltacomputer=deltacomputer,
2854 2882 sidedata=sidedata,
2855 2883 )
2856 2884 else:
2857 2885 if destrevlog._lazydelta:
2858 2886 dp = self.deltaparent(rev)
2859 2887 if dp != nullrev:
2860 2888 cachedelta = (dp, bytes(self._chunk(rev)))
2861 2889
2862 2890 if not cachedelta:
2863 2891 rawtext = self.rawdata(rev)
2864 2892
2865 2893 ifh = destrevlog.opener(
2866 2894 destrevlog.indexfile, b'a+', checkambig=False
2867 2895 )
2868 2896 dfh = None
2869 2897 if not destrevlog._inline:
2870 2898 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2871 2899 try:
2872 2900 destrevlog._addrevision(
2873 2901 node,
2874 2902 rawtext,
2875 2903 tr,
2876 2904 linkrev,
2877 2905 p1,
2878 2906 p2,
2879 2907 flags,
2880 2908 cachedelta,
2881 2909 ifh,
2882 2910 dfh,
2883 2911 deltacomputer=deltacomputer,
2884 2912 )
2885 2913 finally:
2886 2914 if dfh:
2887 2915 dfh.close()
2888 2916 ifh.close()
2889 2917
2890 2918 if addrevisioncb:
2891 2919 addrevisioncb(self, rev, node)
2892 2920
2893 2921 def censorrevision(self, tr, censornode, tombstone=b''):
2894 2922 if (self.version & 0xFFFF) == REVLOGV0:
2895 2923 raise error.RevlogError(
2896 2924 _(b'cannot censor with version %d revlogs') % self.version
2897 2925 )
2898 2926
2899 2927 censorrev = self.rev(censornode)
2900 2928 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2901 2929
2902 2930 if len(tombstone) > self.rawsize(censorrev):
2903 2931 raise error.Abort(
2904 2932 _(b'censor tombstone must be no longer than censored data')
2905 2933 )
2906 2934
2907 2935 # Rewriting the revlog in place is hard. Our strategy for censoring is
2908 2936 # to create a new revlog, copy all revisions to it, then replace the
2909 2937 # revlogs on transaction close.
2910 2938
2911 2939 newindexfile = self.indexfile + b'.tmpcensored'
2912 2940 newdatafile = self.datafile + b'.tmpcensored'
2913 2941
2914 2942 # This is a bit dangerous. We could easily have a mismatch of state.
2915 2943 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2916 2944 newrl.version = self.version
2917 2945 newrl._generaldelta = self._generaldelta
2918 2946 newrl._io = self._io
2919 2947
2920 2948 for rev in self.revs():
2921 2949 node = self.node(rev)
2922 2950 p1, p2 = self.parents(node)
2923 2951
2924 2952 if rev == censorrev:
2925 2953 newrl.addrawrevision(
2926 2954 tombstone,
2927 2955 tr,
2928 2956 self.linkrev(censorrev),
2929 2957 p1,
2930 2958 p2,
2931 2959 censornode,
2932 2960 REVIDX_ISCENSORED,
2933 2961 )
2934 2962
2935 2963 if newrl.deltaparent(rev) != nullrev:
2936 2964 raise error.Abort(
2937 2965 _(
2938 2966 b'censored revision stored as delta; '
2939 2967 b'cannot censor'
2940 2968 ),
2941 2969 hint=_(
2942 2970 b'censoring of revlogs is not '
2943 2971 b'fully implemented; please report '
2944 2972 b'this bug'
2945 2973 ),
2946 2974 )
2947 2975 continue
2948 2976
2949 2977 if self.iscensored(rev):
2950 2978 if self.deltaparent(rev) != nullrev:
2951 2979 raise error.Abort(
2952 2980 _(
2953 2981 b'cannot censor due to censored '
2954 2982 b'revision having delta stored'
2955 2983 )
2956 2984 )
2957 2985 rawtext = self._chunk(rev)
2958 2986 else:
2959 2987 rawtext = self.rawdata(rev)
2960 2988
2961 2989 newrl.addrawrevision(
2962 2990 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2963 2991 )
2964 2992
2965 2993 tr.addbackup(self.indexfile, location=b'store')
2966 2994 if not self._inline:
2967 2995 tr.addbackup(self.datafile, location=b'store')
2968 2996
2969 2997 self.opener.rename(newrl.indexfile, self.indexfile)
2970 2998 if not self._inline:
2971 2999 self.opener.rename(newrl.datafile, self.datafile)
2972 3000
2973 3001 self.clearcaches()
2974 3002 self._loadindex()
2975 3003
2976 3004 def verifyintegrity(self, state):
2977 3005 """Verifies the integrity of the revlog.
2978 3006
2979 3007 Yields ``revlogproblem`` instances describing problems that are
2980 3008 found.
2981 3009 """
2982 3010 dd, di = self.checksize()
2983 3011 if dd:
2984 3012 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2985 3013 if di:
2986 3014 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2987 3015
2988 3016 version = self.version & 0xFFFF
2989 3017
2990 3018 # The verifier tells us what version revlog we should be.
2991 3019 if version != state[b'expectedversion']:
2992 3020 yield revlogproblem(
2993 3021 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2994 3022 % (self.indexfile, version, state[b'expectedversion'])
2995 3023 )
2996 3024
2997 3025 state[b'skipread'] = set()
2998 3026 state[b'safe_renamed'] = set()
2999 3027
3000 3028 for rev in self:
3001 3029 node = self.node(rev)
3002 3030
3003 3031 # Verify contents. 4 cases to care about:
3004 3032 #
3005 3033 # common: the most common case
3006 3034 # rename: with a rename
3007 3035 # meta: file content starts with b'\1\n', the metadata
3008 3036 # header defined in filelog.py, but without a rename
3009 3037 # ext: content stored externally
3010 3038 #
3011 3039 # More formally, their differences are shown below:
3012 3040 #
3013 3041 # | common | rename | meta | ext
3014 3042 # -------------------------------------------------------
3015 3043 # flags() | 0 | 0 | 0 | not 0
3016 3044 # renamed() | False | True | False | ?
3017 3045 # rawtext[0:2]=='\1\n'| False | True | True | ?
3018 3046 #
3019 3047 # "rawtext" means the raw text stored in revlog data, which
3020 3048 # could be retrieved by "rawdata(rev)". "text"
3021 3049 # mentioned below is "revision(rev)".
3022 3050 #
3023 3051 # There are 3 different lengths stored physically:
3024 3052 # 1. L1: rawsize, stored in revlog index
3025 3053 # 2. L2: len(rawtext), stored in revlog data
3026 3054 # 3. L3: len(text), stored in revlog data if flags==0, or
3027 3055 # possibly somewhere else if flags!=0
3028 3056 #
3029 3057 # L1 should be equal to L2. L3 could be different from them.
3030 3058 # "text" may or may not affect commit hash depending on flag
3031 3059 # processors (see flagutil.addflagprocessor).
3032 3060 #
3033 3061 # | common | rename | meta | ext
3034 3062 # -------------------------------------------------
3035 3063 # rawsize() | L1 | L1 | L1 | L1
3036 3064 # size() | L1 | L2-LM | L1(*) | L1 (?)
3037 3065 # len(rawtext) | L2 | L2 | L2 | L2
3038 3066 # len(text) | L2 | L2 | L2 | L3
3039 3067 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3040 3068 #
3041 3069 # LM: length of metadata, depending on rawtext
3042 3070 # (*): not ideal, see comment in filelog.size
3043 3071 # (?): could be "- len(meta)" if the resolved content has
3044 3072 # rename metadata
3045 3073 #
3046 3074 # Checks needed to be done:
3047 3075 # 1. length check: L1 == L2, in all cases.
3048 3076 # 2. hash check: depending on flag processor, we may need to
3049 3077 # use either "text" (external), or "rawtext" (in revlog).
3050 3078
3051 3079 try:
3052 3080 skipflags = state.get(b'skipflags', 0)
3053 3081 if skipflags:
3054 3082 skipflags &= self.flags(rev)
3055 3083
3056 3084 _verify_revision(self, skipflags, state, node)
3057 3085
3058 3086 l1 = self.rawsize(rev)
3059 3087 l2 = len(self.rawdata(node))
3060 3088
3061 3089 if l1 != l2:
3062 3090 yield revlogproblem(
3063 3091 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3064 3092 node=node,
3065 3093 )
3066 3094
3067 3095 except error.CensoredNodeError:
3068 3096 if state[b'erroroncensored']:
3069 3097 yield revlogproblem(
3070 3098 error=_(b'censored file data'), node=node
3071 3099 )
3072 3100 state[b'skipread'].add(node)
3073 3101 except Exception as e:
3074 3102 yield revlogproblem(
3075 3103 error=_(b'unpacking %s: %s')
3076 3104 % (short(node), stringutil.forcebytestr(e)),
3077 3105 node=node,
3078 3106 )
3079 3107 state[b'skipread'].add(node)
3080 3108
3081 3109 def storageinfo(
3082 3110 self,
3083 3111 exclusivefiles=False,
3084 3112 sharedfiles=False,
3085 3113 revisionscount=False,
3086 3114 trackedsize=False,
3087 3115 storedsize=False,
3088 3116 ):
3089 3117 d = {}
3090 3118
3091 3119 if exclusivefiles:
3092 3120 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3093 3121 if not self._inline:
3094 3122 d[b'exclusivefiles'].append((self.opener, self.datafile))
3095 3123
3096 3124 if sharedfiles:
3097 3125 d[b'sharedfiles'] = []
3098 3126
3099 3127 if revisionscount:
3100 3128 d[b'revisionscount'] = len(self)
3101 3129
3102 3130 if trackedsize:
3103 3131 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3104 3132
3105 3133 if storedsize:
3106 3134 d[b'storedsize'] = sum(
3107 3135 self.opener.stat(path).st_size for path in self.files()
3108 3136 )
3109 3137
3110 3138 return d
@@ -1,60 +1,59 b''
1 1 # revlogdeltas.py - constant used for revlog logic
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 from ..interfaces import repository
13 13
14 14 # revlog header flags
15 15 REVLOGV0 = 0
16 16 REVLOGV1 = 1
17 17 # Dummy value until file format is finalized.
18 # Reminder: change the bounds check in revlog.__init__ when this is changed.
19 18 REVLOGV2 = 0xDEAD
20 19 # Shared across v1 and v2.
21 20 FLAG_INLINE_DATA = 1 << 16
22 21 # Only used by v1, implied by v2.
23 22 FLAG_GENERALDELTA = 1 << 17
24 23 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
25 24 REVLOG_DEFAULT_FORMAT = REVLOGV1
26 25 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
27 26 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
28 27 REVLOGV2_FLAGS = FLAG_INLINE_DATA
29 28
30 29 # revlog index flags
31 30
32 31 # For historical reasons, revlog's internal flags were exposed via the
33 32 # wire protocol and are even exposed in parts of the storage APIs.
34 33
35 34 # revision has censor metadata, must be verified
36 35 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
37 36 # revision hash does not match data (narrowhg)
38 37 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
39 38 # revision data is stored externally
40 39 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
41 40 # revision data contains extra metadata not part of the official digest
42 41 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
43 42 # revision changes files in a way that could affect copy tracing.
44 43 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
45 44 REVIDX_DEFAULT_FLAGS = 0
46 45 # stable order in which flags need to be processed and their processors applied
47 46 REVIDX_FLAGS_ORDER = [
48 47 REVIDX_ISCENSORED,
49 48 REVIDX_ELLIPSIS,
50 49 REVIDX_EXTSTORED,
51 50 REVIDX_SIDEDATA,
52 51 REVIDX_HASCOPIESINFO,
53 52 ]
54 53
55 54 # bitmark for flags that could cause rawdata content change
56 55 REVIDX_RAWTEXT_CHANGING_FLAGS = (
57 56 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
58 57 )
59 58
60 59 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,284 +1,284 b''
1 1 """This unit test primarily tests parsers.parse_index2().
2 2
3 3 It also checks certain aspects of the parsers module as a whole.
4 4 """
5 5
6 6 from __future__ import absolute_import, print_function
7 7
8 8 import os
9 9 import struct
10 10 import subprocess
11 11 import sys
12 12 import unittest
13 13
14 14 from mercurial.node import (
15 15 bin,
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 )
20 20 from mercurial import (
21 21 policy,
22 22 pycompat,
23 23 )
24 24
25 25 parsers = policy.importmod('parsers')
26 26
27 27 # original python implementation
28 28 def gettype(q):
29 29 return int(q & 0xFFFF)
30 30
31 31
32 32 def offset_type(offset, type):
33 33 return int(int(offset) << 16 | type)
34 34
35 35
36 36 indexformatng = ">Qiiiiii20s12x"
37 37
38 38
39 39 def py_parseindex(data, inline):
40 40 s = 64
41 41 cache = None
42 42 index = []
43 43 nodemap = {nullid: nullrev}
44 44 n = off = 0
45 45
46 46 l = len(data) - s
47 47 append = index.append
48 48 if inline:
49 49 cache = (0, data)
50 50 while off <= l:
51 51 e = struct.unpack(indexformatng, data[off : off + s])
52 52 nodemap[e[7]] = n
53 53 append(e)
54 54 n += 1
55 55 if e[1] < 0:
56 56 break
57 57 off += e[1] + s
58 58 else:
59 59 while off <= l:
60 60 e = struct.unpack(indexformatng, data[off : off + s])
61 61 nodemap[e[7]] = n
62 62 append(e)
63 63 n += 1
64 64 off += s
65 65
66 66 e = list(index[0])
67 67 type = gettype(e[0])
68 68 e[0] = offset_type(0, type)
69 69 index[0] = tuple(e)
70 70
71 71 return index, cache
72 72
73 73
74 74 data_inlined = (
75 75 b'\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c'
76 76 b'\x00\x00\x04\x07\x00\x00\x00\x00\x00\x00\x15\x15\xff\xff\xff'
77 77 b'\xff\xff\xff\xff\xff\xebG\x97\xb7\x1fB\x04\xcf\x13V\x81\tw\x1b'
78 78 b'w\xdduR\xda\xc6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
79 79 b'x\x9c\x9d\x93?O\xc30\x10\xc5\xf7|\x8a\xdb\x9a\xa8m\x06\xd8*\x95'
80 80 b'\x81B\xa1\xa2\xa2R\xcb\x86Pd\x9a\x0b5$vd_\x04\xfd\xf6\x9c\xff@'
81 81 b'\x11!\x0b\xd9\xec\xf7\xbbw\xe7gG6\xad6\x04\xdaN\xc0\x92\xa0$)'
82 82 b'\xb1\x82\xa2\xd1%\x16\xa4\x8b7\xa9\xca\xd4-\xb2Y\x02\xfc\xc9'
83 83 b'\xcaS\xf9\xaeX\xed\xb6\xd77Q\x02\x83\xd4\x19\xf5--Y\xea\xe1W'
84 84 b'\xab\xed\x10\xceR\x0f_\xdf\xdf\r\xe1,\xf5\xf0\xcb\xf5 \xceR\x0f'
85 85 b'_\xdc\x0e\x0e\xc3R\x0f_\xae\x96\x9b!\x9e\xa5\x1e\xbf\xdb,\x06'
86 86 b'\xc7q\x9a/\x88\x82\xc3B\xea\xb5\xb4TJ\x93\xb6\x82\x0e\xe16\xe6'
87 87 b'KQ\xdb\xaf\xecG\xa3\xd1 \x01\xd3\x0b_^\xe8\xaa\xa0\xae\xad\xd1'
88 88 b'&\xbef\x1bz\x08\xb0|\xc9Xz\x06\xf6Z\x91\x90J\xaa\x17\x90\xaa'
89 89 b'\xd2\xa6\x11$5C\xcf\xba#\xa0\x03\x02*2\x92-\xfc\xb1\x94\xdf\xe2'
90 90 b'\xae\xb8\'m\x8ey0^\x85\xd3\x82\xb4\xf0`:\x9c\x00\x8a\xfd\x01'
91 91 b'\xb0\xc6\x86\x8b\xdd\xae\x80\xf3\xa9\x9fd\x16\n\x00R%\x1a\x06'
92 92 b'\xe9\xd8b\x98\x1d\xf4\xf3+\x9bf\x01\xd8p\x1b\xf3.\xed\x9f^g\xc3'
93 93 b'^\xd9W81T\xdb\xd5\x04sx|\xf2\xeb\xd6`%?x\xed"\x831\xbf\xf3\xdc'
94 94 b'b\xeb%gaY\xe1\xad\x9f\xb9f\'1w\xa9\xa5a\x83s\x82J\xb98\xbc4\x8b'
95 95 b'\x83\x00\x9f$z\xb8#\xa5\xb1\xdf\x98\xd9\xec\x1b\x89O\xe3Ts\x9a4'
96 96 b'\x17m\x8b\xfc\x8f\xa5\x95\x9a\xfc\xfa\xed,\xe5|\xa1\xfe\x15\xb9'
97 97 b'\xbc\xb2\x93\x1f\xf2\x95\xff\xdf,\x1a\xc5\xe7\x17*\x93Oz:>\x0e'
98 98 )
99 99
100 100 data_non_inlined = (
101 101 b'\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01D\x19'
102 102 b'\x00\x07e\x12\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff'
103 103 b'\xff\xff\xff\xff\xd1\xf4\xbb\xb0\xbe\xfc\x13\xbd\x8c\xd3\x9d'
104 104 b'\x0f\xcd\xd9;\x8c\x07\x8cJ/\x00\x00\x00\x00\x00\x00\x00\x00\x00'
105 105 b'\x00\x00\x00\x00\x00\x00\x01D\x19\x00\x00\x00\x00\x00\xdf\x00'
106 106 b'\x00\x01q\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\xff'
107 107 b'\xff\xff\xff\xc1\x12\xb9\x04\x96\xa4Z1t\x91\xdfsJ\x90\xf0\x9bh'
108 108 b'\x07l&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
109 109 b'\x00\x01D\xf8\x00\x00\x00\x00\x01\x1b\x00\x00\x01\xb8\x00\x00'
110 110 b'\x00\x01\x00\x00\x00\x02\x00\x00\x00\x01\xff\xff\xff\xff\x02\n'
111 111 b'\x0e\xc6&\xa1\x92\xae6\x0b\x02i\xfe-\xe5\xbao\x05\xd1\xe7\x00'
112 112 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01F'
113 113 b'\x13\x00\x00\x00\x00\x01\xec\x00\x00\x03\x06\x00\x00\x00\x01'
114 114 b'\x00\x00\x00\x03\x00\x00\x00\x02\xff\xff\xff\xff\x12\xcb\xeby1'
115 115 b'\xb6\r\x98B\xcb\x07\xbd`\x8f\x92\xd9\xc4\x84\xbdK\x00\x00\x00'
116 116 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00'
117 117 )
118 118
119 119
120 def parse_index2(data, inline):
121 index, chunkcache = parsers.parse_index2(data, inline)
120 def parse_index2(data, inline, revlogv2=False):
121 index, chunkcache = parsers.parse_index2(data, inline, revlogv2=revlogv2)
122 122 return list(index), chunkcache
123 123
124 124
125 125 def importparsers(hexversion):
126 126 """Import mercurial.parsers with the given sys.hexversion."""
127 127 # The file parsers.c inspects sys.hexversion to determine the version
128 128 # of the currently-running Python interpreter, so we monkey-patch
129 129 # sys.hexversion to simulate using different versions.
130 130 code = (
131 131 "import sys; sys.hexversion=%s; "
132 132 "import mercurial.cext.parsers" % hexversion
133 133 )
134 134 cmd = "\"%s\" -c \"%s\"" % (os.environ['PYTHON'], code)
135 135 # We need to do these tests inside a subprocess because parser.c's
136 136 # version-checking code happens inside the module init function, and
137 137 # when using reload() to reimport an extension module, "The init function
138 138 # of extension modules is not called a second time"
139 139 # (from http://docs.python.org/2/library/functions.html?#reload).
140 140 p = subprocess.Popen(
141 141 cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
142 142 )
143 143 return p.communicate() # returns stdout, stderr
144 144
145 145
146 146 def hexfailmsg(testnumber, hexversion, stdout, expected):
147 147 try:
148 148 hexstring = hex(hexversion)
149 149 except TypeError:
150 150 hexstring = None
151 151 return (
152 152 "FAILED: version test #%s with Python %s and patched "
153 153 "sys.hexversion %r (%r):\n Expected %s but got:\n-->'%s'\n"
154 154 % (
155 155 testnumber,
156 156 sys.version_info,
157 157 hexversion,
158 158 hexstring,
159 159 expected,
160 160 stdout,
161 161 )
162 162 )
163 163
164 164
165 165 def makehex(major, minor, micro):
166 166 return int("%x%02x%02x00" % (major, minor, micro), 16)
167 167
168 168
169 169 class parseindex2tests(unittest.TestCase):
170 170 def assertversionokay(self, testnumber, hexversion):
171 171 stdout, stderr = importparsers(hexversion)
172 172 self.assertFalse(
173 173 stdout, hexfailmsg(testnumber, hexversion, stdout, 'no stdout')
174 174 )
175 175
176 176 def assertversionfail(self, testnumber, hexversion):
177 177 stdout, stderr = importparsers(hexversion)
178 178 # We include versionerrortext to distinguish from other ImportErrors.
179 179 errtext = b"ImportError: %s" % pycompat.sysbytes(
180 180 parsers.versionerrortext
181 181 )
182 182 self.assertIn(
183 183 errtext,
184 184 stdout,
185 185 hexfailmsg(
186 186 testnumber,
187 187 hexversion,
188 188 stdout,
189 189 expected="stdout to contain %r" % errtext,
190 190 ),
191 191 )
192 192
193 193 def testversiondetection(self):
194 194 """Check the version-detection logic when importing parsers."""
195 195 # Only test the version-detection logic if it is present.
196 196 try:
197 197 parsers.versionerrortext
198 198 except AttributeError:
199 199 return
200 200 info = sys.version_info
201 201 major, minor, micro = info[0], info[1], info[2]
202 202 # Test same major-minor versions.
203 203 self.assertversionokay(1, makehex(major, minor, micro))
204 204 self.assertversionokay(2, makehex(major, minor, micro + 1))
205 205 # Test different major-minor versions.
206 206 self.assertversionfail(3, makehex(major + 1, minor, micro))
207 207 self.assertversionfail(4, makehex(major, minor + 1, micro))
208 208 self.assertversionfail(5, "'foo'")
209 209
210 210 def testbadargs(self):
211 211 # Check that parse_index2() raises TypeError on bad arguments.
212 212 with self.assertRaises(TypeError):
213 213 parse_index2(0, True)
214 214
215 215 def testparseindexfile(self):
216 216 # Check parsers.parse_index2() on an index file against the
217 217 # original Python implementation of parseindex, both with and
218 218 # without inlined data.
219 219
220 220 want = py_parseindex(data_inlined, True)
221 221 got = parse_index2(data_inlined, True)
222 222 self.assertEqual(want, got) # inline data
223 223
224 224 want = py_parseindex(data_non_inlined, False)
225 225 got = parse_index2(data_non_inlined, False)
226 226 self.assertEqual(want, got) # no inline data
227 227
228 228 ix = parsers.parse_index2(data_inlined, True)[0]
229 229 for i, r in enumerate(ix):
230 230 if r[7] == nullid:
231 231 i = -1
232 232 try:
233 233 self.assertEqual(
234 234 ix[r[7]],
235 235 i,
236 236 'Reverse lookup inconsistent for %r' % hex(r[7]),
237 237 )
238 238 except TypeError:
239 239 # pure version doesn't support this
240 240 break
241 241
242 242 def testminusone(self):
243 243 want = (0, 0, 0, -1, -1, -1, -1, nullid)
244 244 index, junk = parsers.parse_index2(data_inlined, True)
245 245 got = index[-1]
246 246 self.assertEqual(want, got) # inline data
247 247
248 248 index, junk = parsers.parse_index2(data_non_inlined, False)
249 249 got = index[-1]
250 250 self.assertEqual(want, got) # no inline data
251 251
252 252 def testdelitemwithoutnodetree(self):
253 253 index, _junk = parsers.parse_index2(data_non_inlined, False)
254 254
255 255 def hexrev(rev):
256 256 if rev == nullrev:
257 257 return b'\xff\xff\xff\xff'
258 258 else:
259 259 return bin('%08x' % rev)
260 260
261 261 def appendrev(p1, p2=nullrev):
262 262 # node won't matter for this test, let's just make sure
263 263 # they don't collide. Other data don't matter either.
264 264 node = hexrev(p1) + hexrev(p2) + b'.' * 12
265 265 index.append((0, 0, 12, 1, 34, p1, p2, node))
266 266
267 267 appendrev(4)
268 268 appendrev(5)
269 269 appendrev(6)
270 270 self.assertEqual(len(index), 7)
271 271
272 272 del index[1:-1]
273 273
274 274 # assertions that failed before correction
275 275 self.assertEqual(len(index), 1) # was 4
276 276 headrevs = getattr(index, 'headrevs', None)
277 277 if headrevs is not None: # not implemented in pure
278 278 self.assertEqual(index.headrevs(), [0]) # gave ValueError
279 279
280 280
281 281 if __name__ == '__main__':
282 282 import silenttestrunner
283 283
284 284 silenttestrunner.main(__name__)
@@ -1,65 +1,65 b''
1 1 #require reporevlogstore
2 2
3 3 A repo with unknown revlogv2 requirement string cannot be opened
4 4
5 5 $ hg init invalidreq
6 6 $ cd invalidreq
7 7 $ echo exp-revlogv2.unknown >> .hg/requires
8 8 $ hg log
9 9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
10 10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
11 11 [255]
12 12 $ cd ..
13 13
14 14 Can create and open repo with revlog v2 requirement
15 15
16 16 $ cat >> $HGRCPATH << EOF
17 17 > [experimental]
18 18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
19 19 > EOF
20 20
21 21 $ hg init empty-repo
22 22 $ cd empty-repo
23 23 $ cat .hg/requires
24 24 dotencode
25 exp-revlogv2.1
25 exp-revlogv2.2
26 26 fncache
27 27 sparserevlog
28 28 store
29 29
30 30 $ hg log
31 31
32 32 Unknown flags to revlog are rejected
33 33
34 34 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
35 35 ... fh.write(b'\xff\x00\xde\xad') and None
36 36
37 37 $ hg log
38 38 abort: unknown flags (0xff00) in version 57005 revlog 00changelog.i
39 39 [50]
40 40
41 41 $ cd ..
42 42
43 43 Writing a simple revlog v2 works
44 44
45 45 $ hg init simple
46 46 $ cd simple
47 47 $ touch foo
48 48 $ hg -q commit -A -m initial
49 49
50 50 $ hg log
51 51 changeset: 0:96ee1d7354c4
52 52 tag: tip
53 53 user: test
54 54 date: Thu Jan 01 00:00:00 1970 +0000
55 55 summary: initial
56 56
57 57 Header written as expected
58 58
59 59 $ f --hexdump --bytes 4 .hg/store/00changelog.i
60 60 .hg/store/00changelog.i:
61 61 0000: 00 01 de ad |....|
62 62
63 63 $ f --hexdump --bytes 4 .hg/store/data/foo.i
64 64 .hg/store/data/foo.i:
65 65 0000: 00 01 de ad |....|
@@ -1,53 +1,53 b''
1 1 $ hg init empty-repo
2 2 $ cd empty-repo
3 3
4 4 Flags on revlog version 0 are rejected
5 5
6 6 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
7 7 ... fh.write(b'\x00\x01\x00\x00') and None
8 8
9 9 $ hg log
10 10 abort: unknown flags (0x01) in version 0 revlog 00changelog.i
11 11 [50]
12 12
13 13 Unknown flags on revlog version 1 are rejected
14 14
15 15 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
16 16 ... fh.write(b'\x00\x04\x00\x01') and None
17 17
18 18 $ hg log
19 19 abort: unknown flags (0x04) in version 1 revlog 00changelog.i
20 20 [50]
21 21
22 22 Unknown version is rejected
23 23
24 24 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
25 ... fh.write(b'\x00\x00\x00\x02') and None
25 ... fh.write(b'\x00\x00\xbe\xef') and None
26 26
27 27 $ hg log
28 abort: unknown version (2) in revlog 00changelog.i
28 abort: unknown version (48879) in revlog 00changelog.i
29 29 [50]
30 30
31 31 $ cd ..
32 32
33 33 Test for CVE-2016-3630
34 34
35 35 $ hg init
36 36
37 37 >>> import codecs
38 38 >>> open("a.i", "wb").write(codecs.decode(codecs.decode(
39 39 ... b"""eJxjYGZgZIAAYQYGxhgom+k/FMx8YKx9ZUaKSOyqo4cnuKb8mbqHV5cBCVTMWb1Cwqkhe4Gsg9AD
40 40 ... Joa3dYtcYYYBAQ8Qr4OqZAYRICPTSr5WKd/42rV36d+8/VmrNpv7NP1jQAXrQE4BqQUARngwVA==""",
41 41 ... "base64"), "zlib")) and None
42 42
43 43 $ hg debugrevlogindex a.i
44 44 rev linkrev nodeid p1 p2
45 45 0 2 99e0332bd498 000000000000 000000000000
46 46 1 3 6674f57a23d8 99e0332bd498 000000000000
47 47
48 48 >>> from mercurial import revlog, vfs
49 49 >>> tvfs = vfs.vfs(b'.')
50 50 >>> tvfs.options = {b'revlogv1': True}
51 51 >>> rl = revlog.revlog(tvfs, b'a.i')
52 52 >>> rl.revision(1)
53 53 mpatchError(*'patch cannot be decoded'*) (glob)
General Comments 0
You need to be logged in to leave comments. Login now