##// END OF EJS Templates
revlogutils: move the NodeMap class in a dedicated nodemap module...
marmoute -
r44486:ab595920 default
parent child Browse files
Show More
@@ -0,0 +1,15
1 # nodemap.py - nodemap related code and utilities
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
8
9 from __future__ import absolute_import
10 from .. import error
11
12
13 class NodeMap(dict):
14 def __missing__(self, x):
15 raise error.RevlogError(b'unknown node: %s' % x)
@@ -1,235 +1,236
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid, nullrev
14 14 from .. import (
15 15 pycompat,
16 revlogutils,
17 16 util,
18 17 )
19 18
19 from ..revlogutils import nodemap as nodemaputil
20
20 21 stringio = pycompat.bytesio
21 22
22 23
23 24 _pack = struct.pack
24 25 _unpack = struct.unpack
25 26 _compress = zlib.compress
26 27 _decompress = zlib.decompress
27 28
28 29 # Some code below makes tuples directly because it's more convenient. However,
29 30 # code outside this module should always use dirstatetuple.
30 31 def dirstatetuple(*x):
31 32 # x is a tuple
32 33 return x
33 34
34 35
35 36 indexformatng = b">Qiiiiii20s12x"
36 37 indexfirst = struct.calcsize(b'Q')
37 38 sizeint = struct.calcsize(b'i')
38 39 indexsize = struct.calcsize(indexformatng)
39 40
40 41
41 42 def gettype(q):
42 43 return int(q & 0xFFFF)
43 44
44 45
45 46 def offset_type(offset, type):
46 47 return int(int(offset) << 16 | type)
47 48
48 49
49 50 class BaseIndexObject(object):
50 51 @property
51 52 def nodemap(self):
52 53 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
53 54 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
54 55 return self._nodemap
55 56
56 57 @util.propertycache
57 58 def _nodemap(self):
58 nodemap = revlogutils.NodeMap({nullid: nullrev})
59 nodemap = nodemaputil.NodeMap({nullid: nullrev})
59 60 for r in range(0, len(self)):
60 61 n = self[r][7]
61 62 nodemap[n] = r
62 63 return nodemap
63 64
64 65 def has_node(self, node):
65 66 """return True if the node exist in the index"""
66 67 return node in self._nodemap
67 68
68 69 def rev(self, node):
69 70 """return a revision for a node
70 71
71 72 If the node is unknown, raise a RevlogError"""
72 73 return self._nodemap[node]
73 74
74 75 def get_rev(self, node):
75 76 """return a revision for a node
76 77
77 78 If the node is unknown, return None"""
78 79 return self._nodemap.get(node)
79 80
80 81 def _stripnodes(self, start):
81 82 if '_nodemap' in vars(self):
82 83 for r in range(start, len(self)):
83 84 n = self[r][7]
84 85 del self._nodemap[n]
85 86
86 87 def clearcaches(self):
87 88 self.__dict__.pop('_nodemap', None)
88 89
89 90 def __len__(self):
90 91 return self._lgt + len(self._extra)
91 92
92 93 def append(self, tup):
93 94 if '_nodemap' in vars(self):
94 95 self._nodemap[tup[7]] = len(self)
95 96 self._extra.append(tup)
96 97
97 98 def _check_index(self, i):
98 99 if not isinstance(i, int):
99 100 raise TypeError(b"expecting int indexes")
100 101 if i < 0 or i >= len(self):
101 102 raise IndexError
102 103
103 104 def __getitem__(self, i):
104 105 if i == -1:
105 106 return (0, 0, 0, -1, -1, -1, -1, nullid)
106 107 self._check_index(i)
107 108 if i >= self._lgt:
108 109 return self._extra[i - self._lgt]
109 110 index = self._calculate_index(i)
110 111 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
111 112 if i == 0:
112 113 e = list(r)
113 114 type = gettype(e[0])
114 115 e[0] = offset_type(0, type)
115 116 return tuple(e)
116 117 return r
117 118
118 119
119 120 class IndexObject(BaseIndexObject):
120 121 def __init__(self, data):
121 122 assert len(data) % indexsize == 0
122 123 self._data = data
123 124 self._lgt = len(data) // indexsize
124 125 self._extra = []
125 126
126 127 def _calculate_index(self, i):
127 128 return i * indexsize
128 129
129 130 def __delitem__(self, i):
130 131 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
131 132 raise ValueError(b"deleting slices only supports a:-1 with step 1")
132 133 i = i.start
133 134 self._check_index(i)
134 135 self._stripnodes(i)
135 136 if i < self._lgt:
136 137 self._data = self._data[: i * indexsize]
137 138 self._lgt = i
138 139 self._extra = []
139 140 else:
140 141 self._extra = self._extra[: i - self._lgt]
141 142
142 143
143 144 class InlinedIndexObject(BaseIndexObject):
144 145 def __init__(self, data, inline=0):
145 146 self._data = data
146 147 self._lgt = self._inline_scan(None)
147 148 self._inline_scan(self._lgt)
148 149 self._extra = []
149 150
150 151 def _inline_scan(self, lgt):
151 152 off = 0
152 153 if lgt is not None:
153 154 self._offsets = [0] * lgt
154 155 count = 0
155 156 while off <= len(self._data) - indexsize:
156 157 (s,) = struct.unpack(
157 158 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
158 159 )
159 160 if lgt is not None:
160 161 self._offsets[count] = off
161 162 count += 1
162 163 off += indexsize + s
163 164 if off != len(self._data):
164 165 raise ValueError(b"corrupted data")
165 166 return count
166 167
167 168 def __delitem__(self, i):
168 169 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
169 170 raise ValueError(b"deleting slices only supports a:-1 with step 1")
170 171 i = i.start
171 172 self._check_index(i)
172 173 self._stripnodes(i)
173 174 if i < self._lgt:
174 175 self._offsets = self._offsets[:i]
175 176 self._lgt = i
176 177 self._extra = []
177 178 else:
178 179 self._extra = self._extra[: i - self._lgt]
179 180
180 181 def _calculate_index(self, i):
181 182 return self._offsets[i]
182 183
183 184
184 185 def parse_index2(data, inline):
185 186 if not inline:
186 187 return IndexObject(data), None
187 188 return InlinedIndexObject(data, inline), (0, data)
188 189
189 190
190 191 def parse_dirstate(dmap, copymap, st):
191 192 parents = [st[:20], st[20:40]]
192 193 # dereference fields so they will be local in loop
193 194 format = b">cllll"
194 195 e_size = struct.calcsize(format)
195 196 pos1 = 40
196 197 l = len(st)
197 198
198 199 # the inner loop
199 200 while pos1 < l:
200 201 pos2 = pos1 + e_size
201 202 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
202 203 pos1 = pos2 + e[4]
203 204 f = st[pos2:pos1]
204 205 if b'\0' in f:
205 206 f, c = f.split(b'\0')
206 207 copymap[f] = c
207 208 dmap[f] = e[:4]
208 209 return parents
209 210
210 211
211 212 def pack_dirstate(dmap, copymap, pl, now):
212 213 now = int(now)
213 214 cs = stringio()
214 215 write = cs.write
215 216 write(b"".join(pl))
216 217 for f, e in pycompat.iteritems(dmap):
217 218 if e[0] == b'n' and e[3] == now:
218 219 # The file was last modified "simultaneously" with the current
219 220 # write to dirstate (i.e. within the same second for file-
220 221 # systems with a granularity of 1 sec). This commonly happens
221 222 # for at least a couple of files on 'update'.
222 223 # The user could change the file without changing its size
223 224 # within the same second. Invalidate the file's mtime in
224 225 # dirstate, forcing future 'status' calls to compare the
225 226 # contents of the file if the size is the same. This prevents
226 227 # mistakenly treating such files as clean.
227 228 e = dirstatetuple(e[0], e[1], e[2], -1)
228 229 dmap[f] = e
229 230
230 231 if f in copymap:
231 232 f = b"%s\0%s" % (f, copymap[f])
232 233 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
233 234 write(e)
234 235 write(f)
235 236 return cs.getvalue()
@@ -1,2988 +1,2988
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 revlogutils,
69 68 templatefilters,
70 69 util,
71 70 )
72 71 from .interfaces import (
73 72 repository,
74 73 util as interfaceutil,
75 74 )
76 75 from .revlogutils import (
77 76 deltas as deltautil,
78 77 flagutil,
78 nodemap as nodemaputil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109 rustrevlog = policy.importrust('revlog')
110 110
111 111 # Aliased for performance.
112 112 _zlibdecompress = zlib.decompress
113 113
114 114 # max size of revlog with inline data
115 115 _maxinline = 131072
116 116 _chunksize = 1048576
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False, {}
121 121
122 122
123 123 def ellipsiswriteprocessor(rl, text, sidedata):
124 124 return text, False
125 125
126 126
127 127 def ellipsisrawprocessor(rl, text):
128 128 return False
129 129
130 130
131 131 ellipsisprocessor = (
132 132 ellipsisreadprocessor,
133 133 ellipsiswriteprocessor,
134 134 ellipsisrawprocessor,
135 135 )
136 136
137 137
138 138 def getoffset(q):
139 139 return int(q >> 16)
140 140
141 141
142 142 def gettype(q):
143 143 return int(q & 0xFFFF)
144 144
145 145
146 146 def offset_type(offset, type):
147 147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 148 raise ValueError(b'unknown revlog index flags')
149 149 return int(int(offset) << 16 | type)
150 150
151 151
152 152 def _verify_revision(rl, skipflags, state, node):
153 153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 154 point for extensions to influence the operation."""
155 155 if skipflags:
156 156 state[b'skipread'].add(node)
157 157 else:
158 158 # Side-effect: read content and verify hash.
159 159 rl.revision(node)
160 160
161 161
162 162 @attr.s(slots=True, frozen=True)
163 163 class _revisioninfo(object):
164 164 """Information about a revision that allows building its fulltext
165 165 node: expected hash of the revision
166 166 p1, p2: parent revs of the revision
167 167 btext: built text cache consisting of a one-element list
168 168 cachedelta: (baserev, uncompressed_delta) or None
169 169 flags: flags associated to the revision storage
170 170
171 171 One of btext[0] or cachedelta must be set.
172 172 """
173 173
174 174 node = attr.ib()
175 175 p1 = attr.ib()
176 176 p2 = attr.ib()
177 177 btext = attr.ib()
178 178 textlen = attr.ib()
179 179 cachedelta = attr.ib()
180 180 flags = attr.ib()
181 181
182 182
183 183 @interfaceutil.implementer(repository.irevisiondelta)
184 184 @attr.s(slots=True)
185 185 class revlogrevisiondelta(object):
186 186 node = attr.ib()
187 187 p1node = attr.ib()
188 188 p2node = attr.ib()
189 189 basenode = attr.ib()
190 190 flags = attr.ib()
191 191 baserevisionsize = attr.ib()
192 192 revision = attr.ib()
193 193 delta = attr.ib()
194 194 linknode = attr.ib(default=None)
195 195
196 196
197 197 @interfaceutil.implementer(repository.iverifyproblem)
198 198 @attr.s(frozen=True)
199 199 class revlogproblem(object):
200 200 warning = attr.ib(default=None)
201 201 error = attr.ib(default=None)
202 202 node = attr.ib(default=None)
203 203
204 204
205 205 # index v0:
206 206 # 4 bytes: offset
207 207 # 4 bytes: compressed length
208 208 # 4 bytes: base rev
209 209 # 4 bytes: link rev
210 210 # 20 bytes: parent 1 nodeid
211 211 # 20 bytes: parent 2 nodeid
212 212 # 20 bytes: nodeid
213 213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 214 indexformatv0_pack = indexformatv0.pack
215 215 indexformatv0_unpack = indexformatv0.unpack
216 216
217 217
218 218 class revlogoldindex(list):
219 219 @property
220 220 def nodemap(self):
221 221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 223 return self._nodemap
224 224
225 225 @util.propertycache
226 226 def _nodemap(self):
227 nodemap = revlogutils.NodeMap({nullid: nullrev})
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 228 for r in range(0, len(self)):
229 229 n = self[r][7]
230 230 nodemap[n] = r
231 231 return nodemap
232 232
233 233 def has_node(self, node):
234 234 """return True if the node exist in the index"""
235 235 return node in self._nodemap
236 236
237 237 def rev(self, node):
238 238 """return a revision for a node
239 239
240 240 If the node is unknown, raise a RevlogError"""
241 241 return self._nodemap[node]
242 242
243 243 def get_rev(self, node):
244 244 """return a revision for a node
245 245
246 246 If the node is unknown, return None"""
247 247 return self._nodemap.get(node)
248 248
249 249 def append(self, tup):
250 250 self._nodemap[tup[7]] = len(self)
251 251 super(revlogoldindex, self).append(tup)
252 252
253 253 def __delitem__(self, i):
254 254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 256 for r in pycompat.xrange(i.start, len(self)):
257 257 del self._nodemap[self[r][7]]
258 258 super(revlogoldindex, self).__delitem__(i)
259 259
260 260 def clearcaches(self):
261 261 self.__dict__.pop('_nodemap', None)
262 262
263 263 def __getitem__(self, i):
264 264 if i == -1:
265 265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 266 return list.__getitem__(self, i)
267 267
268 268
269 269 class revlogoldio(object):
270 270 def __init__(self):
271 271 self.size = indexformatv0.size
272 272
273 273 def parseindex(self, data, inline):
274 274 s = self.size
275 275 index = []
276 nodemap = revlogutils.NodeMap({nullid: nullrev})
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 277 n = off = 0
278 278 l = len(data)
279 279 while off + s <= l:
280 280 cur = data[off : off + s]
281 281 off += s
282 282 e = indexformatv0_unpack(cur)
283 283 # transform to revlogv1 format
284 284 e2 = (
285 285 offset_type(e[0], 0),
286 286 e[1],
287 287 -1,
288 288 e[2],
289 289 e[3],
290 290 nodemap.get(e[4], nullrev),
291 291 nodemap.get(e[5], nullrev),
292 292 e[6],
293 293 )
294 294 index.append(e2)
295 295 nodemap[e[6]] = n
296 296 n += 1
297 297
298 298 index = revlogoldindex(index)
299 299 return index, None
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 if gettype(entry[0]):
303 303 raise error.RevlogError(
304 304 _(b'index entry flags need revlog version 1')
305 305 )
306 306 e2 = (
307 307 getoffset(entry[0]),
308 308 entry[1],
309 309 entry[3],
310 310 entry[4],
311 311 node(entry[5]),
312 312 node(entry[6]),
313 313 entry[7],
314 314 )
315 315 return indexformatv0_pack(*e2)
316 316
317 317
318 318 # index ng:
319 319 # 6 bytes: offset
320 320 # 2 bytes: flags
321 321 # 4 bytes: compressed length
322 322 # 4 bytes: uncompressed length
323 323 # 4 bytes: base rev
324 324 # 4 bytes: link rev
325 325 # 4 bytes: parent 1 rev
326 326 # 4 bytes: parent 2 rev
327 327 # 32 bytes: nodeid
328 328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 329 indexformatng_pack = indexformatng.pack
330 330 versionformat = struct.Struct(b">I")
331 331 versionformat_pack = versionformat.pack
332 332 versionformat_unpack = versionformat.unpack
333 333
334 334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 335 # signed integer)
336 336 _maxentrysize = 0x7FFFFFFF
337 337
338 338
339 339 class revlogio(object):
340 340 def __init__(self):
341 341 self.size = indexformatng.size
342 342
343 343 def parseindex(self, data, inline):
344 344 # call the C implementation to parse the index data
345 345 index, cache = parsers.parse_index2(data, inline)
346 346 return index, cache
347 347
348 348 def packentry(self, entry, node, version, rev):
349 349 p = indexformatng_pack(*entry)
350 350 if rev == 0:
351 351 p = versionformat_pack(version) + p[4:]
352 352 return p
353 353
354 354
355 355 class rustrevlogio(revlogio):
356 356 def parseindex(self, data, inline):
357 357 index, cache = super(rustrevlogio, self).parseindex(data, inline)
358 358 return rustrevlog.MixedIndex(index), cache
359 359
360 360
361 361 class revlog(object):
362 362 """
363 363 the underlying revision storage object
364 364
365 365 A revlog consists of two parts, an index and the revision data.
366 366
367 367 The index is a file with a fixed record size containing
368 368 information on each revision, including its nodeid (hash), the
369 369 nodeids of its parents, the position and offset of its data within
370 370 the data file, and the revision it's based on. Finally, each entry
371 371 contains a linkrev entry that can serve as a pointer to external
372 372 data.
373 373
374 374 The revision data itself is a linear collection of data chunks.
375 375 Each chunk represents a revision and is usually represented as a
376 376 delta against the previous chunk. To bound lookup time, runs of
377 377 deltas are limited to about 2 times the length of the original
378 378 version data. This makes retrieval of a version proportional to
379 379 its size, or O(1) relative to the number of revisions.
380 380
381 381 Both pieces of the revlog are written to in an append-only
382 382 fashion, which means we never need to rewrite a file to insert or
383 383 remove data, and can use some simple techniques to avoid the need
384 384 for locking while reading.
385 385
386 386 If checkambig, indexfile is opened with checkambig=True at
387 387 writing, to avoid file stat ambiguity.
388 388
389 389 If mmaplargeindex is True, and an mmapindexthreshold is set, the
390 390 index will be mmapped rather than read if it is larger than the
391 391 configured threshold.
392 392
393 393 If censorable is True, the revlog can have censored revisions.
394 394
395 395 If `upperboundcomp` is not None, this is the expected maximal gain from
396 396 compression for the data content.
397 397 """
398 398
399 399 _flagserrorclass = error.RevlogError
400 400
401 401 def __init__(
402 402 self,
403 403 opener,
404 404 indexfile,
405 405 datafile=None,
406 406 checkambig=False,
407 407 mmaplargeindex=False,
408 408 censorable=False,
409 409 upperboundcomp=None,
410 410 ):
411 411 """
412 412 create a revlog object
413 413
414 414 opener is a function that abstracts the file opening operation
415 415 and can be used to implement COW semantics or the like.
416 416
417 417 """
418 418 self.upperboundcomp = upperboundcomp
419 419 self.indexfile = indexfile
420 420 self.datafile = datafile or (indexfile[:-2] + b".d")
421 421 self.opener = opener
422 422 # When True, indexfile is opened with checkambig=True at writing, to
423 423 # avoid file stat ambiguity.
424 424 self._checkambig = checkambig
425 425 self._mmaplargeindex = mmaplargeindex
426 426 self._censorable = censorable
427 427 # 3-tuple of (node, rev, text) for a raw revision.
428 428 self._revisioncache = None
429 429 # Maps rev to chain base rev.
430 430 self._chainbasecache = util.lrucachedict(100)
431 431 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
432 432 self._chunkcache = (0, b'')
433 433 # How much data to read and cache into the raw revlog data cache.
434 434 self._chunkcachesize = 65536
435 435 self._maxchainlen = None
436 436 self._deltabothparents = True
437 437 self.index = None
438 438 # Mapping of partial identifiers to full nodes.
439 439 self._pcache = {}
440 440 # Mapping of revision integer to full node.
441 441 self._compengine = b'zlib'
442 442 self._compengineopts = {}
443 443 self._maxdeltachainspan = -1
444 444 self._withsparseread = False
445 445 self._sparserevlog = False
446 446 self._srdensitythreshold = 0.50
447 447 self._srmingapsize = 262144
448 448
449 449 # Make copy of flag processors so each revlog instance can support
450 450 # custom flags.
451 451 self._flagprocessors = dict(flagutil.flagprocessors)
452 452
453 453 # 2-tuple of file handles being used for active writing.
454 454 self._writinghandles = None
455 455
456 456 self._loadindex()
457 457
458 458 def _loadindex(self):
459 459 mmapindexthreshold = None
460 460 opts = self.opener.options
461 461
462 462 if b'revlogv2' in opts:
463 463 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
464 464 elif b'revlogv1' in opts:
465 465 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
466 466 if b'generaldelta' in opts:
467 467 newversionflags |= FLAG_GENERALDELTA
468 468 elif b'revlogv0' in self.opener.options:
469 469 newversionflags = REVLOGV0
470 470 else:
471 471 newversionflags = REVLOG_DEFAULT_VERSION
472 472
473 473 if b'chunkcachesize' in opts:
474 474 self._chunkcachesize = opts[b'chunkcachesize']
475 475 if b'maxchainlen' in opts:
476 476 self._maxchainlen = opts[b'maxchainlen']
477 477 if b'deltabothparents' in opts:
478 478 self._deltabothparents = opts[b'deltabothparents']
479 479 self._lazydelta = bool(opts.get(b'lazydelta', True))
480 480 self._lazydeltabase = False
481 481 if self._lazydelta:
482 482 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
483 483 if b'compengine' in opts:
484 484 self._compengine = opts[b'compengine']
485 485 if b'zlib.level' in opts:
486 486 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
487 487 if b'zstd.level' in opts:
488 488 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
489 489 if b'maxdeltachainspan' in opts:
490 490 self._maxdeltachainspan = opts[b'maxdeltachainspan']
491 491 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
492 492 mmapindexthreshold = opts[b'mmapindexthreshold']
493 493 self.hassidedata = bool(opts.get(b'side-data', False))
494 494 if self.hassidedata:
495 495 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
496 496 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
497 497 withsparseread = bool(opts.get(b'with-sparse-read', False))
498 498 # sparse-revlog forces sparse-read
499 499 self._withsparseread = self._sparserevlog or withsparseread
500 500 if b'sparse-read-density-threshold' in opts:
501 501 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
502 502 if b'sparse-read-min-gap-size' in opts:
503 503 self._srmingapsize = opts[b'sparse-read-min-gap-size']
504 504 if opts.get(b'enableellipsis'):
505 505 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
506 506
507 507 # revlog v0 doesn't have flag processors
508 508 for flag, processor in pycompat.iteritems(
509 509 opts.get(b'flagprocessors', {})
510 510 ):
511 511 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
512 512
513 513 if self._chunkcachesize <= 0:
514 514 raise error.RevlogError(
515 515 _(b'revlog chunk cache size %r is not greater than 0')
516 516 % self._chunkcachesize
517 517 )
518 518 elif self._chunkcachesize & (self._chunkcachesize - 1):
519 519 raise error.RevlogError(
520 520 _(b'revlog chunk cache size %r is not a power of 2')
521 521 % self._chunkcachesize
522 522 )
523 523
524 524 indexdata = b''
525 525 self._initempty = True
526 526 try:
527 527 with self._indexfp() as f:
528 528 if (
529 529 mmapindexthreshold is not None
530 530 and self.opener.fstat(f).st_size >= mmapindexthreshold
531 531 ):
532 532 # TODO: should .close() to release resources without
533 533 # relying on Python GC
534 534 indexdata = util.buffer(util.mmapread(f))
535 535 else:
536 536 indexdata = f.read()
537 537 if len(indexdata) > 0:
538 538 versionflags = versionformat_unpack(indexdata[:4])[0]
539 539 self._initempty = False
540 540 else:
541 541 versionflags = newversionflags
542 542 except IOError as inst:
543 543 if inst.errno != errno.ENOENT:
544 544 raise
545 545
546 546 versionflags = newversionflags
547 547
548 548 self.version = versionflags
549 549
550 550 flags = versionflags & ~0xFFFF
551 551 fmt = versionflags & 0xFFFF
552 552
553 553 if fmt == REVLOGV0:
554 554 if flags:
555 555 raise error.RevlogError(
556 556 _(b'unknown flags (%#04x) in version %d revlog %s')
557 557 % (flags >> 16, fmt, self.indexfile)
558 558 )
559 559
560 560 self._inline = False
561 561 self._generaldelta = False
562 562
563 563 elif fmt == REVLOGV1:
564 564 if flags & ~REVLOGV1_FLAGS:
565 565 raise error.RevlogError(
566 566 _(b'unknown flags (%#04x) in version %d revlog %s')
567 567 % (flags >> 16, fmt, self.indexfile)
568 568 )
569 569
570 570 self._inline = versionflags & FLAG_INLINE_DATA
571 571 self._generaldelta = versionflags & FLAG_GENERALDELTA
572 572
573 573 elif fmt == REVLOGV2:
574 574 if flags & ~REVLOGV2_FLAGS:
575 575 raise error.RevlogError(
576 576 _(b'unknown flags (%#04x) in version %d revlog %s')
577 577 % (flags >> 16, fmt, self.indexfile)
578 578 )
579 579
580 580 self._inline = versionflags & FLAG_INLINE_DATA
581 581 # generaldelta implied by version 2 revlogs.
582 582 self._generaldelta = True
583 583
584 584 else:
585 585 raise error.RevlogError(
586 586 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
587 587 )
588 588 # sparse-revlog can't be on without general-delta (issue6056)
589 589 if not self._generaldelta:
590 590 self._sparserevlog = False
591 591
592 592 self._storedeltachains = True
593 593
594 594 self._io = revlogio()
595 595 if rustrevlog is not None and self.opener.options.get('rust.index'):
596 596 self._io = rustrevlogio()
597 597 if self.version == REVLOGV0:
598 598 self._io = revlogoldio()
599 599 try:
600 600 d = self._io.parseindex(indexdata, self._inline)
601 601 except (ValueError, IndexError):
602 602 raise error.RevlogError(
603 603 _(b"index %s is corrupted") % self.indexfile
604 604 )
605 605 self.index, self._chunkcache = d
606 606 if not self._chunkcache:
607 607 self._chunkclear()
608 608 # revnum -> (chain-length, sum-delta-length)
609 609 self._chaininfocache = {}
610 610 # revlog header -> revlog compressor
611 611 self._decompressors = {}
612 612
613 613 @util.propertycache
614 614 def _compressor(self):
615 615 engine = util.compengines[self._compengine]
616 616 return engine.revlogcompressor(self._compengineopts)
617 617
618 618 def _indexfp(self, mode=b'r'):
619 619 """file object for the revlog's index file"""
620 620 args = {'mode': mode}
621 621 if mode != b'r':
622 622 args['checkambig'] = self._checkambig
623 623 if mode == b'w':
624 624 args['atomictemp'] = True
625 625 return self.opener(self.indexfile, **args)
626 626
627 627 def _datafp(self, mode=b'r'):
628 628 """file object for the revlog's data file"""
629 629 return self.opener(self.datafile, mode=mode)
630 630
631 631 @contextlib.contextmanager
632 632 def _datareadfp(self, existingfp=None):
633 633 """file object suitable to read data"""
634 634 # Use explicit file handle, if given.
635 635 if existingfp is not None:
636 636 yield existingfp
637 637
638 638 # Use a file handle being actively used for writes, if available.
639 639 # There is some danger to doing this because reads will seek the
640 640 # file. However, _writeentry() performs a SEEK_END before all writes,
641 641 # so we should be safe.
642 642 elif self._writinghandles:
643 643 if self._inline:
644 644 yield self._writinghandles[0]
645 645 else:
646 646 yield self._writinghandles[1]
647 647
648 648 # Otherwise open a new file handle.
649 649 else:
650 650 if self._inline:
651 651 func = self._indexfp
652 652 else:
653 653 func = self._datafp
654 654 with func() as fp:
655 655 yield fp
656 656
657 657 def tiprev(self):
658 658 return len(self.index) - 1
659 659
660 660 def tip(self):
661 661 return self.node(self.tiprev())
662 662
663 663 def __contains__(self, rev):
664 664 return 0 <= rev < len(self)
665 665
666 666 def __len__(self):
667 667 return len(self.index)
668 668
669 669 def __iter__(self):
670 670 return iter(pycompat.xrange(len(self)))
671 671
672 672 def revs(self, start=0, stop=None):
673 673 """iterate over all rev in this revlog (from start to stop)"""
674 674 return storageutil.iterrevs(len(self), start=start, stop=stop)
675 675
676 676 @property
677 677 def nodemap(self):
678 678 msg = (
679 679 b"revlog.nodemap is deprecated, "
680 680 b"use revlog.index.[has_node|rev|get_rev]"
681 681 )
682 682 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
683 683 return self.index.nodemap
684 684
685 685 @property
686 686 def _nodecache(self):
687 687 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
688 688 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
689 689 return self.index.nodemap
690 690
691 691 def hasnode(self, node):
692 692 try:
693 693 self.rev(node)
694 694 return True
695 695 except KeyError:
696 696 return False
697 697
698 698 def candelta(self, baserev, rev):
699 699 """whether two revisions (baserev, rev) can be delta-ed or not"""
700 700 # Disable delta if either rev requires a content-changing flag
701 701 # processor (ex. LFS). This is because such flag processor can alter
702 702 # the rawtext content that the delta will be based on, and two clients
703 703 # could have a same revlog node with different flags (i.e. different
704 704 # rawtext contents) and the delta could be incompatible.
705 705 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
706 706 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
707 707 ):
708 708 return False
709 709 return True
710 710
711 711 def clearcaches(self):
712 712 self._revisioncache = None
713 713 self._chainbasecache.clear()
714 714 self._chunkcache = (0, b'')
715 715 self._pcache = {}
716 716 self.index.clearcaches()
717 717
718 718 def rev(self, node):
719 719 try:
720 720 return self.index.rev(node)
721 721 except TypeError:
722 722 raise
723 723 except error.RevlogError:
724 724 # parsers.c radix tree lookup failed
725 725 if node == wdirid or node in wdirfilenodeids:
726 726 raise error.WdirUnsupported
727 727 raise error.LookupError(node, self.indexfile, _(b'no node'))
728 728
729 729 # Accessors for index entries.
730 730
731 731 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
732 732 # are flags.
733 733 def start(self, rev):
734 734 return int(self.index[rev][0] >> 16)
735 735
736 736 def flags(self, rev):
737 737 return self.index[rev][0] & 0xFFFF
738 738
739 739 def length(self, rev):
740 740 return self.index[rev][1]
741 741
742 742 def rawsize(self, rev):
743 743 """return the length of the uncompressed text for a given revision"""
744 744 l = self.index[rev][2]
745 745 if l >= 0:
746 746 return l
747 747
748 748 t = self.rawdata(rev)
749 749 return len(t)
750 750
751 751 def size(self, rev):
752 752 """length of non-raw text (processed by a "read" flag processor)"""
753 753 # fast path: if no "read" flag processor could change the content,
754 754 # size is rawsize. note: ELLIPSIS is known to not change the content.
755 755 flags = self.flags(rev)
756 756 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
757 757 return self.rawsize(rev)
758 758
759 759 return len(self.revision(rev, raw=False))
760 760
761 761 def chainbase(self, rev):
762 762 base = self._chainbasecache.get(rev)
763 763 if base is not None:
764 764 return base
765 765
766 766 index = self.index
767 767 iterrev = rev
768 768 base = index[iterrev][3]
769 769 while base != iterrev:
770 770 iterrev = base
771 771 base = index[iterrev][3]
772 772
773 773 self._chainbasecache[rev] = base
774 774 return base
775 775
776 776 def linkrev(self, rev):
777 777 return self.index[rev][4]
778 778
779 779 def parentrevs(self, rev):
780 780 try:
781 781 entry = self.index[rev]
782 782 except IndexError:
783 783 if rev == wdirrev:
784 784 raise error.WdirUnsupported
785 785 raise
786 786
787 787 return entry[5], entry[6]
788 788
789 789 # fast parentrevs(rev) where rev isn't filtered
790 790 _uncheckedparentrevs = parentrevs
791 791
792 792 def node(self, rev):
793 793 try:
794 794 return self.index[rev][7]
795 795 except IndexError:
796 796 if rev == wdirrev:
797 797 raise error.WdirUnsupported
798 798 raise
799 799
800 800 # Derived from index values.
801 801
802 802 def end(self, rev):
803 803 return self.start(rev) + self.length(rev)
804 804
805 805 def parents(self, node):
806 806 i = self.index
807 807 d = i[self.rev(node)]
808 808 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
809 809
810 810 def chainlen(self, rev):
811 811 return self._chaininfo(rev)[0]
812 812
813 813 def _chaininfo(self, rev):
814 814 chaininfocache = self._chaininfocache
815 815 if rev in chaininfocache:
816 816 return chaininfocache[rev]
817 817 index = self.index
818 818 generaldelta = self._generaldelta
819 819 iterrev = rev
820 820 e = index[iterrev]
821 821 clen = 0
822 822 compresseddeltalen = 0
823 823 while iterrev != e[3]:
824 824 clen += 1
825 825 compresseddeltalen += e[1]
826 826 if generaldelta:
827 827 iterrev = e[3]
828 828 else:
829 829 iterrev -= 1
830 830 if iterrev in chaininfocache:
831 831 t = chaininfocache[iterrev]
832 832 clen += t[0]
833 833 compresseddeltalen += t[1]
834 834 break
835 835 e = index[iterrev]
836 836 else:
837 837 # Add text length of base since decompressing that also takes
838 838 # work. For cache hits the length is already included.
839 839 compresseddeltalen += e[1]
840 840 r = (clen, compresseddeltalen)
841 841 chaininfocache[rev] = r
842 842 return r
843 843
844 844 def _deltachain(self, rev, stoprev=None):
845 845 """Obtain the delta chain for a revision.
846 846
847 847 ``stoprev`` specifies a revision to stop at. If not specified, we
848 848 stop at the base of the chain.
849 849
850 850 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
851 851 revs in ascending order and ``stopped`` is a bool indicating whether
852 852 ``stoprev`` was hit.
853 853 """
854 854 # Try C implementation.
855 855 try:
856 856 return self.index.deltachain(rev, stoprev, self._generaldelta)
857 857 except AttributeError:
858 858 pass
859 859
860 860 chain = []
861 861
862 862 # Alias to prevent attribute lookup in tight loop.
863 863 index = self.index
864 864 generaldelta = self._generaldelta
865 865
866 866 iterrev = rev
867 867 e = index[iterrev]
868 868 while iterrev != e[3] and iterrev != stoprev:
869 869 chain.append(iterrev)
870 870 if generaldelta:
871 871 iterrev = e[3]
872 872 else:
873 873 iterrev -= 1
874 874 e = index[iterrev]
875 875
876 876 if iterrev == stoprev:
877 877 stopped = True
878 878 else:
879 879 chain.append(iterrev)
880 880 stopped = False
881 881
882 882 chain.reverse()
883 883 return chain, stopped
884 884
885 885 def ancestors(self, revs, stoprev=0, inclusive=False):
886 886 """Generate the ancestors of 'revs' in reverse revision order.
887 887 Does not generate revs lower than stoprev.
888 888
889 889 See the documentation for ancestor.lazyancestors for more details."""
890 890
891 891 # first, make sure start revisions aren't filtered
892 892 revs = list(revs)
893 893 checkrev = self.node
894 894 for r in revs:
895 895 checkrev(r)
896 896 # and we're sure ancestors aren't filtered as well
897 897
898 898 if rustancestor is not None:
899 899 lazyancestors = rustancestor.LazyAncestors
900 900 arg = self.index
901 901 elif util.safehasattr(parsers, b'rustlazyancestors'):
902 902 lazyancestors = ancestor.rustlazyancestors
903 903 arg = self.index
904 904 else:
905 905 lazyancestors = ancestor.lazyancestors
906 906 arg = self._uncheckedparentrevs
907 907 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
908 908
909 909 def descendants(self, revs):
910 910 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
911 911
912 912 def findcommonmissing(self, common=None, heads=None):
913 913 """Return a tuple of the ancestors of common and the ancestors of heads
914 914 that are not ancestors of common. In revset terminology, we return the
915 915 tuple:
916 916
917 917 ::common, (::heads) - (::common)
918 918
919 919 The list is sorted by revision number, meaning it is
920 920 topologically sorted.
921 921
922 922 'heads' and 'common' are both lists of node IDs. If heads is
923 923 not supplied, uses all of the revlog's heads. If common is not
924 924 supplied, uses nullid."""
925 925 if common is None:
926 926 common = [nullid]
927 927 if heads is None:
928 928 heads = self.heads()
929 929
930 930 common = [self.rev(n) for n in common]
931 931 heads = [self.rev(n) for n in heads]
932 932
933 933 # we want the ancestors, but inclusive
934 934 class lazyset(object):
935 935 def __init__(self, lazyvalues):
936 936 self.addedvalues = set()
937 937 self.lazyvalues = lazyvalues
938 938
939 939 def __contains__(self, value):
940 940 return value in self.addedvalues or value in self.lazyvalues
941 941
942 942 def __iter__(self):
943 943 added = self.addedvalues
944 944 for r in added:
945 945 yield r
946 946 for r in self.lazyvalues:
947 947 if not r in added:
948 948 yield r
949 949
950 950 def add(self, value):
951 951 self.addedvalues.add(value)
952 952
953 953 def update(self, values):
954 954 self.addedvalues.update(values)
955 955
956 956 has = lazyset(self.ancestors(common))
957 957 has.add(nullrev)
958 958 has.update(common)
959 959
960 960 # take all ancestors from heads that aren't in has
961 961 missing = set()
962 962 visit = collections.deque(r for r in heads if r not in has)
963 963 while visit:
964 964 r = visit.popleft()
965 965 if r in missing:
966 966 continue
967 967 else:
968 968 missing.add(r)
969 969 for p in self.parentrevs(r):
970 970 if p not in has:
971 971 visit.append(p)
972 972 missing = list(missing)
973 973 missing.sort()
974 974 return has, [self.node(miss) for miss in missing]
975 975
976 976 def incrementalmissingrevs(self, common=None):
977 977 """Return an object that can be used to incrementally compute the
978 978 revision numbers of the ancestors of arbitrary sets that are not
979 979 ancestors of common. This is an ancestor.incrementalmissingancestors
980 980 object.
981 981
982 982 'common' is a list of revision numbers. If common is not supplied, uses
983 983 nullrev.
984 984 """
985 985 if common is None:
986 986 common = [nullrev]
987 987
988 988 if rustancestor is not None:
989 989 return rustancestor.MissingAncestors(self.index, common)
990 990 return ancestor.incrementalmissingancestors(self.parentrevs, common)
991 991
992 992 def findmissingrevs(self, common=None, heads=None):
993 993 """Return the revision numbers of the ancestors of heads that
994 994 are not ancestors of common.
995 995
996 996 More specifically, return a list of revision numbers corresponding to
997 997 nodes N such that every N satisfies the following constraints:
998 998
999 999 1. N is an ancestor of some node in 'heads'
1000 1000 2. N is not an ancestor of any node in 'common'
1001 1001
1002 1002 The list is sorted by revision number, meaning it is
1003 1003 topologically sorted.
1004 1004
1005 1005 'heads' and 'common' are both lists of revision numbers. If heads is
1006 1006 not supplied, uses all of the revlog's heads. If common is not
1007 1007 supplied, uses nullid."""
1008 1008 if common is None:
1009 1009 common = [nullrev]
1010 1010 if heads is None:
1011 1011 heads = self.headrevs()
1012 1012
1013 1013 inc = self.incrementalmissingrevs(common=common)
1014 1014 return inc.missingancestors(heads)
1015 1015
1016 1016 def findmissing(self, common=None, heads=None):
1017 1017 """Return the ancestors of heads that are not ancestors of common.
1018 1018
1019 1019 More specifically, return a list of nodes N such that every N
1020 1020 satisfies the following constraints:
1021 1021
1022 1022 1. N is an ancestor of some node in 'heads'
1023 1023 2. N is not an ancestor of any node in 'common'
1024 1024
1025 1025 The list is sorted by revision number, meaning it is
1026 1026 topologically sorted.
1027 1027
1028 1028 'heads' and 'common' are both lists of node IDs. If heads is
1029 1029 not supplied, uses all of the revlog's heads. If common is not
1030 1030 supplied, uses nullid."""
1031 1031 if common is None:
1032 1032 common = [nullid]
1033 1033 if heads is None:
1034 1034 heads = self.heads()
1035 1035
1036 1036 common = [self.rev(n) for n in common]
1037 1037 heads = [self.rev(n) for n in heads]
1038 1038
1039 1039 inc = self.incrementalmissingrevs(common=common)
1040 1040 return [self.node(r) for r in inc.missingancestors(heads)]
1041 1041
1042 1042 def nodesbetween(self, roots=None, heads=None):
1043 1043 """Return a topological path from 'roots' to 'heads'.
1044 1044
1045 1045 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1046 1046 topologically sorted list of all nodes N that satisfy both of
1047 1047 these constraints:
1048 1048
1049 1049 1. N is a descendant of some node in 'roots'
1050 1050 2. N is an ancestor of some node in 'heads'
1051 1051
1052 1052 Every node is considered to be both a descendant and an ancestor
1053 1053 of itself, so every reachable node in 'roots' and 'heads' will be
1054 1054 included in 'nodes'.
1055 1055
1056 1056 'outroots' is the list of reachable nodes in 'roots', i.e., the
1057 1057 subset of 'roots' that is returned in 'nodes'. Likewise,
1058 1058 'outheads' is the subset of 'heads' that is also in 'nodes'.
1059 1059
1060 1060 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1061 1061 unspecified, uses nullid as the only root. If 'heads' is
1062 1062 unspecified, uses list of all of the revlog's heads."""
1063 1063 nonodes = ([], [], [])
1064 1064 if roots is not None:
1065 1065 roots = list(roots)
1066 1066 if not roots:
1067 1067 return nonodes
1068 1068 lowestrev = min([self.rev(n) for n in roots])
1069 1069 else:
1070 1070 roots = [nullid] # Everybody's a descendant of nullid
1071 1071 lowestrev = nullrev
1072 1072 if (lowestrev == nullrev) and (heads is None):
1073 1073 # We want _all_ the nodes!
1074 1074 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1075 1075 if heads is None:
1076 1076 # All nodes are ancestors, so the latest ancestor is the last
1077 1077 # node.
1078 1078 highestrev = len(self) - 1
1079 1079 # Set ancestors to None to signal that every node is an ancestor.
1080 1080 ancestors = None
1081 1081 # Set heads to an empty dictionary for later discovery of heads
1082 1082 heads = {}
1083 1083 else:
1084 1084 heads = list(heads)
1085 1085 if not heads:
1086 1086 return nonodes
1087 1087 ancestors = set()
1088 1088 # Turn heads into a dictionary so we can remove 'fake' heads.
1089 1089 # Also, later we will be using it to filter out the heads we can't
1090 1090 # find from roots.
1091 1091 heads = dict.fromkeys(heads, False)
1092 1092 # Start at the top and keep marking parents until we're done.
1093 1093 nodestotag = set(heads)
1094 1094 # Remember where the top was so we can use it as a limit later.
1095 1095 highestrev = max([self.rev(n) for n in nodestotag])
1096 1096 while nodestotag:
1097 1097 # grab a node to tag
1098 1098 n = nodestotag.pop()
1099 1099 # Never tag nullid
1100 1100 if n == nullid:
1101 1101 continue
1102 1102 # A node's revision number represents its place in a
1103 1103 # topologically sorted list of nodes.
1104 1104 r = self.rev(n)
1105 1105 if r >= lowestrev:
1106 1106 if n not in ancestors:
1107 1107 # If we are possibly a descendant of one of the roots
1108 1108 # and we haven't already been marked as an ancestor
1109 1109 ancestors.add(n) # Mark as ancestor
1110 1110 # Add non-nullid parents to list of nodes to tag.
1111 1111 nodestotag.update(
1112 1112 [p for p in self.parents(n) if p != nullid]
1113 1113 )
1114 1114 elif n in heads: # We've seen it before, is it a fake head?
1115 1115 # So it is, real heads should not be the ancestors of
1116 1116 # any other heads.
1117 1117 heads.pop(n)
1118 1118 if not ancestors:
1119 1119 return nonodes
1120 1120 # Now that we have our set of ancestors, we want to remove any
1121 1121 # roots that are not ancestors.
1122 1122
1123 1123 # If one of the roots was nullid, everything is included anyway.
1124 1124 if lowestrev > nullrev:
1125 1125 # But, since we weren't, let's recompute the lowest rev to not
1126 1126 # include roots that aren't ancestors.
1127 1127
1128 1128 # Filter out roots that aren't ancestors of heads
1129 1129 roots = [root for root in roots if root in ancestors]
1130 1130 # Recompute the lowest revision
1131 1131 if roots:
1132 1132 lowestrev = min([self.rev(root) for root in roots])
1133 1133 else:
1134 1134 # No more roots? Return empty list
1135 1135 return nonodes
1136 1136 else:
1137 1137 # We are descending from nullid, and don't need to care about
1138 1138 # any other roots.
1139 1139 lowestrev = nullrev
1140 1140 roots = [nullid]
1141 1141 # Transform our roots list into a set.
1142 1142 descendants = set(roots)
1143 1143 # Also, keep the original roots so we can filter out roots that aren't
1144 1144 # 'real' roots (i.e. are descended from other roots).
1145 1145 roots = descendants.copy()
1146 1146 # Our topologically sorted list of output nodes.
1147 1147 orderedout = []
1148 1148 # Don't start at nullid since we don't want nullid in our output list,
1149 1149 # and if nullid shows up in descendants, empty parents will look like
1150 1150 # they're descendants.
1151 1151 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1152 1152 n = self.node(r)
1153 1153 isdescendant = False
1154 1154 if lowestrev == nullrev: # Everybody is a descendant of nullid
1155 1155 isdescendant = True
1156 1156 elif n in descendants:
1157 1157 # n is already a descendant
1158 1158 isdescendant = True
1159 1159 # This check only needs to be done here because all the roots
1160 1160 # will start being marked is descendants before the loop.
1161 1161 if n in roots:
1162 1162 # If n was a root, check if it's a 'real' root.
1163 1163 p = tuple(self.parents(n))
1164 1164 # If any of its parents are descendants, it's not a root.
1165 1165 if (p[0] in descendants) or (p[1] in descendants):
1166 1166 roots.remove(n)
1167 1167 else:
1168 1168 p = tuple(self.parents(n))
1169 1169 # A node is a descendant if either of its parents are
1170 1170 # descendants. (We seeded the dependents list with the roots
1171 1171 # up there, remember?)
1172 1172 if (p[0] in descendants) or (p[1] in descendants):
1173 1173 descendants.add(n)
1174 1174 isdescendant = True
1175 1175 if isdescendant and ((ancestors is None) or (n in ancestors)):
1176 1176 # Only include nodes that are both descendants and ancestors.
1177 1177 orderedout.append(n)
1178 1178 if (ancestors is not None) and (n in heads):
1179 1179 # We're trying to figure out which heads are reachable
1180 1180 # from roots.
1181 1181 # Mark this head as having been reached
1182 1182 heads[n] = True
1183 1183 elif ancestors is None:
1184 1184 # Otherwise, we're trying to discover the heads.
1185 1185 # Assume this is a head because if it isn't, the next step
1186 1186 # will eventually remove it.
1187 1187 heads[n] = True
1188 1188 # But, obviously its parents aren't.
1189 1189 for p in self.parents(n):
1190 1190 heads.pop(p, None)
1191 1191 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1192 1192 roots = list(roots)
1193 1193 assert orderedout
1194 1194 assert roots
1195 1195 assert heads
1196 1196 return (orderedout, roots, heads)
1197 1197
1198 1198 def headrevs(self, revs=None):
1199 1199 if revs is None:
1200 1200 try:
1201 1201 return self.index.headrevs()
1202 1202 except AttributeError:
1203 1203 return self._headrevs()
1204 1204 if rustdagop is not None:
1205 1205 return rustdagop.headrevs(self.index, revs)
1206 1206 return dagop.headrevs(revs, self._uncheckedparentrevs)
1207 1207
1208 1208 def computephases(self, roots):
1209 1209 return self.index.computephasesmapsets(roots)
1210 1210
1211 1211 def _headrevs(self):
1212 1212 count = len(self)
1213 1213 if not count:
1214 1214 return [nullrev]
1215 1215 # we won't iter over filtered rev so nobody is a head at start
1216 1216 ishead = [0] * (count + 1)
1217 1217 index = self.index
1218 1218 for r in self:
1219 1219 ishead[r] = 1 # I may be an head
1220 1220 e = index[r]
1221 1221 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1222 1222 return [r for r, val in enumerate(ishead) if val]
1223 1223
1224 1224 def heads(self, start=None, stop=None):
1225 1225 """return the list of all nodes that have no children
1226 1226
1227 1227 if start is specified, only heads that are descendants of
1228 1228 start will be returned
1229 1229 if stop is specified, it will consider all the revs from stop
1230 1230 as if they had no children
1231 1231 """
1232 1232 if start is None and stop is None:
1233 1233 if not len(self):
1234 1234 return [nullid]
1235 1235 return [self.node(r) for r in self.headrevs()]
1236 1236
1237 1237 if start is None:
1238 1238 start = nullrev
1239 1239 else:
1240 1240 start = self.rev(start)
1241 1241
1242 1242 stoprevs = set(self.rev(n) for n in stop or [])
1243 1243
1244 1244 revs = dagop.headrevssubset(
1245 1245 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1246 1246 )
1247 1247
1248 1248 return [self.node(rev) for rev in revs]
1249 1249
1250 1250 def children(self, node):
1251 1251 """find the children of a given node"""
1252 1252 c = []
1253 1253 p = self.rev(node)
1254 1254 for r in self.revs(start=p + 1):
1255 1255 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1256 1256 if prevs:
1257 1257 for pr in prevs:
1258 1258 if pr == p:
1259 1259 c.append(self.node(r))
1260 1260 elif p == nullrev:
1261 1261 c.append(self.node(r))
1262 1262 return c
1263 1263
1264 1264 def commonancestorsheads(self, a, b):
1265 1265 """calculate all the heads of the common ancestors of nodes a and b"""
1266 1266 a, b = self.rev(a), self.rev(b)
1267 1267 ancs = self._commonancestorsheads(a, b)
1268 1268 return pycompat.maplist(self.node, ancs)
1269 1269
1270 1270 def _commonancestorsheads(self, *revs):
1271 1271 """calculate all the heads of the common ancestors of revs"""
1272 1272 try:
1273 1273 ancs = self.index.commonancestorsheads(*revs)
1274 1274 except (AttributeError, OverflowError): # C implementation failed
1275 1275 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1276 1276 return ancs
1277 1277
1278 1278 def isancestor(self, a, b):
1279 1279 """return True if node a is an ancestor of node b
1280 1280
1281 1281 A revision is considered an ancestor of itself."""
1282 1282 a, b = self.rev(a), self.rev(b)
1283 1283 return self.isancestorrev(a, b)
1284 1284
1285 1285 def isancestorrev(self, a, b):
1286 1286 """return True if revision a is an ancestor of revision b
1287 1287
1288 1288 A revision is considered an ancestor of itself.
1289 1289
1290 1290 The implementation of this is trivial but the use of
1291 1291 reachableroots is not."""
1292 1292 if a == nullrev:
1293 1293 return True
1294 1294 elif a == b:
1295 1295 return True
1296 1296 elif a > b:
1297 1297 return False
1298 1298 return bool(self.reachableroots(a, [b], [a], includepath=False))
1299 1299
1300 1300 def reachableroots(self, minroot, heads, roots, includepath=False):
1301 1301 """return (heads(::(<roots> and <roots>::<heads>)))
1302 1302
1303 1303 If includepath is True, return (<roots>::<heads>)."""
1304 1304 try:
1305 1305 return self.index.reachableroots2(
1306 1306 minroot, heads, roots, includepath
1307 1307 )
1308 1308 except AttributeError:
1309 1309 return dagop._reachablerootspure(
1310 1310 self.parentrevs, minroot, roots, heads, includepath
1311 1311 )
1312 1312
1313 1313 def ancestor(self, a, b):
1314 1314 """calculate the "best" common ancestor of nodes a and b"""
1315 1315
1316 1316 a, b = self.rev(a), self.rev(b)
1317 1317 try:
1318 1318 ancs = self.index.ancestors(a, b)
1319 1319 except (AttributeError, OverflowError):
1320 1320 ancs = ancestor.ancestors(self.parentrevs, a, b)
1321 1321 if ancs:
1322 1322 # choose a consistent winner when there's a tie
1323 1323 return min(map(self.node, ancs))
1324 1324 return nullid
1325 1325
1326 1326 def _match(self, id):
1327 1327 if isinstance(id, int):
1328 1328 # rev
1329 1329 return self.node(id)
1330 1330 if len(id) == 20:
1331 1331 # possibly a binary node
1332 1332 # odds of a binary node being all hex in ASCII are 1 in 10**25
1333 1333 try:
1334 1334 node = id
1335 1335 self.rev(node) # quick search the index
1336 1336 return node
1337 1337 except error.LookupError:
1338 1338 pass # may be partial hex id
1339 1339 try:
1340 1340 # str(rev)
1341 1341 rev = int(id)
1342 1342 if b"%d" % rev != id:
1343 1343 raise ValueError
1344 1344 if rev < 0:
1345 1345 rev = len(self) + rev
1346 1346 if rev < 0 or rev >= len(self):
1347 1347 raise ValueError
1348 1348 return self.node(rev)
1349 1349 except (ValueError, OverflowError):
1350 1350 pass
1351 1351 if len(id) == 40:
1352 1352 try:
1353 1353 # a full hex nodeid?
1354 1354 node = bin(id)
1355 1355 self.rev(node)
1356 1356 return node
1357 1357 except (TypeError, error.LookupError):
1358 1358 pass
1359 1359
1360 1360 def _partialmatch(self, id):
1361 1361 # we don't care wdirfilenodeids as they should be always full hash
1362 1362 maybewdir = wdirhex.startswith(id)
1363 1363 try:
1364 1364 partial = self.index.partialmatch(id)
1365 1365 if partial and self.hasnode(partial):
1366 1366 if maybewdir:
1367 1367 # single 'ff...' match in radix tree, ambiguous with wdir
1368 1368 raise error.RevlogError
1369 1369 return partial
1370 1370 if maybewdir:
1371 1371 # no 'ff...' match in radix tree, wdir identified
1372 1372 raise error.WdirUnsupported
1373 1373 return None
1374 1374 except error.RevlogError:
1375 1375 # parsers.c radix tree lookup gave multiple matches
1376 1376 # fast path: for unfiltered changelog, radix tree is accurate
1377 1377 if not getattr(self, 'filteredrevs', None):
1378 1378 raise error.AmbiguousPrefixLookupError(
1379 1379 id, self.indexfile, _(b'ambiguous identifier')
1380 1380 )
1381 1381 # fall through to slow path that filters hidden revisions
1382 1382 except (AttributeError, ValueError):
1383 1383 # we are pure python, or key was too short to search radix tree
1384 1384 pass
1385 1385
1386 1386 if id in self._pcache:
1387 1387 return self._pcache[id]
1388 1388
1389 1389 if len(id) <= 40:
1390 1390 try:
1391 1391 # hex(node)[:...]
1392 1392 l = len(id) // 2 # grab an even number of digits
1393 1393 prefix = bin(id[: l * 2])
1394 1394 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1395 1395 nl = [
1396 1396 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1397 1397 ]
1398 1398 if nullhex.startswith(id):
1399 1399 nl.append(nullid)
1400 1400 if len(nl) > 0:
1401 1401 if len(nl) == 1 and not maybewdir:
1402 1402 self._pcache[id] = nl[0]
1403 1403 return nl[0]
1404 1404 raise error.AmbiguousPrefixLookupError(
1405 1405 id, self.indexfile, _(b'ambiguous identifier')
1406 1406 )
1407 1407 if maybewdir:
1408 1408 raise error.WdirUnsupported
1409 1409 return None
1410 1410 except TypeError:
1411 1411 pass
1412 1412
1413 1413 def lookup(self, id):
1414 1414 """locate a node based on:
1415 1415 - revision number or str(revision number)
1416 1416 - nodeid or subset of hex nodeid
1417 1417 """
1418 1418 n = self._match(id)
1419 1419 if n is not None:
1420 1420 return n
1421 1421 n = self._partialmatch(id)
1422 1422 if n:
1423 1423 return n
1424 1424
1425 1425 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1426 1426
1427 1427 def shortest(self, node, minlength=1):
1428 1428 """Find the shortest unambiguous prefix that matches node."""
1429 1429
1430 1430 def isvalid(prefix):
1431 1431 try:
1432 1432 matchednode = self._partialmatch(prefix)
1433 1433 except error.AmbiguousPrefixLookupError:
1434 1434 return False
1435 1435 except error.WdirUnsupported:
1436 1436 # single 'ff...' match
1437 1437 return True
1438 1438 if matchednode is None:
1439 1439 raise error.LookupError(node, self.indexfile, _(b'no node'))
1440 1440 return True
1441 1441
1442 1442 def maybewdir(prefix):
1443 1443 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1444 1444
1445 1445 hexnode = hex(node)
1446 1446
1447 1447 def disambiguate(hexnode, minlength):
1448 1448 """Disambiguate against wdirid."""
1449 1449 for length in range(minlength, 41):
1450 1450 prefix = hexnode[:length]
1451 1451 if not maybewdir(prefix):
1452 1452 return prefix
1453 1453
1454 1454 if not getattr(self, 'filteredrevs', None):
1455 1455 try:
1456 1456 length = max(self.index.shortest(node), minlength)
1457 1457 return disambiguate(hexnode, length)
1458 1458 except error.RevlogError:
1459 1459 if node != wdirid:
1460 1460 raise error.LookupError(node, self.indexfile, _(b'no node'))
1461 1461 except AttributeError:
1462 1462 # Fall through to pure code
1463 1463 pass
1464 1464
1465 1465 if node == wdirid:
1466 1466 for length in range(minlength, 41):
1467 1467 prefix = hexnode[:length]
1468 1468 if isvalid(prefix):
1469 1469 return prefix
1470 1470
1471 1471 for length in range(minlength, 41):
1472 1472 prefix = hexnode[:length]
1473 1473 if isvalid(prefix):
1474 1474 return disambiguate(hexnode, length)
1475 1475
1476 1476 def cmp(self, node, text):
1477 1477 """compare text with a given file revision
1478 1478
1479 1479 returns True if text is different than what is stored.
1480 1480 """
1481 1481 p1, p2 = self.parents(node)
1482 1482 return storageutil.hashrevisionsha1(text, p1, p2) != node
1483 1483
1484 1484 def _cachesegment(self, offset, data):
1485 1485 """Add a segment to the revlog cache.
1486 1486
1487 1487 Accepts an absolute offset and the data that is at that location.
1488 1488 """
1489 1489 o, d = self._chunkcache
1490 1490 # try to add to existing cache
1491 1491 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1492 1492 self._chunkcache = o, d + data
1493 1493 else:
1494 1494 self._chunkcache = offset, data
1495 1495
1496 1496 def _readsegment(self, offset, length, df=None):
1497 1497 """Load a segment of raw data from the revlog.
1498 1498
1499 1499 Accepts an absolute offset, length to read, and an optional existing
1500 1500 file handle to read from.
1501 1501
1502 1502 If an existing file handle is passed, it will be seeked and the
1503 1503 original seek position will NOT be restored.
1504 1504
1505 1505 Returns a str or buffer of raw byte data.
1506 1506
1507 1507 Raises if the requested number of bytes could not be read.
1508 1508 """
1509 1509 # Cache data both forward and backward around the requested
1510 1510 # data, in a fixed size window. This helps speed up operations
1511 1511 # involving reading the revlog backwards.
1512 1512 cachesize = self._chunkcachesize
1513 1513 realoffset = offset & ~(cachesize - 1)
1514 1514 reallength = (
1515 1515 (offset + length + cachesize) & ~(cachesize - 1)
1516 1516 ) - realoffset
1517 1517 with self._datareadfp(df) as df:
1518 1518 df.seek(realoffset)
1519 1519 d = df.read(reallength)
1520 1520
1521 1521 self._cachesegment(realoffset, d)
1522 1522 if offset != realoffset or reallength != length:
1523 1523 startoffset = offset - realoffset
1524 1524 if len(d) - startoffset < length:
1525 1525 raise error.RevlogError(
1526 1526 _(
1527 1527 b'partial read of revlog %s; expected %d bytes from '
1528 1528 b'offset %d, got %d'
1529 1529 )
1530 1530 % (
1531 1531 self.indexfile if self._inline else self.datafile,
1532 1532 length,
1533 1533 realoffset,
1534 1534 len(d) - startoffset,
1535 1535 )
1536 1536 )
1537 1537
1538 1538 return util.buffer(d, startoffset, length)
1539 1539
1540 1540 if len(d) < length:
1541 1541 raise error.RevlogError(
1542 1542 _(
1543 1543 b'partial read of revlog %s; expected %d bytes from offset '
1544 1544 b'%d, got %d'
1545 1545 )
1546 1546 % (
1547 1547 self.indexfile if self._inline else self.datafile,
1548 1548 length,
1549 1549 offset,
1550 1550 len(d),
1551 1551 )
1552 1552 )
1553 1553
1554 1554 return d
1555 1555
1556 1556 def _getsegment(self, offset, length, df=None):
1557 1557 """Obtain a segment of raw data from the revlog.
1558 1558
1559 1559 Accepts an absolute offset, length of bytes to obtain, and an
1560 1560 optional file handle to the already-opened revlog. If the file
1561 1561 handle is used, it's original seek position will not be preserved.
1562 1562
1563 1563 Requests for data may be returned from a cache.
1564 1564
1565 1565 Returns a str or a buffer instance of raw byte data.
1566 1566 """
1567 1567 o, d = self._chunkcache
1568 1568 l = len(d)
1569 1569
1570 1570 # is it in the cache?
1571 1571 cachestart = offset - o
1572 1572 cacheend = cachestart + length
1573 1573 if cachestart >= 0 and cacheend <= l:
1574 1574 if cachestart == 0 and cacheend == l:
1575 1575 return d # avoid a copy
1576 1576 return util.buffer(d, cachestart, cacheend - cachestart)
1577 1577
1578 1578 return self._readsegment(offset, length, df=df)
1579 1579
1580 1580 def _getsegmentforrevs(self, startrev, endrev, df=None):
1581 1581 """Obtain a segment of raw data corresponding to a range of revisions.
1582 1582
1583 1583 Accepts the start and end revisions and an optional already-open
1584 1584 file handle to be used for reading. If the file handle is read, its
1585 1585 seek position will not be preserved.
1586 1586
1587 1587 Requests for data may be satisfied by a cache.
1588 1588
1589 1589 Returns a 2-tuple of (offset, data) for the requested range of
1590 1590 revisions. Offset is the integer offset from the beginning of the
1591 1591 revlog and data is a str or buffer of the raw byte data.
1592 1592
1593 1593 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1594 1594 to determine where each revision's data begins and ends.
1595 1595 """
1596 1596 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1597 1597 # (functions are expensive).
1598 1598 index = self.index
1599 1599 istart = index[startrev]
1600 1600 start = int(istart[0] >> 16)
1601 1601 if startrev == endrev:
1602 1602 end = start + istart[1]
1603 1603 else:
1604 1604 iend = index[endrev]
1605 1605 end = int(iend[0] >> 16) + iend[1]
1606 1606
1607 1607 if self._inline:
1608 1608 start += (startrev + 1) * self._io.size
1609 1609 end += (endrev + 1) * self._io.size
1610 1610 length = end - start
1611 1611
1612 1612 return start, self._getsegment(start, length, df=df)
1613 1613
1614 1614 def _chunk(self, rev, df=None):
1615 1615 """Obtain a single decompressed chunk for a revision.
1616 1616
1617 1617 Accepts an integer revision and an optional already-open file handle
1618 1618 to be used for reading. If used, the seek position of the file will not
1619 1619 be preserved.
1620 1620
1621 1621 Returns a str holding uncompressed data for the requested revision.
1622 1622 """
1623 1623 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1624 1624
1625 1625 def _chunks(self, revs, df=None, targetsize=None):
1626 1626 """Obtain decompressed chunks for the specified revisions.
1627 1627
1628 1628 Accepts an iterable of numeric revisions that are assumed to be in
1629 1629 ascending order. Also accepts an optional already-open file handle
1630 1630 to be used for reading. If used, the seek position of the file will
1631 1631 not be preserved.
1632 1632
1633 1633 This function is similar to calling ``self._chunk()`` multiple times,
1634 1634 but is faster.
1635 1635
1636 1636 Returns a list with decompressed data for each requested revision.
1637 1637 """
1638 1638 if not revs:
1639 1639 return []
1640 1640 start = self.start
1641 1641 length = self.length
1642 1642 inline = self._inline
1643 1643 iosize = self._io.size
1644 1644 buffer = util.buffer
1645 1645
1646 1646 l = []
1647 1647 ladd = l.append
1648 1648
1649 1649 if not self._withsparseread:
1650 1650 slicedchunks = (revs,)
1651 1651 else:
1652 1652 slicedchunks = deltautil.slicechunk(
1653 1653 self, revs, targetsize=targetsize
1654 1654 )
1655 1655
1656 1656 for revschunk in slicedchunks:
1657 1657 firstrev = revschunk[0]
1658 1658 # Skip trailing revisions with empty diff
1659 1659 for lastrev in revschunk[::-1]:
1660 1660 if length(lastrev) != 0:
1661 1661 break
1662 1662
1663 1663 try:
1664 1664 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1665 1665 except OverflowError:
1666 1666 # issue4215 - we can't cache a run of chunks greater than
1667 1667 # 2G on Windows
1668 1668 return [self._chunk(rev, df=df) for rev in revschunk]
1669 1669
1670 1670 decomp = self.decompress
1671 1671 for rev in revschunk:
1672 1672 chunkstart = start(rev)
1673 1673 if inline:
1674 1674 chunkstart += (rev + 1) * iosize
1675 1675 chunklength = length(rev)
1676 1676 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1677 1677
1678 1678 return l
1679 1679
1680 1680 def _chunkclear(self):
1681 1681 """Clear the raw chunk cache."""
1682 1682 self._chunkcache = (0, b'')
1683 1683
1684 1684 def deltaparent(self, rev):
1685 1685 """return deltaparent of the given revision"""
1686 1686 base = self.index[rev][3]
1687 1687 if base == rev:
1688 1688 return nullrev
1689 1689 elif self._generaldelta:
1690 1690 return base
1691 1691 else:
1692 1692 return rev - 1
1693 1693
1694 1694 def issnapshot(self, rev):
1695 1695 """tells whether rev is a snapshot
1696 1696 """
1697 1697 if not self._sparserevlog:
1698 1698 return self.deltaparent(rev) == nullrev
1699 1699 elif util.safehasattr(self.index, b'issnapshot'):
1700 1700 # directly assign the method to cache the testing and access
1701 1701 self.issnapshot = self.index.issnapshot
1702 1702 return self.issnapshot(rev)
1703 1703 if rev == nullrev:
1704 1704 return True
1705 1705 entry = self.index[rev]
1706 1706 base = entry[3]
1707 1707 if base == rev:
1708 1708 return True
1709 1709 if base == nullrev:
1710 1710 return True
1711 1711 p1 = entry[5]
1712 1712 p2 = entry[6]
1713 1713 if base == p1 or base == p2:
1714 1714 return False
1715 1715 return self.issnapshot(base)
1716 1716
1717 1717 def snapshotdepth(self, rev):
1718 1718 """number of snapshot in the chain before this one"""
1719 1719 if not self.issnapshot(rev):
1720 1720 raise error.ProgrammingError(b'revision %d not a snapshot')
1721 1721 return len(self._deltachain(rev)[0]) - 1
1722 1722
1723 1723 def revdiff(self, rev1, rev2):
1724 1724 """return or calculate a delta between two revisions
1725 1725
1726 1726 The delta calculated is in binary form and is intended to be written to
1727 1727 revlog data directly. So this function needs raw revision data.
1728 1728 """
1729 1729 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1730 1730 return bytes(self._chunk(rev2))
1731 1731
1732 1732 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1733 1733
1734 1734 def _processflags(self, text, flags, operation, raw=False):
1735 1735 """deprecated entry point to access flag processors"""
1736 1736 msg = b'_processflag(...) use the specialized variant'
1737 1737 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1738 1738 if raw:
1739 1739 return text, flagutil.processflagsraw(self, text, flags)
1740 1740 elif operation == b'read':
1741 1741 return flagutil.processflagsread(self, text, flags)
1742 1742 else: # write operation
1743 1743 return flagutil.processflagswrite(self, text, flags)
1744 1744
1745 1745 def revision(self, nodeorrev, _df=None, raw=False):
1746 1746 """return an uncompressed revision of a given node or revision
1747 1747 number.
1748 1748
1749 1749 _df - an existing file handle to read from. (internal-only)
1750 1750 raw - an optional argument specifying if the revision data is to be
1751 1751 treated as raw data when applying flag transforms. 'raw' should be set
1752 1752 to True when generating changegroups or in debug commands.
1753 1753 """
1754 1754 if raw:
1755 1755 msg = (
1756 1756 b'revlog.revision(..., raw=True) is deprecated, '
1757 1757 b'use revlog.rawdata(...)'
1758 1758 )
1759 1759 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1760 1760 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1761 1761
1762 1762 def sidedata(self, nodeorrev, _df=None):
1763 1763 """a map of extra data related to the changeset but not part of the hash
1764 1764
1765 1765 This function currently return a dictionary. However, more advanced
1766 1766 mapping object will likely be used in the future for a more
1767 1767 efficient/lazy code.
1768 1768 """
1769 1769 return self._revisiondata(nodeorrev, _df)[1]
1770 1770
1771 1771 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1772 1772 # deal with <nodeorrev> argument type
1773 1773 if isinstance(nodeorrev, int):
1774 1774 rev = nodeorrev
1775 1775 node = self.node(rev)
1776 1776 else:
1777 1777 node = nodeorrev
1778 1778 rev = None
1779 1779
1780 1780 # fast path the special `nullid` rev
1781 1781 if node == nullid:
1782 1782 return b"", {}
1783 1783
1784 1784 # ``rawtext`` is the text as stored inside the revlog. Might be the
1785 1785 # revision or might need to be processed to retrieve the revision.
1786 1786 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1787 1787
1788 1788 if raw and validated:
1789 1789 # if we don't want to process the raw text and that raw
1790 1790 # text is cached, we can exit early.
1791 1791 return rawtext, {}
1792 1792 if rev is None:
1793 1793 rev = self.rev(node)
1794 1794 # the revlog's flag for this revision
1795 1795 # (usually alter its state or content)
1796 1796 flags = self.flags(rev)
1797 1797
1798 1798 if validated and flags == REVIDX_DEFAULT_FLAGS:
1799 1799 # no extra flags set, no flag processor runs, text = rawtext
1800 1800 return rawtext, {}
1801 1801
1802 1802 sidedata = {}
1803 1803 if raw:
1804 1804 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1805 1805 text = rawtext
1806 1806 else:
1807 1807 try:
1808 1808 r = flagutil.processflagsread(self, rawtext, flags)
1809 1809 except error.SidedataHashError as exc:
1810 1810 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1811 1811 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1812 1812 raise error.RevlogError(msg)
1813 1813 text, validatehash, sidedata = r
1814 1814 if validatehash:
1815 1815 self.checkhash(text, node, rev=rev)
1816 1816 if not validated:
1817 1817 self._revisioncache = (node, rev, rawtext)
1818 1818
1819 1819 return text, sidedata
1820 1820
1821 1821 def _rawtext(self, node, rev, _df=None):
1822 1822 """return the possibly unvalidated rawtext for a revision
1823 1823
1824 1824 returns (rev, rawtext, validated)
1825 1825 """
1826 1826
1827 1827 # revision in the cache (could be useful to apply delta)
1828 1828 cachedrev = None
1829 1829 # An intermediate text to apply deltas to
1830 1830 basetext = None
1831 1831
1832 1832 # Check if we have the entry in cache
1833 1833 # The cache entry looks like (node, rev, rawtext)
1834 1834 if self._revisioncache:
1835 1835 if self._revisioncache[0] == node:
1836 1836 return (rev, self._revisioncache[2], True)
1837 1837 cachedrev = self._revisioncache[1]
1838 1838
1839 1839 if rev is None:
1840 1840 rev = self.rev(node)
1841 1841
1842 1842 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1843 1843 if stopped:
1844 1844 basetext = self._revisioncache[2]
1845 1845
1846 1846 # drop cache to save memory, the caller is expected to
1847 1847 # update self._revisioncache after validating the text
1848 1848 self._revisioncache = None
1849 1849
1850 1850 targetsize = None
1851 1851 rawsize = self.index[rev][2]
1852 1852 if 0 <= rawsize:
1853 1853 targetsize = 4 * rawsize
1854 1854
1855 1855 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1856 1856 if basetext is None:
1857 1857 basetext = bytes(bins[0])
1858 1858 bins = bins[1:]
1859 1859
1860 1860 rawtext = mdiff.patches(basetext, bins)
1861 1861 del basetext # let us have a chance to free memory early
1862 1862 return (rev, rawtext, False)
1863 1863
1864 1864 def rawdata(self, nodeorrev, _df=None):
1865 1865 """return an uncompressed raw data of a given node or revision number.
1866 1866
1867 1867 _df - an existing file handle to read from. (internal-only)
1868 1868 """
1869 1869 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1870 1870
1871 1871 def hash(self, text, p1, p2):
1872 1872 """Compute a node hash.
1873 1873
1874 1874 Available as a function so that subclasses can replace the hash
1875 1875 as needed.
1876 1876 """
1877 1877 return storageutil.hashrevisionsha1(text, p1, p2)
1878 1878
1879 1879 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1880 1880 """Check node hash integrity.
1881 1881
1882 1882 Available as a function so that subclasses can extend hash mismatch
1883 1883 behaviors as needed.
1884 1884 """
1885 1885 try:
1886 1886 if p1 is None and p2 is None:
1887 1887 p1, p2 = self.parents(node)
1888 1888 if node != self.hash(text, p1, p2):
1889 1889 # Clear the revision cache on hash failure. The revision cache
1890 1890 # only stores the raw revision and clearing the cache does have
1891 1891 # the side-effect that we won't have a cache hit when the raw
1892 1892 # revision data is accessed. But this case should be rare and
1893 1893 # it is extra work to teach the cache about the hash
1894 1894 # verification state.
1895 1895 if self._revisioncache and self._revisioncache[0] == node:
1896 1896 self._revisioncache = None
1897 1897
1898 1898 revornode = rev
1899 1899 if revornode is None:
1900 1900 revornode = templatefilters.short(hex(node))
1901 1901 raise error.RevlogError(
1902 1902 _(b"integrity check failed on %s:%s")
1903 1903 % (self.indexfile, pycompat.bytestr(revornode))
1904 1904 )
1905 1905 except error.RevlogError:
1906 1906 if self._censorable and storageutil.iscensoredtext(text):
1907 1907 raise error.CensoredNodeError(self.indexfile, node, text)
1908 1908 raise
1909 1909
1910 1910 def _enforceinlinesize(self, tr, fp=None):
1911 1911 """Check if the revlog is too big for inline and convert if so.
1912 1912
1913 1913 This should be called after revisions are added to the revlog. If the
1914 1914 revlog has grown too large to be an inline revlog, it will convert it
1915 1915 to use multiple index and data files.
1916 1916 """
1917 1917 tiprev = len(self) - 1
1918 1918 if (
1919 1919 not self._inline
1920 1920 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1921 1921 ):
1922 1922 return
1923 1923
1924 1924 trinfo = tr.find(self.indexfile)
1925 1925 if trinfo is None:
1926 1926 raise error.RevlogError(
1927 1927 _(b"%s not found in the transaction") % self.indexfile
1928 1928 )
1929 1929
1930 1930 trindex = trinfo[2]
1931 1931 if trindex is not None:
1932 1932 dataoff = self.start(trindex)
1933 1933 else:
1934 1934 # revlog was stripped at start of transaction, use all leftover data
1935 1935 trindex = len(self) - 1
1936 1936 dataoff = self.end(tiprev)
1937 1937
1938 1938 tr.add(self.datafile, dataoff)
1939 1939
1940 1940 if fp:
1941 1941 fp.flush()
1942 1942 fp.close()
1943 1943 # We can't use the cached file handle after close(). So prevent
1944 1944 # its usage.
1945 1945 self._writinghandles = None
1946 1946
1947 1947 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1948 1948 for r in self:
1949 1949 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1950 1950
1951 1951 with self._indexfp(b'w') as fp:
1952 1952 self.version &= ~FLAG_INLINE_DATA
1953 1953 self._inline = False
1954 1954 io = self._io
1955 1955 for i in self:
1956 1956 e = io.packentry(self.index[i], self.node, self.version, i)
1957 1957 fp.write(e)
1958 1958
1959 1959 # the temp file replace the real index when we exit the context
1960 1960 # manager
1961 1961
1962 1962 tr.replace(self.indexfile, trindex * self._io.size)
1963 1963 self._chunkclear()
1964 1964
1965 1965 def _nodeduplicatecallback(self, transaction, node):
1966 1966 """called when trying to add a node already stored.
1967 1967 """
1968 1968
1969 1969 def addrevision(
1970 1970 self,
1971 1971 text,
1972 1972 transaction,
1973 1973 link,
1974 1974 p1,
1975 1975 p2,
1976 1976 cachedelta=None,
1977 1977 node=None,
1978 1978 flags=REVIDX_DEFAULT_FLAGS,
1979 1979 deltacomputer=None,
1980 1980 sidedata=None,
1981 1981 ):
1982 1982 """add a revision to the log
1983 1983
1984 1984 text - the revision data to add
1985 1985 transaction - the transaction object used for rollback
1986 1986 link - the linkrev data to add
1987 1987 p1, p2 - the parent nodeids of the revision
1988 1988 cachedelta - an optional precomputed delta
1989 1989 node - nodeid of revision; typically node is not specified, and it is
1990 1990 computed by default as hash(text, p1, p2), however subclasses might
1991 1991 use different hashing method (and override checkhash() in such case)
1992 1992 flags - the known flags to set on the revision
1993 1993 deltacomputer - an optional deltacomputer instance shared between
1994 1994 multiple calls
1995 1995 """
1996 1996 if link == nullrev:
1997 1997 raise error.RevlogError(
1998 1998 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1999 1999 )
2000 2000
2001 2001 if sidedata is None:
2002 2002 sidedata = {}
2003 2003 flags = flags & ~REVIDX_SIDEDATA
2004 2004 elif not self.hassidedata:
2005 2005 raise error.ProgrammingError(
2006 2006 _(b"trying to add sidedata to a revlog who don't support them")
2007 2007 )
2008 2008 else:
2009 2009 flags |= REVIDX_SIDEDATA
2010 2010
2011 2011 if flags:
2012 2012 node = node or self.hash(text, p1, p2)
2013 2013
2014 2014 rawtext, validatehash = flagutil.processflagswrite(
2015 2015 self, text, flags, sidedata=sidedata
2016 2016 )
2017 2017
2018 2018 # If the flag processor modifies the revision data, ignore any provided
2019 2019 # cachedelta.
2020 2020 if rawtext != text:
2021 2021 cachedelta = None
2022 2022
2023 2023 if len(rawtext) > _maxentrysize:
2024 2024 raise error.RevlogError(
2025 2025 _(
2026 2026 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2027 2027 )
2028 2028 % (self.indexfile, len(rawtext))
2029 2029 )
2030 2030
2031 2031 node = node or self.hash(rawtext, p1, p2)
2032 2032 if self.index.has_node(node):
2033 2033 return node
2034 2034
2035 2035 if validatehash:
2036 2036 self.checkhash(rawtext, node, p1=p1, p2=p2)
2037 2037
2038 2038 return self.addrawrevision(
2039 2039 rawtext,
2040 2040 transaction,
2041 2041 link,
2042 2042 p1,
2043 2043 p2,
2044 2044 node,
2045 2045 flags,
2046 2046 cachedelta=cachedelta,
2047 2047 deltacomputer=deltacomputer,
2048 2048 )
2049 2049
2050 2050 def addrawrevision(
2051 2051 self,
2052 2052 rawtext,
2053 2053 transaction,
2054 2054 link,
2055 2055 p1,
2056 2056 p2,
2057 2057 node,
2058 2058 flags,
2059 2059 cachedelta=None,
2060 2060 deltacomputer=None,
2061 2061 ):
2062 2062 """add a raw revision with known flags, node and parents
2063 2063 useful when reusing a revision not stored in this revlog (ex: received
2064 2064 over wire, or read from an external bundle).
2065 2065 """
2066 2066 dfh = None
2067 2067 if not self._inline:
2068 2068 dfh = self._datafp(b"a+")
2069 2069 ifh = self._indexfp(b"a+")
2070 2070 try:
2071 2071 return self._addrevision(
2072 2072 node,
2073 2073 rawtext,
2074 2074 transaction,
2075 2075 link,
2076 2076 p1,
2077 2077 p2,
2078 2078 flags,
2079 2079 cachedelta,
2080 2080 ifh,
2081 2081 dfh,
2082 2082 deltacomputer=deltacomputer,
2083 2083 )
2084 2084 finally:
2085 2085 if dfh:
2086 2086 dfh.close()
2087 2087 ifh.close()
2088 2088
2089 2089 def compress(self, data):
2090 2090 """Generate a possibly-compressed representation of data."""
2091 2091 if not data:
2092 2092 return b'', data
2093 2093
2094 2094 compressed = self._compressor.compress(data)
2095 2095
2096 2096 if compressed:
2097 2097 # The revlog compressor added the header in the returned data.
2098 2098 return b'', compressed
2099 2099
2100 2100 if data[0:1] == b'\0':
2101 2101 return b'', data
2102 2102 return b'u', data
2103 2103
2104 2104 def decompress(self, data):
2105 2105 """Decompress a revlog chunk.
2106 2106
2107 2107 The chunk is expected to begin with a header identifying the
2108 2108 format type so it can be routed to an appropriate decompressor.
2109 2109 """
2110 2110 if not data:
2111 2111 return data
2112 2112
2113 2113 # Revlogs are read much more frequently than they are written and many
2114 2114 # chunks only take microseconds to decompress, so performance is
2115 2115 # important here.
2116 2116 #
2117 2117 # We can make a few assumptions about revlogs:
2118 2118 #
2119 2119 # 1) the majority of chunks will be compressed (as opposed to inline
2120 2120 # raw data).
2121 2121 # 2) decompressing *any* data will likely by at least 10x slower than
2122 2122 # returning raw inline data.
2123 2123 # 3) we want to prioritize common and officially supported compression
2124 2124 # engines
2125 2125 #
2126 2126 # It follows that we want to optimize for "decompress compressed data
2127 2127 # when encoded with common and officially supported compression engines"
2128 2128 # case over "raw data" and "data encoded by less common or non-official
2129 2129 # compression engines." That is why we have the inline lookup first
2130 2130 # followed by the compengines lookup.
2131 2131 #
2132 2132 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2133 2133 # compressed chunks. And this matters for changelog and manifest reads.
2134 2134 t = data[0:1]
2135 2135
2136 2136 if t == b'x':
2137 2137 try:
2138 2138 return _zlibdecompress(data)
2139 2139 except zlib.error as e:
2140 2140 raise error.RevlogError(
2141 2141 _(b'revlog decompress error: %s')
2142 2142 % stringutil.forcebytestr(e)
2143 2143 )
2144 2144 # '\0' is more common than 'u' so it goes first.
2145 2145 elif t == b'\0':
2146 2146 return data
2147 2147 elif t == b'u':
2148 2148 return util.buffer(data, 1)
2149 2149
2150 2150 try:
2151 2151 compressor = self._decompressors[t]
2152 2152 except KeyError:
2153 2153 try:
2154 2154 engine = util.compengines.forrevlogheader(t)
2155 2155 compressor = engine.revlogcompressor(self._compengineopts)
2156 2156 self._decompressors[t] = compressor
2157 2157 except KeyError:
2158 2158 raise error.RevlogError(_(b'unknown compression type %r') % t)
2159 2159
2160 2160 return compressor.decompress(data)
2161 2161
2162 2162 def _addrevision(
2163 2163 self,
2164 2164 node,
2165 2165 rawtext,
2166 2166 transaction,
2167 2167 link,
2168 2168 p1,
2169 2169 p2,
2170 2170 flags,
2171 2171 cachedelta,
2172 2172 ifh,
2173 2173 dfh,
2174 2174 alwayscache=False,
2175 2175 deltacomputer=None,
2176 2176 ):
2177 2177 """internal function to add revisions to the log
2178 2178
2179 2179 see addrevision for argument descriptions.
2180 2180
2181 2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2182 2182
2183 2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2184 2184 be used.
2185 2185
2186 2186 invariants:
2187 2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2188 2188 if both are set, they must correspond to each other.
2189 2189 """
2190 2190 if node == nullid:
2191 2191 raise error.RevlogError(
2192 2192 _(b"%s: attempt to add null revision") % self.indexfile
2193 2193 )
2194 2194 if node == wdirid or node in wdirfilenodeids:
2195 2195 raise error.RevlogError(
2196 2196 _(b"%s: attempt to add wdir revision") % self.indexfile
2197 2197 )
2198 2198
2199 2199 if self._inline:
2200 2200 fh = ifh
2201 2201 else:
2202 2202 fh = dfh
2203 2203
2204 2204 btext = [rawtext]
2205 2205
2206 2206 curr = len(self)
2207 2207 prev = curr - 1
2208 2208 offset = self.end(prev)
2209 2209 p1r, p2r = self.rev(p1), self.rev(p2)
2210 2210
2211 2211 # full versions are inserted when the needed deltas
2212 2212 # become comparable to the uncompressed text
2213 2213 if rawtext is None:
2214 2214 # need rawtext size, before changed by flag processors, which is
2215 2215 # the non-raw size. use revlog explicitly to avoid filelog's extra
2216 2216 # logic that might remove metadata size.
2217 2217 textlen = mdiff.patchedsize(
2218 2218 revlog.size(self, cachedelta[0]), cachedelta[1]
2219 2219 )
2220 2220 else:
2221 2221 textlen = len(rawtext)
2222 2222
2223 2223 if deltacomputer is None:
2224 2224 deltacomputer = deltautil.deltacomputer(self)
2225 2225
2226 2226 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2227 2227
2228 2228 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2229 2229
2230 2230 e = (
2231 2231 offset_type(offset, flags),
2232 2232 deltainfo.deltalen,
2233 2233 textlen,
2234 2234 deltainfo.base,
2235 2235 link,
2236 2236 p1r,
2237 2237 p2r,
2238 2238 node,
2239 2239 )
2240 2240 self.index.append(e)
2241 2241
2242 2242 entry = self._io.packentry(e, self.node, self.version, curr)
2243 2243 self._writeentry(
2244 2244 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2245 2245 )
2246 2246
2247 2247 rawtext = btext[0]
2248 2248
2249 2249 if alwayscache and rawtext is None:
2250 2250 rawtext = deltacomputer.buildtext(revinfo, fh)
2251 2251
2252 2252 if type(rawtext) == bytes: # only accept immutable objects
2253 2253 self._revisioncache = (node, curr, rawtext)
2254 2254 self._chainbasecache[curr] = deltainfo.chainbase
2255 2255 return node
2256 2256
2257 2257 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2258 2258 # Files opened in a+ mode have inconsistent behavior on various
2259 2259 # platforms. Windows requires that a file positioning call be made
2260 2260 # when the file handle transitions between reads and writes. See
2261 2261 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2262 2262 # platforms, Python or the platform itself can be buggy. Some versions
2263 2263 # of Solaris have been observed to not append at the end of the file
2264 2264 # if the file was seeked to before the end. See issue4943 for more.
2265 2265 #
2266 2266 # We work around this issue by inserting a seek() before writing.
2267 2267 # Note: This is likely not necessary on Python 3. However, because
2268 2268 # the file handle is reused for reads and may be seeked there, we need
2269 2269 # to be careful before changing this.
2270 2270 ifh.seek(0, os.SEEK_END)
2271 2271 if dfh:
2272 2272 dfh.seek(0, os.SEEK_END)
2273 2273
2274 2274 curr = len(self) - 1
2275 2275 if not self._inline:
2276 2276 transaction.add(self.datafile, offset)
2277 2277 transaction.add(self.indexfile, curr * len(entry))
2278 2278 if data[0]:
2279 2279 dfh.write(data[0])
2280 2280 dfh.write(data[1])
2281 2281 ifh.write(entry)
2282 2282 else:
2283 2283 offset += curr * self._io.size
2284 2284 transaction.add(self.indexfile, offset, curr)
2285 2285 ifh.write(entry)
2286 2286 ifh.write(data[0])
2287 2287 ifh.write(data[1])
2288 2288 self._enforceinlinesize(transaction, ifh)
2289 2289
2290 2290 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2291 2291 """
2292 2292 add a delta group
2293 2293
2294 2294 given a set of deltas, add them to the revision log. the
2295 2295 first delta is against its parent, which should be in our
2296 2296 log, the rest are against the previous delta.
2297 2297
2298 2298 If ``addrevisioncb`` is defined, it will be called with arguments of
2299 2299 this revlog and the node that was added.
2300 2300 """
2301 2301
2302 2302 if self._writinghandles:
2303 2303 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2304 2304
2305 2305 nodes = []
2306 2306
2307 2307 r = len(self)
2308 2308 end = 0
2309 2309 if r:
2310 2310 end = self.end(r - 1)
2311 2311 ifh = self._indexfp(b"a+")
2312 2312 isize = r * self._io.size
2313 2313 if self._inline:
2314 2314 transaction.add(self.indexfile, end + isize, r)
2315 2315 dfh = None
2316 2316 else:
2317 2317 transaction.add(self.indexfile, isize, r)
2318 2318 transaction.add(self.datafile, end)
2319 2319 dfh = self._datafp(b"a+")
2320 2320
2321 2321 def flush():
2322 2322 if dfh:
2323 2323 dfh.flush()
2324 2324 ifh.flush()
2325 2325
2326 2326 self._writinghandles = (ifh, dfh)
2327 2327
2328 2328 try:
2329 2329 deltacomputer = deltautil.deltacomputer(self)
2330 2330 # loop through our set of deltas
2331 2331 for data in deltas:
2332 2332 node, p1, p2, linknode, deltabase, delta, flags = data
2333 2333 link = linkmapper(linknode)
2334 2334 flags = flags or REVIDX_DEFAULT_FLAGS
2335 2335
2336 2336 nodes.append(node)
2337 2337
2338 2338 if self.index.has_node(node):
2339 2339 self._nodeduplicatecallback(transaction, node)
2340 2340 # this can happen if two branches make the same change
2341 2341 continue
2342 2342
2343 2343 for p in (p1, p2):
2344 2344 if not self.index.has_node(p):
2345 2345 raise error.LookupError(
2346 2346 p, self.indexfile, _(b'unknown parent')
2347 2347 )
2348 2348
2349 2349 if not self.index.has_node(deltabase):
2350 2350 raise error.LookupError(
2351 2351 deltabase, self.indexfile, _(b'unknown delta base')
2352 2352 )
2353 2353
2354 2354 baserev = self.rev(deltabase)
2355 2355
2356 2356 if baserev != nullrev and self.iscensored(baserev):
2357 2357 # if base is censored, delta must be full replacement in a
2358 2358 # single patch operation
2359 2359 hlen = struct.calcsize(b">lll")
2360 2360 oldlen = self.rawsize(baserev)
2361 2361 newlen = len(delta) - hlen
2362 2362 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2363 2363 raise error.CensoredBaseError(
2364 2364 self.indexfile, self.node(baserev)
2365 2365 )
2366 2366
2367 2367 if not flags and self._peek_iscensored(baserev, delta, flush):
2368 2368 flags |= REVIDX_ISCENSORED
2369 2369
2370 2370 # We assume consumers of addrevisioncb will want to retrieve
2371 2371 # the added revision, which will require a call to
2372 2372 # revision(). revision() will fast path if there is a cache
2373 2373 # hit. So, we tell _addrevision() to always cache in this case.
2374 2374 # We're only using addgroup() in the context of changegroup
2375 2375 # generation so the revision data can always be handled as raw
2376 2376 # by the flagprocessor.
2377 2377 self._addrevision(
2378 2378 node,
2379 2379 None,
2380 2380 transaction,
2381 2381 link,
2382 2382 p1,
2383 2383 p2,
2384 2384 flags,
2385 2385 (baserev, delta),
2386 2386 ifh,
2387 2387 dfh,
2388 2388 alwayscache=bool(addrevisioncb),
2389 2389 deltacomputer=deltacomputer,
2390 2390 )
2391 2391
2392 2392 if addrevisioncb:
2393 2393 addrevisioncb(self, node)
2394 2394
2395 2395 if not dfh and not self._inline:
2396 2396 # addrevision switched from inline to conventional
2397 2397 # reopen the index
2398 2398 ifh.close()
2399 2399 dfh = self._datafp(b"a+")
2400 2400 ifh = self._indexfp(b"a+")
2401 2401 self._writinghandles = (ifh, dfh)
2402 2402 finally:
2403 2403 self._writinghandles = None
2404 2404
2405 2405 if dfh:
2406 2406 dfh.close()
2407 2407 ifh.close()
2408 2408
2409 2409 return nodes
2410 2410
2411 2411 def iscensored(self, rev):
2412 2412 """Check if a file revision is censored."""
2413 2413 if not self._censorable:
2414 2414 return False
2415 2415
2416 2416 return self.flags(rev) & REVIDX_ISCENSORED
2417 2417
2418 2418 def _peek_iscensored(self, baserev, delta, flush):
2419 2419 """Quickly check if a delta produces a censored revision."""
2420 2420 if not self._censorable:
2421 2421 return False
2422 2422
2423 2423 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2424 2424
2425 2425 def getstrippoint(self, minlink):
2426 2426 """find the minimum rev that must be stripped to strip the linkrev
2427 2427
2428 2428 Returns a tuple containing the minimum rev and a set of all revs that
2429 2429 have linkrevs that will be broken by this strip.
2430 2430 """
2431 2431 return storageutil.resolvestripinfo(
2432 2432 minlink,
2433 2433 len(self) - 1,
2434 2434 self.headrevs(),
2435 2435 self.linkrev,
2436 2436 self.parentrevs,
2437 2437 )
2438 2438
2439 2439 def strip(self, minlink, transaction):
2440 2440 """truncate the revlog on the first revision with a linkrev >= minlink
2441 2441
2442 2442 This function is called when we're stripping revision minlink and
2443 2443 its descendants from the repository.
2444 2444
2445 2445 We have to remove all revisions with linkrev >= minlink, because
2446 2446 the equivalent changelog revisions will be renumbered after the
2447 2447 strip.
2448 2448
2449 2449 So we truncate the revlog on the first of these revisions, and
2450 2450 trust that the caller has saved the revisions that shouldn't be
2451 2451 removed and that it'll re-add them after this truncation.
2452 2452 """
2453 2453 if len(self) == 0:
2454 2454 return
2455 2455
2456 2456 rev, _ = self.getstrippoint(minlink)
2457 2457 if rev == len(self):
2458 2458 return
2459 2459
2460 2460 # first truncate the files on disk
2461 2461 end = self.start(rev)
2462 2462 if not self._inline:
2463 2463 transaction.add(self.datafile, end)
2464 2464 end = rev * self._io.size
2465 2465 else:
2466 2466 end += rev * self._io.size
2467 2467
2468 2468 transaction.add(self.indexfile, end)
2469 2469
2470 2470 # then reset internal state in memory to forget those revisions
2471 2471 self._revisioncache = None
2472 2472 self._chaininfocache = {}
2473 2473 self._chunkclear()
2474 2474
2475 2475 del self.index[rev:-1]
2476 2476
2477 2477 def checksize(self):
2478 2478 """Check size of index and data files
2479 2479
2480 2480 return a (dd, di) tuple.
2481 2481 - dd: extra bytes for the "data" file
2482 2482 - di: extra bytes for the "index" file
2483 2483
2484 2484 A healthy revlog will return (0, 0).
2485 2485 """
2486 2486 expected = 0
2487 2487 if len(self):
2488 2488 expected = max(0, self.end(len(self) - 1))
2489 2489
2490 2490 try:
2491 2491 with self._datafp() as f:
2492 2492 f.seek(0, io.SEEK_END)
2493 2493 actual = f.tell()
2494 2494 dd = actual - expected
2495 2495 except IOError as inst:
2496 2496 if inst.errno != errno.ENOENT:
2497 2497 raise
2498 2498 dd = 0
2499 2499
2500 2500 try:
2501 2501 f = self.opener(self.indexfile)
2502 2502 f.seek(0, io.SEEK_END)
2503 2503 actual = f.tell()
2504 2504 f.close()
2505 2505 s = self._io.size
2506 2506 i = max(0, actual // s)
2507 2507 di = actual - (i * s)
2508 2508 if self._inline:
2509 2509 databytes = 0
2510 2510 for r in self:
2511 2511 databytes += max(0, self.length(r))
2512 2512 dd = 0
2513 2513 di = actual - len(self) * s - databytes
2514 2514 except IOError as inst:
2515 2515 if inst.errno != errno.ENOENT:
2516 2516 raise
2517 2517 di = 0
2518 2518
2519 2519 return (dd, di)
2520 2520
2521 2521 def files(self):
2522 2522 res = [self.indexfile]
2523 2523 if not self._inline:
2524 2524 res.append(self.datafile)
2525 2525 return res
2526 2526
2527 2527 def emitrevisions(
2528 2528 self,
2529 2529 nodes,
2530 2530 nodesorder=None,
2531 2531 revisiondata=False,
2532 2532 assumehaveparentrevisions=False,
2533 2533 deltamode=repository.CG_DELTAMODE_STD,
2534 2534 ):
2535 2535 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2536 2536 raise error.ProgrammingError(
2537 2537 b'unhandled value for nodesorder: %s' % nodesorder
2538 2538 )
2539 2539
2540 2540 if nodesorder is None and not self._generaldelta:
2541 2541 nodesorder = b'storage'
2542 2542
2543 2543 if (
2544 2544 not self._storedeltachains
2545 2545 and deltamode != repository.CG_DELTAMODE_PREV
2546 2546 ):
2547 2547 deltamode = repository.CG_DELTAMODE_FULL
2548 2548
2549 2549 return storageutil.emitrevisions(
2550 2550 self,
2551 2551 nodes,
2552 2552 nodesorder,
2553 2553 revlogrevisiondelta,
2554 2554 deltaparentfn=self.deltaparent,
2555 2555 candeltafn=self.candelta,
2556 2556 rawsizefn=self.rawsize,
2557 2557 revdifffn=self.revdiff,
2558 2558 flagsfn=self.flags,
2559 2559 deltamode=deltamode,
2560 2560 revisiondata=revisiondata,
2561 2561 assumehaveparentrevisions=assumehaveparentrevisions,
2562 2562 )
2563 2563
2564 2564 DELTAREUSEALWAYS = b'always'
2565 2565 DELTAREUSESAMEREVS = b'samerevs'
2566 2566 DELTAREUSENEVER = b'never'
2567 2567
2568 2568 DELTAREUSEFULLADD = b'fulladd'
2569 2569
2570 2570 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2571 2571
2572 2572 def clone(
2573 2573 self,
2574 2574 tr,
2575 2575 destrevlog,
2576 2576 addrevisioncb=None,
2577 2577 deltareuse=DELTAREUSESAMEREVS,
2578 2578 forcedeltabothparents=None,
2579 2579 sidedatacompanion=None,
2580 2580 ):
2581 2581 """Copy this revlog to another, possibly with format changes.
2582 2582
2583 2583 The destination revlog will contain the same revisions and nodes.
2584 2584 However, it may not be bit-for-bit identical due to e.g. delta encoding
2585 2585 differences.
2586 2586
2587 2587 The ``deltareuse`` argument control how deltas from the existing revlog
2588 2588 are preserved in the destination revlog. The argument can have the
2589 2589 following values:
2590 2590
2591 2591 DELTAREUSEALWAYS
2592 2592 Deltas will always be reused (if possible), even if the destination
2593 2593 revlog would not select the same revisions for the delta. This is the
2594 2594 fastest mode of operation.
2595 2595 DELTAREUSESAMEREVS
2596 2596 Deltas will be reused if the destination revlog would pick the same
2597 2597 revisions for the delta. This mode strikes a balance between speed
2598 2598 and optimization.
2599 2599 DELTAREUSENEVER
2600 2600 Deltas will never be reused. This is the slowest mode of execution.
2601 2601 This mode can be used to recompute deltas (e.g. if the diff/delta
2602 2602 algorithm changes).
2603 2603 DELTAREUSEFULLADD
2604 2604 Revision will be re-added as if their were new content. This is
2605 2605 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2606 2606 eg: large file detection and handling.
2607 2607
2608 2608 Delta computation can be slow, so the choice of delta reuse policy can
2609 2609 significantly affect run time.
2610 2610
2611 2611 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2612 2612 two extremes. Deltas will be reused if they are appropriate. But if the
2613 2613 delta could choose a better revision, it will do so. This means if you
2614 2614 are converting a non-generaldelta revlog to a generaldelta revlog,
2615 2615 deltas will be recomputed if the delta's parent isn't a parent of the
2616 2616 revision.
2617 2617
2618 2618 In addition to the delta policy, the ``forcedeltabothparents``
2619 2619 argument controls whether to force compute deltas against both parents
2620 2620 for merges. By default, the current default is used.
2621 2621
2622 2622 If not None, the `sidedatacompanion` is callable that accept two
2623 2623 arguments:
2624 2624
2625 2625 (srcrevlog, rev)
2626 2626
2627 2627 and return a triplet that control changes to sidedata content from the
2628 2628 old revision to the new clone result:
2629 2629
2630 2630 (dropall, filterout, update)
2631 2631
2632 2632 * if `dropall` is True, all sidedata should be dropped
2633 2633 * `filterout` is a set of sidedata keys that should be dropped
2634 2634 * `update` is a mapping of additionnal/new key -> value
2635 2635 """
2636 2636 if deltareuse not in self.DELTAREUSEALL:
2637 2637 raise ValueError(
2638 2638 _(b'value for deltareuse invalid: %s') % deltareuse
2639 2639 )
2640 2640
2641 2641 if len(destrevlog):
2642 2642 raise ValueError(_(b'destination revlog is not empty'))
2643 2643
2644 2644 if getattr(self, 'filteredrevs', None):
2645 2645 raise ValueError(_(b'source revlog has filtered revisions'))
2646 2646 if getattr(destrevlog, 'filteredrevs', None):
2647 2647 raise ValueError(_(b'destination revlog has filtered revisions'))
2648 2648
2649 2649 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2650 2650 # if possible.
2651 2651 oldlazydelta = destrevlog._lazydelta
2652 2652 oldlazydeltabase = destrevlog._lazydeltabase
2653 2653 oldamd = destrevlog._deltabothparents
2654 2654
2655 2655 try:
2656 2656 if deltareuse == self.DELTAREUSEALWAYS:
2657 2657 destrevlog._lazydeltabase = True
2658 2658 destrevlog._lazydelta = True
2659 2659 elif deltareuse == self.DELTAREUSESAMEREVS:
2660 2660 destrevlog._lazydeltabase = False
2661 2661 destrevlog._lazydelta = True
2662 2662 elif deltareuse == self.DELTAREUSENEVER:
2663 2663 destrevlog._lazydeltabase = False
2664 2664 destrevlog._lazydelta = False
2665 2665
2666 2666 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2667 2667
2668 2668 self._clone(
2669 2669 tr,
2670 2670 destrevlog,
2671 2671 addrevisioncb,
2672 2672 deltareuse,
2673 2673 forcedeltabothparents,
2674 2674 sidedatacompanion,
2675 2675 )
2676 2676
2677 2677 finally:
2678 2678 destrevlog._lazydelta = oldlazydelta
2679 2679 destrevlog._lazydeltabase = oldlazydeltabase
2680 2680 destrevlog._deltabothparents = oldamd
2681 2681
2682 2682 def _clone(
2683 2683 self,
2684 2684 tr,
2685 2685 destrevlog,
2686 2686 addrevisioncb,
2687 2687 deltareuse,
2688 2688 forcedeltabothparents,
2689 2689 sidedatacompanion,
2690 2690 ):
2691 2691 """perform the core duty of `revlog.clone` after parameter processing"""
2692 2692 deltacomputer = deltautil.deltacomputer(destrevlog)
2693 2693 index = self.index
2694 2694 for rev in self:
2695 2695 entry = index[rev]
2696 2696
2697 2697 # Some classes override linkrev to take filtered revs into
2698 2698 # account. Use raw entry from index.
2699 2699 flags = entry[0] & 0xFFFF
2700 2700 linkrev = entry[4]
2701 2701 p1 = index[entry[5]][7]
2702 2702 p2 = index[entry[6]][7]
2703 2703 node = entry[7]
2704 2704
2705 2705 sidedataactions = (False, [], {})
2706 2706 if sidedatacompanion is not None:
2707 2707 sidedataactions = sidedatacompanion(self, rev)
2708 2708
2709 2709 # (Possibly) reuse the delta from the revlog if allowed and
2710 2710 # the revlog chunk is a delta.
2711 2711 cachedelta = None
2712 2712 rawtext = None
2713 2713 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2714 2714 dropall, filterout, update = sidedataactions
2715 2715 text, sidedata = self._revisiondata(rev)
2716 2716 if dropall:
2717 2717 sidedata = {}
2718 2718 for key in filterout:
2719 2719 sidedata.pop(key, None)
2720 2720 sidedata.update(update)
2721 2721 if not sidedata:
2722 2722 sidedata = None
2723 2723 destrevlog.addrevision(
2724 2724 text,
2725 2725 tr,
2726 2726 linkrev,
2727 2727 p1,
2728 2728 p2,
2729 2729 cachedelta=cachedelta,
2730 2730 node=node,
2731 2731 flags=flags,
2732 2732 deltacomputer=deltacomputer,
2733 2733 sidedata=sidedata,
2734 2734 )
2735 2735 else:
2736 2736 if destrevlog._lazydelta:
2737 2737 dp = self.deltaparent(rev)
2738 2738 if dp != nullrev:
2739 2739 cachedelta = (dp, bytes(self._chunk(rev)))
2740 2740
2741 2741 if not cachedelta:
2742 2742 rawtext = self.rawdata(rev)
2743 2743
2744 2744 ifh = destrevlog.opener(
2745 2745 destrevlog.indexfile, b'a+', checkambig=False
2746 2746 )
2747 2747 dfh = None
2748 2748 if not destrevlog._inline:
2749 2749 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2750 2750 try:
2751 2751 destrevlog._addrevision(
2752 2752 node,
2753 2753 rawtext,
2754 2754 tr,
2755 2755 linkrev,
2756 2756 p1,
2757 2757 p2,
2758 2758 flags,
2759 2759 cachedelta,
2760 2760 ifh,
2761 2761 dfh,
2762 2762 deltacomputer=deltacomputer,
2763 2763 )
2764 2764 finally:
2765 2765 if dfh:
2766 2766 dfh.close()
2767 2767 ifh.close()
2768 2768
2769 2769 if addrevisioncb:
2770 2770 addrevisioncb(self, rev, node)
2771 2771
2772 2772 def censorrevision(self, tr, censornode, tombstone=b''):
2773 2773 if (self.version & 0xFFFF) == REVLOGV0:
2774 2774 raise error.RevlogError(
2775 2775 _(b'cannot censor with version %d revlogs') % self.version
2776 2776 )
2777 2777
2778 2778 censorrev = self.rev(censornode)
2779 2779 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2780 2780
2781 2781 if len(tombstone) > self.rawsize(censorrev):
2782 2782 raise error.Abort(
2783 2783 _(b'censor tombstone must be no longer than censored data')
2784 2784 )
2785 2785
2786 2786 # Rewriting the revlog in place is hard. Our strategy for censoring is
2787 2787 # to create a new revlog, copy all revisions to it, then replace the
2788 2788 # revlogs on transaction close.
2789 2789
2790 2790 newindexfile = self.indexfile + b'.tmpcensored'
2791 2791 newdatafile = self.datafile + b'.tmpcensored'
2792 2792
2793 2793 # This is a bit dangerous. We could easily have a mismatch of state.
2794 2794 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2795 2795 newrl.version = self.version
2796 2796 newrl._generaldelta = self._generaldelta
2797 2797 newrl._io = self._io
2798 2798
2799 2799 for rev in self.revs():
2800 2800 node = self.node(rev)
2801 2801 p1, p2 = self.parents(node)
2802 2802
2803 2803 if rev == censorrev:
2804 2804 newrl.addrawrevision(
2805 2805 tombstone,
2806 2806 tr,
2807 2807 self.linkrev(censorrev),
2808 2808 p1,
2809 2809 p2,
2810 2810 censornode,
2811 2811 REVIDX_ISCENSORED,
2812 2812 )
2813 2813
2814 2814 if newrl.deltaparent(rev) != nullrev:
2815 2815 raise error.Abort(
2816 2816 _(
2817 2817 b'censored revision stored as delta; '
2818 2818 b'cannot censor'
2819 2819 ),
2820 2820 hint=_(
2821 2821 b'censoring of revlogs is not '
2822 2822 b'fully implemented; please report '
2823 2823 b'this bug'
2824 2824 ),
2825 2825 )
2826 2826 continue
2827 2827
2828 2828 if self.iscensored(rev):
2829 2829 if self.deltaparent(rev) != nullrev:
2830 2830 raise error.Abort(
2831 2831 _(
2832 2832 b'cannot censor due to censored '
2833 2833 b'revision having delta stored'
2834 2834 )
2835 2835 )
2836 2836 rawtext = self._chunk(rev)
2837 2837 else:
2838 2838 rawtext = self.rawdata(rev)
2839 2839
2840 2840 newrl.addrawrevision(
2841 2841 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2842 2842 )
2843 2843
2844 2844 tr.addbackup(self.indexfile, location=b'store')
2845 2845 if not self._inline:
2846 2846 tr.addbackup(self.datafile, location=b'store')
2847 2847
2848 2848 self.opener.rename(newrl.indexfile, self.indexfile)
2849 2849 if not self._inline:
2850 2850 self.opener.rename(newrl.datafile, self.datafile)
2851 2851
2852 2852 self.clearcaches()
2853 2853 self._loadindex()
2854 2854
2855 2855 def verifyintegrity(self, state):
2856 2856 """Verifies the integrity of the revlog.
2857 2857
2858 2858 Yields ``revlogproblem`` instances describing problems that are
2859 2859 found.
2860 2860 """
2861 2861 dd, di = self.checksize()
2862 2862 if dd:
2863 2863 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2864 2864 if di:
2865 2865 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2866 2866
2867 2867 version = self.version & 0xFFFF
2868 2868
2869 2869 # The verifier tells us what version revlog we should be.
2870 2870 if version != state[b'expectedversion']:
2871 2871 yield revlogproblem(
2872 2872 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2873 2873 % (self.indexfile, version, state[b'expectedversion'])
2874 2874 )
2875 2875
2876 2876 state[b'skipread'] = set()
2877 2877
2878 2878 for rev in self:
2879 2879 node = self.node(rev)
2880 2880
2881 2881 # Verify contents. 4 cases to care about:
2882 2882 #
2883 2883 # common: the most common case
2884 2884 # rename: with a rename
2885 2885 # meta: file content starts with b'\1\n', the metadata
2886 2886 # header defined in filelog.py, but without a rename
2887 2887 # ext: content stored externally
2888 2888 #
2889 2889 # More formally, their differences are shown below:
2890 2890 #
2891 2891 # | common | rename | meta | ext
2892 2892 # -------------------------------------------------------
2893 2893 # flags() | 0 | 0 | 0 | not 0
2894 2894 # renamed() | False | True | False | ?
2895 2895 # rawtext[0:2]=='\1\n'| False | True | True | ?
2896 2896 #
2897 2897 # "rawtext" means the raw text stored in revlog data, which
2898 2898 # could be retrieved by "rawdata(rev)". "text"
2899 2899 # mentioned below is "revision(rev)".
2900 2900 #
2901 2901 # There are 3 different lengths stored physically:
2902 2902 # 1. L1: rawsize, stored in revlog index
2903 2903 # 2. L2: len(rawtext), stored in revlog data
2904 2904 # 3. L3: len(text), stored in revlog data if flags==0, or
2905 2905 # possibly somewhere else if flags!=0
2906 2906 #
2907 2907 # L1 should be equal to L2. L3 could be different from them.
2908 2908 # "text" may or may not affect commit hash depending on flag
2909 2909 # processors (see flagutil.addflagprocessor).
2910 2910 #
2911 2911 # | common | rename | meta | ext
2912 2912 # -------------------------------------------------
2913 2913 # rawsize() | L1 | L1 | L1 | L1
2914 2914 # size() | L1 | L2-LM | L1(*) | L1 (?)
2915 2915 # len(rawtext) | L2 | L2 | L2 | L2
2916 2916 # len(text) | L2 | L2 | L2 | L3
2917 2917 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2918 2918 #
2919 2919 # LM: length of metadata, depending on rawtext
2920 2920 # (*): not ideal, see comment in filelog.size
2921 2921 # (?): could be "- len(meta)" if the resolved content has
2922 2922 # rename metadata
2923 2923 #
2924 2924 # Checks needed to be done:
2925 2925 # 1. length check: L1 == L2, in all cases.
2926 2926 # 2. hash check: depending on flag processor, we may need to
2927 2927 # use either "text" (external), or "rawtext" (in revlog).
2928 2928
2929 2929 try:
2930 2930 skipflags = state.get(b'skipflags', 0)
2931 2931 if skipflags:
2932 2932 skipflags &= self.flags(rev)
2933 2933
2934 2934 _verify_revision(self, skipflags, state, node)
2935 2935
2936 2936 l1 = self.rawsize(rev)
2937 2937 l2 = len(self.rawdata(node))
2938 2938
2939 2939 if l1 != l2:
2940 2940 yield revlogproblem(
2941 2941 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2942 2942 node=node,
2943 2943 )
2944 2944
2945 2945 except error.CensoredNodeError:
2946 2946 if state[b'erroroncensored']:
2947 2947 yield revlogproblem(
2948 2948 error=_(b'censored file data'), node=node
2949 2949 )
2950 2950 state[b'skipread'].add(node)
2951 2951 except Exception as e:
2952 2952 yield revlogproblem(
2953 2953 error=_(b'unpacking %s: %s')
2954 2954 % (short(node), stringutil.forcebytestr(e)),
2955 2955 node=node,
2956 2956 )
2957 2957 state[b'skipread'].add(node)
2958 2958
2959 2959 def storageinfo(
2960 2960 self,
2961 2961 exclusivefiles=False,
2962 2962 sharedfiles=False,
2963 2963 revisionscount=False,
2964 2964 trackedsize=False,
2965 2965 storedsize=False,
2966 2966 ):
2967 2967 d = {}
2968 2968
2969 2969 if exclusivefiles:
2970 2970 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2971 2971 if not self._inline:
2972 2972 d[b'exclusivefiles'].append((self.opener, self.datafile))
2973 2973
2974 2974 if sharedfiles:
2975 2975 d[b'sharedfiles'] = []
2976 2976
2977 2977 if revisionscount:
2978 2978 d[b'revisionscount'] = len(self)
2979 2979
2980 2980 if trackedsize:
2981 2981 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2982 2982
2983 2983 if storedsize:
2984 2984 d[b'storedsize'] = sum(
2985 2985 self.opener.stat(path).st_size for path in self.files()
2986 2986 )
2987 2987
2988 2988 return d
@@ -1,14 +1,8
1 1 # mercurial.revlogutils -- basic utilities for revlog
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 from .. import error
10
11
12 class NodeMap(dict):
13 def __missing__(self, x):
14 raise error.RevlogError(b'unknown node: %s' % x)
General Comments 0
You need to be logged in to leave comments. Login now