##// END OF EJS Templates
revlog: deprecate direct `nodemap` access...
marmoute -
r43974:02802fa8 default
parent child Browse files
Show More
@@ -1,229 +1,235 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid, nullrev
14 14 from .. import (
15 15 pycompat,
16 16 revlogutils,
17 17 util,
18 18 )
19 19
20 20 stringio = pycompat.bytesio
21 21
22 22
23 23 _pack = struct.pack
24 24 _unpack = struct.unpack
25 25 _compress = zlib.compress
26 26 _decompress = zlib.decompress
27 27
28 28 # Some code below makes tuples directly because it's more convenient. However,
29 29 # code outside this module should always use dirstatetuple.
30 30 def dirstatetuple(*x):
31 31 # x is a tuple
32 32 return x
33 33
34 34
35 35 indexformatng = b">Qiiiiii20s12x"
36 36 indexfirst = struct.calcsize(b'Q')
37 37 sizeint = struct.calcsize(b'i')
38 38 indexsize = struct.calcsize(indexformatng)
39 39
40 40
41 41 def gettype(q):
42 42 return int(q & 0xFFFF)
43 43
44 44
45 45 def offset_type(offset, type):
46 46 return int(int(offset) << 16 | type)
47 47
48 48
49 49 class BaseIndexObject(object):
50 @property
51 def nodemap(self):
52 msg = "index.nodemap is deprecated, " "use index.[has_node|rev|get_rev]"
53 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
54 return self._nodemap
55
50 56 @util.propertycache
51 def nodemap(self):
57 def _nodemap(self):
52 58 nodemap = revlogutils.NodeMap({nullid: nullrev})
53 59 for r in range(0, len(self)):
54 60 n = self[r][7]
55 61 nodemap[n] = r
56 62 return nodemap
57 63
58 64 def has_node(self, node):
59 65 """return True if the node exist in the index"""
60 return node in self.nodemap
66 return node in self._nodemap
61 67
62 68 def rev(self, node):
63 69 """return a revision for a node
64 70
65 71 If the node is unknown, raise a RevlogError"""
66 return self.nodemap[node]
72 return self._nodemap[node]
67 73
68 74 def get_rev(self, node):
69 75 """return a revision for a node
70 76
71 77 If the node is unknown, return None"""
72 return self.nodemap.get(node)
78 return self._nodemap.get(node)
73 79
74 80 def _stripnodes(self, start):
75 if 'nodemap' in vars(self):
81 if '_nodemap' in vars(self):
76 82 for r in range(start, len(self)):
77 83 n = self[r][7]
78 del self.nodemap[n]
84 del self._nodemap[n]
79 85
80 86 def clearcaches(self):
81 self.__dict__.pop('nodemap', None)
87 self.__dict__.pop('_nodemap', None)
82 88
83 89 def __len__(self):
84 90 return self._lgt + len(self._extra)
85 91
86 92 def append(self, tup):
87 if 'nodemap' in vars(self):
88 self.nodemap[tup[7]] = len(self)
93 if '_nodemap' in vars(self):
94 self._nodemap[tup[7]] = len(self)
89 95 self._extra.append(tup)
90 96
91 97 def _check_index(self, i):
92 98 if not isinstance(i, int):
93 99 raise TypeError(b"expecting int indexes")
94 100 if i < 0 or i >= len(self):
95 101 raise IndexError
96 102
97 103 def __getitem__(self, i):
98 104 if i == -1:
99 105 return (0, 0, 0, -1, -1, -1, -1, nullid)
100 106 self._check_index(i)
101 107 if i >= self._lgt:
102 108 return self._extra[i - self._lgt]
103 109 index = self._calculate_index(i)
104 110 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
105 111 if i == 0:
106 112 e = list(r)
107 113 type = gettype(e[0])
108 114 e[0] = offset_type(0, type)
109 115 return tuple(e)
110 116 return r
111 117
112 118
113 119 class IndexObject(BaseIndexObject):
114 120 def __init__(self, data):
115 121 assert len(data) % indexsize == 0
116 122 self._data = data
117 123 self._lgt = len(data) // indexsize
118 124 self._extra = []
119 125
120 126 def _calculate_index(self, i):
121 127 return i * indexsize
122 128
123 129 def __delitem__(self, i):
124 130 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
125 131 raise ValueError(b"deleting slices only supports a:-1 with step 1")
126 132 i = i.start
127 133 self._check_index(i)
128 134 self._stripnodes(i)
129 135 if i < self._lgt:
130 136 self._data = self._data[: i * indexsize]
131 137 self._lgt = i
132 138 self._extra = []
133 139 else:
134 140 self._extra = self._extra[: i - self._lgt]
135 141
136 142
137 143 class InlinedIndexObject(BaseIndexObject):
138 144 def __init__(self, data, inline=0):
139 145 self._data = data
140 146 self._lgt = self._inline_scan(None)
141 147 self._inline_scan(self._lgt)
142 148 self._extra = []
143 149
144 150 def _inline_scan(self, lgt):
145 151 off = 0
146 152 if lgt is not None:
147 153 self._offsets = [0] * lgt
148 154 count = 0
149 155 while off <= len(self._data) - indexsize:
150 156 (s,) = struct.unpack(
151 157 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
152 158 )
153 159 if lgt is not None:
154 160 self._offsets[count] = off
155 161 count += 1
156 162 off += indexsize + s
157 163 if off != len(self._data):
158 164 raise ValueError(b"corrupted data")
159 165 return count
160 166
161 167 def __delitem__(self, i):
162 168 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
163 169 raise ValueError(b"deleting slices only supports a:-1 with step 1")
164 170 i = i.start
165 171 self._check_index(i)
166 172 self._stripnodes(i)
167 173 if i < self._lgt:
168 174 self._offsets = self._offsets[:i]
169 175 self._lgt = i
170 176 self._extra = []
171 177 else:
172 178 self._extra = self._extra[: i - self._lgt]
173 179
174 180 def _calculate_index(self, i):
175 181 return self._offsets[i]
176 182
177 183
178 184 def parse_index2(data, inline):
179 185 if not inline:
180 186 return IndexObject(data), None
181 187 return InlinedIndexObject(data, inline), (0, data)
182 188
183 189
184 190 def parse_dirstate(dmap, copymap, st):
185 191 parents = [st[:20], st[20:40]]
186 192 # dereference fields so they will be local in loop
187 193 format = b">cllll"
188 194 e_size = struct.calcsize(format)
189 195 pos1 = 40
190 196 l = len(st)
191 197
192 198 # the inner loop
193 199 while pos1 < l:
194 200 pos2 = pos1 + e_size
195 201 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
196 202 pos1 = pos2 + e[4]
197 203 f = st[pos2:pos1]
198 204 if b'\0' in f:
199 205 f, c = f.split(b'\0')
200 206 copymap[f] = c
201 207 dmap[f] = e[:4]
202 208 return parents
203 209
204 210
205 211 def pack_dirstate(dmap, copymap, pl, now):
206 212 now = int(now)
207 213 cs = stringio()
208 214 write = cs.write
209 215 write(b"".join(pl))
210 216 for f, e in pycompat.iteritems(dmap):
211 217 if e[0] == b'n' and e[3] == now:
212 218 # The file was last modified "simultaneously" with the current
213 219 # write to dirstate (i.e. within the same second for file-
214 220 # systems with a granularity of 1 sec). This commonly happens
215 221 # for at least a couple of files on 'update'.
216 222 # The user could change the file without changing its size
217 223 # within the same second. Invalidate the file's mtime in
218 224 # dirstate, forcing future 'status' calls to compare the
219 225 # contents of the file if the size is the same. This prevents
220 226 # mistakenly treating such files as clean.
221 227 e = dirstatetuple(e[0], e[1], e[2], -1)
222 228 dmap[f] = e
223 229
224 230 if f in copymap:
225 231 f = b"%s\0%s" % (f, copymap[f])
226 232 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
227 233 write(e)
228 234 write(f)
229 235 return cs.getvalue()
@@ -1,2972 +1,2982 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 revlogutils,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109
110 110 # Aliased for performance.
111 111 _zlibdecompress = zlib.decompress
112 112
113 113 # max size of revlog with inline data
114 114 _maxinline = 131072
115 115 _chunksize = 1048576
116 116
117 117 # Flag processors for REVIDX_ELLIPSIS.
118 118 def ellipsisreadprocessor(rl, text):
119 119 return text, False, {}
120 120
121 121
122 122 def ellipsiswriteprocessor(rl, text, sidedata):
123 123 return text, False
124 124
125 125
126 126 def ellipsisrawprocessor(rl, text):
127 127 return False
128 128
129 129
130 130 ellipsisprocessor = (
131 131 ellipsisreadprocessor,
132 132 ellipsiswriteprocessor,
133 133 ellipsisrawprocessor,
134 134 )
135 135
136 136
137 137 def getoffset(q):
138 138 return int(q >> 16)
139 139
140 140
141 141 def gettype(q):
142 142 return int(q & 0xFFFF)
143 143
144 144
145 145 def offset_type(offset, type):
146 146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 147 raise ValueError(b'unknown revlog index flags')
148 148 return int(int(offset) << 16 | type)
149 149
150 150
151 151 @attr.s(slots=True, frozen=True)
152 152 class _revisioninfo(object):
153 153 """Information about a revision that allows building its fulltext
154 154 node: expected hash of the revision
155 155 p1, p2: parent revs of the revision
156 156 btext: built text cache consisting of a one-element list
157 157 cachedelta: (baserev, uncompressed_delta) or None
158 158 flags: flags associated to the revision storage
159 159
160 160 One of btext[0] or cachedelta must be set.
161 161 """
162 162
163 163 node = attr.ib()
164 164 p1 = attr.ib()
165 165 p2 = attr.ib()
166 166 btext = attr.ib()
167 167 textlen = attr.ib()
168 168 cachedelta = attr.ib()
169 169 flags = attr.ib()
170 170
171 171
172 172 @interfaceutil.implementer(repository.irevisiondelta)
173 173 @attr.s(slots=True)
174 174 class revlogrevisiondelta(object):
175 175 node = attr.ib()
176 176 p1node = attr.ib()
177 177 p2node = attr.ib()
178 178 basenode = attr.ib()
179 179 flags = attr.ib()
180 180 baserevisionsize = attr.ib()
181 181 revision = attr.ib()
182 182 delta = attr.ib()
183 183 linknode = attr.ib(default=None)
184 184
185 185
186 186 @interfaceutil.implementer(repository.iverifyproblem)
187 187 @attr.s(frozen=True)
188 188 class revlogproblem(object):
189 189 warning = attr.ib(default=None)
190 190 error = attr.ib(default=None)
191 191 node = attr.ib(default=None)
192 192
193 193
194 194 # index v0:
195 195 # 4 bytes: offset
196 196 # 4 bytes: compressed length
197 197 # 4 bytes: base rev
198 198 # 4 bytes: link rev
199 199 # 20 bytes: parent 1 nodeid
200 200 # 20 bytes: parent 2 nodeid
201 201 # 20 bytes: nodeid
202 202 indexformatv0 = struct.Struct(b">4l20s20s20s")
203 203 indexformatv0_pack = indexformatv0.pack
204 204 indexformatv0_unpack = indexformatv0.unpack
205 205
206 206
207 207 class revlogoldindex(list):
208 @property
209 def nodemap(self):
210 msg = "index.nodemap is deprecated, " "use index.[has_node|rev|get_rev]"
211 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
212 return self._nodemap
213
208 214 @util.propertycache
209 def nodemap(self):
215 def _nodemap(self):
210 216 nodemap = revlogutils.NodeMap({nullid: nullrev})
211 217 for r in range(0, len(self)):
212 218 n = self[r][7]
213 219 nodemap[n] = r
214 220 return nodemap
215 221
216 222 def has_node(self, node):
217 223 """return True if the node exist in the index"""
218 return node in self.nodemap
224 return node in self._nodemap
219 225
220 226 def rev(self, node):
221 227 """return a revision for a node
222 228
223 229 If the node is unknown, raise a RevlogError"""
224 return self.nodemap[node]
230 return self._nodemap[node]
225 231
226 232 def get_rev(self, node):
227 233 """return a revision for a node
228 234
229 235 If the node is unknown, return None"""
230 return self.nodemap.get(node)
236 return self._nodemap.get(node)
231 237
232 238 def append(self, tup):
233 self.nodemap[tup[7]] = len(self)
239 self._nodemap[tup[7]] = len(self)
234 240 super(revlogoldindex, self).append(tup)
235 241
236 242 def __delitem__(self, i):
237 243 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
238 244 raise ValueError(b"deleting slices only supports a:-1 with step 1")
239 245 for r in pycompat.xrange(i.start, len(self)):
240 del self.nodemap[self[r][7]]
246 del self._nodemap[self[r][7]]
241 247 super(revlogoldindex, self).__delitem__(i)
242 248
243 249 def clearcaches(self):
244 self.__dict__.pop('nodemap', None)
250 self.__dict__.pop('_nodemap', None)
245 251
246 252 def __getitem__(self, i):
247 253 if i == -1:
248 254 return (0, 0, 0, -1, -1, -1, -1, nullid)
249 255 return list.__getitem__(self, i)
250 256
251 257
252 258 class revlogoldio(object):
253 259 def __init__(self):
254 260 self.size = indexformatv0.size
255 261
256 262 def parseindex(self, data, inline):
257 263 s = self.size
258 264 index = []
259 265 nodemap = revlogutils.NodeMap({nullid: nullrev})
260 266 n = off = 0
261 267 l = len(data)
262 268 while off + s <= l:
263 269 cur = data[off : off + s]
264 270 off += s
265 271 e = indexformatv0_unpack(cur)
266 272 # transform to revlogv1 format
267 273 e2 = (
268 274 offset_type(e[0], 0),
269 275 e[1],
270 276 -1,
271 277 e[2],
272 278 e[3],
273 279 nodemap.get(e[4], nullrev),
274 280 nodemap.get(e[5], nullrev),
275 281 e[6],
276 282 )
277 283 index.append(e2)
278 284 nodemap[e[6]] = n
279 285 n += 1
280 286
281 287 index = revlogoldindex(index)
282 288 return index, None
283 289
284 290 def packentry(self, entry, node, version, rev):
285 291 if gettype(entry[0]):
286 292 raise error.RevlogError(
287 293 _(b'index entry flags need revlog version 1')
288 294 )
289 295 e2 = (
290 296 getoffset(entry[0]),
291 297 entry[1],
292 298 entry[3],
293 299 entry[4],
294 300 node(entry[5]),
295 301 node(entry[6]),
296 302 entry[7],
297 303 )
298 304 return indexformatv0_pack(*e2)
299 305
300 306
301 307 # index ng:
302 308 # 6 bytes: offset
303 309 # 2 bytes: flags
304 310 # 4 bytes: compressed length
305 311 # 4 bytes: uncompressed length
306 312 # 4 bytes: base rev
307 313 # 4 bytes: link rev
308 314 # 4 bytes: parent 1 rev
309 315 # 4 bytes: parent 2 rev
310 316 # 32 bytes: nodeid
311 317 indexformatng = struct.Struct(b">Qiiiiii20s12x")
312 318 indexformatng_pack = indexformatng.pack
313 319 versionformat = struct.Struct(b">I")
314 320 versionformat_pack = versionformat.pack
315 321 versionformat_unpack = versionformat.unpack
316 322
317 323 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
318 324 # signed integer)
319 325 _maxentrysize = 0x7FFFFFFF
320 326
321 327
322 328 class revlogio(object):
323 329 def __init__(self):
324 330 self.size = indexformatng.size
325 331
326 332 def parseindex(self, data, inline):
327 333 # call the C implementation to parse the index data
328 334 index, cache = parsers.parse_index2(data, inline)
329 335 return index, cache
330 336
331 337 def packentry(self, entry, node, version, rev):
332 338 p = indexformatng_pack(*entry)
333 339 if rev == 0:
334 340 p = versionformat_pack(version) + p[4:]
335 341 return p
336 342
337 343
338 344 class revlog(object):
339 345 """
340 346 the underlying revision storage object
341 347
342 348 A revlog consists of two parts, an index and the revision data.
343 349
344 350 The index is a file with a fixed record size containing
345 351 information on each revision, including its nodeid (hash), the
346 352 nodeids of its parents, the position and offset of its data within
347 353 the data file, and the revision it's based on. Finally, each entry
348 354 contains a linkrev entry that can serve as a pointer to external
349 355 data.
350 356
351 357 The revision data itself is a linear collection of data chunks.
352 358 Each chunk represents a revision and is usually represented as a
353 359 delta against the previous chunk. To bound lookup time, runs of
354 360 deltas are limited to about 2 times the length of the original
355 361 version data. This makes retrieval of a version proportional to
356 362 its size, or O(1) relative to the number of revisions.
357 363
358 364 Both pieces of the revlog are written to in an append-only
359 365 fashion, which means we never need to rewrite a file to insert or
360 366 remove data, and can use some simple techniques to avoid the need
361 367 for locking while reading.
362 368
363 369 If checkambig, indexfile is opened with checkambig=True at
364 370 writing, to avoid file stat ambiguity.
365 371
366 372 If mmaplargeindex is True, and an mmapindexthreshold is set, the
367 373 index will be mmapped rather than read if it is larger than the
368 374 configured threshold.
369 375
370 376 If censorable is True, the revlog can have censored revisions.
371 377
372 378 If `upperboundcomp` is not None, this is the expected maximal gain from
373 379 compression for the data content.
374 380 """
375 381
376 382 _flagserrorclass = error.RevlogError
377 383
378 384 def __init__(
379 385 self,
380 386 opener,
381 387 indexfile,
382 388 datafile=None,
383 389 checkambig=False,
384 390 mmaplargeindex=False,
385 391 censorable=False,
386 392 upperboundcomp=None,
387 393 ):
388 394 """
389 395 create a revlog object
390 396
391 397 opener is a function that abstracts the file opening operation
392 398 and can be used to implement COW semantics or the like.
393 399
394 400 """
395 401 self.upperboundcomp = upperboundcomp
396 402 self.indexfile = indexfile
397 403 self.datafile = datafile or (indexfile[:-2] + b".d")
398 404 self.opener = opener
399 405 # When True, indexfile is opened with checkambig=True at writing, to
400 406 # avoid file stat ambiguity.
401 407 self._checkambig = checkambig
402 408 self._mmaplargeindex = mmaplargeindex
403 409 self._censorable = censorable
404 410 # 3-tuple of (node, rev, text) for a raw revision.
405 411 self._revisioncache = None
406 412 # Maps rev to chain base rev.
407 413 self._chainbasecache = util.lrucachedict(100)
408 414 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
409 415 self._chunkcache = (0, b'')
410 416 # How much data to read and cache into the raw revlog data cache.
411 417 self._chunkcachesize = 65536
412 418 self._maxchainlen = None
413 419 self._deltabothparents = True
414 420 self.index = None
415 421 # Mapping of partial identifiers to full nodes.
416 422 self._pcache = {}
417 423 # Mapping of revision integer to full node.
418 424 self._nodepos = None
419 425 self._compengine = b'zlib'
420 426 self._compengineopts = {}
421 427 self._maxdeltachainspan = -1
422 428 self._withsparseread = False
423 429 self._sparserevlog = False
424 430 self._srdensitythreshold = 0.50
425 431 self._srmingapsize = 262144
426 432
427 433 # Make copy of flag processors so each revlog instance can support
428 434 # custom flags.
429 435 self._flagprocessors = dict(flagutil.flagprocessors)
430 436
431 437 # 2-tuple of file handles being used for active writing.
432 438 self._writinghandles = None
433 439
434 440 self._loadindex()
435 441
436 442 def _loadindex(self):
437 443 mmapindexthreshold = None
438 444 opts = self.opener.options
439 445
440 446 if b'revlogv2' in opts:
441 447 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
442 448 elif b'revlogv1' in opts:
443 449 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
444 450 if b'generaldelta' in opts:
445 451 newversionflags |= FLAG_GENERALDELTA
446 452 elif b'revlogv0' in self.opener.options:
447 453 newversionflags = REVLOGV0
448 454 else:
449 455 newversionflags = REVLOG_DEFAULT_VERSION
450 456
451 457 if b'chunkcachesize' in opts:
452 458 self._chunkcachesize = opts[b'chunkcachesize']
453 459 if b'maxchainlen' in opts:
454 460 self._maxchainlen = opts[b'maxchainlen']
455 461 if b'deltabothparents' in opts:
456 462 self._deltabothparents = opts[b'deltabothparents']
457 463 self._lazydelta = bool(opts.get(b'lazydelta', True))
458 464 self._lazydeltabase = False
459 465 if self._lazydelta:
460 466 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
461 467 if b'compengine' in opts:
462 468 self._compengine = opts[b'compengine']
463 469 if b'zlib.level' in opts:
464 470 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
465 471 if b'zstd.level' in opts:
466 472 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
467 473 if b'maxdeltachainspan' in opts:
468 474 self._maxdeltachainspan = opts[b'maxdeltachainspan']
469 475 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
470 476 mmapindexthreshold = opts[b'mmapindexthreshold']
471 477 self.hassidedata = bool(opts.get(b'side-data', False))
472 478 if self.hassidedata:
473 479 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
474 480 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
475 481 withsparseread = bool(opts.get(b'with-sparse-read', False))
476 482 # sparse-revlog forces sparse-read
477 483 self._withsparseread = self._sparserevlog or withsparseread
478 484 if b'sparse-read-density-threshold' in opts:
479 485 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
480 486 if b'sparse-read-min-gap-size' in opts:
481 487 self._srmingapsize = opts[b'sparse-read-min-gap-size']
482 488 if opts.get(b'enableellipsis'):
483 489 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
484 490
485 491 # revlog v0 doesn't have flag processors
486 492 for flag, processor in pycompat.iteritems(
487 493 opts.get(b'flagprocessors', {})
488 494 ):
489 495 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
490 496
491 497 if self._chunkcachesize <= 0:
492 498 raise error.RevlogError(
493 499 _(b'revlog chunk cache size %r is not greater than 0')
494 500 % self._chunkcachesize
495 501 )
496 502 elif self._chunkcachesize & (self._chunkcachesize - 1):
497 503 raise error.RevlogError(
498 504 _(b'revlog chunk cache size %r is not a power of 2')
499 505 % self._chunkcachesize
500 506 )
501 507
502 508 indexdata = b''
503 509 self._initempty = True
504 510 try:
505 511 with self._indexfp() as f:
506 512 if (
507 513 mmapindexthreshold is not None
508 514 and self.opener.fstat(f).st_size >= mmapindexthreshold
509 515 ):
510 516 # TODO: should .close() to release resources without
511 517 # relying on Python GC
512 518 indexdata = util.buffer(util.mmapread(f))
513 519 else:
514 520 indexdata = f.read()
515 521 if len(indexdata) > 0:
516 522 versionflags = versionformat_unpack(indexdata[:4])[0]
517 523 self._initempty = False
518 524 else:
519 525 versionflags = newversionflags
520 526 except IOError as inst:
521 527 if inst.errno != errno.ENOENT:
522 528 raise
523 529
524 530 versionflags = newversionflags
525 531
526 532 self.version = versionflags
527 533
528 534 flags = versionflags & ~0xFFFF
529 535 fmt = versionflags & 0xFFFF
530 536
531 537 if fmt == REVLOGV0:
532 538 if flags:
533 539 raise error.RevlogError(
534 540 _(b'unknown flags (%#04x) in version %d revlog %s')
535 541 % (flags >> 16, fmt, self.indexfile)
536 542 )
537 543
538 544 self._inline = False
539 545 self._generaldelta = False
540 546
541 547 elif fmt == REVLOGV1:
542 548 if flags & ~REVLOGV1_FLAGS:
543 549 raise error.RevlogError(
544 550 _(b'unknown flags (%#04x) in version %d revlog %s')
545 551 % (flags >> 16, fmt, self.indexfile)
546 552 )
547 553
548 554 self._inline = versionflags & FLAG_INLINE_DATA
549 555 self._generaldelta = versionflags & FLAG_GENERALDELTA
550 556
551 557 elif fmt == REVLOGV2:
552 558 if flags & ~REVLOGV2_FLAGS:
553 559 raise error.RevlogError(
554 560 _(b'unknown flags (%#04x) in version %d revlog %s')
555 561 % (flags >> 16, fmt, self.indexfile)
556 562 )
557 563
558 564 self._inline = versionflags & FLAG_INLINE_DATA
559 565 # generaldelta implied by version 2 revlogs.
560 566 self._generaldelta = True
561 567
562 568 else:
563 569 raise error.RevlogError(
564 570 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
565 571 )
566 572 # sparse-revlog can't be on without general-delta (issue6056)
567 573 if not self._generaldelta:
568 574 self._sparserevlog = False
569 575
570 576 self._storedeltachains = True
571 577
572 578 self._io = revlogio()
573 579 if self.version == REVLOGV0:
574 580 self._io = revlogoldio()
575 581 try:
576 582 d = self._io.parseindex(indexdata, self._inline)
577 583 except (ValueError, IndexError):
578 584 raise error.RevlogError(
579 585 _(b"index %s is corrupted") % self.indexfile
580 586 )
581 587 self.index, self._chunkcache = d
582 self.nodemap = self.index.nodemap
583 588 if not self._chunkcache:
584 589 self._chunkclear()
585 590 # revnum -> (chain-length, sum-delta-length)
586 591 self._chaininfocache = {}
587 592 # revlog header -> revlog compressor
588 593 self._decompressors = {}
589 594
590 595 @util.propertycache
591 596 def _compressor(self):
592 597 engine = util.compengines[self._compengine]
593 598 return engine.revlogcompressor(self._compengineopts)
594 599
595 600 def _indexfp(self, mode=b'r'):
596 601 """file object for the revlog's index file"""
597 602 args = {'mode': mode}
598 603 if mode != b'r':
599 604 args['checkambig'] = self._checkambig
600 605 if mode == b'w':
601 606 args['atomictemp'] = True
602 607 return self.opener(self.indexfile, **args)
603 608
604 609 def _datafp(self, mode=b'r'):
605 610 """file object for the revlog's data file"""
606 611 return self.opener(self.datafile, mode=mode)
607 612
608 613 @contextlib.contextmanager
609 614 def _datareadfp(self, existingfp=None):
610 615 """file object suitable to read data"""
611 616 # Use explicit file handle, if given.
612 617 if existingfp is not None:
613 618 yield existingfp
614 619
615 620 # Use a file handle being actively used for writes, if available.
616 621 # There is some danger to doing this because reads will seek the
617 622 # file. However, _writeentry() performs a SEEK_END before all writes,
618 623 # so we should be safe.
619 624 elif self._writinghandles:
620 625 if self._inline:
621 626 yield self._writinghandles[0]
622 627 else:
623 628 yield self._writinghandles[1]
624 629
625 630 # Otherwise open a new file handle.
626 631 else:
627 632 if self._inline:
628 633 func = self._indexfp
629 634 else:
630 635 func = self._datafp
631 636 with func() as fp:
632 637 yield fp
633 638
634 639 def tiprev(self):
635 640 return len(self.index) - 1
636 641
637 642 def tip(self):
638 643 return self.node(self.tiprev())
639 644
640 645 def __contains__(self, rev):
641 646 return 0 <= rev < len(self)
642 647
643 648 def __len__(self):
644 649 return len(self.index)
645 650
646 651 def __iter__(self):
647 652 return iter(pycompat.xrange(len(self)))
648 653
649 654 def revs(self, start=0, stop=None):
650 655 """iterate over all rev in this revlog (from start to stop)"""
651 656 return storageutil.iterrevs(len(self), start=start, stop=stop)
652 657
653 @util.propertycache
658 @property
654 659 def nodemap(self):
660 msg = (
661 "revlog.nodemap is deprecated, "
662 "use revlog.index.[has_node|rev|get_rev]"
663 )
664 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
655 665 return self.index.nodemap
656 666
657 667 @property
658 668 def _nodecache(self):
659 669 msg = "revlog._nodecache is deprecated, use revlog.index.nodemap"
660 670 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
661 671 return self.index.nodemap
662 672
663 673 def hasnode(self, node):
664 674 try:
665 675 self.rev(node)
666 676 return True
667 677 except KeyError:
668 678 return False
669 679
670 680 def candelta(self, baserev, rev):
671 681 """whether two revisions (baserev, rev) can be delta-ed or not"""
672 682 # Disable delta if either rev requires a content-changing flag
673 683 # processor (ex. LFS). This is because such flag processor can alter
674 684 # the rawtext content that the delta will be based on, and two clients
675 685 # could have a same revlog node with different flags (i.e. different
676 686 # rawtext contents) and the delta could be incompatible.
677 687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
678 688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
679 689 ):
680 690 return False
681 691 return True
682 692
683 693 def clearcaches(self):
684 694 self._revisioncache = None
685 695 self._chainbasecache.clear()
686 696 self._chunkcache = (0, b'')
687 697 self._pcache = {}
688 698 self.index.clearcaches()
689 699
690 700 def rev(self, node):
691 701 try:
692 702 return self.index.rev(node)
693 703 except TypeError:
694 704 raise
695 705 except error.RevlogError:
696 706 # parsers.c radix tree lookup failed
697 707 if node == wdirid or node in wdirfilenodeids:
698 708 raise error.WdirUnsupported
699 709 raise error.LookupError(node, self.indexfile, _(b'no node'))
700 710
701 711 # Accessors for index entries.
702 712
703 713 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
704 714 # are flags.
705 715 def start(self, rev):
706 716 return int(self.index[rev][0] >> 16)
707 717
708 718 def flags(self, rev):
709 719 return self.index[rev][0] & 0xFFFF
710 720
711 721 def length(self, rev):
712 722 return self.index[rev][1]
713 723
714 724 def rawsize(self, rev):
715 725 """return the length of the uncompressed text for a given revision"""
716 726 l = self.index[rev][2]
717 727 if l >= 0:
718 728 return l
719 729
720 730 t = self.rawdata(rev)
721 731 return len(t)
722 732
723 733 def size(self, rev):
724 734 """length of non-raw text (processed by a "read" flag processor)"""
725 735 # fast path: if no "read" flag processor could change the content,
726 736 # size is rawsize. note: ELLIPSIS is known to not change the content.
727 737 flags = self.flags(rev)
728 738 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
729 739 return self.rawsize(rev)
730 740
731 741 return len(self.revision(rev, raw=False))
732 742
733 743 def chainbase(self, rev):
734 744 base = self._chainbasecache.get(rev)
735 745 if base is not None:
736 746 return base
737 747
738 748 index = self.index
739 749 iterrev = rev
740 750 base = index[iterrev][3]
741 751 while base != iterrev:
742 752 iterrev = base
743 753 base = index[iterrev][3]
744 754
745 755 self._chainbasecache[rev] = base
746 756 return base
747 757
748 758 def linkrev(self, rev):
749 759 return self.index[rev][4]
750 760
751 761 def parentrevs(self, rev):
752 762 try:
753 763 entry = self.index[rev]
754 764 except IndexError:
755 765 if rev == wdirrev:
756 766 raise error.WdirUnsupported
757 767 raise
758 768
759 769 return entry[5], entry[6]
760 770
761 771 # fast parentrevs(rev) where rev isn't filtered
762 772 _uncheckedparentrevs = parentrevs
763 773
764 774 def node(self, rev):
765 775 try:
766 776 return self.index[rev][7]
767 777 except IndexError:
768 778 if rev == wdirrev:
769 779 raise error.WdirUnsupported
770 780 raise
771 781
772 782 # Derived from index values.
773 783
774 784 def end(self, rev):
775 785 return self.start(rev) + self.length(rev)
776 786
777 787 def parents(self, node):
778 788 i = self.index
779 789 d = i[self.rev(node)]
780 790 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
781 791
782 792 def chainlen(self, rev):
783 793 return self._chaininfo(rev)[0]
784 794
785 795 def _chaininfo(self, rev):
786 796 chaininfocache = self._chaininfocache
787 797 if rev in chaininfocache:
788 798 return chaininfocache[rev]
789 799 index = self.index
790 800 generaldelta = self._generaldelta
791 801 iterrev = rev
792 802 e = index[iterrev]
793 803 clen = 0
794 804 compresseddeltalen = 0
795 805 while iterrev != e[3]:
796 806 clen += 1
797 807 compresseddeltalen += e[1]
798 808 if generaldelta:
799 809 iterrev = e[3]
800 810 else:
801 811 iterrev -= 1
802 812 if iterrev in chaininfocache:
803 813 t = chaininfocache[iterrev]
804 814 clen += t[0]
805 815 compresseddeltalen += t[1]
806 816 break
807 817 e = index[iterrev]
808 818 else:
809 819 # Add text length of base since decompressing that also takes
810 820 # work. For cache hits the length is already included.
811 821 compresseddeltalen += e[1]
812 822 r = (clen, compresseddeltalen)
813 823 chaininfocache[rev] = r
814 824 return r
815 825
816 826 def _deltachain(self, rev, stoprev=None):
817 827 """Obtain the delta chain for a revision.
818 828
819 829 ``stoprev`` specifies a revision to stop at. If not specified, we
820 830 stop at the base of the chain.
821 831
822 832 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
823 833 revs in ascending order and ``stopped`` is a bool indicating whether
824 834 ``stoprev`` was hit.
825 835 """
826 836 # Try C implementation.
827 837 try:
828 838 return self.index.deltachain(rev, stoprev, self._generaldelta)
829 839 except AttributeError:
830 840 pass
831 841
832 842 chain = []
833 843
834 844 # Alias to prevent attribute lookup in tight loop.
835 845 index = self.index
836 846 generaldelta = self._generaldelta
837 847
838 848 iterrev = rev
839 849 e = index[iterrev]
840 850 while iterrev != e[3] and iterrev != stoprev:
841 851 chain.append(iterrev)
842 852 if generaldelta:
843 853 iterrev = e[3]
844 854 else:
845 855 iterrev -= 1
846 856 e = index[iterrev]
847 857
848 858 if iterrev == stoprev:
849 859 stopped = True
850 860 else:
851 861 chain.append(iterrev)
852 862 stopped = False
853 863
854 864 chain.reverse()
855 865 return chain, stopped
856 866
857 867 def ancestors(self, revs, stoprev=0, inclusive=False):
858 868 """Generate the ancestors of 'revs' in reverse revision order.
859 869 Does not generate revs lower than stoprev.
860 870
861 871 See the documentation for ancestor.lazyancestors for more details."""
862 872
863 873 # first, make sure start revisions aren't filtered
864 874 revs = list(revs)
865 875 checkrev = self.node
866 876 for r in revs:
867 877 checkrev(r)
868 878 # and we're sure ancestors aren't filtered as well
869 879
870 880 if rustancestor is not None:
871 881 lazyancestors = rustancestor.LazyAncestors
872 882 arg = self.index
873 883 elif util.safehasattr(parsers, b'rustlazyancestors'):
874 884 lazyancestors = ancestor.rustlazyancestors
875 885 arg = self.index
876 886 else:
877 887 lazyancestors = ancestor.lazyancestors
878 888 arg = self._uncheckedparentrevs
879 889 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
880 890
881 891 def descendants(self, revs):
882 892 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
883 893
884 894 def findcommonmissing(self, common=None, heads=None):
885 895 """Return a tuple of the ancestors of common and the ancestors of heads
886 896 that are not ancestors of common. In revset terminology, we return the
887 897 tuple:
888 898
889 899 ::common, (::heads) - (::common)
890 900
891 901 The list is sorted by revision number, meaning it is
892 902 topologically sorted.
893 903
894 904 'heads' and 'common' are both lists of node IDs. If heads is
895 905 not supplied, uses all of the revlog's heads. If common is not
896 906 supplied, uses nullid."""
897 907 if common is None:
898 908 common = [nullid]
899 909 if heads is None:
900 910 heads = self.heads()
901 911
902 912 common = [self.rev(n) for n in common]
903 913 heads = [self.rev(n) for n in heads]
904 914
905 915 # we want the ancestors, but inclusive
906 916 class lazyset(object):
907 917 def __init__(self, lazyvalues):
908 918 self.addedvalues = set()
909 919 self.lazyvalues = lazyvalues
910 920
911 921 def __contains__(self, value):
912 922 return value in self.addedvalues or value in self.lazyvalues
913 923
914 924 def __iter__(self):
915 925 added = self.addedvalues
916 926 for r in added:
917 927 yield r
918 928 for r in self.lazyvalues:
919 929 if not r in added:
920 930 yield r
921 931
922 932 def add(self, value):
923 933 self.addedvalues.add(value)
924 934
925 935 def update(self, values):
926 936 self.addedvalues.update(values)
927 937
928 938 has = lazyset(self.ancestors(common))
929 939 has.add(nullrev)
930 940 has.update(common)
931 941
932 942 # take all ancestors from heads that aren't in has
933 943 missing = set()
934 944 visit = collections.deque(r for r in heads if r not in has)
935 945 while visit:
936 946 r = visit.popleft()
937 947 if r in missing:
938 948 continue
939 949 else:
940 950 missing.add(r)
941 951 for p in self.parentrevs(r):
942 952 if p not in has:
943 953 visit.append(p)
944 954 missing = list(missing)
945 955 missing.sort()
946 956 return has, [self.node(miss) for miss in missing]
947 957
948 958 def incrementalmissingrevs(self, common=None):
949 959 """Return an object that can be used to incrementally compute the
950 960 revision numbers of the ancestors of arbitrary sets that are not
951 961 ancestors of common. This is an ancestor.incrementalmissingancestors
952 962 object.
953 963
954 964 'common' is a list of revision numbers. If common is not supplied, uses
955 965 nullrev.
956 966 """
957 967 if common is None:
958 968 common = [nullrev]
959 969
960 970 if rustancestor is not None:
961 971 return rustancestor.MissingAncestors(self.index, common)
962 972 return ancestor.incrementalmissingancestors(self.parentrevs, common)
963 973
964 974 def findmissingrevs(self, common=None, heads=None):
965 975 """Return the revision numbers of the ancestors of heads that
966 976 are not ancestors of common.
967 977
968 978 More specifically, return a list of revision numbers corresponding to
969 979 nodes N such that every N satisfies the following constraints:
970 980
971 981 1. N is an ancestor of some node in 'heads'
972 982 2. N is not an ancestor of any node in 'common'
973 983
974 984 The list is sorted by revision number, meaning it is
975 985 topologically sorted.
976 986
977 987 'heads' and 'common' are both lists of revision numbers. If heads is
978 988 not supplied, uses all of the revlog's heads. If common is not
979 989 supplied, uses nullid."""
980 990 if common is None:
981 991 common = [nullrev]
982 992 if heads is None:
983 993 heads = self.headrevs()
984 994
985 995 inc = self.incrementalmissingrevs(common=common)
986 996 return inc.missingancestors(heads)
987 997
988 998 def findmissing(self, common=None, heads=None):
989 999 """Return the ancestors of heads that are not ancestors of common.
990 1000
991 1001 More specifically, return a list of nodes N such that every N
992 1002 satisfies the following constraints:
993 1003
994 1004 1. N is an ancestor of some node in 'heads'
995 1005 2. N is not an ancestor of any node in 'common'
996 1006
997 1007 The list is sorted by revision number, meaning it is
998 1008 topologically sorted.
999 1009
1000 1010 'heads' and 'common' are both lists of node IDs. If heads is
1001 1011 not supplied, uses all of the revlog's heads. If common is not
1002 1012 supplied, uses nullid."""
1003 1013 if common is None:
1004 1014 common = [nullid]
1005 1015 if heads is None:
1006 1016 heads = self.heads()
1007 1017
1008 1018 common = [self.rev(n) for n in common]
1009 1019 heads = [self.rev(n) for n in heads]
1010 1020
1011 1021 inc = self.incrementalmissingrevs(common=common)
1012 1022 return [self.node(r) for r in inc.missingancestors(heads)]
1013 1023
1014 1024 def nodesbetween(self, roots=None, heads=None):
1015 1025 """Return a topological path from 'roots' to 'heads'.
1016 1026
1017 1027 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1018 1028 topologically sorted list of all nodes N that satisfy both of
1019 1029 these constraints:
1020 1030
1021 1031 1. N is a descendant of some node in 'roots'
1022 1032 2. N is an ancestor of some node in 'heads'
1023 1033
1024 1034 Every node is considered to be both a descendant and an ancestor
1025 1035 of itself, so every reachable node in 'roots' and 'heads' will be
1026 1036 included in 'nodes'.
1027 1037
1028 1038 'outroots' is the list of reachable nodes in 'roots', i.e., the
1029 1039 subset of 'roots' that is returned in 'nodes'. Likewise,
1030 1040 'outheads' is the subset of 'heads' that is also in 'nodes'.
1031 1041
1032 1042 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1033 1043 unspecified, uses nullid as the only root. If 'heads' is
1034 1044 unspecified, uses list of all of the revlog's heads."""
1035 1045 nonodes = ([], [], [])
1036 1046 if roots is not None:
1037 1047 roots = list(roots)
1038 1048 if not roots:
1039 1049 return nonodes
1040 1050 lowestrev = min([self.rev(n) for n in roots])
1041 1051 else:
1042 1052 roots = [nullid] # Everybody's a descendant of nullid
1043 1053 lowestrev = nullrev
1044 1054 if (lowestrev == nullrev) and (heads is None):
1045 1055 # We want _all_ the nodes!
1046 1056 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1047 1057 if heads is None:
1048 1058 # All nodes are ancestors, so the latest ancestor is the last
1049 1059 # node.
1050 1060 highestrev = len(self) - 1
1051 1061 # Set ancestors to None to signal that every node is an ancestor.
1052 1062 ancestors = None
1053 1063 # Set heads to an empty dictionary for later discovery of heads
1054 1064 heads = {}
1055 1065 else:
1056 1066 heads = list(heads)
1057 1067 if not heads:
1058 1068 return nonodes
1059 1069 ancestors = set()
1060 1070 # Turn heads into a dictionary so we can remove 'fake' heads.
1061 1071 # Also, later we will be using it to filter out the heads we can't
1062 1072 # find from roots.
1063 1073 heads = dict.fromkeys(heads, False)
1064 1074 # Start at the top and keep marking parents until we're done.
1065 1075 nodestotag = set(heads)
1066 1076 # Remember where the top was so we can use it as a limit later.
1067 1077 highestrev = max([self.rev(n) for n in nodestotag])
1068 1078 while nodestotag:
1069 1079 # grab a node to tag
1070 1080 n = nodestotag.pop()
1071 1081 # Never tag nullid
1072 1082 if n == nullid:
1073 1083 continue
1074 1084 # A node's revision number represents its place in a
1075 1085 # topologically sorted list of nodes.
1076 1086 r = self.rev(n)
1077 1087 if r >= lowestrev:
1078 1088 if n not in ancestors:
1079 1089 # If we are possibly a descendant of one of the roots
1080 1090 # and we haven't already been marked as an ancestor
1081 1091 ancestors.add(n) # Mark as ancestor
1082 1092 # Add non-nullid parents to list of nodes to tag.
1083 1093 nodestotag.update(
1084 1094 [p for p in self.parents(n) if p != nullid]
1085 1095 )
1086 1096 elif n in heads: # We've seen it before, is it a fake head?
1087 1097 # So it is, real heads should not be the ancestors of
1088 1098 # any other heads.
1089 1099 heads.pop(n)
1090 1100 if not ancestors:
1091 1101 return nonodes
1092 1102 # Now that we have our set of ancestors, we want to remove any
1093 1103 # roots that are not ancestors.
1094 1104
1095 1105 # If one of the roots was nullid, everything is included anyway.
1096 1106 if lowestrev > nullrev:
1097 1107 # But, since we weren't, let's recompute the lowest rev to not
1098 1108 # include roots that aren't ancestors.
1099 1109
1100 1110 # Filter out roots that aren't ancestors of heads
1101 1111 roots = [root for root in roots if root in ancestors]
1102 1112 # Recompute the lowest revision
1103 1113 if roots:
1104 1114 lowestrev = min([self.rev(root) for root in roots])
1105 1115 else:
1106 1116 # No more roots? Return empty list
1107 1117 return nonodes
1108 1118 else:
1109 1119 # We are descending from nullid, and don't need to care about
1110 1120 # any other roots.
1111 1121 lowestrev = nullrev
1112 1122 roots = [nullid]
1113 1123 # Transform our roots list into a set.
1114 1124 descendants = set(roots)
1115 1125 # Also, keep the original roots so we can filter out roots that aren't
1116 1126 # 'real' roots (i.e. are descended from other roots).
1117 1127 roots = descendants.copy()
1118 1128 # Our topologically sorted list of output nodes.
1119 1129 orderedout = []
1120 1130 # Don't start at nullid since we don't want nullid in our output list,
1121 1131 # and if nullid shows up in descendants, empty parents will look like
1122 1132 # they're descendants.
1123 1133 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1124 1134 n = self.node(r)
1125 1135 isdescendant = False
1126 1136 if lowestrev == nullrev: # Everybody is a descendant of nullid
1127 1137 isdescendant = True
1128 1138 elif n in descendants:
1129 1139 # n is already a descendant
1130 1140 isdescendant = True
1131 1141 # This check only needs to be done here because all the roots
1132 1142 # will start being marked is descendants before the loop.
1133 1143 if n in roots:
1134 1144 # If n was a root, check if it's a 'real' root.
1135 1145 p = tuple(self.parents(n))
1136 1146 # If any of its parents are descendants, it's not a root.
1137 1147 if (p[0] in descendants) or (p[1] in descendants):
1138 1148 roots.remove(n)
1139 1149 else:
1140 1150 p = tuple(self.parents(n))
1141 1151 # A node is a descendant if either of its parents are
1142 1152 # descendants. (We seeded the dependents list with the roots
1143 1153 # up there, remember?)
1144 1154 if (p[0] in descendants) or (p[1] in descendants):
1145 1155 descendants.add(n)
1146 1156 isdescendant = True
1147 1157 if isdescendant and ((ancestors is None) or (n in ancestors)):
1148 1158 # Only include nodes that are both descendants and ancestors.
1149 1159 orderedout.append(n)
1150 1160 if (ancestors is not None) and (n in heads):
1151 1161 # We're trying to figure out which heads are reachable
1152 1162 # from roots.
1153 1163 # Mark this head as having been reached
1154 1164 heads[n] = True
1155 1165 elif ancestors is None:
1156 1166 # Otherwise, we're trying to discover the heads.
1157 1167 # Assume this is a head because if it isn't, the next step
1158 1168 # will eventually remove it.
1159 1169 heads[n] = True
1160 1170 # But, obviously its parents aren't.
1161 1171 for p in self.parents(n):
1162 1172 heads.pop(p, None)
1163 1173 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1164 1174 roots = list(roots)
1165 1175 assert orderedout
1166 1176 assert roots
1167 1177 assert heads
1168 1178 return (orderedout, roots, heads)
1169 1179
1170 1180 def headrevs(self, revs=None):
1171 1181 if revs is None:
1172 1182 try:
1173 1183 return self.index.headrevs()
1174 1184 except AttributeError:
1175 1185 return self._headrevs()
1176 1186 if rustdagop is not None:
1177 1187 return rustdagop.headrevs(self.index, revs)
1178 1188 return dagop.headrevs(revs, self._uncheckedparentrevs)
1179 1189
1180 1190 def computephases(self, roots):
1181 1191 return self.index.computephasesmapsets(roots)
1182 1192
1183 1193 def _headrevs(self):
1184 1194 count = len(self)
1185 1195 if not count:
1186 1196 return [nullrev]
1187 1197 # we won't iter over filtered rev so nobody is a head at start
1188 1198 ishead = [0] * (count + 1)
1189 1199 index = self.index
1190 1200 for r in self:
1191 1201 ishead[r] = 1 # I may be an head
1192 1202 e = index[r]
1193 1203 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1194 1204 return [r for r, val in enumerate(ishead) if val]
1195 1205
1196 1206 def heads(self, start=None, stop=None):
1197 1207 """return the list of all nodes that have no children
1198 1208
1199 1209 if start is specified, only heads that are descendants of
1200 1210 start will be returned
1201 1211 if stop is specified, it will consider all the revs from stop
1202 1212 as if they had no children
1203 1213 """
1204 1214 if start is None and stop is None:
1205 1215 if not len(self):
1206 1216 return [nullid]
1207 1217 return [self.node(r) for r in self.headrevs()]
1208 1218
1209 1219 if start is None:
1210 1220 start = nullrev
1211 1221 else:
1212 1222 start = self.rev(start)
1213 1223
1214 1224 stoprevs = set(self.rev(n) for n in stop or [])
1215 1225
1216 1226 revs = dagop.headrevssubset(
1217 1227 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1218 1228 )
1219 1229
1220 1230 return [self.node(rev) for rev in revs]
1221 1231
1222 1232 def children(self, node):
1223 1233 """find the children of a given node"""
1224 1234 c = []
1225 1235 p = self.rev(node)
1226 1236 for r in self.revs(start=p + 1):
1227 1237 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1228 1238 if prevs:
1229 1239 for pr in prevs:
1230 1240 if pr == p:
1231 1241 c.append(self.node(r))
1232 1242 elif p == nullrev:
1233 1243 c.append(self.node(r))
1234 1244 return c
1235 1245
1236 1246 def commonancestorsheads(self, a, b):
1237 1247 """calculate all the heads of the common ancestors of nodes a and b"""
1238 1248 a, b = self.rev(a), self.rev(b)
1239 1249 ancs = self._commonancestorsheads(a, b)
1240 1250 return pycompat.maplist(self.node, ancs)
1241 1251
1242 1252 def _commonancestorsheads(self, *revs):
1243 1253 """calculate all the heads of the common ancestors of revs"""
1244 1254 try:
1245 1255 ancs = self.index.commonancestorsheads(*revs)
1246 1256 except (AttributeError, OverflowError): # C implementation failed
1247 1257 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1248 1258 return ancs
1249 1259
1250 1260 def isancestor(self, a, b):
1251 1261 """return True if node a is an ancestor of node b
1252 1262
1253 1263 A revision is considered an ancestor of itself."""
1254 1264 a, b = self.rev(a), self.rev(b)
1255 1265 return self.isancestorrev(a, b)
1256 1266
1257 1267 def isancestorrev(self, a, b):
1258 1268 """return True if revision a is an ancestor of revision b
1259 1269
1260 1270 A revision is considered an ancestor of itself.
1261 1271
1262 1272 The implementation of this is trivial but the use of
1263 1273 reachableroots is not."""
1264 1274 if a == nullrev:
1265 1275 return True
1266 1276 elif a == b:
1267 1277 return True
1268 1278 elif a > b:
1269 1279 return False
1270 1280 return bool(self.reachableroots(a, [b], [a], includepath=False))
1271 1281
1272 1282 def reachableroots(self, minroot, heads, roots, includepath=False):
1273 1283 """return (heads(::<roots> and <roots>::<heads>))
1274 1284
1275 1285 If includepath is True, return (<roots>::<heads>)."""
1276 1286 try:
1277 1287 return self.index.reachableroots2(
1278 1288 minroot, heads, roots, includepath
1279 1289 )
1280 1290 except AttributeError:
1281 1291 return dagop._reachablerootspure(
1282 1292 self.parentrevs, minroot, roots, heads, includepath
1283 1293 )
1284 1294
1285 1295 def ancestor(self, a, b):
1286 1296 """calculate the "best" common ancestor of nodes a and b"""
1287 1297
1288 1298 a, b = self.rev(a), self.rev(b)
1289 1299 try:
1290 1300 ancs = self.index.ancestors(a, b)
1291 1301 except (AttributeError, OverflowError):
1292 1302 ancs = ancestor.ancestors(self.parentrevs, a, b)
1293 1303 if ancs:
1294 1304 # choose a consistent winner when there's a tie
1295 1305 return min(map(self.node, ancs))
1296 1306 return nullid
1297 1307
1298 1308 def _match(self, id):
1299 1309 if isinstance(id, int):
1300 1310 # rev
1301 1311 return self.node(id)
1302 1312 if len(id) == 20:
1303 1313 # possibly a binary node
1304 1314 # odds of a binary node being all hex in ASCII are 1 in 10**25
1305 1315 try:
1306 1316 node = id
1307 1317 self.rev(node) # quick search the index
1308 1318 return node
1309 1319 except error.LookupError:
1310 1320 pass # may be partial hex id
1311 1321 try:
1312 1322 # str(rev)
1313 1323 rev = int(id)
1314 1324 if b"%d" % rev != id:
1315 1325 raise ValueError
1316 1326 if rev < 0:
1317 1327 rev = len(self) + rev
1318 1328 if rev < 0 or rev >= len(self):
1319 1329 raise ValueError
1320 1330 return self.node(rev)
1321 1331 except (ValueError, OverflowError):
1322 1332 pass
1323 1333 if len(id) == 40:
1324 1334 try:
1325 1335 # a full hex nodeid?
1326 1336 node = bin(id)
1327 1337 self.rev(node)
1328 1338 return node
1329 1339 except (TypeError, error.LookupError):
1330 1340 pass
1331 1341
1332 1342 def _partialmatch(self, id):
1333 1343 # we don't care wdirfilenodeids as they should be always full hash
1334 1344 maybewdir = wdirhex.startswith(id)
1335 1345 try:
1336 1346 partial = self.index.partialmatch(id)
1337 1347 if partial and self.hasnode(partial):
1338 1348 if maybewdir:
1339 1349 # single 'ff...' match in radix tree, ambiguous with wdir
1340 1350 raise error.RevlogError
1341 1351 return partial
1342 1352 if maybewdir:
1343 1353 # no 'ff...' match in radix tree, wdir identified
1344 1354 raise error.WdirUnsupported
1345 1355 return None
1346 1356 except error.RevlogError:
1347 1357 # parsers.c radix tree lookup gave multiple matches
1348 1358 # fast path: for unfiltered changelog, radix tree is accurate
1349 1359 if not getattr(self, 'filteredrevs', None):
1350 1360 raise error.AmbiguousPrefixLookupError(
1351 1361 id, self.indexfile, _(b'ambiguous identifier')
1352 1362 )
1353 1363 # fall through to slow path that filters hidden revisions
1354 1364 except (AttributeError, ValueError):
1355 1365 # we are pure python, or key was too short to search radix tree
1356 1366 pass
1357 1367
1358 1368 if id in self._pcache:
1359 1369 return self._pcache[id]
1360 1370
1361 1371 if len(id) <= 40:
1362 1372 try:
1363 1373 # hex(node)[:...]
1364 1374 l = len(id) // 2 # grab an even number of digits
1365 1375 prefix = bin(id[: l * 2])
1366 1376 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1367 1377 nl = [
1368 1378 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1369 1379 ]
1370 1380 if nullhex.startswith(id):
1371 1381 nl.append(nullid)
1372 1382 if len(nl) > 0:
1373 1383 if len(nl) == 1 and not maybewdir:
1374 1384 self._pcache[id] = nl[0]
1375 1385 return nl[0]
1376 1386 raise error.AmbiguousPrefixLookupError(
1377 1387 id, self.indexfile, _(b'ambiguous identifier')
1378 1388 )
1379 1389 if maybewdir:
1380 1390 raise error.WdirUnsupported
1381 1391 return None
1382 1392 except TypeError:
1383 1393 pass
1384 1394
1385 1395 def lookup(self, id):
1386 1396 """locate a node based on:
1387 1397 - revision number or str(revision number)
1388 1398 - nodeid or subset of hex nodeid
1389 1399 """
1390 1400 n = self._match(id)
1391 1401 if n is not None:
1392 1402 return n
1393 1403 n = self._partialmatch(id)
1394 1404 if n:
1395 1405 return n
1396 1406
1397 1407 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1398 1408
1399 1409 def shortest(self, node, minlength=1):
1400 1410 """Find the shortest unambiguous prefix that matches node."""
1401 1411
1402 1412 def isvalid(prefix):
1403 1413 try:
1404 1414 matchednode = self._partialmatch(prefix)
1405 1415 except error.AmbiguousPrefixLookupError:
1406 1416 return False
1407 1417 except error.WdirUnsupported:
1408 1418 # single 'ff...' match
1409 1419 return True
1410 1420 if matchednode is None:
1411 1421 raise error.LookupError(node, self.indexfile, _(b'no node'))
1412 1422 return True
1413 1423
1414 1424 def maybewdir(prefix):
1415 1425 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1416 1426
1417 1427 hexnode = hex(node)
1418 1428
1419 1429 def disambiguate(hexnode, minlength):
1420 1430 """Disambiguate against wdirid."""
1421 1431 for length in range(minlength, 41):
1422 1432 prefix = hexnode[:length]
1423 1433 if not maybewdir(prefix):
1424 1434 return prefix
1425 1435
1426 1436 if not getattr(self, 'filteredrevs', None):
1427 1437 try:
1428 1438 length = max(self.index.shortest(node), minlength)
1429 1439 return disambiguate(hexnode, length)
1430 1440 except error.RevlogError:
1431 1441 if node != wdirid:
1432 1442 raise error.LookupError(node, self.indexfile, _(b'no node'))
1433 1443 except AttributeError:
1434 1444 # Fall through to pure code
1435 1445 pass
1436 1446
1437 1447 if node == wdirid:
1438 1448 for length in range(minlength, 41):
1439 1449 prefix = hexnode[:length]
1440 1450 if isvalid(prefix):
1441 1451 return prefix
1442 1452
1443 1453 for length in range(minlength, 41):
1444 1454 prefix = hexnode[:length]
1445 1455 if isvalid(prefix):
1446 1456 return disambiguate(hexnode, length)
1447 1457
1448 1458 def cmp(self, node, text):
1449 1459 """compare text with a given file revision
1450 1460
1451 1461 returns True if text is different than what is stored.
1452 1462 """
1453 1463 p1, p2 = self.parents(node)
1454 1464 return storageutil.hashrevisionsha1(text, p1, p2) != node
1455 1465
1456 1466 def _cachesegment(self, offset, data):
1457 1467 """Add a segment to the revlog cache.
1458 1468
1459 1469 Accepts an absolute offset and the data that is at that location.
1460 1470 """
1461 1471 o, d = self._chunkcache
1462 1472 # try to add to existing cache
1463 1473 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1464 1474 self._chunkcache = o, d + data
1465 1475 else:
1466 1476 self._chunkcache = offset, data
1467 1477
1468 1478 def _readsegment(self, offset, length, df=None):
1469 1479 """Load a segment of raw data from the revlog.
1470 1480
1471 1481 Accepts an absolute offset, length to read, and an optional existing
1472 1482 file handle to read from.
1473 1483
1474 1484 If an existing file handle is passed, it will be seeked and the
1475 1485 original seek position will NOT be restored.
1476 1486
1477 1487 Returns a str or buffer of raw byte data.
1478 1488
1479 1489 Raises if the requested number of bytes could not be read.
1480 1490 """
1481 1491 # Cache data both forward and backward around the requested
1482 1492 # data, in a fixed size window. This helps speed up operations
1483 1493 # involving reading the revlog backwards.
1484 1494 cachesize = self._chunkcachesize
1485 1495 realoffset = offset & ~(cachesize - 1)
1486 1496 reallength = (
1487 1497 (offset + length + cachesize) & ~(cachesize - 1)
1488 1498 ) - realoffset
1489 1499 with self._datareadfp(df) as df:
1490 1500 df.seek(realoffset)
1491 1501 d = df.read(reallength)
1492 1502
1493 1503 self._cachesegment(realoffset, d)
1494 1504 if offset != realoffset or reallength != length:
1495 1505 startoffset = offset - realoffset
1496 1506 if len(d) - startoffset < length:
1497 1507 raise error.RevlogError(
1498 1508 _(
1499 1509 b'partial read of revlog %s; expected %d bytes from '
1500 1510 b'offset %d, got %d'
1501 1511 )
1502 1512 % (
1503 1513 self.indexfile if self._inline else self.datafile,
1504 1514 length,
1505 1515 realoffset,
1506 1516 len(d) - startoffset,
1507 1517 )
1508 1518 )
1509 1519
1510 1520 return util.buffer(d, startoffset, length)
1511 1521
1512 1522 if len(d) < length:
1513 1523 raise error.RevlogError(
1514 1524 _(
1515 1525 b'partial read of revlog %s; expected %d bytes from offset '
1516 1526 b'%d, got %d'
1517 1527 )
1518 1528 % (
1519 1529 self.indexfile if self._inline else self.datafile,
1520 1530 length,
1521 1531 offset,
1522 1532 len(d),
1523 1533 )
1524 1534 )
1525 1535
1526 1536 return d
1527 1537
1528 1538 def _getsegment(self, offset, length, df=None):
1529 1539 """Obtain a segment of raw data from the revlog.
1530 1540
1531 1541 Accepts an absolute offset, length of bytes to obtain, and an
1532 1542 optional file handle to the already-opened revlog. If the file
1533 1543 handle is used, it's original seek position will not be preserved.
1534 1544
1535 1545 Requests for data may be returned from a cache.
1536 1546
1537 1547 Returns a str or a buffer instance of raw byte data.
1538 1548 """
1539 1549 o, d = self._chunkcache
1540 1550 l = len(d)
1541 1551
1542 1552 # is it in the cache?
1543 1553 cachestart = offset - o
1544 1554 cacheend = cachestart + length
1545 1555 if cachestart >= 0 and cacheend <= l:
1546 1556 if cachestart == 0 and cacheend == l:
1547 1557 return d # avoid a copy
1548 1558 return util.buffer(d, cachestart, cacheend - cachestart)
1549 1559
1550 1560 return self._readsegment(offset, length, df=df)
1551 1561
1552 1562 def _getsegmentforrevs(self, startrev, endrev, df=None):
1553 1563 """Obtain a segment of raw data corresponding to a range of revisions.
1554 1564
1555 1565 Accepts the start and end revisions and an optional already-open
1556 1566 file handle to be used for reading. If the file handle is read, its
1557 1567 seek position will not be preserved.
1558 1568
1559 1569 Requests for data may be satisfied by a cache.
1560 1570
1561 1571 Returns a 2-tuple of (offset, data) for the requested range of
1562 1572 revisions. Offset is the integer offset from the beginning of the
1563 1573 revlog and data is a str or buffer of the raw byte data.
1564 1574
1565 1575 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1566 1576 to determine where each revision's data begins and ends.
1567 1577 """
1568 1578 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1569 1579 # (functions are expensive).
1570 1580 index = self.index
1571 1581 istart = index[startrev]
1572 1582 start = int(istart[0] >> 16)
1573 1583 if startrev == endrev:
1574 1584 end = start + istart[1]
1575 1585 else:
1576 1586 iend = index[endrev]
1577 1587 end = int(iend[0] >> 16) + iend[1]
1578 1588
1579 1589 if self._inline:
1580 1590 start += (startrev + 1) * self._io.size
1581 1591 end += (endrev + 1) * self._io.size
1582 1592 length = end - start
1583 1593
1584 1594 return start, self._getsegment(start, length, df=df)
1585 1595
1586 1596 def _chunk(self, rev, df=None):
1587 1597 """Obtain a single decompressed chunk for a revision.
1588 1598
1589 1599 Accepts an integer revision and an optional already-open file handle
1590 1600 to be used for reading. If used, the seek position of the file will not
1591 1601 be preserved.
1592 1602
1593 1603 Returns a str holding uncompressed data for the requested revision.
1594 1604 """
1595 1605 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1596 1606
1597 1607 def _chunks(self, revs, df=None, targetsize=None):
1598 1608 """Obtain decompressed chunks for the specified revisions.
1599 1609
1600 1610 Accepts an iterable of numeric revisions that are assumed to be in
1601 1611 ascending order. Also accepts an optional already-open file handle
1602 1612 to be used for reading. If used, the seek position of the file will
1603 1613 not be preserved.
1604 1614
1605 1615 This function is similar to calling ``self._chunk()`` multiple times,
1606 1616 but is faster.
1607 1617
1608 1618 Returns a list with decompressed data for each requested revision.
1609 1619 """
1610 1620 if not revs:
1611 1621 return []
1612 1622 start = self.start
1613 1623 length = self.length
1614 1624 inline = self._inline
1615 1625 iosize = self._io.size
1616 1626 buffer = util.buffer
1617 1627
1618 1628 l = []
1619 1629 ladd = l.append
1620 1630
1621 1631 if not self._withsparseread:
1622 1632 slicedchunks = (revs,)
1623 1633 else:
1624 1634 slicedchunks = deltautil.slicechunk(
1625 1635 self, revs, targetsize=targetsize
1626 1636 )
1627 1637
1628 1638 for revschunk in slicedchunks:
1629 1639 firstrev = revschunk[0]
1630 1640 # Skip trailing revisions with empty diff
1631 1641 for lastrev in revschunk[::-1]:
1632 1642 if length(lastrev) != 0:
1633 1643 break
1634 1644
1635 1645 try:
1636 1646 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1637 1647 except OverflowError:
1638 1648 # issue4215 - we can't cache a run of chunks greater than
1639 1649 # 2G on Windows
1640 1650 return [self._chunk(rev, df=df) for rev in revschunk]
1641 1651
1642 1652 decomp = self.decompress
1643 1653 for rev in revschunk:
1644 1654 chunkstart = start(rev)
1645 1655 if inline:
1646 1656 chunkstart += (rev + 1) * iosize
1647 1657 chunklength = length(rev)
1648 1658 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1649 1659
1650 1660 return l
1651 1661
1652 1662 def _chunkclear(self):
1653 1663 """Clear the raw chunk cache."""
1654 1664 self._chunkcache = (0, b'')
1655 1665
1656 1666 def deltaparent(self, rev):
1657 1667 """return deltaparent of the given revision"""
1658 1668 base = self.index[rev][3]
1659 1669 if base == rev:
1660 1670 return nullrev
1661 1671 elif self._generaldelta:
1662 1672 return base
1663 1673 else:
1664 1674 return rev - 1
1665 1675
1666 1676 def issnapshot(self, rev):
1667 1677 """tells whether rev is a snapshot
1668 1678 """
1669 1679 if not self._sparserevlog:
1670 1680 return self.deltaparent(rev) == nullrev
1671 1681 elif util.safehasattr(self.index, b'issnapshot'):
1672 1682 # directly assign the method to cache the testing and access
1673 1683 self.issnapshot = self.index.issnapshot
1674 1684 return self.issnapshot(rev)
1675 1685 if rev == nullrev:
1676 1686 return True
1677 1687 entry = self.index[rev]
1678 1688 base = entry[3]
1679 1689 if base == rev:
1680 1690 return True
1681 1691 if base == nullrev:
1682 1692 return True
1683 1693 p1 = entry[5]
1684 1694 p2 = entry[6]
1685 1695 if base == p1 or base == p2:
1686 1696 return False
1687 1697 return self.issnapshot(base)
1688 1698
1689 1699 def snapshotdepth(self, rev):
1690 1700 """number of snapshot in the chain before this one"""
1691 1701 if not self.issnapshot(rev):
1692 1702 raise error.ProgrammingError(b'revision %d not a snapshot')
1693 1703 return len(self._deltachain(rev)[0]) - 1
1694 1704
1695 1705 def revdiff(self, rev1, rev2):
1696 1706 """return or calculate a delta between two revisions
1697 1707
1698 1708 The delta calculated is in binary form and is intended to be written to
1699 1709 revlog data directly. So this function needs raw revision data.
1700 1710 """
1701 1711 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1702 1712 return bytes(self._chunk(rev2))
1703 1713
1704 1714 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1705 1715
1706 1716 def _processflags(self, text, flags, operation, raw=False):
1707 1717 """deprecated entry point to access flag processors"""
1708 1718 msg = b'_processflag(...) use the specialized variant'
1709 1719 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1710 1720 if raw:
1711 1721 return text, flagutil.processflagsraw(self, text, flags)
1712 1722 elif operation == b'read':
1713 1723 return flagutil.processflagsread(self, text, flags)
1714 1724 else: # write operation
1715 1725 return flagutil.processflagswrite(self, text, flags)
1716 1726
1717 1727 def revision(self, nodeorrev, _df=None, raw=False):
1718 1728 """return an uncompressed revision of a given node or revision
1719 1729 number.
1720 1730
1721 1731 _df - an existing file handle to read from. (internal-only)
1722 1732 raw - an optional argument specifying if the revision data is to be
1723 1733 treated as raw data when applying flag transforms. 'raw' should be set
1724 1734 to True when generating changegroups or in debug commands.
1725 1735 """
1726 1736 if raw:
1727 1737 msg = (
1728 1738 b'revlog.revision(..., raw=True) is deprecated, '
1729 1739 b'use revlog.rawdata(...)'
1730 1740 )
1731 1741 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1732 1742 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1733 1743
1734 1744 def sidedata(self, nodeorrev, _df=None):
1735 1745 """a map of extra data related to the changeset but not part of the hash
1736 1746
1737 1747 This function currently return a dictionary. However, more advanced
1738 1748 mapping object will likely be used in the future for a more
1739 1749 efficient/lazy code.
1740 1750 """
1741 1751 return self._revisiondata(nodeorrev, _df)[1]
1742 1752
1743 1753 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1744 1754 # deal with <nodeorrev> argument type
1745 1755 if isinstance(nodeorrev, int):
1746 1756 rev = nodeorrev
1747 1757 node = self.node(rev)
1748 1758 else:
1749 1759 node = nodeorrev
1750 1760 rev = None
1751 1761
1752 1762 # fast path the special `nullid` rev
1753 1763 if node == nullid:
1754 1764 return b"", {}
1755 1765
1756 1766 # The text as stored inside the revlog. Might be the revision or might
1757 1767 # need to be processed to retrieve the revision.
1758 1768 rawtext = None
1759 1769
1760 1770 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1761 1771
1762 1772 if raw and validated:
1763 1773 # if we don't want to process the raw text and that raw
1764 1774 # text is cached, we can exit early.
1765 1775 return rawtext, {}
1766 1776 if rev is None:
1767 1777 rev = self.rev(node)
1768 1778 # the revlog's flag for this revision
1769 1779 # (usually alter its state or content)
1770 1780 flags = self.flags(rev)
1771 1781
1772 1782 if validated and flags == REVIDX_DEFAULT_FLAGS:
1773 1783 # no extra flags set, no flag processor runs, text = rawtext
1774 1784 return rawtext, {}
1775 1785
1776 1786 sidedata = {}
1777 1787 if raw:
1778 1788 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1779 1789 text = rawtext
1780 1790 else:
1781 1791 try:
1782 1792 r = flagutil.processflagsread(self, rawtext, flags)
1783 1793 except error.SidedataHashError as exc:
1784 1794 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1785 1795 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1786 1796 raise error.RevlogError(msg)
1787 1797 text, validatehash, sidedata = r
1788 1798 if validatehash:
1789 1799 self.checkhash(text, node, rev=rev)
1790 1800 if not validated:
1791 1801 self._revisioncache = (node, rev, rawtext)
1792 1802
1793 1803 return text, sidedata
1794 1804
1795 1805 def _rawtext(self, node, rev, _df=None):
1796 1806 """return the possibly unvalidated rawtext for a revision
1797 1807
1798 1808 returns (rev, rawtext, validated)
1799 1809 """
1800 1810
1801 1811 # revision in the cache (could be useful to apply delta)
1802 1812 cachedrev = None
1803 1813 # An intermediate text to apply deltas to
1804 1814 basetext = None
1805 1815
1806 1816 # Check if we have the entry in cache
1807 1817 # The cache entry looks like (node, rev, rawtext)
1808 1818 if self._revisioncache:
1809 1819 if self._revisioncache[0] == node:
1810 1820 return (rev, self._revisioncache[2], True)
1811 1821 cachedrev = self._revisioncache[1]
1812 1822
1813 1823 if rev is None:
1814 1824 rev = self.rev(node)
1815 1825
1816 1826 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1817 1827 if stopped:
1818 1828 basetext = self._revisioncache[2]
1819 1829
1820 1830 # drop cache to save memory, the caller is expected to
1821 1831 # update self._revisioncache after validating the text
1822 1832 self._revisioncache = None
1823 1833
1824 1834 targetsize = None
1825 1835 rawsize = self.index[rev][2]
1826 1836 if 0 <= rawsize:
1827 1837 targetsize = 4 * rawsize
1828 1838
1829 1839 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1830 1840 if basetext is None:
1831 1841 basetext = bytes(bins[0])
1832 1842 bins = bins[1:]
1833 1843
1834 1844 rawtext = mdiff.patches(basetext, bins)
1835 1845 del basetext # let us have a chance to free memory early
1836 1846 return (rev, rawtext, False)
1837 1847
1838 1848 def rawdata(self, nodeorrev, _df=None):
1839 1849 """return an uncompressed raw data of a given node or revision number.
1840 1850
1841 1851 _df - an existing file handle to read from. (internal-only)
1842 1852 """
1843 1853 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1844 1854
1845 1855 def hash(self, text, p1, p2):
1846 1856 """Compute a node hash.
1847 1857
1848 1858 Available as a function so that subclasses can replace the hash
1849 1859 as needed.
1850 1860 """
1851 1861 return storageutil.hashrevisionsha1(text, p1, p2)
1852 1862
1853 1863 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1854 1864 """Check node hash integrity.
1855 1865
1856 1866 Available as a function so that subclasses can extend hash mismatch
1857 1867 behaviors as needed.
1858 1868 """
1859 1869 try:
1860 1870 if p1 is None and p2 is None:
1861 1871 p1, p2 = self.parents(node)
1862 1872 if node != self.hash(text, p1, p2):
1863 1873 # Clear the revision cache on hash failure. The revision cache
1864 1874 # only stores the raw revision and clearing the cache does have
1865 1875 # the side-effect that we won't have a cache hit when the raw
1866 1876 # revision data is accessed. But this case should be rare and
1867 1877 # it is extra work to teach the cache about the hash
1868 1878 # verification state.
1869 1879 if self._revisioncache and self._revisioncache[0] == node:
1870 1880 self._revisioncache = None
1871 1881
1872 1882 revornode = rev
1873 1883 if revornode is None:
1874 1884 revornode = templatefilters.short(hex(node))
1875 1885 raise error.RevlogError(
1876 1886 _(b"integrity check failed on %s:%s")
1877 1887 % (self.indexfile, pycompat.bytestr(revornode))
1878 1888 )
1879 1889 except error.RevlogError:
1880 1890 if self._censorable and storageutil.iscensoredtext(text):
1881 1891 raise error.CensoredNodeError(self.indexfile, node, text)
1882 1892 raise
1883 1893
1884 1894 def _enforceinlinesize(self, tr, fp=None):
1885 1895 """Check if the revlog is too big for inline and convert if so.
1886 1896
1887 1897 This should be called after revisions are added to the revlog. If the
1888 1898 revlog has grown too large to be an inline revlog, it will convert it
1889 1899 to use multiple index and data files.
1890 1900 """
1891 1901 tiprev = len(self) - 1
1892 1902 if (
1893 1903 not self._inline
1894 1904 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1895 1905 ):
1896 1906 return
1897 1907
1898 1908 trinfo = tr.find(self.indexfile)
1899 1909 if trinfo is None:
1900 1910 raise error.RevlogError(
1901 1911 _(b"%s not found in the transaction") % self.indexfile
1902 1912 )
1903 1913
1904 1914 trindex = trinfo[2]
1905 1915 if trindex is not None:
1906 1916 dataoff = self.start(trindex)
1907 1917 else:
1908 1918 # revlog was stripped at start of transaction, use all leftover data
1909 1919 trindex = len(self) - 1
1910 1920 dataoff = self.end(tiprev)
1911 1921
1912 1922 tr.add(self.datafile, dataoff)
1913 1923
1914 1924 if fp:
1915 1925 fp.flush()
1916 1926 fp.close()
1917 1927 # We can't use the cached file handle after close(). So prevent
1918 1928 # its usage.
1919 1929 self._writinghandles = None
1920 1930
1921 1931 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1922 1932 for r in self:
1923 1933 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1924 1934
1925 1935 with self._indexfp(b'w') as fp:
1926 1936 self.version &= ~FLAG_INLINE_DATA
1927 1937 self._inline = False
1928 1938 io = self._io
1929 1939 for i in self:
1930 1940 e = io.packentry(self.index[i], self.node, self.version, i)
1931 1941 fp.write(e)
1932 1942
1933 1943 # the temp file replace the real index when we exit the context
1934 1944 # manager
1935 1945
1936 1946 tr.replace(self.indexfile, trindex * self._io.size)
1937 1947 self._chunkclear()
1938 1948
1939 1949 def _nodeduplicatecallback(self, transaction, node):
1940 1950 """called when trying to add a node already stored.
1941 1951 """
1942 1952
1943 1953 def addrevision(
1944 1954 self,
1945 1955 text,
1946 1956 transaction,
1947 1957 link,
1948 1958 p1,
1949 1959 p2,
1950 1960 cachedelta=None,
1951 1961 node=None,
1952 1962 flags=REVIDX_DEFAULT_FLAGS,
1953 1963 deltacomputer=None,
1954 1964 sidedata=None,
1955 1965 ):
1956 1966 """add a revision to the log
1957 1967
1958 1968 text - the revision data to add
1959 1969 transaction - the transaction object used for rollback
1960 1970 link - the linkrev data to add
1961 1971 p1, p2 - the parent nodeids of the revision
1962 1972 cachedelta - an optional precomputed delta
1963 1973 node - nodeid of revision; typically node is not specified, and it is
1964 1974 computed by default as hash(text, p1, p2), however subclasses might
1965 1975 use different hashing method (and override checkhash() in such case)
1966 1976 flags - the known flags to set on the revision
1967 1977 deltacomputer - an optional deltacomputer instance shared between
1968 1978 multiple calls
1969 1979 """
1970 1980 if link == nullrev:
1971 1981 raise error.RevlogError(
1972 1982 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1973 1983 )
1974 1984
1975 1985 if sidedata is None:
1976 1986 sidedata = {}
1977 1987 flags = flags & ~REVIDX_SIDEDATA
1978 1988 elif not self.hassidedata:
1979 1989 raise error.ProgrammingError(
1980 1990 _(b"trying to add sidedata to a revlog who don't support them")
1981 1991 )
1982 1992 else:
1983 1993 flags |= REVIDX_SIDEDATA
1984 1994
1985 1995 if flags:
1986 1996 node = node or self.hash(text, p1, p2)
1987 1997
1988 1998 rawtext, validatehash = flagutil.processflagswrite(
1989 1999 self, text, flags, sidedata=sidedata
1990 2000 )
1991 2001
1992 2002 # If the flag processor modifies the revision data, ignore any provided
1993 2003 # cachedelta.
1994 2004 if rawtext != text:
1995 2005 cachedelta = None
1996 2006
1997 2007 if len(rawtext) > _maxentrysize:
1998 2008 raise error.RevlogError(
1999 2009 _(
2000 2010 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2001 2011 )
2002 2012 % (self.indexfile, len(rawtext))
2003 2013 )
2004 2014
2005 2015 node = node or self.hash(rawtext, p1, p2)
2006 2016 if self.index.has_node(node):
2007 2017 return node
2008 2018
2009 2019 if validatehash:
2010 2020 self.checkhash(rawtext, node, p1=p1, p2=p2)
2011 2021
2012 2022 return self.addrawrevision(
2013 2023 rawtext,
2014 2024 transaction,
2015 2025 link,
2016 2026 p1,
2017 2027 p2,
2018 2028 node,
2019 2029 flags,
2020 2030 cachedelta=cachedelta,
2021 2031 deltacomputer=deltacomputer,
2022 2032 )
2023 2033
2024 2034 def addrawrevision(
2025 2035 self,
2026 2036 rawtext,
2027 2037 transaction,
2028 2038 link,
2029 2039 p1,
2030 2040 p2,
2031 2041 node,
2032 2042 flags,
2033 2043 cachedelta=None,
2034 2044 deltacomputer=None,
2035 2045 ):
2036 2046 """add a raw revision with known flags, node and parents
2037 2047 useful when reusing a revision not stored in this revlog (ex: received
2038 2048 over wire, or read from an external bundle).
2039 2049 """
2040 2050 dfh = None
2041 2051 if not self._inline:
2042 2052 dfh = self._datafp(b"a+")
2043 2053 ifh = self._indexfp(b"a+")
2044 2054 try:
2045 2055 return self._addrevision(
2046 2056 node,
2047 2057 rawtext,
2048 2058 transaction,
2049 2059 link,
2050 2060 p1,
2051 2061 p2,
2052 2062 flags,
2053 2063 cachedelta,
2054 2064 ifh,
2055 2065 dfh,
2056 2066 deltacomputer=deltacomputer,
2057 2067 )
2058 2068 finally:
2059 2069 if dfh:
2060 2070 dfh.close()
2061 2071 ifh.close()
2062 2072
2063 2073 def compress(self, data):
2064 2074 """Generate a possibly-compressed representation of data."""
2065 2075 if not data:
2066 2076 return b'', data
2067 2077
2068 2078 compressed = self._compressor.compress(data)
2069 2079
2070 2080 if compressed:
2071 2081 # The revlog compressor added the header in the returned data.
2072 2082 return b'', compressed
2073 2083
2074 2084 if data[0:1] == b'\0':
2075 2085 return b'', data
2076 2086 return b'u', data
2077 2087
2078 2088 def decompress(self, data):
2079 2089 """Decompress a revlog chunk.
2080 2090
2081 2091 The chunk is expected to begin with a header identifying the
2082 2092 format type so it can be routed to an appropriate decompressor.
2083 2093 """
2084 2094 if not data:
2085 2095 return data
2086 2096
2087 2097 # Revlogs are read much more frequently than they are written and many
2088 2098 # chunks only take microseconds to decompress, so performance is
2089 2099 # important here.
2090 2100 #
2091 2101 # We can make a few assumptions about revlogs:
2092 2102 #
2093 2103 # 1) the majority of chunks will be compressed (as opposed to inline
2094 2104 # raw data).
2095 2105 # 2) decompressing *any* data will likely by at least 10x slower than
2096 2106 # returning raw inline data.
2097 2107 # 3) we want to prioritize common and officially supported compression
2098 2108 # engines
2099 2109 #
2100 2110 # It follows that we want to optimize for "decompress compressed data
2101 2111 # when encoded with common and officially supported compression engines"
2102 2112 # case over "raw data" and "data encoded by less common or non-official
2103 2113 # compression engines." That is why we have the inline lookup first
2104 2114 # followed by the compengines lookup.
2105 2115 #
2106 2116 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2107 2117 # compressed chunks. And this matters for changelog and manifest reads.
2108 2118 t = data[0:1]
2109 2119
2110 2120 if t == b'x':
2111 2121 try:
2112 2122 return _zlibdecompress(data)
2113 2123 except zlib.error as e:
2114 2124 raise error.RevlogError(
2115 2125 _(b'revlog decompress error: %s')
2116 2126 % stringutil.forcebytestr(e)
2117 2127 )
2118 2128 # '\0' is more common than 'u' so it goes first.
2119 2129 elif t == b'\0':
2120 2130 return data
2121 2131 elif t == b'u':
2122 2132 return util.buffer(data, 1)
2123 2133
2124 2134 try:
2125 2135 compressor = self._decompressors[t]
2126 2136 except KeyError:
2127 2137 try:
2128 2138 engine = util.compengines.forrevlogheader(t)
2129 2139 compressor = engine.revlogcompressor(self._compengineopts)
2130 2140 self._decompressors[t] = compressor
2131 2141 except KeyError:
2132 2142 raise error.RevlogError(_(b'unknown compression type %r') % t)
2133 2143
2134 2144 return compressor.decompress(data)
2135 2145
2136 2146 def _addrevision(
2137 2147 self,
2138 2148 node,
2139 2149 rawtext,
2140 2150 transaction,
2141 2151 link,
2142 2152 p1,
2143 2153 p2,
2144 2154 flags,
2145 2155 cachedelta,
2146 2156 ifh,
2147 2157 dfh,
2148 2158 alwayscache=False,
2149 2159 deltacomputer=None,
2150 2160 ):
2151 2161 """internal function to add revisions to the log
2152 2162
2153 2163 see addrevision for argument descriptions.
2154 2164
2155 2165 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2156 2166
2157 2167 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2158 2168 be used.
2159 2169
2160 2170 invariants:
2161 2171 - rawtext is optional (can be None); if not set, cachedelta must be set.
2162 2172 if both are set, they must correspond to each other.
2163 2173 """
2164 2174 if node == nullid:
2165 2175 raise error.RevlogError(
2166 2176 _(b"%s: attempt to add null revision") % self.indexfile
2167 2177 )
2168 2178 if node == wdirid or node in wdirfilenodeids:
2169 2179 raise error.RevlogError(
2170 2180 _(b"%s: attempt to add wdir revision") % self.indexfile
2171 2181 )
2172 2182
2173 2183 if self._inline:
2174 2184 fh = ifh
2175 2185 else:
2176 2186 fh = dfh
2177 2187
2178 2188 btext = [rawtext]
2179 2189
2180 2190 curr = len(self)
2181 2191 prev = curr - 1
2182 2192 offset = self.end(prev)
2183 2193 p1r, p2r = self.rev(p1), self.rev(p2)
2184 2194
2185 2195 # full versions are inserted when the needed deltas
2186 2196 # become comparable to the uncompressed text
2187 2197 if rawtext is None:
2188 2198 # need rawtext size, before changed by flag processors, which is
2189 2199 # the non-raw size. use revlog explicitly to avoid filelog's extra
2190 2200 # logic that might remove metadata size.
2191 2201 textlen = mdiff.patchedsize(
2192 2202 revlog.size(self, cachedelta[0]), cachedelta[1]
2193 2203 )
2194 2204 else:
2195 2205 textlen = len(rawtext)
2196 2206
2197 2207 if deltacomputer is None:
2198 2208 deltacomputer = deltautil.deltacomputer(self)
2199 2209
2200 2210 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2201 2211
2202 2212 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2203 2213
2204 2214 e = (
2205 2215 offset_type(offset, flags),
2206 2216 deltainfo.deltalen,
2207 2217 textlen,
2208 2218 deltainfo.base,
2209 2219 link,
2210 2220 p1r,
2211 2221 p2r,
2212 2222 node,
2213 2223 )
2214 2224 self.index.append(e)
2215 2225
2216 2226 # Reset the pure node cache start lookup offset to account for new
2217 2227 # revision.
2218 2228 if self._nodepos is not None:
2219 2229 self._nodepos = curr
2220 2230
2221 2231 entry = self._io.packentry(e, self.node, self.version, curr)
2222 2232 self._writeentry(
2223 2233 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2224 2234 )
2225 2235
2226 2236 rawtext = btext[0]
2227 2237
2228 2238 if alwayscache and rawtext is None:
2229 2239 rawtext = deltacomputer.buildtext(revinfo, fh)
2230 2240
2231 2241 if type(rawtext) == bytes: # only accept immutable objects
2232 2242 self._revisioncache = (node, curr, rawtext)
2233 2243 self._chainbasecache[curr] = deltainfo.chainbase
2234 2244 return node
2235 2245
2236 2246 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2237 2247 # Files opened in a+ mode have inconsistent behavior on various
2238 2248 # platforms. Windows requires that a file positioning call be made
2239 2249 # when the file handle transitions between reads and writes. See
2240 2250 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2241 2251 # platforms, Python or the platform itself can be buggy. Some versions
2242 2252 # of Solaris have been observed to not append at the end of the file
2243 2253 # if the file was seeked to before the end. See issue4943 for more.
2244 2254 #
2245 2255 # We work around this issue by inserting a seek() before writing.
2246 2256 # Note: This is likely not necessary on Python 3. However, because
2247 2257 # the file handle is reused for reads and may be seeked there, we need
2248 2258 # to be careful before changing this.
2249 2259 ifh.seek(0, os.SEEK_END)
2250 2260 if dfh:
2251 2261 dfh.seek(0, os.SEEK_END)
2252 2262
2253 2263 curr = len(self) - 1
2254 2264 if not self._inline:
2255 2265 transaction.add(self.datafile, offset)
2256 2266 transaction.add(self.indexfile, curr * len(entry))
2257 2267 if data[0]:
2258 2268 dfh.write(data[0])
2259 2269 dfh.write(data[1])
2260 2270 ifh.write(entry)
2261 2271 else:
2262 2272 offset += curr * self._io.size
2263 2273 transaction.add(self.indexfile, offset, curr)
2264 2274 ifh.write(entry)
2265 2275 ifh.write(data[0])
2266 2276 ifh.write(data[1])
2267 2277 self._enforceinlinesize(transaction, ifh)
2268 2278
2269 2279 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2270 2280 """
2271 2281 add a delta group
2272 2282
2273 2283 given a set of deltas, add them to the revision log. the
2274 2284 first delta is against its parent, which should be in our
2275 2285 log, the rest are against the previous delta.
2276 2286
2277 2287 If ``addrevisioncb`` is defined, it will be called with arguments of
2278 2288 this revlog and the node that was added.
2279 2289 """
2280 2290
2281 2291 if self._writinghandles:
2282 2292 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2283 2293
2284 2294 nodes = []
2285 2295
2286 2296 r = len(self)
2287 2297 end = 0
2288 2298 if r:
2289 2299 end = self.end(r - 1)
2290 2300 ifh = self._indexfp(b"a+")
2291 2301 isize = r * self._io.size
2292 2302 if self._inline:
2293 2303 transaction.add(self.indexfile, end + isize, r)
2294 2304 dfh = None
2295 2305 else:
2296 2306 transaction.add(self.indexfile, isize, r)
2297 2307 transaction.add(self.datafile, end)
2298 2308 dfh = self._datafp(b"a+")
2299 2309
2300 2310 def flush():
2301 2311 if dfh:
2302 2312 dfh.flush()
2303 2313 ifh.flush()
2304 2314
2305 2315 self._writinghandles = (ifh, dfh)
2306 2316
2307 2317 try:
2308 2318 deltacomputer = deltautil.deltacomputer(self)
2309 2319 # loop through our set of deltas
2310 2320 for data in deltas:
2311 2321 node, p1, p2, linknode, deltabase, delta, flags = data
2312 2322 link = linkmapper(linknode)
2313 2323 flags = flags or REVIDX_DEFAULT_FLAGS
2314 2324
2315 2325 nodes.append(node)
2316 2326
2317 2327 if self.index.has_node(node):
2318 2328 self._nodeduplicatecallback(transaction, node)
2319 2329 # this can happen if two branches make the same change
2320 2330 continue
2321 2331
2322 2332 for p in (p1, p2):
2323 2333 if not self.index.has_node(p):
2324 2334 raise error.LookupError(
2325 2335 p, self.indexfile, _(b'unknown parent')
2326 2336 )
2327 2337
2328 2338 if not self.index.has_node(deltabase):
2329 2339 raise error.LookupError(
2330 2340 deltabase, self.indexfile, _(b'unknown delta base')
2331 2341 )
2332 2342
2333 2343 baserev = self.rev(deltabase)
2334 2344
2335 2345 if baserev != nullrev and self.iscensored(baserev):
2336 2346 # if base is censored, delta must be full replacement in a
2337 2347 # single patch operation
2338 2348 hlen = struct.calcsize(b">lll")
2339 2349 oldlen = self.rawsize(baserev)
2340 2350 newlen = len(delta) - hlen
2341 2351 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2342 2352 raise error.CensoredBaseError(
2343 2353 self.indexfile, self.node(baserev)
2344 2354 )
2345 2355
2346 2356 if not flags and self._peek_iscensored(baserev, delta, flush):
2347 2357 flags |= REVIDX_ISCENSORED
2348 2358
2349 2359 # We assume consumers of addrevisioncb will want to retrieve
2350 2360 # the added revision, which will require a call to
2351 2361 # revision(). revision() will fast path if there is a cache
2352 2362 # hit. So, we tell _addrevision() to always cache in this case.
2353 2363 # We're only using addgroup() in the context of changegroup
2354 2364 # generation so the revision data can always be handled as raw
2355 2365 # by the flagprocessor.
2356 2366 self._addrevision(
2357 2367 node,
2358 2368 None,
2359 2369 transaction,
2360 2370 link,
2361 2371 p1,
2362 2372 p2,
2363 2373 flags,
2364 2374 (baserev, delta),
2365 2375 ifh,
2366 2376 dfh,
2367 2377 alwayscache=bool(addrevisioncb),
2368 2378 deltacomputer=deltacomputer,
2369 2379 )
2370 2380
2371 2381 if addrevisioncb:
2372 2382 addrevisioncb(self, node)
2373 2383
2374 2384 if not dfh and not self._inline:
2375 2385 # addrevision switched from inline to conventional
2376 2386 # reopen the index
2377 2387 ifh.close()
2378 2388 dfh = self._datafp(b"a+")
2379 2389 ifh = self._indexfp(b"a+")
2380 2390 self._writinghandles = (ifh, dfh)
2381 2391 finally:
2382 2392 self._writinghandles = None
2383 2393
2384 2394 if dfh:
2385 2395 dfh.close()
2386 2396 ifh.close()
2387 2397
2388 2398 return nodes
2389 2399
2390 2400 def iscensored(self, rev):
2391 2401 """Check if a file revision is censored."""
2392 2402 if not self._censorable:
2393 2403 return False
2394 2404
2395 2405 return self.flags(rev) & REVIDX_ISCENSORED
2396 2406
2397 2407 def _peek_iscensored(self, baserev, delta, flush):
2398 2408 """Quickly check if a delta produces a censored revision."""
2399 2409 if not self._censorable:
2400 2410 return False
2401 2411
2402 2412 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2403 2413
2404 2414 def getstrippoint(self, minlink):
2405 2415 """find the minimum rev that must be stripped to strip the linkrev
2406 2416
2407 2417 Returns a tuple containing the minimum rev and a set of all revs that
2408 2418 have linkrevs that will be broken by this strip.
2409 2419 """
2410 2420 return storageutil.resolvestripinfo(
2411 2421 minlink,
2412 2422 len(self) - 1,
2413 2423 self.headrevs(),
2414 2424 self.linkrev,
2415 2425 self.parentrevs,
2416 2426 )
2417 2427
2418 2428 def strip(self, minlink, transaction):
2419 2429 """truncate the revlog on the first revision with a linkrev >= minlink
2420 2430
2421 2431 This function is called when we're stripping revision minlink and
2422 2432 its descendants from the repository.
2423 2433
2424 2434 We have to remove all revisions with linkrev >= minlink, because
2425 2435 the equivalent changelog revisions will be renumbered after the
2426 2436 strip.
2427 2437
2428 2438 So we truncate the revlog on the first of these revisions, and
2429 2439 trust that the caller has saved the revisions that shouldn't be
2430 2440 removed and that it'll re-add them after this truncation.
2431 2441 """
2432 2442 if len(self) == 0:
2433 2443 return
2434 2444
2435 2445 rev, _ = self.getstrippoint(minlink)
2436 2446 if rev == len(self):
2437 2447 return
2438 2448
2439 2449 # first truncate the files on disk
2440 2450 end = self.start(rev)
2441 2451 if not self._inline:
2442 2452 transaction.add(self.datafile, end)
2443 2453 end = rev * self._io.size
2444 2454 else:
2445 2455 end += rev * self._io.size
2446 2456
2447 2457 transaction.add(self.indexfile, end)
2448 2458
2449 2459 # then reset internal state in memory to forget those revisions
2450 2460 self._revisioncache = None
2451 2461 self._chaininfocache = {}
2452 2462 self._chunkclear()
2453 2463
2454 2464 del self.index[rev:-1]
2455 2465 self._nodepos = None
2456 2466
2457 2467 def checksize(self):
2458 2468 """Check size of index and data files
2459 2469
2460 2470 return a (dd, di) tuple.
2461 2471 - dd: extra bytes for the "data" file
2462 2472 - di: extra bytes for the "index" file
2463 2473
2464 2474 A healthy revlog will return (0, 0).
2465 2475 """
2466 2476 expected = 0
2467 2477 if len(self):
2468 2478 expected = max(0, self.end(len(self) - 1))
2469 2479
2470 2480 try:
2471 2481 with self._datafp() as f:
2472 2482 f.seek(0, io.SEEK_END)
2473 2483 actual = f.tell()
2474 2484 dd = actual - expected
2475 2485 except IOError as inst:
2476 2486 if inst.errno != errno.ENOENT:
2477 2487 raise
2478 2488 dd = 0
2479 2489
2480 2490 try:
2481 2491 f = self.opener(self.indexfile)
2482 2492 f.seek(0, io.SEEK_END)
2483 2493 actual = f.tell()
2484 2494 f.close()
2485 2495 s = self._io.size
2486 2496 i = max(0, actual // s)
2487 2497 di = actual - (i * s)
2488 2498 if self._inline:
2489 2499 databytes = 0
2490 2500 for r in self:
2491 2501 databytes += max(0, self.length(r))
2492 2502 dd = 0
2493 2503 di = actual - len(self) * s - databytes
2494 2504 except IOError as inst:
2495 2505 if inst.errno != errno.ENOENT:
2496 2506 raise
2497 2507 di = 0
2498 2508
2499 2509 return (dd, di)
2500 2510
2501 2511 def files(self):
2502 2512 res = [self.indexfile]
2503 2513 if not self._inline:
2504 2514 res.append(self.datafile)
2505 2515 return res
2506 2516
2507 2517 def emitrevisions(
2508 2518 self,
2509 2519 nodes,
2510 2520 nodesorder=None,
2511 2521 revisiondata=False,
2512 2522 assumehaveparentrevisions=False,
2513 2523 deltamode=repository.CG_DELTAMODE_STD,
2514 2524 ):
2515 2525 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2516 2526 raise error.ProgrammingError(
2517 2527 b'unhandled value for nodesorder: %s' % nodesorder
2518 2528 )
2519 2529
2520 2530 if nodesorder is None and not self._generaldelta:
2521 2531 nodesorder = b'storage'
2522 2532
2523 2533 if (
2524 2534 not self._storedeltachains
2525 2535 and deltamode != repository.CG_DELTAMODE_PREV
2526 2536 ):
2527 2537 deltamode = repository.CG_DELTAMODE_FULL
2528 2538
2529 2539 return storageutil.emitrevisions(
2530 2540 self,
2531 2541 nodes,
2532 2542 nodesorder,
2533 2543 revlogrevisiondelta,
2534 2544 deltaparentfn=self.deltaparent,
2535 2545 candeltafn=self.candelta,
2536 2546 rawsizefn=self.rawsize,
2537 2547 revdifffn=self.revdiff,
2538 2548 flagsfn=self.flags,
2539 2549 deltamode=deltamode,
2540 2550 revisiondata=revisiondata,
2541 2551 assumehaveparentrevisions=assumehaveparentrevisions,
2542 2552 )
2543 2553
2544 2554 DELTAREUSEALWAYS = b'always'
2545 2555 DELTAREUSESAMEREVS = b'samerevs'
2546 2556 DELTAREUSENEVER = b'never'
2547 2557
2548 2558 DELTAREUSEFULLADD = b'fulladd'
2549 2559
2550 2560 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2551 2561
2552 2562 def clone(
2553 2563 self,
2554 2564 tr,
2555 2565 destrevlog,
2556 2566 addrevisioncb=None,
2557 2567 deltareuse=DELTAREUSESAMEREVS,
2558 2568 forcedeltabothparents=None,
2559 2569 sidedatacompanion=None,
2560 2570 ):
2561 2571 """Copy this revlog to another, possibly with format changes.
2562 2572
2563 2573 The destination revlog will contain the same revisions and nodes.
2564 2574 However, it may not be bit-for-bit identical due to e.g. delta encoding
2565 2575 differences.
2566 2576
2567 2577 The ``deltareuse`` argument control how deltas from the existing revlog
2568 2578 are preserved in the destination revlog. The argument can have the
2569 2579 following values:
2570 2580
2571 2581 DELTAREUSEALWAYS
2572 2582 Deltas will always be reused (if possible), even if the destination
2573 2583 revlog would not select the same revisions for the delta. This is the
2574 2584 fastest mode of operation.
2575 2585 DELTAREUSESAMEREVS
2576 2586 Deltas will be reused if the destination revlog would pick the same
2577 2587 revisions for the delta. This mode strikes a balance between speed
2578 2588 and optimization.
2579 2589 DELTAREUSENEVER
2580 2590 Deltas will never be reused. This is the slowest mode of execution.
2581 2591 This mode can be used to recompute deltas (e.g. if the diff/delta
2582 2592 algorithm changes).
2583 2593 DELTAREUSEFULLADD
2584 2594 Revision will be re-added as if their were new content. This is
2585 2595 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2586 2596 eg: large file detection and handling.
2587 2597
2588 2598 Delta computation can be slow, so the choice of delta reuse policy can
2589 2599 significantly affect run time.
2590 2600
2591 2601 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2592 2602 two extremes. Deltas will be reused if they are appropriate. But if the
2593 2603 delta could choose a better revision, it will do so. This means if you
2594 2604 are converting a non-generaldelta revlog to a generaldelta revlog,
2595 2605 deltas will be recomputed if the delta's parent isn't a parent of the
2596 2606 revision.
2597 2607
2598 2608 In addition to the delta policy, the ``forcedeltabothparents``
2599 2609 argument controls whether to force compute deltas against both parents
2600 2610 for merges. By default, the current default is used.
2601 2611
2602 2612 If not None, the `sidedatacompanion` is callable that accept two
2603 2613 arguments:
2604 2614
2605 2615 (srcrevlog, rev)
2606 2616
2607 2617 and return a triplet that control changes to sidedata content from the
2608 2618 old revision to the new clone result:
2609 2619
2610 2620 (dropall, filterout, update)
2611 2621
2612 2622 * if `dropall` is True, all sidedata should be dropped
2613 2623 * `filterout` is a set of sidedata keys that should be dropped
2614 2624 * `update` is a mapping of additionnal/new key -> value
2615 2625 """
2616 2626 if deltareuse not in self.DELTAREUSEALL:
2617 2627 raise ValueError(
2618 2628 _(b'value for deltareuse invalid: %s') % deltareuse
2619 2629 )
2620 2630
2621 2631 if len(destrevlog):
2622 2632 raise ValueError(_(b'destination revlog is not empty'))
2623 2633
2624 2634 if getattr(self, 'filteredrevs', None):
2625 2635 raise ValueError(_(b'source revlog has filtered revisions'))
2626 2636 if getattr(destrevlog, 'filteredrevs', None):
2627 2637 raise ValueError(_(b'destination revlog has filtered revisions'))
2628 2638
2629 2639 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2630 2640 # if possible.
2631 2641 oldlazydelta = destrevlog._lazydelta
2632 2642 oldlazydeltabase = destrevlog._lazydeltabase
2633 2643 oldamd = destrevlog._deltabothparents
2634 2644
2635 2645 try:
2636 2646 if deltareuse == self.DELTAREUSEALWAYS:
2637 2647 destrevlog._lazydeltabase = True
2638 2648 destrevlog._lazydelta = True
2639 2649 elif deltareuse == self.DELTAREUSESAMEREVS:
2640 2650 destrevlog._lazydeltabase = False
2641 2651 destrevlog._lazydelta = True
2642 2652 elif deltareuse == self.DELTAREUSENEVER:
2643 2653 destrevlog._lazydeltabase = False
2644 2654 destrevlog._lazydelta = False
2645 2655
2646 2656 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2647 2657
2648 2658 self._clone(
2649 2659 tr,
2650 2660 destrevlog,
2651 2661 addrevisioncb,
2652 2662 deltareuse,
2653 2663 forcedeltabothparents,
2654 2664 sidedatacompanion,
2655 2665 )
2656 2666
2657 2667 finally:
2658 2668 destrevlog._lazydelta = oldlazydelta
2659 2669 destrevlog._lazydeltabase = oldlazydeltabase
2660 2670 destrevlog._deltabothparents = oldamd
2661 2671
2662 2672 def _clone(
2663 2673 self,
2664 2674 tr,
2665 2675 destrevlog,
2666 2676 addrevisioncb,
2667 2677 deltareuse,
2668 2678 forcedeltabothparents,
2669 2679 sidedatacompanion,
2670 2680 ):
2671 2681 """perform the core duty of `revlog.clone` after parameter processing"""
2672 2682 deltacomputer = deltautil.deltacomputer(destrevlog)
2673 2683 index = self.index
2674 2684 for rev in self:
2675 2685 entry = index[rev]
2676 2686
2677 2687 # Some classes override linkrev to take filtered revs into
2678 2688 # account. Use raw entry from index.
2679 2689 flags = entry[0] & 0xFFFF
2680 2690 linkrev = entry[4]
2681 2691 p1 = index[entry[5]][7]
2682 2692 p2 = index[entry[6]][7]
2683 2693 node = entry[7]
2684 2694
2685 2695 sidedataactions = (False, [], {})
2686 2696 if sidedatacompanion is not None:
2687 2697 sidedataactions = sidedatacompanion(self, rev)
2688 2698
2689 2699 # (Possibly) reuse the delta from the revlog if allowed and
2690 2700 # the revlog chunk is a delta.
2691 2701 cachedelta = None
2692 2702 rawtext = None
2693 2703 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2694 2704 dropall, filterout, update = sidedataactions
2695 2705 text, sidedata = self._revisiondata(rev)
2696 2706 if dropall:
2697 2707 sidedata = {}
2698 2708 for key in filterout:
2699 2709 sidedata.pop(key, None)
2700 2710 sidedata.update(update)
2701 2711 if not sidedata:
2702 2712 sidedata = None
2703 2713 destrevlog.addrevision(
2704 2714 text,
2705 2715 tr,
2706 2716 linkrev,
2707 2717 p1,
2708 2718 p2,
2709 2719 cachedelta=cachedelta,
2710 2720 node=node,
2711 2721 flags=flags,
2712 2722 deltacomputer=deltacomputer,
2713 2723 sidedata=sidedata,
2714 2724 )
2715 2725 else:
2716 2726 if destrevlog._lazydelta:
2717 2727 dp = self.deltaparent(rev)
2718 2728 if dp != nullrev:
2719 2729 cachedelta = (dp, bytes(self._chunk(rev)))
2720 2730
2721 2731 if not cachedelta:
2722 2732 rawtext = self.rawdata(rev)
2723 2733
2724 2734 ifh = destrevlog.opener(
2725 2735 destrevlog.indexfile, b'a+', checkambig=False
2726 2736 )
2727 2737 dfh = None
2728 2738 if not destrevlog._inline:
2729 2739 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2730 2740 try:
2731 2741 destrevlog._addrevision(
2732 2742 node,
2733 2743 rawtext,
2734 2744 tr,
2735 2745 linkrev,
2736 2746 p1,
2737 2747 p2,
2738 2748 flags,
2739 2749 cachedelta,
2740 2750 ifh,
2741 2751 dfh,
2742 2752 deltacomputer=deltacomputer,
2743 2753 )
2744 2754 finally:
2745 2755 if dfh:
2746 2756 dfh.close()
2747 2757 ifh.close()
2748 2758
2749 2759 if addrevisioncb:
2750 2760 addrevisioncb(self, rev, node)
2751 2761
2752 2762 def censorrevision(self, tr, censornode, tombstone=b''):
2753 2763 if (self.version & 0xFFFF) == REVLOGV0:
2754 2764 raise error.RevlogError(
2755 2765 _(b'cannot censor with version %d revlogs') % self.version
2756 2766 )
2757 2767
2758 2768 censorrev = self.rev(censornode)
2759 2769 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2760 2770
2761 2771 if len(tombstone) > self.rawsize(censorrev):
2762 2772 raise error.Abort(
2763 2773 _(b'censor tombstone must be no longer than censored data')
2764 2774 )
2765 2775
2766 2776 # Rewriting the revlog in place is hard. Our strategy for censoring is
2767 2777 # to create a new revlog, copy all revisions to it, then replace the
2768 2778 # revlogs on transaction close.
2769 2779
2770 2780 newindexfile = self.indexfile + b'.tmpcensored'
2771 2781 newdatafile = self.datafile + b'.tmpcensored'
2772 2782
2773 2783 # This is a bit dangerous. We could easily have a mismatch of state.
2774 2784 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2775 2785 newrl.version = self.version
2776 2786 newrl._generaldelta = self._generaldelta
2777 2787 newrl._io = self._io
2778 2788
2779 2789 for rev in self.revs():
2780 2790 node = self.node(rev)
2781 2791 p1, p2 = self.parents(node)
2782 2792
2783 2793 if rev == censorrev:
2784 2794 newrl.addrawrevision(
2785 2795 tombstone,
2786 2796 tr,
2787 2797 self.linkrev(censorrev),
2788 2798 p1,
2789 2799 p2,
2790 2800 censornode,
2791 2801 REVIDX_ISCENSORED,
2792 2802 )
2793 2803
2794 2804 if newrl.deltaparent(rev) != nullrev:
2795 2805 raise error.Abort(
2796 2806 _(
2797 2807 b'censored revision stored as delta; '
2798 2808 b'cannot censor'
2799 2809 ),
2800 2810 hint=_(
2801 2811 b'censoring of revlogs is not '
2802 2812 b'fully implemented; please report '
2803 2813 b'this bug'
2804 2814 ),
2805 2815 )
2806 2816 continue
2807 2817
2808 2818 if self.iscensored(rev):
2809 2819 if self.deltaparent(rev) != nullrev:
2810 2820 raise error.Abort(
2811 2821 _(
2812 2822 b'cannot censor due to censored '
2813 2823 b'revision having delta stored'
2814 2824 )
2815 2825 )
2816 2826 rawtext = self._chunk(rev)
2817 2827 else:
2818 2828 rawtext = self.rawdata(rev)
2819 2829
2820 2830 newrl.addrawrevision(
2821 2831 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2822 2832 )
2823 2833
2824 2834 tr.addbackup(self.indexfile, location=b'store')
2825 2835 if not self._inline:
2826 2836 tr.addbackup(self.datafile, location=b'store')
2827 2837
2828 2838 self.opener.rename(newrl.indexfile, self.indexfile)
2829 2839 if not self._inline:
2830 2840 self.opener.rename(newrl.datafile, self.datafile)
2831 2841
2832 2842 self.clearcaches()
2833 2843 self._loadindex()
2834 2844
2835 2845 def verifyintegrity(self, state):
2836 2846 """Verifies the integrity of the revlog.
2837 2847
2838 2848 Yields ``revlogproblem`` instances describing problems that are
2839 2849 found.
2840 2850 """
2841 2851 dd, di = self.checksize()
2842 2852 if dd:
2843 2853 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2844 2854 if di:
2845 2855 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2846 2856
2847 2857 version = self.version & 0xFFFF
2848 2858
2849 2859 # The verifier tells us what version revlog we should be.
2850 2860 if version != state[b'expectedversion']:
2851 2861 yield revlogproblem(
2852 2862 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2853 2863 % (self.indexfile, version, state[b'expectedversion'])
2854 2864 )
2855 2865
2856 2866 state[b'skipread'] = set()
2857 2867
2858 2868 for rev in self:
2859 2869 node = self.node(rev)
2860 2870
2861 2871 # Verify contents. 4 cases to care about:
2862 2872 #
2863 2873 # common: the most common case
2864 2874 # rename: with a rename
2865 2875 # meta: file content starts with b'\1\n', the metadata
2866 2876 # header defined in filelog.py, but without a rename
2867 2877 # ext: content stored externally
2868 2878 #
2869 2879 # More formally, their differences are shown below:
2870 2880 #
2871 2881 # | common | rename | meta | ext
2872 2882 # -------------------------------------------------------
2873 2883 # flags() | 0 | 0 | 0 | not 0
2874 2884 # renamed() | False | True | False | ?
2875 2885 # rawtext[0:2]=='\1\n'| False | True | True | ?
2876 2886 #
2877 2887 # "rawtext" means the raw text stored in revlog data, which
2878 2888 # could be retrieved by "rawdata(rev)". "text"
2879 2889 # mentioned below is "revision(rev)".
2880 2890 #
2881 2891 # There are 3 different lengths stored physically:
2882 2892 # 1. L1: rawsize, stored in revlog index
2883 2893 # 2. L2: len(rawtext), stored in revlog data
2884 2894 # 3. L3: len(text), stored in revlog data if flags==0, or
2885 2895 # possibly somewhere else if flags!=0
2886 2896 #
2887 2897 # L1 should be equal to L2. L3 could be different from them.
2888 2898 # "text" may or may not affect commit hash depending on flag
2889 2899 # processors (see flagutil.addflagprocessor).
2890 2900 #
2891 2901 # | common | rename | meta | ext
2892 2902 # -------------------------------------------------
2893 2903 # rawsize() | L1 | L1 | L1 | L1
2894 2904 # size() | L1 | L2-LM | L1(*) | L1 (?)
2895 2905 # len(rawtext) | L2 | L2 | L2 | L2
2896 2906 # len(text) | L2 | L2 | L2 | L3
2897 2907 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2898 2908 #
2899 2909 # LM: length of metadata, depending on rawtext
2900 2910 # (*): not ideal, see comment in filelog.size
2901 2911 # (?): could be "- len(meta)" if the resolved content has
2902 2912 # rename metadata
2903 2913 #
2904 2914 # Checks needed to be done:
2905 2915 # 1. length check: L1 == L2, in all cases.
2906 2916 # 2. hash check: depending on flag processor, we may need to
2907 2917 # use either "text" (external), or "rawtext" (in revlog).
2908 2918
2909 2919 try:
2910 2920 skipflags = state.get(b'skipflags', 0)
2911 2921 if skipflags:
2912 2922 skipflags &= self.flags(rev)
2913 2923
2914 2924 if skipflags:
2915 2925 state[b'skipread'].add(node)
2916 2926 else:
2917 2927 # Side-effect: read content and verify hash.
2918 2928 self.revision(node)
2919 2929
2920 2930 l1 = self.rawsize(rev)
2921 2931 l2 = len(self.rawdata(node))
2922 2932
2923 2933 if l1 != l2:
2924 2934 yield revlogproblem(
2925 2935 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2926 2936 node=node,
2927 2937 )
2928 2938
2929 2939 except error.CensoredNodeError:
2930 2940 if state[b'erroroncensored']:
2931 2941 yield revlogproblem(
2932 2942 error=_(b'censored file data'), node=node
2933 2943 )
2934 2944 state[b'skipread'].add(node)
2935 2945 except Exception as e:
2936 2946 yield revlogproblem(
2937 2947 error=_(b'unpacking %s: %s')
2938 2948 % (short(node), stringutil.forcebytestr(e)),
2939 2949 node=node,
2940 2950 )
2941 2951 state[b'skipread'].add(node)
2942 2952
2943 2953 def storageinfo(
2944 2954 self,
2945 2955 exclusivefiles=False,
2946 2956 sharedfiles=False,
2947 2957 revisionscount=False,
2948 2958 trackedsize=False,
2949 2959 storedsize=False,
2950 2960 ):
2951 2961 d = {}
2952 2962
2953 2963 if exclusivefiles:
2954 2964 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2955 2965 if not self._inline:
2956 2966 d[b'exclusivefiles'].append((self.opener, self.datafile))
2957 2967
2958 2968 if sharedfiles:
2959 2969 d[b'sharedfiles'] = []
2960 2970
2961 2971 if revisionscount:
2962 2972 d[b'revisionscount'] = len(self)
2963 2973
2964 2974 if trackedsize:
2965 2975 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2966 2976
2967 2977 if storedsize:
2968 2978 d[b'storedsize'] = sum(
2969 2979 self.opener.stat(path).st_size for path in self.files()
2970 2980 )
2971 2981
2972 2982 return d
General Comments 0
You need to be logged in to leave comments. Login now