##// END OF EJS Templates
rust: clean remains of direct-ffi code...
marmoute -
r44975:fb16ad36 default
parent child Browse files
Show More
@@ -1,431 +1,395
1 1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from .node import nullrev
13 13 from . import (
14 14 dagop,
15 15 policy,
16 16 pycompat,
17 17 )
18 18
19 19 parsers = policy.importmod('parsers')
20 20
21 21
22 22 def commonancestorsheads(pfunc, *nodes):
23 23 """Returns a set with the heads of all common ancestors of all nodes,
24 24 heads(::nodes[0] and ::nodes[1] and ...) .
25 25
26 26 pfunc must return a list of parent vertices for a given vertex.
27 27 """
28 28 if not isinstance(nodes, set):
29 29 nodes = set(nodes)
30 30 if nullrev in nodes:
31 31 return set()
32 32 if len(nodes) <= 1:
33 33 return nodes
34 34
35 35 allseen = (1 << len(nodes)) - 1
36 36 seen = [0] * (max(nodes) + 1)
37 37 for i, n in enumerate(nodes):
38 38 seen[n] = 1 << i
39 39 poison = 1 << (i + 1)
40 40
41 41 gca = set()
42 42 interesting = len(nodes)
43 43 nv = len(seen) - 1
44 44 while nv >= 0 and interesting:
45 45 v = nv
46 46 nv -= 1
47 47 if not seen[v]:
48 48 continue
49 49 sv = seen[v]
50 50 if sv < poison:
51 51 interesting -= 1
52 52 if sv == allseen:
53 53 gca.add(v)
54 54 sv |= poison
55 55 if v in nodes:
56 56 # history is linear
57 57 return {v}
58 58 if sv < poison:
59 59 for p in pfunc(v):
60 60 sp = seen[p]
61 61 if p == nullrev:
62 62 continue
63 63 if sp == 0:
64 64 seen[p] = sv
65 65 interesting += 1
66 66 elif sp != sv:
67 67 seen[p] |= sv
68 68 else:
69 69 for p in pfunc(v):
70 70 if p == nullrev:
71 71 continue
72 72 sp = seen[p]
73 73 if sp and sp < poison:
74 74 interesting -= 1
75 75 seen[p] = sv
76 76 return gca
77 77
78 78
79 79 def ancestors(pfunc, *orignodes):
80 80 """
81 81 Returns the common ancestors of a and b that are furthest from a
82 82 root (as measured by longest path).
83 83
84 84 pfunc must return a list of parent vertices for a given vertex.
85 85 """
86 86
87 87 def deepest(nodes):
88 88 interesting = {}
89 89 count = max(nodes) + 1
90 90 depth = [0] * count
91 91 seen = [0] * count
92 92 mapping = []
93 93 for (i, n) in enumerate(sorted(nodes)):
94 94 depth[n] = 1
95 95 b = 1 << i
96 96 seen[n] = b
97 97 interesting[b] = 1
98 98 mapping.append((b, n))
99 99 nv = count - 1
100 100 while nv >= 0 and len(interesting) > 1:
101 101 v = nv
102 102 nv -= 1
103 103 dv = depth[v]
104 104 if dv == 0:
105 105 continue
106 106 sv = seen[v]
107 107 for p in pfunc(v):
108 108 if p == nullrev:
109 109 continue
110 110 dp = depth[p]
111 111 sp = seen[p]
112 112 if dp <= dv:
113 113 depth[p] = dv + 1
114 114 if sp != sv:
115 115 interesting[sv] += 1
116 116 seen[p] = sv
117 117 if sp:
118 118 interesting[sp] -= 1
119 119 if interesting[sp] == 0:
120 120 del interesting[sp]
121 121 elif dv == dp - 1:
122 122 nsp = sp | sv
123 123 if nsp == sp:
124 124 continue
125 125 seen[p] = nsp
126 126 interesting.setdefault(nsp, 0)
127 127 interesting[nsp] += 1
128 128 interesting[sp] -= 1
129 129 if interesting[sp] == 0:
130 130 del interesting[sp]
131 131 interesting[sv] -= 1
132 132 if interesting[sv] == 0:
133 133 del interesting[sv]
134 134
135 135 if len(interesting) != 1:
136 136 return []
137 137
138 138 k = 0
139 139 for i in interesting:
140 140 k |= i
141 141 return {n for (i, n) in mapping if k & i}
142 142
143 143 gca = commonancestorsheads(pfunc, *orignodes)
144 144
145 145 if len(gca) <= 1:
146 146 return gca
147 147 return deepest(gca)
148 148
149 149
150 150 class incrementalmissingancestors(object):
151 151 '''persistent state used to calculate missing ancestors incrementally
152 152
153 153 Although similar in spirit to lazyancestors below, this is a separate class
154 154 because trying to support contains and missingancestors operations with the
155 155 same internal data structures adds needless complexity.'''
156 156
157 157 def __init__(self, pfunc, bases):
158 158 self.bases = set(bases)
159 159 if not self.bases:
160 160 self.bases.add(nullrev)
161 161 self.pfunc = pfunc
162 162
163 163 def hasbases(self):
164 164 '''whether the common set has any non-trivial bases'''
165 165 return self.bases and self.bases != {nullrev}
166 166
167 167 def addbases(self, newbases):
168 168 '''grow the ancestor set by adding new bases'''
169 169 self.bases.update(newbases)
170 170
171 171 def basesheads(self):
172 172 return dagop.headrevs(self.bases, self.pfunc)
173 173
174 174 def removeancestorsfrom(self, revs):
175 175 '''remove all ancestors of bases from the set revs (in place)'''
176 176 bases = self.bases
177 177 pfunc = self.pfunc
178 178 revs.difference_update(bases)
179 179 # nullrev is always an ancestor
180 180 revs.discard(nullrev)
181 181 if not revs:
182 182 return
183 183 # anything in revs > start is definitely not an ancestor of bases
184 184 # revs <= start needs to be investigated
185 185 start = max(bases)
186 186 keepcount = sum(1 for r in revs if r > start)
187 187 if len(revs) == keepcount:
188 188 # no revs to consider
189 189 return
190 190
191 191 for curr in pycompat.xrange(start, min(revs) - 1, -1):
192 192 if curr not in bases:
193 193 continue
194 194 revs.discard(curr)
195 195 bases.update(pfunc(curr))
196 196 if len(revs) == keepcount:
197 197 # no more potential revs to discard
198 198 break
199 199
200 200 def missingancestors(self, revs):
201 201 '''return all the ancestors of revs that are not ancestors of self.bases
202 202
203 203 This may include elements from revs.
204 204
205 205 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
206 206 revision number order, which is a topological order.'''
207 207 revsvisit = set(revs)
208 208 basesvisit = self.bases
209 209 pfunc = self.pfunc
210 210 bothvisit = revsvisit.intersection(basesvisit)
211 211 revsvisit.difference_update(bothvisit)
212 212 if not revsvisit:
213 213 return []
214 214
215 215 start = max(max(revsvisit), max(basesvisit))
216 216 # At this point, we hold the invariants that:
217 217 # - revsvisit is the set of nodes we know are an ancestor of at least
218 218 # one of the nodes in revs
219 219 # - basesvisit is the same for bases
220 220 # - bothvisit is the set of nodes we know are ancestors of at least one
221 221 # of the nodes in revs and one of the nodes in bases. bothvisit and
222 222 # revsvisit are mutually exclusive, but bothvisit is a subset of
223 223 # basesvisit.
224 224 # Now we walk down in reverse topo order, adding parents of nodes
225 225 # already visited to the sets while maintaining the invariants. When a
226 226 # node is found in both revsvisit and basesvisit, it is removed from
227 227 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
228 228 # are no more ancestors of revs that aren't also ancestors of bases, so
229 229 # exit.
230 230
231 231 missing = []
232 232 for curr in pycompat.xrange(start, nullrev, -1):
233 233 if not revsvisit:
234 234 break
235 235
236 236 if curr in bothvisit:
237 237 bothvisit.remove(curr)
238 238 # curr's parents might have made it into revsvisit through
239 239 # another path
240 240 for p in pfunc(curr):
241 241 revsvisit.discard(p)
242 242 basesvisit.add(p)
243 243 bothvisit.add(p)
244 244 continue
245 245
246 246 if curr in revsvisit:
247 247 missing.append(curr)
248 248 revsvisit.remove(curr)
249 249 thisvisit = revsvisit
250 250 othervisit = basesvisit
251 251 elif curr in basesvisit:
252 252 thisvisit = basesvisit
253 253 othervisit = revsvisit
254 254 else:
255 255 # not an ancestor of revs or bases: ignore
256 256 continue
257 257
258 258 for p in pfunc(curr):
259 259 if p == nullrev:
260 260 pass
261 261 elif p in othervisit or p in bothvisit:
262 262 # p is implicitly in thisvisit. This means p is or should be
263 263 # in bothvisit
264 264 revsvisit.discard(p)
265 265 basesvisit.add(p)
266 266 bothvisit.add(p)
267 267 else:
268 268 # visit later
269 269 thisvisit.add(p)
270 270
271 271 missing.reverse()
272 272 return missing
273 273
274 274
275 275 # Extracted from lazyancestors.__iter__ to avoid a reference cycle
276 276 def _lazyancestorsiter(parentrevs, initrevs, stoprev, inclusive):
277 277 seen = {nullrev}
278 278 heappush = heapq.heappush
279 279 heappop = heapq.heappop
280 280 heapreplace = heapq.heapreplace
281 281 see = seen.add
282 282
283 283 if inclusive:
284 284 visit = [-r for r in initrevs]
285 285 seen.update(initrevs)
286 286 heapq.heapify(visit)
287 287 else:
288 288 visit = []
289 289 heapq.heapify(visit)
290 290 for r in initrevs:
291 291 p1, p2 = parentrevs(r)
292 292 if p1 not in seen:
293 293 heappush(visit, -p1)
294 294 see(p1)
295 295 if p2 not in seen:
296 296 heappush(visit, -p2)
297 297 see(p2)
298 298
299 299 while visit:
300 300 current = -visit[0]
301 301 if current < stoprev:
302 302 break
303 303 yield current
304 304 # optimize out heapq operation if p1 is known to be the next highest
305 305 # revision, which is quite common in linear history.
306 306 p1, p2 = parentrevs(current)
307 307 if p1 not in seen:
308 308 if current - p1 == 1:
309 309 visit[0] = -p1
310 310 else:
311 311 heapreplace(visit, -p1)
312 312 see(p1)
313 313 else:
314 314 heappop(visit)
315 315 if p2 not in seen:
316 316 heappush(visit, -p2)
317 317 see(p2)
318 318
319 319
320 320 class lazyancestors(object):
321 321 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
322 322 """Create a new object generating ancestors for the given revs. Does
323 323 not generate revs lower than stoprev.
324 324
325 325 This is computed lazily starting from revs. The object supports
326 326 iteration and membership.
327 327
328 328 cl should be a changelog and revs should be an iterable. inclusive is
329 329 a boolean that indicates whether revs should be included. Revs lower
330 330 than stoprev will not be generated.
331 331
332 332 Result does not include the null revision."""
333 333 self._parentrevs = pfunc
334 334 self._initrevs = [r for r in revs if r >= stoprev]
335 335 self._stoprev = stoprev
336 336 self._inclusive = inclusive
337 337
338 338 self._containsseen = set()
339 339 self._containsiter = _lazyancestorsiter(
340 340 self._parentrevs, self._initrevs, self._stoprev, self._inclusive
341 341 )
342 342
343 343 def __nonzero__(self):
344 344 """False if the set is empty, True otherwise."""
345 345 try:
346 346 next(iter(self))
347 347 return True
348 348 except StopIteration:
349 349 return False
350 350
351 351 __bool__ = __nonzero__
352 352
353 353 def __iter__(self):
354 354 """Generate the ancestors of _initrevs in reverse topological order.
355 355
356 356 If inclusive is False, yield a sequence of revision numbers starting
357 357 with the parents of each revision in revs, i.e., each revision is
358 358 *not* considered an ancestor of itself. Results are emitted in reverse
359 359 revision number order. That order is also topological: a child is
360 360 always emitted before its parent.
361 361
362 362 If inclusive is True, the source revisions are also yielded. The
363 363 reverse revision number order is still enforced."""
364 364 return _lazyancestorsiter(
365 365 self._parentrevs, self._initrevs, self._stoprev, self._inclusive
366 366 )
367 367
368 368 def __contains__(self, target):
369 369 """Test whether target is an ancestor of self._initrevs."""
370 370 seen = self._containsseen
371 371 if target in seen:
372 372 return True
373 373 iter = self._containsiter
374 374 if iter is None:
375 375 # Iterator exhausted
376 376 return False
377 377 # Only integer target is valid, but some callers expect 'None in self'
378 378 # to be False. So we explicitly allow it.
379 379 if target is None:
380 380 return False
381 381
382 382 see = seen.add
383 383 try:
384 384 while True:
385 385 rev = next(iter)
386 386 see(rev)
387 387 if rev == target:
388 388 return True
389 389 if rev < target:
390 390 return False
391 391 except StopIteration:
392 392 # Set to None to indicate fast-path can be used next time, and to
393 393 # free up memory.
394 394 self._containsiter = None
395 395 return False
396
397
398 class rustlazyancestors(object):
399 def __init__(self, index, revs, stoprev=0, inclusive=False):
400 self._index = index
401 self._stoprev = stoprev
402 self._inclusive = inclusive
403 # no need to prefilter out init revs that are smaller than stoprev,
404 # it's done by rustlazyancestors constructor.
405 # we need to convert to a list, because our ruslazyancestors
406 # constructor (from C code) doesn't understand anything else yet
407 self._initrevs = initrevs = list(revs)
408
409 self._containsiter = parsers.rustlazyancestors(
410 index, initrevs, stoprev, inclusive
411 )
412
413 def __nonzero__(self):
414 """False if the set is empty, True otherwise.
415
416 It's better to duplicate this essentially trivial method than
417 to subclass lazyancestors
418 """
419 try:
420 next(iter(self))
421 return True
422 except StopIteration:
423 return False
424
425 def __iter__(self):
426 return parsers.rustlazyancestors(
427 self._index, self._initrevs, self._stoprev, self._inclusive
428 )
429
430 def __contains__(self, target):
431 return target in self._containsiter
@@ -1,3038 +1,3035
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 nodemap as nodemaputil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109 rustrevlog = policy.importrust('revlog')
110 110
111 111 # Aliased for performance.
112 112 _zlibdecompress = zlib.decompress
113 113
114 114 # max size of revlog with inline data
115 115 _maxinline = 131072
116 116 _chunksize = 1048576
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False, {}
121 121
122 122
123 123 def ellipsiswriteprocessor(rl, text, sidedata):
124 124 return text, False
125 125
126 126
127 127 def ellipsisrawprocessor(rl, text):
128 128 return False
129 129
130 130
131 131 ellipsisprocessor = (
132 132 ellipsisreadprocessor,
133 133 ellipsiswriteprocessor,
134 134 ellipsisrawprocessor,
135 135 )
136 136
137 137
138 138 def getoffset(q):
139 139 return int(q >> 16)
140 140
141 141
142 142 def gettype(q):
143 143 return int(q & 0xFFFF)
144 144
145 145
146 146 def offset_type(offset, type):
147 147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 148 raise ValueError(b'unknown revlog index flags')
149 149 return int(int(offset) << 16 | type)
150 150
151 151
152 152 def _verify_revision(rl, skipflags, state, node):
153 153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 154 point for extensions to influence the operation."""
155 155 if skipflags:
156 156 state[b'skipread'].add(node)
157 157 else:
158 158 # Side-effect: read content and verify hash.
159 159 rl.revision(node)
160 160
161 161
162 162 @attr.s(slots=True, frozen=True)
163 163 class _revisioninfo(object):
164 164 """Information about a revision that allows building its fulltext
165 165 node: expected hash of the revision
166 166 p1, p2: parent revs of the revision
167 167 btext: built text cache consisting of a one-element list
168 168 cachedelta: (baserev, uncompressed_delta) or None
169 169 flags: flags associated to the revision storage
170 170
171 171 One of btext[0] or cachedelta must be set.
172 172 """
173 173
174 174 node = attr.ib()
175 175 p1 = attr.ib()
176 176 p2 = attr.ib()
177 177 btext = attr.ib()
178 178 textlen = attr.ib()
179 179 cachedelta = attr.ib()
180 180 flags = attr.ib()
181 181
182 182
183 183 @interfaceutil.implementer(repository.irevisiondelta)
184 184 @attr.s(slots=True)
185 185 class revlogrevisiondelta(object):
186 186 node = attr.ib()
187 187 p1node = attr.ib()
188 188 p2node = attr.ib()
189 189 basenode = attr.ib()
190 190 flags = attr.ib()
191 191 baserevisionsize = attr.ib()
192 192 revision = attr.ib()
193 193 delta = attr.ib()
194 194 linknode = attr.ib(default=None)
195 195
196 196
197 197 @interfaceutil.implementer(repository.iverifyproblem)
198 198 @attr.s(frozen=True)
199 199 class revlogproblem(object):
200 200 warning = attr.ib(default=None)
201 201 error = attr.ib(default=None)
202 202 node = attr.ib(default=None)
203 203
204 204
205 205 # index v0:
206 206 # 4 bytes: offset
207 207 # 4 bytes: compressed length
208 208 # 4 bytes: base rev
209 209 # 4 bytes: link rev
210 210 # 20 bytes: parent 1 nodeid
211 211 # 20 bytes: parent 2 nodeid
212 212 # 20 bytes: nodeid
213 213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 214 indexformatv0_pack = indexformatv0.pack
215 215 indexformatv0_unpack = indexformatv0.unpack
216 216
217 217
218 218 class revlogoldindex(list):
219 219 @property
220 220 def nodemap(self):
221 221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 223 return self._nodemap
224 224
225 225 @util.propertycache
226 226 def _nodemap(self):
227 227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 228 for r in range(0, len(self)):
229 229 n = self[r][7]
230 230 nodemap[n] = r
231 231 return nodemap
232 232
233 233 def has_node(self, node):
234 234 """return True if the node exist in the index"""
235 235 return node in self._nodemap
236 236
237 237 def rev(self, node):
238 238 """return a revision for a node
239 239
240 240 If the node is unknown, raise a RevlogError"""
241 241 return self._nodemap[node]
242 242
243 243 def get_rev(self, node):
244 244 """return a revision for a node
245 245
246 246 If the node is unknown, return None"""
247 247 return self._nodemap.get(node)
248 248
249 249 def append(self, tup):
250 250 self._nodemap[tup[7]] = len(self)
251 251 super(revlogoldindex, self).append(tup)
252 252
253 253 def __delitem__(self, i):
254 254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 256 for r in pycompat.xrange(i.start, len(self)):
257 257 del self._nodemap[self[r][7]]
258 258 super(revlogoldindex, self).__delitem__(i)
259 259
260 260 def clearcaches(self):
261 261 self.__dict__.pop('_nodemap', None)
262 262
263 263 def __getitem__(self, i):
264 264 if i == -1:
265 265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 266 return list.__getitem__(self, i)
267 267
268 268
269 269 class revlogoldio(object):
270 270 def __init__(self):
271 271 self.size = indexformatv0.size
272 272
273 273 def parseindex(self, data, inline):
274 274 s = self.size
275 275 index = []
276 276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 277 n = off = 0
278 278 l = len(data)
279 279 while off + s <= l:
280 280 cur = data[off : off + s]
281 281 off += s
282 282 e = indexformatv0_unpack(cur)
283 283 # transform to revlogv1 format
284 284 e2 = (
285 285 offset_type(e[0], 0),
286 286 e[1],
287 287 -1,
288 288 e[2],
289 289 e[3],
290 290 nodemap.get(e[4], nullrev),
291 291 nodemap.get(e[5], nullrev),
292 292 e[6],
293 293 )
294 294 index.append(e2)
295 295 nodemap[e[6]] = n
296 296 n += 1
297 297
298 298 index = revlogoldindex(index)
299 299 return index, None
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 if gettype(entry[0]):
303 303 raise error.RevlogError(
304 304 _(b'index entry flags need revlog version 1')
305 305 )
306 306 e2 = (
307 307 getoffset(entry[0]),
308 308 entry[1],
309 309 entry[3],
310 310 entry[4],
311 311 node(entry[5]),
312 312 node(entry[6]),
313 313 entry[7],
314 314 )
315 315 return indexformatv0_pack(*e2)
316 316
317 317
318 318 # index ng:
319 319 # 6 bytes: offset
320 320 # 2 bytes: flags
321 321 # 4 bytes: compressed length
322 322 # 4 bytes: uncompressed length
323 323 # 4 bytes: base rev
324 324 # 4 bytes: link rev
325 325 # 4 bytes: parent 1 rev
326 326 # 4 bytes: parent 2 rev
327 327 # 32 bytes: nodeid
328 328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 329 indexformatng_pack = indexformatng.pack
330 330 versionformat = struct.Struct(b">I")
331 331 versionformat_pack = versionformat.pack
332 332 versionformat_unpack = versionformat.unpack
333 333
334 334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 335 # signed integer)
336 336 _maxentrysize = 0x7FFFFFFF
337 337
338 338
339 339 class revlogio(object):
340 340 def __init__(self):
341 341 self.size = indexformatng.size
342 342
343 343 def parseindex(self, data, inline):
344 344 # call the C implementation to parse the index data
345 345 index, cache = parsers.parse_index2(data, inline)
346 346 return index, cache
347 347
348 348 def packentry(self, entry, node, version, rev):
349 349 p = indexformatng_pack(*entry)
350 350 if rev == 0:
351 351 p = versionformat_pack(version) + p[4:]
352 352 return p
353 353
354 354
355 355 NodemapRevlogIO = None
356 356
357 357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358 358
359 359 class NodemapRevlogIO(revlogio):
360 360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361 361
362 362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 363 """
364 364
365 365 def parseindex(self, data, inline):
366 366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 367 return index, cache
368 368
369 369
370 370 class rustrevlogio(revlogio):
371 371 def parseindex(self, data, inline):
372 372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 373 return rustrevlog.MixedIndex(index), cache
374 374
375 375
376 376 class revlog(object):
377 377 """
378 378 the underlying revision storage object
379 379
380 380 A revlog consists of two parts, an index and the revision data.
381 381
382 382 The index is a file with a fixed record size containing
383 383 information on each revision, including its nodeid (hash), the
384 384 nodeids of its parents, the position and offset of its data within
385 385 the data file, and the revision it's based on. Finally, each entry
386 386 contains a linkrev entry that can serve as a pointer to external
387 387 data.
388 388
389 389 The revision data itself is a linear collection of data chunks.
390 390 Each chunk represents a revision and is usually represented as a
391 391 delta against the previous chunk. To bound lookup time, runs of
392 392 deltas are limited to about 2 times the length of the original
393 393 version data. This makes retrieval of a version proportional to
394 394 its size, or O(1) relative to the number of revisions.
395 395
396 396 Both pieces of the revlog are written to in an append-only
397 397 fashion, which means we never need to rewrite a file to insert or
398 398 remove data, and can use some simple techniques to avoid the need
399 399 for locking while reading.
400 400
401 401 If checkambig, indexfile is opened with checkambig=True at
402 402 writing, to avoid file stat ambiguity.
403 403
404 404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 405 index will be mmapped rather than read if it is larger than the
406 406 configured threshold.
407 407
408 408 If censorable is True, the revlog can have censored revisions.
409 409
410 410 If `upperboundcomp` is not None, this is the expected maximal gain from
411 411 compression for the data content.
412 412 """
413 413
414 414 _flagserrorclass = error.RevlogError
415 415
416 416 def __init__(
417 417 self,
418 418 opener,
419 419 indexfile,
420 420 datafile=None,
421 421 checkambig=False,
422 422 mmaplargeindex=False,
423 423 censorable=False,
424 424 upperboundcomp=None,
425 425 persistentnodemap=False,
426 426 ):
427 427 """
428 428 create a revlog object
429 429
430 430 opener is a function that abstracts the file opening operation
431 431 and can be used to implement COW semantics or the like.
432 432
433 433 """
434 434 self.upperboundcomp = upperboundcomp
435 435 self.indexfile = indexfile
436 436 self.datafile = datafile or (indexfile[:-2] + b".d")
437 437 self.nodemap_file = None
438 438 if persistentnodemap:
439 439 self.nodemap_file = indexfile[:-2] + b".n"
440 440
441 441 self.opener = opener
442 442 # When True, indexfile is opened with checkambig=True at writing, to
443 443 # avoid file stat ambiguity.
444 444 self._checkambig = checkambig
445 445 self._mmaplargeindex = mmaplargeindex
446 446 self._censorable = censorable
447 447 # 3-tuple of (node, rev, text) for a raw revision.
448 448 self._revisioncache = None
449 449 # Maps rev to chain base rev.
450 450 self._chainbasecache = util.lrucachedict(100)
451 451 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
452 452 self._chunkcache = (0, b'')
453 453 # How much data to read and cache into the raw revlog data cache.
454 454 self._chunkcachesize = 65536
455 455 self._maxchainlen = None
456 456 self._deltabothparents = True
457 457 self.index = None
458 458 self._nodemap_docket = None
459 459 # Mapping of partial identifiers to full nodes.
460 460 self._pcache = {}
461 461 # Mapping of revision integer to full node.
462 462 self._compengine = b'zlib'
463 463 self._compengineopts = {}
464 464 self._maxdeltachainspan = -1
465 465 self._withsparseread = False
466 466 self._sparserevlog = False
467 467 self._srdensitythreshold = 0.50
468 468 self._srmingapsize = 262144
469 469
470 470 # Make copy of flag processors so each revlog instance can support
471 471 # custom flags.
472 472 self._flagprocessors = dict(flagutil.flagprocessors)
473 473
474 474 # 2-tuple of file handles being used for active writing.
475 475 self._writinghandles = None
476 476
477 477 self._loadindex()
478 478
479 479 def _loadindex(self):
480 480 mmapindexthreshold = None
481 481 opts = self.opener.options
482 482
483 483 if b'revlogv2' in opts:
484 484 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
485 485 elif b'revlogv1' in opts:
486 486 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
487 487 if b'generaldelta' in opts:
488 488 newversionflags |= FLAG_GENERALDELTA
489 489 elif b'revlogv0' in self.opener.options:
490 490 newversionflags = REVLOGV0
491 491 else:
492 492 newversionflags = REVLOG_DEFAULT_VERSION
493 493
494 494 if b'chunkcachesize' in opts:
495 495 self._chunkcachesize = opts[b'chunkcachesize']
496 496 if b'maxchainlen' in opts:
497 497 self._maxchainlen = opts[b'maxchainlen']
498 498 if b'deltabothparents' in opts:
499 499 self._deltabothparents = opts[b'deltabothparents']
500 500 self._lazydelta = bool(opts.get(b'lazydelta', True))
501 501 self._lazydeltabase = False
502 502 if self._lazydelta:
503 503 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
504 504 if b'compengine' in opts:
505 505 self._compengine = opts[b'compengine']
506 506 if b'zlib.level' in opts:
507 507 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
508 508 if b'zstd.level' in opts:
509 509 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
510 510 if b'maxdeltachainspan' in opts:
511 511 self._maxdeltachainspan = opts[b'maxdeltachainspan']
512 512 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
513 513 mmapindexthreshold = opts[b'mmapindexthreshold']
514 514 self.hassidedata = bool(opts.get(b'side-data', False))
515 515 if self.hassidedata:
516 516 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
517 517 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
518 518 withsparseread = bool(opts.get(b'with-sparse-read', False))
519 519 # sparse-revlog forces sparse-read
520 520 self._withsparseread = self._sparserevlog or withsparseread
521 521 if b'sparse-read-density-threshold' in opts:
522 522 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
523 523 if b'sparse-read-min-gap-size' in opts:
524 524 self._srmingapsize = opts[b'sparse-read-min-gap-size']
525 525 if opts.get(b'enableellipsis'):
526 526 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
527 527
528 528 # revlog v0 doesn't have flag processors
529 529 for flag, processor in pycompat.iteritems(
530 530 opts.get(b'flagprocessors', {})
531 531 ):
532 532 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
533 533
534 534 if self._chunkcachesize <= 0:
535 535 raise error.RevlogError(
536 536 _(b'revlog chunk cache size %r is not greater than 0')
537 537 % self._chunkcachesize
538 538 )
539 539 elif self._chunkcachesize & (self._chunkcachesize - 1):
540 540 raise error.RevlogError(
541 541 _(b'revlog chunk cache size %r is not a power of 2')
542 542 % self._chunkcachesize
543 543 )
544 544
545 545 indexdata = b''
546 546 self._initempty = True
547 547 try:
548 548 with self._indexfp() as f:
549 549 if (
550 550 mmapindexthreshold is not None
551 551 and self.opener.fstat(f).st_size >= mmapindexthreshold
552 552 ):
553 553 # TODO: should .close() to release resources without
554 554 # relying on Python GC
555 555 indexdata = util.buffer(util.mmapread(f))
556 556 else:
557 557 indexdata = f.read()
558 558 if len(indexdata) > 0:
559 559 versionflags = versionformat_unpack(indexdata[:4])[0]
560 560 self._initempty = False
561 561 else:
562 562 versionflags = newversionflags
563 563 except IOError as inst:
564 564 if inst.errno != errno.ENOENT:
565 565 raise
566 566
567 567 versionflags = newversionflags
568 568
569 569 self.version = versionflags
570 570
571 571 flags = versionflags & ~0xFFFF
572 572 fmt = versionflags & 0xFFFF
573 573
574 574 if fmt == REVLOGV0:
575 575 if flags:
576 576 raise error.RevlogError(
577 577 _(b'unknown flags (%#04x) in version %d revlog %s')
578 578 % (flags >> 16, fmt, self.indexfile)
579 579 )
580 580
581 581 self._inline = False
582 582 self._generaldelta = False
583 583
584 584 elif fmt == REVLOGV1:
585 585 if flags & ~REVLOGV1_FLAGS:
586 586 raise error.RevlogError(
587 587 _(b'unknown flags (%#04x) in version %d revlog %s')
588 588 % (flags >> 16, fmt, self.indexfile)
589 589 )
590 590
591 591 self._inline = versionflags & FLAG_INLINE_DATA
592 592 self._generaldelta = versionflags & FLAG_GENERALDELTA
593 593
594 594 elif fmt == REVLOGV2:
595 595 if flags & ~REVLOGV2_FLAGS:
596 596 raise error.RevlogError(
597 597 _(b'unknown flags (%#04x) in version %d revlog %s')
598 598 % (flags >> 16, fmt, self.indexfile)
599 599 )
600 600
601 601 self._inline = versionflags & FLAG_INLINE_DATA
602 602 # generaldelta implied by version 2 revlogs.
603 603 self._generaldelta = True
604 604
605 605 else:
606 606 raise error.RevlogError(
607 607 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
608 608 )
609 609 # sparse-revlog can't be on without general-delta (issue6056)
610 610 if not self._generaldelta:
611 611 self._sparserevlog = False
612 612
613 613 self._storedeltachains = True
614 614
615 615 devel_nodemap = (
616 616 self.nodemap_file
617 617 and opts.get(b'devel-force-nodemap', False)
618 618 and NodemapRevlogIO is not None
619 619 )
620 620
621 621 self._io = revlogio()
622 622 if self.version == REVLOGV0:
623 623 self._io = revlogoldio()
624 624 elif devel_nodemap:
625 625 self._io = NodemapRevlogIO()
626 626 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
627 627 self._io = rustrevlogio()
628 628 try:
629 629 d = self._io.parseindex(indexdata, self._inline)
630 630 index, _chunkcache = d
631 631 use_nodemap = (
632 632 not self._inline
633 633 and self.nodemap_file is not None
634 634 and util.safehasattr(index, 'update_nodemap_data')
635 635 )
636 636 if use_nodemap:
637 637 nodemap_data = nodemaputil.persisted_data(self)
638 638 if nodemap_data is not None:
639 639 self._nodemap_docket = nodemap_data[0]
640 640 index.update_nodemap_data(*nodemap_data)
641 641 except (ValueError, IndexError):
642 642 raise error.RevlogError(
643 643 _(b"index %s is corrupted") % self.indexfile
644 644 )
645 645 self.index, self._chunkcache = d
646 646 if not self._chunkcache:
647 647 self._chunkclear()
648 648 # revnum -> (chain-length, sum-delta-length)
649 649 self._chaininfocache = {}
650 650 # revlog header -> revlog compressor
651 651 self._decompressors = {}
652 652
653 653 @util.propertycache
654 654 def _compressor(self):
655 655 engine = util.compengines[self._compengine]
656 656 return engine.revlogcompressor(self._compengineopts)
657 657
658 658 def _indexfp(self, mode=b'r'):
659 659 """file object for the revlog's index file"""
660 660 args = {'mode': mode}
661 661 if mode != b'r':
662 662 args['checkambig'] = self._checkambig
663 663 if mode == b'w':
664 664 args['atomictemp'] = True
665 665 return self.opener(self.indexfile, **args)
666 666
667 667 def _datafp(self, mode=b'r'):
668 668 """file object for the revlog's data file"""
669 669 return self.opener(self.datafile, mode=mode)
670 670
671 671 @contextlib.contextmanager
672 672 def _datareadfp(self, existingfp=None):
673 673 """file object suitable to read data"""
674 674 # Use explicit file handle, if given.
675 675 if existingfp is not None:
676 676 yield existingfp
677 677
678 678 # Use a file handle being actively used for writes, if available.
679 679 # There is some danger to doing this because reads will seek the
680 680 # file. However, _writeentry() performs a SEEK_END before all writes,
681 681 # so we should be safe.
682 682 elif self._writinghandles:
683 683 if self._inline:
684 684 yield self._writinghandles[0]
685 685 else:
686 686 yield self._writinghandles[1]
687 687
688 688 # Otherwise open a new file handle.
689 689 else:
690 690 if self._inline:
691 691 func = self._indexfp
692 692 else:
693 693 func = self._datafp
694 694 with func() as fp:
695 695 yield fp
696 696
697 697 def tiprev(self):
698 698 return len(self.index) - 1
699 699
700 700 def tip(self):
701 701 return self.node(self.tiprev())
702 702
703 703 def __contains__(self, rev):
704 704 return 0 <= rev < len(self)
705 705
706 706 def __len__(self):
707 707 return len(self.index)
708 708
709 709 def __iter__(self):
710 710 return iter(pycompat.xrange(len(self)))
711 711
712 712 def revs(self, start=0, stop=None):
713 713 """iterate over all rev in this revlog (from start to stop)"""
714 714 return storageutil.iterrevs(len(self), start=start, stop=stop)
715 715
716 716 @property
717 717 def nodemap(self):
718 718 msg = (
719 719 b"revlog.nodemap is deprecated, "
720 720 b"use revlog.index.[has_node|rev|get_rev]"
721 721 )
722 722 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
723 723 return self.index.nodemap
724 724
725 725 @property
726 726 def _nodecache(self):
727 727 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
728 728 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
729 729 return self.index.nodemap
730 730
731 731 def hasnode(self, node):
732 732 try:
733 733 self.rev(node)
734 734 return True
735 735 except KeyError:
736 736 return False
737 737
738 738 def candelta(self, baserev, rev):
739 739 """whether two revisions (baserev, rev) can be delta-ed or not"""
740 740 # Disable delta if either rev requires a content-changing flag
741 741 # processor (ex. LFS). This is because such flag processor can alter
742 742 # the rawtext content that the delta will be based on, and two clients
743 743 # could have a same revlog node with different flags (i.e. different
744 744 # rawtext contents) and the delta could be incompatible.
745 745 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
746 746 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
747 747 ):
748 748 return False
749 749 return True
750 750
751 751 def update_caches(self, transaction):
752 752 if self.nodemap_file is not None:
753 753 if transaction is None:
754 754 nodemaputil.update_persistent_nodemap(self)
755 755 else:
756 756 nodemaputil.setup_persistent_nodemap(transaction, self)
757 757
758 758 def clearcaches(self):
759 759 self._revisioncache = None
760 760 self._chainbasecache.clear()
761 761 self._chunkcache = (0, b'')
762 762 self._pcache = {}
763 763 self.index.clearcaches()
764 764
765 765 def rev(self, node):
766 766 try:
767 767 return self.index.rev(node)
768 768 except TypeError:
769 769 raise
770 770 except error.RevlogError:
771 771 # parsers.c radix tree lookup failed
772 772 if node == wdirid or node in wdirfilenodeids:
773 773 raise error.WdirUnsupported
774 774 raise error.LookupError(node, self.indexfile, _(b'no node'))
775 775
776 776 # Accessors for index entries.
777 777
778 778 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
779 779 # are flags.
780 780 def start(self, rev):
781 781 return int(self.index[rev][0] >> 16)
782 782
783 783 def flags(self, rev):
784 784 return self.index[rev][0] & 0xFFFF
785 785
786 786 def length(self, rev):
787 787 return self.index[rev][1]
788 788
789 789 def rawsize(self, rev):
790 790 """return the length of the uncompressed text for a given revision"""
791 791 l = self.index[rev][2]
792 792 if l >= 0:
793 793 return l
794 794
795 795 t = self.rawdata(rev)
796 796 return len(t)
797 797
798 798 def size(self, rev):
799 799 """length of non-raw text (processed by a "read" flag processor)"""
800 800 # fast path: if no "read" flag processor could change the content,
801 801 # size is rawsize. note: ELLIPSIS is known to not change the content.
802 802 flags = self.flags(rev)
803 803 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
804 804 return self.rawsize(rev)
805 805
806 806 return len(self.revision(rev, raw=False))
807 807
808 808 def chainbase(self, rev):
809 809 base = self._chainbasecache.get(rev)
810 810 if base is not None:
811 811 return base
812 812
813 813 index = self.index
814 814 iterrev = rev
815 815 base = index[iterrev][3]
816 816 while base != iterrev:
817 817 iterrev = base
818 818 base = index[iterrev][3]
819 819
820 820 self._chainbasecache[rev] = base
821 821 return base
822 822
823 823 def linkrev(self, rev):
824 824 return self.index[rev][4]
825 825
826 826 def parentrevs(self, rev):
827 827 try:
828 828 entry = self.index[rev]
829 829 except IndexError:
830 830 if rev == wdirrev:
831 831 raise error.WdirUnsupported
832 832 raise
833 833
834 834 return entry[5], entry[6]
835 835
836 836 # fast parentrevs(rev) where rev isn't filtered
837 837 _uncheckedparentrevs = parentrevs
838 838
839 839 def node(self, rev):
840 840 try:
841 841 return self.index[rev][7]
842 842 except IndexError:
843 843 if rev == wdirrev:
844 844 raise error.WdirUnsupported
845 845 raise
846 846
847 847 # Derived from index values.
848 848
849 849 def end(self, rev):
850 850 return self.start(rev) + self.length(rev)
851 851
852 852 def parents(self, node):
853 853 i = self.index
854 854 d = i[self.rev(node)]
855 855 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
856 856
857 857 def chainlen(self, rev):
858 858 return self._chaininfo(rev)[0]
859 859
860 860 def _chaininfo(self, rev):
861 861 chaininfocache = self._chaininfocache
862 862 if rev in chaininfocache:
863 863 return chaininfocache[rev]
864 864 index = self.index
865 865 generaldelta = self._generaldelta
866 866 iterrev = rev
867 867 e = index[iterrev]
868 868 clen = 0
869 869 compresseddeltalen = 0
870 870 while iterrev != e[3]:
871 871 clen += 1
872 872 compresseddeltalen += e[1]
873 873 if generaldelta:
874 874 iterrev = e[3]
875 875 else:
876 876 iterrev -= 1
877 877 if iterrev in chaininfocache:
878 878 t = chaininfocache[iterrev]
879 879 clen += t[0]
880 880 compresseddeltalen += t[1]
881 881 break
882 882 e = index[iterrev]
883 883 else:
884 884 # Add text length of base since decompressing that also takes
885 885 # work. For cache hits the length is already included.
886 886 compresseddeltalen += e[1]
887 887 r = (clen, compresseddeltalen)
888 888 chaininfocache[rev] = r
889 889 return r
890 890
891 891 def _deltachain(self, rev, stoprev=None):
892 892 """Obtain the delta chain for a revision.
893 893
894 894 ``stoprev`` specifies a revision to stop at. If not specified, we
895 895 stop at the base of the chain.
896 896
897 897 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
898 898 revs in ascending order and ``stopped`` is a bool indicating whether
899 899 ``stoprev`` was hit.
900 900 """
901 901 # Try C implementation.
902 902 try:
903 903 return self.index.deltachain(rev, stoprev, self._generaldelta)
904 904 except AttributeError:
905 905 pass
906 906
907 907 chain = []
908 908
909 909 # Alias to prevent attribute lookup in tight loop.
910 910 index = self.index
911 911 generaldelta = self._generaldelta
912 912
913 913 iterrev = rev
914 914 e = index[iterrev]
915 915 while iterrev != e[3] and iterrev != stoprev:
916 916 chain.append(iterrev)
917 917 if generaldelta:
918 918 iterrev = e[3]
919 919 else:
920 920 iterrev -= 1
921 921 e = index[iterrev]
922 922
923 923 if iterrev == stoprev:
924 924 stopped = True
925 925 else:
926 926 chain.append(iterrev)
927 927 stopped = False
928 928
929 929 chain.reverse()
930 930 return chain, stopped
931 931
932 932 def ancestors(self, revs, stoprev=0, inclusive=False):
933 933 """Generate the ancestors of 'revs' in reverse revision order.
934 934 Does not generate revs lower than stoprev.
935 935
936 936 See the documentation for ancestor.lazyancestors for more details."""
937 937
938 938 # first, make sure start revisions aren't filtered
939 939 revs = list(revs)
940 940 checkrev = self.node
941 941 for r in revs:
942 942 checkrev(r)
943 943 # and we're sure ancestors aren't filtered as well
944 944
945 945 if rustancestor is not None:
946 946 lazyancestors = rustancestor.LazyAncestors
947 947 arg = self.index
948 elif util.safehasattr(parsers, b'rustlazyancestors'):
949 lazyancestors = ancestor.rustlazyancestors
950 arg = self.index
951 948 else:
952 949 lazyancestors = ancestor.lazyancestors
953 950 arg = self._uncheckedparentrevs
954 951 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
955 952
956 953 def descendants(self, revs):
957 954 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
958 955
959 956 def findcommonmissing(self, common=None, heads=None):
960 957 """Return a tuple of the ancestors of common and the ancestors of heads
961 958 that are not ancestors of common. In revset terminology, we return the
962 959 tuple:
963 960
964 961 ::common, (::heads) - (::common)
965 962
966 963 The list is sorted by revision number, meaning it is
967 964 topologically sorted.
968 965
969 966 'heads' and 'common' are both lists of node IDs. If heads is
970 967 not supplied, uses all of the revlog's heads. If common is not
971 968 supplied, uses nullid."""
972 969 if common is None:
973 970 common = [nullid]
974 971 if heads is None:
975 972 heads = self.heads()
976 973
977 974 common = [self.rev(n) for n in common]
978 975 heads = [self.rev(n) for n in heads]
979 976
980 977 # we want the ancestors, but inclusive
981 978 class lazyset(object):
982 979 def __init__(self, lazyvalues):
983 980 self.addedvalues = set()
984 981 self.lazyvalues = lazyvalues
985 982
986 983 def __contains__(self, value):
987 984 return value in self.addedvalues or value in self.lazyvalues
988 985
989 986 def __iter__(self):
990 987 added = self.addedvalues
991 988 for r in added:
992 989 yield r
993 990 for r in self.lazyvalues:
994 991 if not r in added:
995 992 yield r
996 993
997 994 def add(self, value):
998 995 self.addedvalues.add(value)
999 996
1000 997 def update(self, values):
1001 998 self.addedvalues.update(values)
1002 999
1003 1000 has = lazyset(self.ancestors(common))
1004 1001 has.add(nullrev)
1005 1002 has.update(common)
1006 1003
1007 1004 # take all ancestors from heads that aren't in has
1008 1005 missing = set()
1009 1006 visit = collections.deque(r for r in heads if r not in has)
1010 1007 while visit:
1011 1008 r = visit.popleft()
1012 1009 if r in missing:
1013 1010 continue
1014 1011 else:
1015 1012 missing.add(r)
1016 1013 for p in self.parentrevs(r):
1017 1014 if p not in has:
1018 1015 visit.append(p)
1019 1016 missing = list(missing)
1020 1017 missing.sort()
1021 1018 return has, [self.node(miss) for miss in missing]
1022 1019
1023 1020 def incrementalmissingrevs(self, common=None):
1024 1021 """Return an object that can be used to incrementally compute the
1025 1022 revision numbers of the ancestors of arbitrary sets that are not
1026 1023 ancestors of common. This is an ancestor.incrementalmissingancestors
1027 1024 object.
1028 1025
1029 1026 'common' is a list of revision numbers. If common is not supplied, uses
1030 1027 nullrev.
1031 1028 """
1032 1029 if common is None:
1033 1030 common = [nullrev]
1034 1031
1035 1032 if rustancestor is not None:
1036 1033 return rustancestor.MissingAncestors(self.index, common)
1037 1034 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1038 1035
1039 1036 def findmissingrevs(self, common=None, heads=None):
1040 1037 """Return the revision numbers of the ancestors of heads that
1041 1038 are not ancestors of common.
1042 1039
1043 1040 More specifically, return a list of revision numbers corresponding to
1044 1041 nodes N such that every N satisfies the following constraints:
1045 1042
1046 1043 1. N is an ancestor of some node in 'heads'
1047 1044 2. N is not an ancestor of any node in 'common'
1048 1045
1049 1046 The list is sorted by revision number, meaning it is
1050 1047 topologically sorted.
1051 1048
1052 1049 'heads' and 'common' are both lists of revision numbers. If heads is
1053 1050 not supplied, uses all of the revlog's heads. If common is not
1054 1051 supplied, uses nullid."""
1055 1052 if common is None:
1056 1053 common = [nullrev]
1057 1054 if heads is None:
1058 1055 heads = self.headrevs()
1059 1056
1060 1057 inc = self.incrementalmissingrevs(common=common)
1061 1058 return inc.missingancestors(heads)
1062 1059
1063 1060 def findmissing(self, common=None, heads=None):
1064 1061 """Return the ancestors of heads that are not ancestors of common.
1065 1062
1066 1063 More specifically, return a list of nodes N such that every N
1067 1064 satisfies the following constraints:
1068 1065
1069 1066 1. N is an ancestor of some node in 'heads'
1070 1067 2. N is not an ancestor of any node in 'common'
1071 1068
1072 1069 The list is sorted by revision number, meaning it is
1073 1070 topologically sorted.
1074 1071
1075 1072 'heads' and 'common' are both lists of node IDs. If heads is
1076 1073 not supplied, uses all of the revlog's heads. If common is not
1077 1074 supplied, uses nullid."""
1078 1075 if common is None:
1079 1076 common = [nullid]
1080 1077 if heads is None:
1081 1078 heads = self.heads()
1082 1079
1083 1080 common = [self.rev(n) for n in common]
1084 1081 heads = [self.rev(n) for n in heads]
1085 1082
1086 1083 inc = self.incrementalmissingrevs(common=common)
1087 1084 return [self.node(r) for r in inc.missingancestors(heads)]
1088 1085
1089 1086 def nodesbetween(self, roots=None, heads=None):
1090 1087 """Return a topological path from 'roots' to 'heads'.
1091 1088
1092 1089 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1093 1090 topologically sorted list of all nodes N that satisfy both of
1094 1091 these constraints:
1095 1092
1096 1093 1. N is a descendant of some node in 'roots'
1097 1094 2. N is an ancestor of some node in 'heads'
1098 1095
1099 1096 Every node is considered to be both a descendant and an ancestor
1100 1097 of itself, so every reachable node in 'roots' and 'heads' will be
1101 1098 included in 'nodes'.
1102 1099
1103 1100 'outroots' is the list of reachable nodes in 'roots', i.e., the
1104 1101 subset of 'roots' that is returned in 'nodes'. Likewise,
1105 1102 'outheads' is the subset of 'heads' that is also in 'nodes'.
1106 1103
1107 1104 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1108 1105 unspecified, uses nullid as the only root. If 'heads' is
1109 1106 unspecified, uses list of all of the revlog's heads."""
1110 1107 nonodes = ([], [], [])
1111 1108 if roots is not None:
1112 1109 roots = list(roots)
1113 1110 if not roots:
1114 1111 return nonodes
1115 1112 lowestrev = min([self.rev(n) for n in roots])
1116 1113 else:
1117 1114 roots = [nullid] # Everybody's a descendant of nullid
1118 1115 lowestrev = nullrev
1119 1116 if (lowestrev == nullrev) and (heads is None):
1120 1117 # We want _all_ the nodes!
1121 1118 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1122 1119 if heads is None:
1123 1120 # All nodes are ancestors, so the latest ancestor is the last
1124 1121 # node.
1125 1122 highestrev = len(self) - 1
1126 1123 # Set ancestors to None to signal that every node is an ancestor.
1127 1124 ancestors = None
1128 1125 # Set heads to an empty dictionary for later discovery of heads
1129 1126 heads = {}
1130 1127 else:
1131 1128 heads = list(heads)
1132 1129 if not heads:
1133 1130 return nonodes
1134 1131 ancestors = set()
1135 1132 # Turn heads into a dictionary so we can remove 'fake' heads.
1136 1133 # Also, later we will be using it to filter out the heads we can't
1137 1134 # find from roots.
1138 1135 heads = dict.fromkeys(heads, False)
1139 1136 # Start at the top and keep marking parents until we're done.
1140 1137 nodestotag = set(heads)
1141 1138 # Remember where the top was so we can use it as a limit later.
1142 1139 highestrev = max([self.rev(n) for n in nodestotag])
1143 1140 while nodestotag:
1144 1141 # grab a node to tag
1145 1142 n = nodestotag.pop()
1146 1143 # Never tag nullid
1147 1144 if n == nullid:
1148 1145 continue
1149 1146 # A node's revision number represents its place in a
1150 1147 # topologically sorted list of nodes.
1151 1148 r = self.rev(n)
1152 1149 if r >= lowestrev:
1153 1150 if n not in ancestors:
1154 1151 # If we are possibly a descendant of one of the roots
1155 1152 # and we haven't already been marked as an ancestor
1156 1153 ancestors.add(n) # Mark as ancestor
1157 1154 # Add non-nullid parents to list of nodes to tag.
1158 1155 nodestotag.update(
1159 1156 [p for p in self.parents(n) if p != nullid]
1160 1157 )
1161 1158 elif n in heads: # We've seen it before, is it a fake head?
1162 1159 # So it is, real heads should not be the ancestors of
1163 1160 # any other heads.
1164 1161 heads.pop(n)
1165 1162 if not ancestors:
1166 1163 return nonodes
1167 1164 # Now that we have our set of ancestors, we want to remove any
1168 1165 # roots that are not ancestors.
1169 1166
1170 1167 # If one of the roots was nullid, everything is included anyway.
1171 1168 if lowestrev > nullrev:
1172 1169 # But, since we weren't, let's recompute the lowest rev to not
1173 1170 # include roots that aren't ancestors.
1174 1171
1175 1172 # Filter out roots that aren't ancestors of heads
1176 1173 roots = [root for root in roots if root in ancestors]
1177 1174 # Recompute the lowest revision
1178 1175 if roots:
1179 1176 lowestrev = min([self.rev(root) for root in roots])
1180 1177 else:
1181 1178 # No more roots? Return empty list
1182 1179 return nonodes
1183 1180 else:
1184 1181 # We are descending from nullid, and don't need to care about
1185 1182 # any other roots.
1186 1183 lowestrev = nullrev
1187 1184 roots = [nullid]
1188 1185 # Transform our roots list into a set.
1189 1186 descendants = set(roots)
1190 1187 # Also, keep the original roots so we can filter out roots that aren't
1191 1188 # 'real' roots (i.e. are descended from other roots).
1192 1189 roots = descendants.copy()
1193 1190 # Our topologically sorted list of output nodes.
1194 1191 orderedout = []
1195 1192 # Don't start at nullid since we don't want nullid in our output list,
1196 1193 # and if nullid shows up in descendants, empty parents will look like
1197 1194 # they're descendants.
1198 1195 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1199 1196 n = self.node(r)
1200 1197 isdescendant = False
1201 1198 if lowestrev == nullrev: # Everybody is a descendant of nullid
1202 1199 isdescendant = True
1203 1200 elif n in descendants:
1204 1201 # n is already a descendant
1205 1202 isdescendant = True
1206 1203 # This check only needs to be done here because all the roots
1207 1204 # will start being marked is descendants before the loop.
1208 1205 if n in roots:
1209 1206 # If n was a root, check if it's a 'real' root.
1210 1207 p = tuple(self.parents(n))
1211 1208 # If any of its parents are descendants, it's not a root.
1212 1209 if (p[0] in descendants) or (p[1] in descendants):
1213 1210 roots.remove(n)
1214 1211 else:
1215 1212 p = tuple(self.parents(n))
1216 1213 # A node is a descendant if either of its parents are
1217 1214 # descendants. (We seeded the dependents list with the roots
1218 1215 # up there, remember?)
1219 1216 if (p[0] in descendants) or (p[1] in descendants):
1220 1217 descendants.add(n)
1221 1218 isdescendant = True
1222 1219 if isdescendant and ((ancestors is None) or (n in ancestors)):
1223 1220 # Only include nodes that are both descendants and ancestors.
1224 1221 orderedout.append(n)
1225 1222 if (ancestors is not None) and (n in heads):
1226 1223 # We're trying to figure out which heads are reachable
1227 1224 # from roots.
1228 1225 # Mark this head as having been reached
1229 1226 heads[n] = True
1230 1227 elif ancestors is None:
1231 1228 # Otherwise, we're trying to discover the heads.
1232 1229 # Assume this is a head because if it isn't, the next step
1233 1230 # will eventually remove it.
1234 1231 heads[n] = True
1235 1232 # But, obviously its parents aren't.
1236 1233 for p in self.parents(n):
1237 1234 heads.pop(p, None)
1238 1235 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1239 1236 roots = list(roots)
1240 1237 assert orderedout
1241 1238 assert roots
1242 1239 assert heads
1243 1240 return (orderedout, roots, heads)
1244 1241
1245 1242 def headrevs(self, revs=None):
1246 1243 if revs is None:
1247 1244 try:
1248 1245 return self.index.headrevs()
1249 1246 except AttributeError:
1250 1247 return self._headrevs()
1251 1248 if rustdagop is not None:
1252 1249 return rustdagop.headrevs(self.index, revs)
1253 1250 return dagop.headrevs(revs, self._uncheckedparentrevs)
1254 1251
1255 1252 def computephases(self, roots):
1256 1253 return self.index.computephasesmapsets(roots)
1257 1254
1258 1255 def _headrevs(self):
1259 1256 count = len(self)
1260 1257 if not count:
1261 1258 return [nullrev]
1262 1259 # we won't iter over filtered rev so nobody is a head at start
1263 1260 ishead = [0] * (count + 1)
1264 1261 index = self.index
1265 1262 for r in self:
1266 1263 ishead[r] = 1 # I may be an head
1267 1264 e = index[r]
1268 1265 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1269 1266 return [r for r, val in enumerate(ishead) if val]
1270 1267
1271 1268 def heads(self, start=None, stop=None):
1272 1269 """return the list of all nodes that have no children
1273 1270
1274 1271 if start is specified, only heads that are descendants of
1275 1272 start will be returned
1276 1273 if stop is specified, it will consider all the revs from stop
1277 1274 as if they had no children
1278 1275 """
1279 1276 if start is None and stop is None:
1280 1277 if not len(self):
1281 1278 return [nullid]
1282 1279 return [self.node(r) for r in self.headrevs()]
1283 1280
1284 1281 if start is None:
1285 1282 start = nullrev
1286 1283 else:
1287 1284 start = self.rev(start)
1288 1285
1289 1286 stoprevs = {self.rev(n) for n in stop or []}
1290 1287
1291 1288 revs = dagop.headrevssubset(
1292 1289 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1293 1290 )
1294 1291
1295 1292 return [self.node(rev) for rev in revs]
1296 1293
1297 1294 def children(self, node):
1298 1295 """find the children of a given node"""
1299 1296 c = []
1300 1297 p = self.rev(node)
1301 1298 for r in self.revs(start=p + 1):
1302 1299 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1303 1300 if prevs:
1304 1301 for pr in prevs:
1305 1302 if pr == p:
1306 1303 c.append(self.node(r))
1307 1304 elif p == nullrev:
1308 1305 c.append(self.node(r))
1309 1306 return c
1310 1307
1311 1308 def commonancestorsheads(self, a, b):
1312 1309 """calculate all the heads of the common ancestors of nodes a and b"""
1313 1310 a, b = self.rev(a), self.rev(b)
1314 1311 ancs = self._commonancestorsheads(a, b)
1315 1312 return pycompat.maplist(self.node, ancs)
1316 1313
1317 1314 def _commonancestorsheads(self, *revs):
1318 1315 """calculate all the heads of the common ancestors of revs"""
1319 1316 try:
1320 1317 ancs = self.index.commonancestorsheads(*revs)
1321 1318 except (AttributeError, OverflowError): # C implementation failed
1322 1319 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1323 1320 return ancs
1324 1321
1325 1322 def isancestor(self, a, b):
1326 1323 """return True if node a is an ancestor of node b
1327 1324
1328 1325 A revision is considered an ancestor of itself."""
1329 1326 a, b = self.rev(a), self.rev(b)
1330 1327 return self.isancestorrev(a, b)
1331 1328
1332 1329 def isancestorrev(self, a, b):
1333 1330 """return True if revision a is an ancestor of revision b
1334 1331
1335 1332 A revision is considered an ancestor of itself.
1336 1333
1337 1334 The implementation of this is trivial but the use of
1338 1335 reachableroots is not."""
1339 1336 if a == nullrev:
1340 1337 return True
1341 1338 elif a == b:
1342 1339 return True
1343 1340 elif a > b:
1344 1341 return False
1345 1342 return bool(self.reachableroots(a, [b], [a], includepath=False))
1346 1343
1347 1344 def reachableroots(self, minroot, heads, roots, includepath=False):
1348 1345 """return (heads(::(<roots> and <roots>::<heads>)))
1349 1346
1350 1347 If includepath is True, return (<roots>::<heads>)."""
1351 1348 try:
1352 1349 return self.index.reachableroots2(
1353 1350 minroot, heads, roots, includepath
1354 1351 )
1355 1352 except AttributeError:
1356 1353 return dagop._reachablerootspure(
1357 1354 self.parentrevs, minroot, roots, heads, includepath
1358 1355 )
1359 1356
1360 1357 def ancestor(self, a, b):
1361 1358 """calculate the "best" common ancestor of nodes a and b"""
1362 1359
1363 1360 a, b = self.rev(a), self.rev(b)
1364 1361 try:
1365 1362 ancs = self.index.ancestors(a, b)
1366 1363 except (AttributeError, OverflowError):
1367 1364 ancs = ancestor.ancestors(self.parentrevs, a, b)
1368 1365 if ancs:
1369 1366 # choose a consistent winner when there's a tie
1370 1367 return min(map(self.node, ancs))
1371 1368 return nullid
1372 1369
1373 1370 def _match(self, id):
1374 1371 if isinstance(id, int):
1375 1372 # rev
1376 1373 return self.node(id)
1377 1374 if len(id) == 20:
1378 1375 # possibly a binary node
1379 1376 # odds of a binary node being all hex in ASCII are 1 in 10**25
1380 1377 try:
1381 1378 node = id
1382 1379 self.rev(node) # quick search the index
1383 1380 return node
1384 1381 except error.LookupError:
1385 1382 pass # may be partial hex id
1386 1383 try:
1387 1384 # str(rev)
1388 1385 rev = int(id)
1389 1386 if b"%d" % rev != id:
1390 1387 raise ValueError
1391 1388 if rev < 0:
1392 1389 rev = len(self) + rev
1393 1390 if rev < 0 or rev >= len(self):
1394 1391 raise ValueError
1395 1392 return self.node(rev)
1396 1393 except (ValueError, OverflowError):
1397 1394 pass
1398 1395 if len(id) == 40:
1399 1396 try:
1400 1397 # a full hex nodeid?
1401 1398 node = bin(id)
1402 1399 self.rev(node)
1403 1400 return node
1404 1401 except (TypeError, error.LookupError):
1405 1402 pass
1406 1403
1407 1404 def _partialmatch(self, id):
1408 1405 # we don't care wdirfilenodeids as they should be always full hash
1409 1406 maybewdir = wdirhex.startswith(id)
1410 1407 try:
1411 1408 partial = self.index.partialmatch(id)
1412 1409 if partial and self.hasnode(partial):
1413 1410 if maybewdir:
1414 1411 # single 'ff...' match in radix tree, ambiguous with wdir
1415 1412 raise error.RevlogError
1416 1413 return partial
1417 1414 if maybewdir:
1418 1415 # no 'ff...' match in radix tree, wdir identified
1419 1416 raise error.WdirUnsupported
1420 1417 return None
1421 1418 except error.RevlogError:
1422 1419 # parsers.c radix tree lookup gave multiple matches
1423 1420 # fast path: for unfiltered changelog, radix tree is accurate
1424 1421 if not getattr(self, 'filteredrevs', None):
1425 1422 raise error.AmbiguousPrefixLookupError(
1426 1423 id, self.indexfile, _(b'ambiguous identifier')
1427 1424 )
1428 1425 # fall through to slow path that filters hidden revisions
1429 1426 except (AttributeError, ValueError):
1430 1427 # we are pure python, or key was too short to search radix tree
1431 1428 pass
1432 1429
1433 1430 if id in self._pcache:
1434 1431 return self._pcache[id]
1435 1432
1436 1433 if len(id) <= 40:
1437 1434 try:
1438 1435 # hex(node)[:...]
1439 1436 l = len(id) // 2 # grab an even number of digits
1440 1437 prefix = bin(id[: l * 2])
1441 1438 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1442 1439 nl = [
1443 1440 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1444 1441 ]
1445 1442 if nullhex.startswith(id):
1446 1443 nl.append(nullid)
1447 1444 if len(nl) > 0:
1448 1445 if len(nl) == 1 and not maybewdir:
1449 1446 self._pcache[id] = nl[0]
1450 1447 return nl[0]
1451 1448 raise error.AmbiguousPrefixLookupError(
1452 1449 id, self.indexfile, _(b'ambiguous identifier')
1453 1450 )
1454 1451 if maybewdir:
1455 1452 raise error.WdirUnsupported
1456 1453 return None
1457 1454 except TypeError:
1458 1455 pass
1459 1456
1460 1457 def lookup(self, id):
1461 1458 """locate a node based on:
1462 1459 - revision number or str(revision number)
1463 1460 - nodeid or subset of hex nodeid
1464 1461 """
1465 1462 n = self._match(id)
1466 1463 if n is not None:
1467 1464 return n
1468 1465 n = self._partialmatch(id)
1469 1466 if n:
1470 1467 return n
1471 1468
1472 1469 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1473 1470
1474 1471 def shortest(self, node, minlength=1):
1475 1472 """Find the shortest unambiguous prefix that matches node."""
1476 1473
1477 1474 def isvalid(prefix):
1478 1475 try:
1479 1476 matchednode = self._partialmatch(prefix)
1480 1477 except error.AmbiguousPrefixLookupError:
1481 1478 return False
1482 1479 except error.WdirUnsupported:
1483 1480 # single 'ff...' match
1484 1481 return True
1485 1482 if matchednode is None:
1486 1483 raise error.LookupError(node, self.indexfile, _(b'no node'))
1487 1484 return True
1488 1485
1489 1486 def maybewdir(prefix):
1490 1487 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1491 1488
1492 1489 hexnode = hex(node)
1493 1490
1494 1491 def disambiguate(hexnode, minlength):
1495 1492 """Disambiguate against wdirid."""
1496 1493 for length in range(minlength, 41):
1497 1494 prefix = hexnode[:length]
1498 1495 if not maybewdir(prefix):
1499 1496 return prefix
1500 1497
1501 1498 if not getattr(self, 'filteredrevs', None):
1502 1499 try:
1503 1500 length = max(self.index.shortest(node), minlength)
1504 1501 return disambiguate(hexnode, length)
1505 1502 except error.RevlogError:
1506 1503 if node != wdirid:
1507 1504 raise error.LookupError(node, self.indexfile, _(b'no node'))
1508 1505 except AttributeError:
1509 1506 # Fall through to pure code
1510 1507 pass
1511 1508
1512 1509 if node == wdirid:
1513 1510 for length in range(minlength, 41):
1514 1511 prefix = hexnode[:length]
1515 1512 if isvalid(prefix):
1516 1513 return prefix
1517 1514
1518 1515 for length in range(minlength, 41):
1519 1516 prefix = hexnode[:length]
1520 1517 if isvalid(prefix):
1521 1518 return disambiguate(hexnode, length)
1522 1519
1523 1520 def cmp(self, node, text):
1524 1521 """compare text with a given file revision
1525 1522
1526 1523 returns True if text is different than what is stored.
1527 1524 """
1528 1525 p1, p2 = self.parents(node)
1529 1526 return storageutil.hashrevisionsha1(text, p1, p2) != node
1530 1527
1531 1528 def _cachesegment(self, offset, data):
1532 1529 """Add a segment to the revlog cache.
1533 1530
1534 1531 Accepts an absolute offset and the data that is at that location.
1535 1532 """
1536 1533 o, d = self._chunkcache
1537 1534 # try to add to existing cache
1538 1535 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1539 1536 self._chunkcache = o, d + data
1540 1537 else:
1541 1538 self._chunkcache = offset, data
1542 1539
1543 1540 def _readsegment(self, offset, length, df=None):
1544 1541 """Load a segment of raw data from the revlog.
1545 1542
1546 1543 Accepts an absolute offset, length to read, and an optional existing
1547 1544 file handle to read from.
1548 1545
1549 1546 If an existing file handle is passed, it will be seeked and the
1550 1547 original seek position will NOT be restored.
1551 1548
1552 1549 Returns a str or buffer of raw byte data.
1553 1550
1554 1551 Raises if the requested number of bytes could not be read.
1555 1552 """
1556 1553 # Cache data both forward and backward around the requested
1557 1554 # data, in a fixed size window. This helps speed up operations
1558 1555 # involving reading the revlog backwards.
1559 1556 cachesize = self._chunkcachesize
1560 1557 realoffset = offset & ~(cachesize - 1)
1561 1558 reallength = (
1562 1559 (offset + length + cachesize) & ~(cachesize - 1)
1563 1560 ) - realoffset
1564 1561 with self._datareadfp(df) as df:
1565 1562 df.seek(realoffset)
1566 1563 d = df.read(reallength)
1567 1564
1568 1565 self._cachesegment(realoffset, d)
1569 1566 if offset != realoffset or reallength != length:
1570 1567 startoffset = offset - realoffset
1571 1568 if len(d) - startoffset < length:
1572 1569 raise error.RevlogError(
1573 1570 _(
1574 1571 b'partial read of revlog %s; expected %d bytes from '
1575 1572 b'offset %d, got %d'
1576 1573 )
1577 1574 % (
1578 1575 self.indexfile if self._inline else self.datafile,
1579 1576 length,
1580 1577 realoffset,
1581 1578 len(d) - startoffset,
1582 1579 )
1583 1580 )
1584 1581
1585 1582 return util.buffer(d, startoffset, length)
1586 1583
1587 1584 if len(d) < length:
1588 1585 raise error.RevlogError(
1589 1586 _(
1590 1587 b'partial read of revlog %s; expected %d bytes from offset '
1591 1588 b'%d, got %d'
1592 1589 )
1593 1590 % (
1594 1591 self.indexfile if self._inline else self.datafile,
1595 1592 length,
1596 1593 offset,
1597 1594 len(d),
1598 1595 )
1599 1596 )
1600 1597
1601 1598 return d
1602 1599
1603 1600 def _getsegment(self, offset, length, df=None):
1604 1601 """Obtain a segment of raw data from the revlog.
1605 1602
1606 1603 Accepts an absolute offset, length of bytes to obtain, and an
1607 1604 optional file handle to the already-opened revlog. If the file
1608 1605 handle is used, it's original seek position will not be preserved.
1609 1606
1610 1607 Requests for data may be returned from a cache.
1611 1608
1612 1609 Returns a str or a buffer instance of raw byte data.
1613 1610 """
1614 1611 o, d = self._chunkcache
1615 1612 l = len(d)
1616 1613
1617 1614 # is it in the cache?
1618 1615 cachestart = offset - o
1619 1616 cacheend = cachestart + length
1620 1617 if cachestart >= 0 and cacheend <= l:
1621 1618 if cachestart == 0 and cacheend == l:
1622 1619 return d # avoid a copy
1623 1620 return util.buffer(d, cachestart, cacheend - cachestart)
1624 1621
1625 1622 return self._readsegment(offset, length, df=df)
1626 1623
1627 1624 def _getsegmentforrevs(self, startrev, endrev, df=None):
1628 1625 """Obtain a segment of raw data corresponding to a range of revisions.
1629 1626
1630 1627 Accepts the start and end revisions and an optional already-open
1631 1628 file handle to be used for reading. If the file handle is read, its
1632 1629 seek position will not be preserved.
1633 1630
1634 1631 Requests for data may be satisfied by a cache.
1635 1632
1636 1633 Returns a 2-tuple of (offset, data) for the requested range of
1637 1634 revisions. Offset is the integer offset from the beginning of the
1638 1635 revlog and data is a str or buffer of the raw byte data.
1639 1636
1640 1637 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1641 1638 to determine where each revision's data begins and ends.
1642 1639 """
1643 1640 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1644 1641 # (functions are expensive).
1645 1642 index = self.index
1646 1643 istart = index[startrev]
1647 1644 start = int(istart[0] >> 16)
1648 1645 if startrev == endrev:
1649 1646 end = start + istart[1]
1650 1647 else:
1651 1648 iend = index[endrev]
1652 1649 end = int(iend[0] >> 16) + iend[1]
1653 1650
1654 1651 if self._inline:
1655 1652 start += (startrev + 1) * self._io.size
1656 1653 end += (endrev + 1) * self._io.size
1657 1654 length = end - start
1658 1655
1659 1656 return start, self._getsegment(start, length, df=df)
1660 1657
1661 1658 def _chunk(self, rev, df=None):
1662 1659 """Obtain a single decompressed chunk for a revision.
1663 1660
1664 1661 Accepts an integer revision and an optional already-open file handle
1665 1662 to be used for reading. If used, the seek position of the file will not
1666 1663 be preserved.
1667 1664
1668 1665 Returns a str holding uncompressed data for the requested revision.
1669 1666 """
1670 1667 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1671 1668
1672 1669 def _chunks(self, revs, df=None, targetsize=None):
1673 1670 """Obtain decompressed chunks for the specified revisions.
1674 1671
1675 1672 Accepts an iterable of numeric revisions that are assumed to be in
1676 1673 ascending order. Also accepts an optional already-open file handle
1677 1674 to be used for reading. If used, the seek position of the file will
1678 1675 not be preserved.
1679 1676
1680 1677 This function is similar to calling ``self._chunk()`` multiple times,
1681 1678 but is faster.
1682 1679
1683 1680 Returns a list with decompressed data for each requested revision.
1684 1681 """
1685 1682 if not revs:
1686 1683 return []
1687 1684 start = self.start
1688 1685 length = self.length
1689 1686 inline = self._inline
1690 1687 iosize = self._io.size
1691 1688 buffer = util.buffer
1692 1689
1693 1690 l = []
1694 1691 ladd = l.append
1695 1692
1696 1693 if not self._withsparseread:
1697 1694 slicedchunks = (revs,)
1698 1695 else:
1699 1696 slicedchunks = deltautil.slicechunk(
1700 1697 self, revs, targetsize=targetsize
1701 1698 )
1702 1699
1703 1700 for revschunk in slicedchunks:
1704 1701 firstrev = revschunk[0]
1705 1702 # Skip trailing revisions with empty diff
1706 1703 for lastrev in revschunk[::-1]:
1707 1704 if length(lastrev) != 0:
1708 1705 break
1709 1706
1710 1707 try:
1711 1708 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1712 1709 except OverflowError:
1713 1710 # issue4215 - we can't cache a run of chunks greater than
1714 1711 # 2G on Windows
1715 1712 return [self._chunk(rev, df=df) for rev in revschunk]
1716 1713
1717 1714 decomp = self.decompress
1718 1715 for rev in revschunk:
1719 1716 chunkstart = start(rev)
1720 1717 if inline:
1721 1718 chunkstart += (rev + 1) * iosize
1722 1719 chunklength = length(rev)
1723 1720 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1724 1721
1725 1722 return l
1726 1723
1727 1724 def _chunkclear(self):
1728 1725 """Clear the raw chunk cache."""
1729 1726 self._chunkcache = (0, b'')
1730 1727
1731 1728 def deltaparent(self, rev):
1732 1729 """return deltaparent of the given revision"""
1733 1730 base = self.index[rev][3]
1734 1731 if base == rev:
1735 1732 return nullrev
1736 1733 elif self._generaldelta:
1737 1734 return base
1738 1735 else:
1739 1736 return rev - 1
1740 1737
1741 1738 def issnapshot(self, rev):
1742 1739 """tells whether rev is a snapshot
1743 1740 """
1744 1741 if not self._sparserevlog:
1745 1742 return self.deltaparent(rev) == nullrev
1746 1743 elif util.safehasattr(self.index, b'issnapshot'):
1747 1744 # directly assign the method to cache the testing and access
1748 1745 self.issnapshot = self.index.issnapshot
1749 1746 return self.issnapshot(rev)
1750 1747 if rev == nullrev:
1751 1748 return True
1752 1749 entry = self.index[rev]
1753 1750 base = entry[3]
1754 1751 if base == rev:
1755 1752 return True
1756 1753 if base == nullrev:
1757 1754 return True
1758 1755 p1 = entry[5]
1759 1756 p2 = entry[6]
1760 1757 if base == p1 or base == p2:
1761 1758 return False
1762 1759 return self.issnapshot(base)
1763 1760
1764 1761 def snapshotdepth(self, rev):
1765 1762 """number of snapshot in the chain before this one"""
1766 1763 if not self.issnapshot(rev):
1767 1764 raise error.ProgrammingError(b'revision %d not a snapshot')
1768 1765 return len(self._deltachain(rev)[0]) - 1
1769 1766
1770 1767 def revdiff(self, rev1, rev2):
1771 1768 """return or calculate a delta between two revisions
1772 1769
1773 1770 The delta calculated is in binary form and is intended to be written to
1774 1771 revlog data directly. So this function needs raw revision data.
1775 1772 """
1776 1773 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1777 1774 return bytes(self._chunk(rev2))
1778 1775
1779 1776 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1780 1777
1781 1778 def _processflags(self, text, flags, operation, raw=False):
1782 1779 """deprecated entry point to access flag processors"""
1783 1780 msg = b'_processflag(...) use the specialized variant'
1784 1781 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1785 1782 if raw:
1786 1783 return text, flagutil.processflagsraw(self, text, flags)
1787 1784 elif operation == b'read':
1788 1785 return flagutil.processflagsread(self, text, flags)
1789 1786 else: # write operation
1790 1787 return flagutil.processflagswrite(self, text, flags)
1791 1788
1792 1789 def revision(self, nodeorrev, _df=None, raw=False):
1793 1790 """return an uncompressed revision of a given node or revision
1794 1791 number.
1795 1792
1796 1793 _df - an existing file handle to read from. (internal-only)
1797 1794 raw - an optional argument specifying if the revision data is to be
1798 1795 treated as raw data when applying flag transforms. 'raw' should be set
1799 1796 to True when generating changegroups or in debug commands.
1800 1797 """
1801 1798 if raw:
1802 1799 msg = (
1803 1800 b'revlog.revision(..., raw=True) is deprecated, '
1804 1801 b'use revlog.rawdata(...)'
1805 1802 )
1806 1803 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1807 1804 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1808 1805
1809 1806 def sidedata(self, nodeorrev, _df=None):
1810 1807 """a map of extra data related to the changeset but not part of the hash
1811 1808
1812 1809 This function currently return a dictionary. However, more advanced
1813 1810 mapping object will likely be used in the future for a more
1814 1811 efficient/lazy code.
1815 1812 """
1816 1813 return self._revisiondata(nodeorrev, _df)[1]
1817 1814
1818 1815 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1819 1816 # deal with <nodeorrev> argument type
1820 1817 if isinstance(nodeorrev, int):
1821 1818 rev = nodeorrev
1822 1819 node = self.node(rev)
1823 1820 else:
1824 1821 node = nodeorrev
1825 1822 rev = None
1826 1823
1827 1824 # fast path the special `nullid` rev
1828 1825 if node == nullid:
1829 1826 return b"", {}
1830 1827
1831 1828 # ``rawtext`` is the text as stored inside the revlog. Might be the
1832 1829 # revision or might need to be processed to retrieve the revision.
1833 1830 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1834 1831
1835 1832 if raw and validated:
1836 1833 # if we don't want to process the raw text and that raw
1837 1834 # text is cached, we can exit early.
1838 1835 return rawtext, {}
1839 1836 if rev is None:
1840 1837 rev = self.rev(node)
1841 1838 # the revlog's flag for this revision
1842 1839 # (usually alter its state or content)
1843 1840 flags = self.flags(rev)
1844 1841
1845 1842 if validated and flags == REVIDX_DEFAULT_FLAGS:
1846 1843 # no extra flags set, no flag processor runs, text = rawtext
1847 1844 return rawtext, {}
1848 1845
1849 1846 sidedata = {}
1850 1847 if raw:
1851 1848 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1852 1849 text = rawtext
1853 1850 else:
1854 1851 try:
1855 1852 r = flagutil.processflagsread(self, rawtext, flags)
1856 1853 except error.SidedataHashError as exc:
1857 1854 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1858 1855 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1859 1856 raise error.RevlogError(msg)
1860 1857 text, validatehash, sidedata = r
1861 1858 if validatehash:
1862 1859 self.checkhash(text, node, rev=rev)
1863 1860 if not validated:
1864 1861 self._revisioncache = (node, rev, rawtext)
1865 1862
1866 1863 return text, sidedata
1867 1864
1868 1865 def _rawtext(self, node, rev, _df=None):
1869 1866 """return the possibly unvalidated rawtext for a revision
1870 1867
1871 1868 returns (rev, rawtext, validated)
1872 1869 """
1873 1870
1874 1871 # revision in the cache (could be useful to apply delta)
1875 1872 cachedrev = None
1876 1873 # An intermediate text to apply deltas to
1877 1874 basetext = None
1878 1875
1879 1876 # Check if we have the entry in cache
1880 1877 # The cache entry looks like (node, rev, rawtext)
1881 1878 if self._revisioncache:
1882 1879 if self._revisioncache[0] == node:
1883 1880 return (rev, self._revisioncache[2], True)
1884 1881 cachedrev = self._revisioncache[1]
1885 1882
1886 1883 if rev is None:
1887 1884 rev = self.rev(node)
1888 1885
1889 1886 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1890 1887 if stopped:
1891 1888 basetext = self._revisioncache[2]
1892 1889
1893 1890 # drop cache to save memory, the caller is expected to
1894 1891 # update self._revisioncache after validating the text
1895 1892 self._revisioncache = None
1896 1893
1897 1894 targetsize = None
1898 1895 rawsize = self.index[rev][2]
1899 1896 if 0 <= rawsize:
1900 1897 targetsize = 4 * rawsize
1901 1898
1902 1899 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1903 1900 if basetext is None:
1904 1901 basetext = bytes(bins[0])
1905 1902 bins = bins[1:]
1906 1903
1907 1904 rawtext = mdiff.patches(basetext, bins)
1908 1905 del basetext # let us have a chance to free memory early
1909 1906 return (rev, rawtext, False)
1910 1907
1911 1908 def rawdata(self, nodeorrev, _df=None):
1912 1909 """return an uncompressed raw data of a given node or revision number.
1913 1910
1914 1911 _df - an existing file handle to read from. (internal-only)
1915 1912 """
1916 1913 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1917 1914
1918 1915 def hash(self, text, p1, p2):
1919 1916 """Compute a node hash.
1920 1917
1921 1918 Available as a function so that subclasses can replace the hash
1922 1919 as needed.
1923 1920 """
1924 1921 return storageutil.hashrevisionsha1(text, p1, p2)
1925 1922
1926 1923 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1927 1924 """Check node hash integrity.
1928 1925
1929 1926 Available as a function so that subclasses can extend hash mismatch
1930 1927 behaviors as needed.
1931 1928 """
1932 1929 try:
1933 1930 if p1 is None and p2 is None:
1934 1931 p1, p2 = self.parents(node)
1935 1932 if node != self.hash(text, p1, p2):
1936 1933 # Clear the revision cache on hash failure. The revision cache
1937 1934 # only stores the raw revision and clearing the cache does have
1938 1935 # the side-effect that we won't have a cache hit when the raw
1939 1936 # revision data is accessed. But this case should be rare and
1940 1937 # it is extra work to teach the cache about the hash
1941 1938 # verification state.
1942 1939 if self._revisioncache and self._revisioncache[0] == node:
1943 1940 self._revisioncache = None
1944 1941
1945 1942 revornode = rev
1946 1943 if revornode is None:
1947 1944 revornode = templatefilters.short(hex(node))
1948 1945 raise error.RevlogError(
1949 1946 _(b"integrity check failed on %s:%s")
1950 1947 % (self.indexfile, pycompat.bytestr(revornode))
1951 1948 )
1952 1949 except error.RevlogError:
1953 1950 if self._censorable and storageutil.iscensoredtext(text):
1954 1951 raise error.CensoredNodeError(self.indexfile, node, text)
1955 1952 raise
1956 1953
1957 1954 def _enforceinlinesize(self, tr, fp=None):
1958 1955 """Check if the revlog is too big for inline and convert if so.
1959 1956
1960 1957 This should be called after revisions are added to the revlog. If the
1961 1958 revlog has grown too large to be an inline revlog, it will convert it
1962 1959 to use multiple index and data files.
1963 1960 """
1964 1961 tiprev = len(self) - 1
1965 1962 if (
1966 1963 not self._inline
1967 1964 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1968 1965 ):
1969 1966 return
1970 1967
1971 1968 trinfo = tr.find(self.indexfile)
1972 1969 if trinfo is None:
1973 1970 raise error.RevlogError(
1974 1971 _(b"%s not found in the transaction") % self.indexfile
1975 1972 )
1976 1973
1977 1974 trindex = trinfo[2]
1978 1975 if trindex is not None:
1979 1976 dataoff = self.start(trindex)
1980 1977 else:
1981 1978 # revlog was stripped at start of transaction, use all leftover data
1982 1979 trindex = len(self) - 1
1983 1980 dataoff = self.end(tiprev)
1984 1981
1985 1982 tr.add(self.datafile, dataoff)
1986 1983
1987 1984 if fp:
1988 1985 fp.flush()
1989 1986 fp.close()
1990 1987 # We can't use the cached file handle after close(). So prevent
1991 1988 # its usage.
1992 1989 self._writinghandles = None
1993 1990
1994 1991 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1995 1992 for r in self:
1996 1993 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1997 1994
1998 1995 with self._indexfp(b'w') as fp:
1999 1996 self.version &= ~FLAG_INLINE_DATA
2000 1997 self._inline = False
2001 1998 io = self._io
2002 1999 for i in self:
2003 2000 e = io.packentry(self.index[i], self.node, self.version, i)
2004 2001 fp.write(e)
2005 2002
2006 2003 # the temp file replace the real index when we exit the context
2007 2004 # manager
2008 2005
2009 2006 tr.replace(self.indexfile, trindex * self._io.size)
2010 2007 nodemaputil.setup_persistent_nodemap(tr, self)
2011 2008 self._chunkclear()
2012 2009
2013 2010 def _nodeduplicatecallback(self, transaction, node):
2014 2011 """called when trying to add a node already stored.
2015 2012 """
2016 2013
2017 2014 def addrevision(
2018 2015 self,
2019 2016 text,
2020 2017 transaction,
2021 2018 link,
2022 2019 p1,
2023 2020 p2,
2024 2021 cachedelta=None,
2025 2022 node=None,
2026 2023 flags=REVIDX_DEFAULT_FLAGS,
2027 2024 deltacomputer=None,
2028 2025 sidedata=None,
2029 2026 ):
2030 2027 """add a revision to the log
2031 2028
2032 2029 text - the revision data to add
2033 2030 transaction - the transaction object used for rollback
2034 2031 link - the linkrev data to add
2035 2032 p1, p2 - the parent nodeids of the revision
2036 2033 cachedelta - an optional precomputed delta
2037 2034 node - nodeid of revision; typically node is not specified, and it is
2038 2035 computed by default as hash(text, p1, p2), however subclasses might
2039 2036 use different hashing method (and override checkhash() in such case)
2040 2037 flags - the known flags to set on the revision
2041 2038 deltacomputer - an optional deltacomputer instance shared between
2042 2039 multiple calls
2043 2040 """
2044 2041 if link == nullrev:
2045 2042 raise error.RevlogError(
2046 2043 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2047 2044 )
2048 2045
2049 2046 if sidedata is None:
2050 2047 sidedata = {}
2051 2048 flags = flags & ~REVIDX_SIDEDATA
2052 2049 elif not self.hassidedata:
2053 2050 raise error.ProgrammingError(
2054 2051 _(b"trying to add sidedata to a revlog who don't support them")
2055 2052 )
2056 2053 else:
2057 2054 flags |= REVIDX_SIDEDATA
2058 2055
2059 2056 if flags:
2060 2057 node = node or self.hash(text, p1, p2)
2061 2058
2062 2059 rawtext, validatehash = flagutil.processflagswrite(
2063 2060 self, text, flags, sidedata=sidedata
2064 2061 )
2065 2062
2066 2063 # If the flag processor modifies the revision data, ignore any provided
2067 2064 # cachedelta.
2068 2065 if rawtext != text:
2069 2066 cachedelta = None
2070 2067
2071 2068 if len(rawtext) > _maxentrysize:
2072 2069 raise error.RevlogError(
2073 2070 _(
2074 2071 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2075 2072 )
2076 2073 % (self.indexfile, len(rawtext))
2077 2074 )
2078 2075
2079 2076 node = node or self.hash(rawtext, p1, p2)
2080 2077 if self.index.has_node(node):
2081 2078 return node
2082 2079
2083 2080 if validatehash:
2084 2081 self.checkhash(rawtext, node, p1=p1, p2=p2)
2085 2082
2086 2083 return self.addrawrevision(
2087 2084 rawtext,
2088 2085 transaction,
2089 2086 link,
2090 2087 p1,
2091 2088 p2,
2092 2089 node,
2093 2090 flags,
2094 2091 cachedelta=cachedelta,
2095 2092 deltacomputer=deltacomputer,
2096 2093 )
2097 2094
2098 2095 def addrawrevision(
2099 2096 self,
2100 2097 rawtext,
2101 2098 transaction,
2102 2099 link,
2103 2100 p1,
2104 2101 p2,
2105 2102 node,
2106 2103 flags,
2107 2104 cachedelta=None,
2108 2105 deltacomputer=None,
2109 2106 ):
2110 2107 """add a raw revision with known flags, node and parents
2111 2108 useful when reusing a revision not stored in this revlog (ex: received
2112 2109 over wire, or read from an external bundle).
2113 2110 """
2114 2111 dfh = None
2115 2112 if not self._inline:
2116 2113 dfh = self._datafp(b"a+")
2117 2114 ifh = self._indexfp(b"a+")
2118 2115 try:
2119 2116 return self._addrevision(
2120 2117 node,
2121 2118 rawtext,
2122 2119 transaction,
2123 2120 link,
2124 2121 p1,
2125 2122 p2,
2126 2123 flags,
2127 2124 cachedelta,
2128 2125 ifh,
2129 2126 dfh,
2130 2127 deltacomputer=deltacomputer,
2131 2128 )
2132 2129 finally:
2133 2130 if dfh:
2134 2131 dfh.close()
2135 2132 ifh.close()
2136 2133
2137 2134 def compress(self, data):
2138 2135 """Generate a possibly-compressed representation of data."""
2139 2136 if not data:
2140 2137 return b'', data
2141 2138
2142 2139 compressed = self._compressor.compress(data)
2143 2140
2144 2141 if compressed:
2145 2142 # The revlog compressor added the header in the returned data.
2146 2143 return b'', compressed
2147 2144
2148 2145 if data[0:1] == b'\0':
2149 2146 return b'', data
2150 2147 return b'u', data
2151 2148
2152 2149 def decompress(self, data):
2153 2150 """Decompress a revlog chunk.
2154 2151
2155 2152 The chunk is expected to begin with a header identifying the
2156 2153 format type so it can be routed to an appropriate decompressor.
2157 2154 """
2158 2155 if not data:
2159 2156 return data
2160 2157
2161 2158 # Revlogs are read much more frequently than they are written and many
2162 2159 # chunks only take microseconds to decompress, so performance is
2163 2160 # important here.
2164 2161 #
2165 2162 # We can make a few assumptions about revlogs:
2166 2163 #
2167 2164 # 1) the majority of chunks will be compressed (as opposed to inline
2168 2165 # raw data).
2169 2166 # 2) decompressing *any* data will likely by at least 10x slower than
2170 2167 # returning raw inline data.
2171 2168 # 3) we want to prioritize common and officially supported compression
2172 2169 # engines
2173 2170 #
2174 2171 # It follows that we want to optimize for "decompress compressed data
2175 2172 # when encoded with common and officially supported compression engines"
2176 2173 # case over "raw data" and "data encoded by less common or non-official
2177 2174 # compression engines." That is why we have the inline lookup first
2178 2175 # followed by the compengines lookup.
2179 2176 #
2180 2177 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2181 2178 # compressed chunks. And this matters for changelog and manifest reads.
2182 2179 t = data[0:1]
2183 2180
2184 2181 if t == b'x':
2185 2182 try:
2186 2183 return _zlibdecompress(data)
2187 2184 except zlib.error as e:
2188 2185 raise error.RevlogError(
2189 2186 _(b'revlog decompress error: %s')
2190 2187 % stringutil.forcebytestr(e)
2191 2188 )
2192 2189 # '\0' is more common than 'u' so it goes first.
2193 2190 elif t == b'\0':
2194 2191 return data
2195 2192 elif t == b'u':
2196 2193 return util.buffer(data, 1)
2197 2194
2198 2195 try:
2199 2196 compressor = self._decompressors[t]
2200 2197 except KeyError:
2201 2198 try:
2202 2199 engine = util.compengines.forrevlogheader(t)
2203 2200 compressor = engine.revlogcompressor(self._compengineopts)
2204 2201 self._decompressors[t] = compressor
2205 2202 except KeyError:
2206 2203 raise error.RevlogError(_(b'unknown compression type %r') % t)
2207 2204
2208 2205 return compressor.decompress(data)
2209 2206
2210 2207 def _addrevision(
2211 2208 self,
2212 2209 node,
2213 2210 rawtext,
2214 2211 transaction,
2215 2212 link,
2216 2213 p1,
2217 2214 p2,
2218 2215 flags,
2219 2216 cachedelta,
2220 2217 ifh,
2221 2218 dfh,
2222 2219 alwayscache=False,
2223 2220 deltacomputer=None,
2224 2221 ):
2225 2222 """internal function to add revisions to the log
2226 2223
2227 2224 see addrevision for argument descriptions.
2228 2225
2229 2226 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2230 2227
2231 2228 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2232 2229 be used.
2233 2230
2234 2231 invariants:
2235 2232 - rawtext is optional (can be None); if not set, cachedelta must be set.
2236 2233 if both are set, they must correspond to each other.
2237 2234 """
2238 2235 if node == nullid:
2239 2236 raise error.RevlogError(
2240 2237 _(b"%s: attempt to add null revision") % self.indexfile
2241 2238 )
2242 2239 if node == wdirid or node in wdirfilenodeids:
2243 2240 raise error.RevlogError(
2244 2241 _(b"%s: attempt to add wdir revision") % self.indexfile
2245 2242 )
2246 2243
2247 2244 if self._inline:
2248 2245 fh = ifh
2249 2246 else:
2250 2247 fh = dfh
2251 2248
2252 2249 btext = [rawtext]
2253 2250
2254 2251 curr = len(self)
2255 2252 prev = curr - 1
2256 2253 offset = self.end(prev)
2257 2254 p1r, p2r = self.rev(p1), self.rev(p2)
2258 2255
2259 2256 # full versions are inserted when the needed deltas
2260 2257 # become comparable to the uncompressed text
2261 2258 if rawtext is None:
2262 2259 # need rawtext size, before changed by flag processors, which is
2263 2260 # the non-raw size. use revlog explicitly to avoid filelog's extra
2264 2261 # logic that might remove metadata size.
2265 2262 textlen = mdiff.patchedsize(
2266 2263 revlog.size(self, cachedelta[0]), cachedelta[1]
2267 2264 )
2268 2265 else:
2269 2266 textlen = len(rawtext)
2270 2267
2271 2268 if deltacomputer is None:
2272 2269 deltacomputer = deltautil.deltacomputer(self)
2273 2270
2274 2271 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2275 2272
2276 2273 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2277 2274
2278 2275 e = (
2279 2276 offset_type(offset, flags),
2280 2277 deltainfo.deltalen,
2281 2278 textlen,
2282 2279 deltainfo.base,
2283 2280 link,
2284 2281 p1r,
2285 2282 p2r,
2286 2283 node,
2287 2284 )
2288 2285 self.index.append(e)
2289 2286
2290 2287 entry = self._io.packentry(e, self.node, self.version, curr)
2291 2288 self._writeentry(
2292 2289 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2293 2290 )
2294 2291
2295 2292 rawtext = btext[0]
2296 2293
2297 2294 if alwayscache and rawtext is None:
2298 2295 rawtext = deltacomputer.buildtext(revinfo, fh)
2299 2296
2300 2297 if type(rawtext) == bytes: # only accept immutable objects
2301 2298 self._revisioncache = (node, curr, rawtext)
2302 2299 self._chainbasecache[curr] = deltainfo.chainbase
2303 2300 return node
2304 2301
2305 2302 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2306 2303 # Files opened in a+ mode have inconsistent behavior on various
2307 2304 # platforms. Windows requires that a file positioning call be made
2308 2305 # when the file handle transitions between reads and writes. See
2309 2306 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2310 2307 # platforms, Python or the platform itself can be buggy. Some versions
2311 2308 # of Solaris have been observed to not append at the end of the file
2312 2309 # if the file was seeked to before the end. See issue4943 for more.
2313 2310 #
2314 2311 # We work around this issue by inserting a seek() before writing.
2315 2312 # Note: This is likely not necessary on Python 3. However, because
2316 2313 # the file handle is reused for reads and may be seeked there, we need
2317 2314 # to be careful before changing this.
2318 2315 ifh.seek(0, os.SEEK_END)
2319 2316 if dfh:
2320 2317 dfh.seek(0, os.SEEK_END)
2321 2318
2322 2319 curr = len(self) - 1
2323 2320 if not self._inline:
2324 2321 transaction.add(self.datafile, offset)
2325 2322 transaction.add(self.indexfile, curr * len(entry))
2326 2323 if data[0]:
2327 2324 dfh.write(data[0])
2328 2325 dfh.write(data[1])
2329 2326 ifh.write(entry)
2330 2327 else:
2331 2328 offset += curr * self._io.size
2332 2329 transaction.add(self.indexfile, offset, curr)
2333 2330 ifh.write(entry)
2334 2331 ifh.write(data[0])
2335 2332 ifh.write(data[1])
2336 2333 self._enforceinlinesize(transaction, ifh)
2337 2334 nodemaputil.setup_persistent_nodemap(transaction, self)
2338 2335
2339 2336 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2340 2337 """
2341 2338 add a delta group
2342 2339
2343 2340 given a set of deltas, add them to the revision log. the
2344 2341 first delta is against its parent, which should be in our
2345 2342 log, the rest are against the previous delta.
2346 2343
2347 2344 If ``addrevisioncb`` is defined, it will be called with arguments of
2348 2345 this revlog and the node that was added.
2349 2346 """
2350 2347
2351 2348 if self._writinghandles:
2352 2349 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2353 2350
2354 2351 nodes = []
2355 2352
2356 2353 r = len(self)
2357 2354 end = 0
2358 2355 if r:
2359 2356 end = self.end(r - 1)
2360 2357 ifh = self._indexfp(b"a+")
2361 2358 isize = r * self._io.size
2362 2359 if self._inline:
2363 2360 transaction.add(self.indexfile, end + isize, r)
2364 2361 dfh = None
2365 2362 else:
2366 2363 transaction.add(self.indexfile, isize, r)
2367 2364 transaction.add(self.datafile, end)
2368 2365 dfh = self._datafp(b"a+")
2369 2366
2370 2367 def flush():
2371 2368 if dfh:
2372 2369 dfh.flush()
2373 2370 ifh.flush()
2374 2371
2375 2372 self._writinghandles = (ifh, dfh)
2376 2373
2377 2374 try:
2378 2375 deltacomputer = deltautil.deltacomputer(self)
2379 2376 # loop through our set of deltas
2380 2377 for data in deltas:
2381 2378 node, p1, p2, linknode, deltabase, delta, flags = data
2382 2379 link = linkmapper(linknode)
2383 2380 flags = flags or REVIDX_DEFAULT_FLAGS
2384 2381
2385 2382 nodes.append(node)
2386 2383
2387 2384 if self.index.has_node(node):
2388 2385 self._nodeduplicatecallback(transaction, node)
2389 2386 # this can happen if two branches make the same change
2390 2387 continue
2391 2388
2392 2389 for p in (p1, p2):
2393 2390 if not self.index.has_node(p):
2394 2391 raise error.LookupError(
2395 2392 p, self.indexfile, _(b'unknown parent')
2396 2393 )
2397 2394
2398 2395 if not self.index.has_node(deltabase):
2399 2396 raise error.LookupError(
2400 2397 deltabase, self.indexfile, _(b'unknown delta base')
2401 2398 )
2402 2399
2403 2400 baserev = self.rev(deltabase)
2404 2401
2405 2402 if baserev != nullrev and self.iscensored(baserev):
2406 2403 # if base is censored, delta must be full replacement in a
2407 2404 # single patch operation
2408 2405 hlen = struct.calcsize(b">lll")
2409 2406 oldlen = self.rawsize(baserev)
2410 2407 newlen = len(delta) - hlen
2411 2408 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2412 2409 raise error.CensoredBaseError(
2413 2410 self.indexfile, self.node(baserev)
2414 2411 )
2415 2412
2416 2413 if not flags and self._peek_iscensored(baserev, delta, flush):
2417 2414 flags |= REVIDX_ISCENSORED
2418 2415
2419 2416 # We assume consumers of addrevisioncb will want to retrieve
2420 2417 # the added revision, which will require a call to
2421 2418 # revision(). revision() will fast path if there is a cache
2422 2419 # hit. So, we tell _addrevision() to always cache in this case.
2423 2420 # We're only using addgroup() in the context of changegroup
2424 2421 # generation so the revision data can always be handled as raw
2425 2422 # by the flagprocessor.
2426 2423 self._addrevision(
2427 2424 node,
2428 2425 None,
2429 2426 transaction,
2430 2427 link,
2431 2428 p1,
2432 2429 p2,
2433 2430 flags,
2434 2431 (baserev, delta),
2435 2432 ifh,
2436 2433 dfh,
2437 2434 alwayscache=bool(addrevisioncb),
2438 2435 deltacomputer=deltacomputer,
2439 2436 )
2440 2437
2441 2438 if addrevisioncb:
2442 2439 addrevisioncb(self, node)
2443 2440
2444 2441 if not dfh and not self._inline:
2445 2442 # addrevision switched from inline to conventional
2446 2443 # reopen the index
2447 2444 ifh.close()
2448 2445 dfh = self._datafp(b"a+")
2449 2446 ifh = self._indexfp(b"a+")
2450 2447 self._writinghandles = (ifh, dfh)
2451 2448 finally:
2452 2449 self._writinghandles = None
2453 2450
2454 2451 if dfh:
2455 2452 dfh.close()
2456 2453 ifh.close()
2457 2454
2458 2455 return nodes
2459 2456
2460 2457 def iscensored(self, rev):
2461 2458 """Check if a file revision is censored."""
2462 2459 if not self._censorable:
2463 2460 return False
2464 2461
2465 2462 return self.flags(rev) & REVIDX_ISCENSORED
2466 2463
2467 2464 def _peek_iscensored(self, baserev, delta, flush):
2468 2465 """Quickly check if a delta produces a censored revision."""
2469 2466 if not self._censorable:
2470 2467 return False
2471 2468
2472 2469 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2473 2470
2474 2471 def getstrippoint(self, minlink):
2475 2472 """find the minimum rev that must be stripped to strip the linkrev
2476 2473
2477 2474 Returns a tuple containing the minimum rev and a set of all revs that
2478 2475 have linkrevs that will be broken by this strip.
2479 2476 """
2480 2477 return storageutil.resolvestripinfo(
2481 2478 minlink,
2482 2479 len(self) - 1,
2483 2480 self.headrevs(),
2484 2481 self.linkrev,
2485 2482 self.parentrevs,
2486 2483 )
2487 2484
2488 2485 def strip(self, minlink, transaction):
2489 2486 """truncate the revlog on the first revision with a linkrev >= minlink
2490 2487
2491 2488 This function is called when we're stripping revision minlink and
2492 2489 its descendants from the repository.
2493 2490
2494 2491 We have to remove all revisions with linkrev >= minlink, because
2495 2492 the equivalent changelog revisions will be renumbered after the
2496 2493 strip.
2497 2494
2498 2495 So we truncate the revlog on the first of these revisions, and
2499 2496 trust that the caller has saved the revisions that shouldn't be
2500 2497 removed and that it'll re-add them after this truncation.
2501 2498 """
2502 2499 if len(self) == 0:
2503 2500 return
2504 2501
2505 2502 rev, _ = self.getstrippoint(minlink)
2506 2503 if rev == len(self):
2507 2504 return
2508 2505
2509 2506 # first truncate the files on disk
2510 2507 end = self.start(rev)
2511 2508 if not self._inline:
2512 2509 transaction.add(self.datafile, end)
2513 2510 end = rev * self._io.size
2514 2511 else:
2515 2512 end += rev * self._io.size
2516 2513
2517 2514 transaction.add(self.indexfile, end)
2518 2515
2519 2516 # then reset internal state in memory to forget those revisions
2520 2517 self._revisioncache = None
2521 2518 self._chaininfocache = {}
2522 2519 self._chunkclear()
2523 2520
2524 2521 del self.index[rev:-1]
2525 2522
2526 2523 def checksize(self):
2527 2524 """Check size of index and data files
2528 2525
2529 2526 return a (dd, di) tuple.
2530 2527 - dd: extra bytes for the "data" file
2531 2528 - di: extra bytes for the "index" file
2532 2529
2533 2530 A healthy revlog will return (0, 0).
2534 2531 """
2535 2532 expected = 0
2536 2533 if len(self):
2537 2534 expected = max(0, self.end(len(self) - 1))
2538 2535
2539 2536 try:
2540 2537 with self._datafp() as f:
2541 2538 f.seek(0, io.SEEK_END)
2542 2539 actual = f.tell()
2543 2540 dd = actual - expected
2544 2541 except IOError as inst:
2545 2542 if inst.errno != errno.ENOENT:
2546 2543 raise
2547 2544 dd = 0
2548 2545
2549 2546 try:
2550 2547 f = self.opener(self.indexfile)
2551 2548 f.seek(0, io.SEEK_END)
2552 2549 actual = f.tell()
2553 2550 f.close()
2554 2551 s = self._io.size
2555 2552 i = max(0, actual // s)
2556 2553 di = actual - (i * s)
2557 2554 if self._inline:
2558 2555 databytes = 0
2559 2556 for r in self:
2560 2557 databytes += max(0, self.length(r))
2561 2558 dd = 0
2562 2559 di = actual - len(self) * s - databytes
2563 2560 except IOError as inst:
2564 2561 if inst.errno != errno.ENOENT:
2565 2562 raise
2566 2563 di = 0
2567 2564
2568 2565 return (dd, di)
2569 2566
2570 2567 def files(self):
2571 2568 res = [self.indexfile]
2572 2569 if not self._inline:
2573 2570 res.append(self.datafile)
2574 2571 return res
2575 2572
2576 2573 def emitrevisions(
2577 2574 self,
2578 2575 nodes,
2579 2576 nodesorder=None,
2580 2577 revisiondata=False,
2581 2578 assumehaveparentrevisions=False,
2582 2579 deltamode=repository.CG_DELTAMODE_STD,
2583 2580 ):
2584 2581 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2585 2582 raise error.ProgrammingError(
2586 2583 b'unhandled value for nodesorder: %s' % nodesorder
2587 2584 )
2588 2585
2589 2586 if nodesorder is None and not self._generaldelta:
2590 2587 nodesorder = b'storage'
2591 2588
2592 2589 if (
2593 2590 not self._storedeltachains
2594 2591 and deltamode != repository.CG_DELTAMODE_PREV
2595 2592 ):
2596 2593 deltamode = repository.CG_DELTAMODE_FULL
2597 2594
2598 2595 return storageutil.emitrevisions(
2599 2596 self,
2600 2597 nodes,
2601 2598 nodesorder,
2602 2599 revlogrevisiondelta,
2603 2600 deltaparentfn=self.deltaparent,
2604 2601 candeltafn=self.candelta,
2605 2602 rawsizefn=self.rawsize,
2606 2603 revdifffn=self.revdiff,
2607 2604 flagsfn=self.flags,
2608 2605 deltamode=deltamode,
2609 2606 revisiondata=revisiondata,
2610 2607 assumehaveparentrevisions=assumehaveparentrevisions,
2611 2608 )
2612 2609
2613 2610 DELTAREUSEALWAYS = b'always'
2614 2611 DELTAREUSESAMEREVS = b'samerevs'
2615 2612 DELTAREUSENEVER = b'never'
2616 2613
2617 2614 DELTAREUSEFULLADD = b'fulladd'
2618 2615
2619 2616 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2620 2617
2621 2618 def clone(
2622 2619 self,
2623 2620 tr,
2624 2621 destrevlog,
2625 2622 addrevisioncb=None,
2626 2623 deltareuse=DELTAREUSESAMEREVS,
2627 2624 forcedeltabothparents=None,
2628 2625 sidedatacompanion=None,
2629 2626 ):
2630 2627 """Copy this revlog to another, possibly with format changes.
2631 2628
2632 2629 The destination revlog will contain the same revisions and nodes.
2633 2630 However, it may not be bit-for-bit identical due to e.g. delta encoding
2634 2631 differences.
2635 2632
2636 2633 The ``deltareuse`` argument control how deltas from the existing revlog
2637 2634 are preserved in the destination revlog. The argument can have the
2638 2635 following values:
2639 2636
2640 2637 DELTAREUSEALWAYS
2641 2638 Deltas will always be reused (if possible), even if the destination
2642 2639 revlog would not select the same revisions for the delta. This is the
2643 2640 fastest mode of operation.
2644 2641 DELTAREUSESAMEREVS
2645 2642 Deltas will be reused if the destination revlog would pick the same
2646 2643 revisions for the delta. This mode strikes a balance between speed
2647 2644 and optimization.
2648 2645 DELTAREUSENEVER
2649 2646 Deltas will never be reused. This is the slowest mode of execution.
2650 2647 This mode can be used to recompute deltas (e.g. if the diff/delta
2651 2648 algorithm changes).
2652 2649 DELTAREUSEFULLADD
2653 2650 Revision will be re-added as if their were new content. This is
2654 2651 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2655 2652 eg: large file detection and handling.
2656 2653
2657 2654 Delta computation can be slow, so the choice of delta reuse policy can
2658 2655 significantly affect run time.
2659 2656
2660 2657 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2661 2658 two extremes. Deltas will be reused if they are appropriate. But if the
2662 2659 delta could choose a better revision, it will do so. This means if you
2663 2660 are converting a non-generaldelta revlog to a generaldelta revlog,
2664 2661 deltas will be recomputed if the delta's parent isn't a parent of the
2665 2662 revision.
2666 2663
2667 2664 In addition to the delta policy, the ``forcedeltabothparents``
2668 2665 argument controls whether to force compute deltas against both parents
2669 2666 for merges. By default, the current default is used.
2670 2667
2671 2668 If not None, the `sidedatacompanion` is callable that accept two
2672 2669 arguments:
2673 2670
2674 2671 (srcrevlog, rev)
2675 2672
2676 2673 and return a triplet that control changes to sidedata content from the
2677 2674 old revision to the new clone result:
2678 2675
2679 2676 (dropall, filterout, update)
2680 2677
2681 2678 * if `dropall` is True, all sidedata should be dropped
2682 2679 * `filterout` is a set of sidedata keys that should be dropped
2683 2680 * `update` is a mapping of additionnal/new key -> value
2684 2681 """
2685 2682 if deltareuse not in self.DELTAREUSEALL:
2686 2683 raise ValueError(
2687 2684 _(b'value for deltareuse invalid: %s') % deltareuse
2688 2685 )
2689 2686
2690 2687 if len(destrevlog):
2691 2688 raise ValueError(_(b'destination revlog is not empty'))
2692 2689
2693 2690 if getattr(self, 'filteredrevs', None):
2694 2691 raise ValueError(_(b'source revlog has filtered revisions'))
2695 2692 if getattr(destrevlog, 'filteredrevs', None):
2696 2693 raise ValueError(_(b'destination revlog has filtered revisions'))
2697 2694
2698 2695 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2699 2696 # if possible.
2700 2697 oldlazydelta = destrevlog._lazydelta
2701 2698 oldlazydeltabase = destrevlog._lazydeltabase
2702 2699 oldamd = destrevlog._deltabothparents
2703 2700
2704 2701 try:
2705 2702 if deltareuse == self.DELTAREUSEALWAYS:
2706 2703 destrevlog._lazydeltabase = True
2707 2704 destrevlog._lazydelta = True
2708 2705 elif deltareuse == self.DELTAREUSESAMEREVS:
2709 2706 destrevlog._lazydeltabase = False
2710 2707 destrevlog._lazydelta = True
2711 2708 elif deltareuse == self.DELTAREUSENEVER:
2712 2709 destrevlog._lazydeltabase = False
2713 2710 destrevlog._lazydelta = False
2714 2711
2715 2712 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2716 2713
2717 2714 self._clone(
2718 2715 tr,
2719 2716 destrevlog,
2720 2717 addrevisioncb,
2721 2718 deltareuse,
2722 2719 forcedeltabothparents,
2723 2720 sidedatacompanion,
2724 2721 )
2725 2722
2726 2723 finally:
2727 2724 destrevlog._lazydelta = oldlazydelta
2728 2725 destrevlog._lazydeltabase = oldlazydeltabase
2729 2726 destrevlog._deltabothparents = oldamd
2730 2727
2731 2728 def _clone(
2732 2729 self,
2733 2730 tr,
2734 2731 destrevlog,
2735 2732 addrevisioncb,
2736 2733 deltareuse,
2737 2734 forcedeltabothparents,
2738 2735 sidedatacompanion,
2739 2736 ):
2740 2737 """perform the core duty of `revlog.clone` after parameter processing"""
2741 2738 deltacomputer = deltautil.deltacomputer(destrevlog)
2742 2739 index = self.index
2743 2740 for rev in self:
2744 2741 entry = index[rev]
2745 2742
2746 2743 # Some classes override linkrev to take filtered revs into
2747 2744 # account. Use raw entry from index.
2748 2745 flags = entry[0] & 0xFFFF
2749 2746 linkrev = entry[4]
2750 2747 p1 = index[entry[5]][7]
2751 2748 p2 = index[entry[6]][7]
2752 2749 node = entry[7]
2753 2750
2754 2751 sidedataactions = (False, [], {})
2755 2752 if sidedatacompanion is not None:
2756 2753 sidedataactions = sidedatacompanion(self, rev)
2757 2754
2758 2755 # (Possibly) reuse the delta from the revlog if allowed and
2759 2756 # the revlog chunk is a delta.
2760 2757 cachedelta = None
2761 2758 rawtext = None
2762 2759 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2763 2760 dropall, filterout, update = sidedataactions
2764 2761 text, sidedata = self._revisiondata(rev)
2765 2762 if dropall:
2766 2763 sidedata = {}
2767 2764 for key in filterout:
2768 2765 sidedata.pop(key, None)
2769 2766 sidedata.update(update)
2770 2767 if not sidedata:
2771 2768 sidedata = None
2772 2769 destrevlog.addrevision(
2773 2770 text,
2774 2771 tr,
2775 2772 linkrev,
2776 2773 p1,
2777 2774 p2,
2778 2775 cachedelta=cachedelta,
2779 2776 node=node,
2780 2777 flags=flags,
2781 2778 deltacomputer=deltacomputer,
2782 2779 sidedata=sidedata,
2783 2780 )
2784 2781 else:
2785 2782 if destrevlog._lazydelta:
2786 2783 dp = self.deltaparent(rev)
2787 2784 if dp != nullrev:
2788 2785 cachedelta = (dp, bytes(self._chunk(rev)))
2789 2786
2790 2787 if not cachedelta:
2791 2788 rawtext = self.rawdata(rev)
2792 2789
2793 2790 ifh = destrevlog.opener(
2794 2791 destrevlog.indexfile, b'a+', checkambig=False
2795 2792 )
2796 2793 dfh = None
2797 2794 if not destrevlog._inline:
2798 2795 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2799 2796 try:
2800 2797 destrevlog._addrevision(
2801 2798 node,
2802 2799 rawtext,
2803 2800 tr,
2804 2801 linkrev,
2805 2802 p1,
2806 2803 p2,
2807 2804 flags,
2808 2805 cachedelta,
2809 2806 ifh,
2810 2807 dfh,
2811 2808 deltacomputer=deltacomputer,
2812 2809 )
2813 2810 finally:
2814 2811 if dfh:
2815 2812 dfh.close()
2816 2813 ifh.close()
2817 2814
2818 2815 if addrevisioncb:
2819 2816 addrevisioncb(self, rev, node)
2820 2817
2821 2818 def censorrevision(self, tr, censornode, tombstone=b''):
2822 2819 if (self.version & 0xFFFF) == REVLOGV0:
2823 2820 raise error.RevlogError(
2824 2821 _(b'cannot censor with version %d revlogs') % self.version
2825 2822 )
2826 2823
2827 2824 censorrev = self.rev(censornode)
2828 2825 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2829 2826
2830 2827 if len(tombstone) > self.rawsize(censorrev):
2831 2828 raise error.Abort(
2832 2829 _(b'censor tombstone must be no longer than censored data')
2833 2830 )
2834 2831
2835 2832 # Rewriting the revlog in place is hard. Our strategy for censoring is
2836 2833 # to create a new revlog, copy all revisions to it, then replace the
2837 2834 # revlogs on transaction close.
2838 2835
2839 2836 newindexfile = self.indexfile + b'.tmpcensored'
2840 2837 newdatafile = self.datafile + b'.tmpcensored'
2841 2838
2842 2839 # This is a bit dangerous. We could easily have a mismatch of state.
2843 2840 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2844 2841 newrl.version = self.version
2845 2842 newrl._generaldelta = self._generaldelta
2846 2843 newrl._io = self._io
2847 2844
2848 2845 for rev in self.revs():
2849 2846 node = self.node(rev)
2850 2847 p1, p2 = self.parents(node)
2851 2848
2852 2849 if rev == censorrev:
2853 2850 newrl.addrawrevision(
2854 2851 tombstone,
2855 2852 tr,
2856 2853 self.linkrev(censorrev),
2857 2854 p1,
2858 2855 p2,
2859 2856 censornode,
2860 2857 REVIDX_ISCENSORED,
2861 2858 )
2862 2859
2863 2860 if newrl.deltaparent(rev) != nullrev:
2864 2861 raise error.Abort(
2865 2862 _(
2866 2863 b'censored revision stored as delta; '
2867 2864 b'cannot censor'
2868 2865 ),
2869 2866 hint=_(
2870 2867 b'censoring of revlogs is not '
2871 2868 b'fully implemented; please report '
2872 2869 b'this bug'
2873 2870 ),
2874 2871 )
2875 2872 continue
2876 2873
2877 2874 if self.iscensored(rev):
2878 2875 if self.deltaparent(rev) != nullrev:
2879 2876 raise error.Abort(
2880 2877 _(
2881 2878 b'cannot censor due to censored '
2882 2879 b'revision having delta stored'
2883 2880 )
2884 2881 )
2885 2882 rawtext = self._chunk(rev)
2886 2883 else:
2887 2884 rawtext = self.rawdata(rev)
2888 2885
2889 2886 newrl.addrawrevision(
2890 2887 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2891 2888 )
2892 2889
2893 2890 tr.addbackup(self.indexfile, location=b'store')
2894 2891 if not self._inline:
2895 2892 tr.addbackup(self.datafile, location=b'store')
2896 2893
2897 2894 self.opener.rename(newrl.indexfile, self.indexfile)
2898 2895 if not self._inline:
2899 2896 self.opener.rename(newrl.datafile, self.datafile)
2900 2897
2901 2898 self.clearcaches()
2902 2899 self._loadindex()
2903 2900
2904 2901 def verifyintegrity(self, state):
2905 2902 """Verifies the integrity of the revlog.
2906 2903
2907 2904 Yields ``revlogproblem`` instances describing problems that are
2908 2905 found.
2909 2906 """
2910 2907 dd, di = self.checksize()
2911 2908 if dd:
2912 2909 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2913 2910 if di:
2914 2911 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2915 2912
2916 2913 version = self.version & 0xFFFF
2917 2914
2918 2915 # The verifier tells us what version revlog we should be.
2919 2916 if version != state[b'expectedversion']:
2920 2917 yield revlogproblem(
2921 2918 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2922 2919 % (self.indexfile, version, state[b'expectedversion'])
2923 2920 )
2924 2921
2925 2922 state[b'skipread'] = set()
2926 2923 state[b'safe_renamed'] = set()
2927 2924
2928 2925 for rev in self:
2929 2926 node = self.node(rev)
2930 2927
2931 2928 # Verify contents. 4 cases to care about:
2932 2929 #
2933 2930 # common: the most common case
2934 2931 # rename: with a rename
2935 2932 # meta: file content starts with b'\1\n', the metadata
2936 2933 # header defined in filelog.py, but without a rename
2937 2934 # ext: content stored externally
2938 2935 #
2939 2936 # More formally, their differences are shown below:
2940 2937 #
2941 2938 # | common | rename | meta | ext
2942 2939 # -------------------------------------------------------
2943 2940 # flags() | 0 | 0 | 0 | not 0
2944 2941 # renamed() | False | True | False | ?
2945 2942 # rawtext[0:2]=='\1\n'| False | True | True | ?
2946 2943 #
2947 2944 # "rawtext" means the raw text stored in revlog data, which
2948 2945 # could be retrieved by "rawdata(rev)". "text"
2949 2946 # mentioned below is "revision(rev)".
2950 2947 #
2951 2948 # There are 3 different lengths stored physically:
2952 2949 # 1. L1: rawsize, stored in revlog index
2953 2950 # 2. L2: len(rawtext), stored in revlog data
2954 2951 # 3. L3: len(text), stored in revlog data if flags==0, or
2955 2952 # possibly somewhere else if flags!=0
2956 2953 #
2957 2954 # L1 should be equal to L2. L3 could be different from them.
2958 2955 # "text" may or may not affect commit hash depending on flag
2959 2956 # processors (see flagutil.addflagprocessor).
2960 2957 #
2961 2958 # | common | rename | meta | ext
2962 2959 # -------------------------------------------------
2963 2960 # rawsize() | L1 | L1 | L1 | L1
2964 2961 # size() | L1 | L2-LM | L1(*) | L1 (?)
2965 2962 # len(rawtext) | L2 | L2 | L2 | L2
2966 2963 # len(text) | L2 | L2 | L2 | L3
2967 2964 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2968 2965 #
2969 2966 # LM: length of metadata, depending on rawtext
2970 2967 # (*): not ideal, see comment in filelog.size
2971 2968 # (?): could be "- len(meta)" if the resolved content has
2972 2969 # rename metadata
2973 2970 #
2974 2971 # Checks needed to be done:
2975 2972 # 1. length check: L1 == L2, in all cases.
2976 2973 # 2. hash check: depending on flag processor, we may need to
2977 2974 # use either "text" (external), or "rawtext" (in revlog).
2978 2975
2979 2976 try:
2980 2977 skipflags = state.get(b'skipflags', 0)
2981 2978 if skipflags:
2982 2979 skipflags &= self.flags(rev)
2983 2980
2984 2981 _verify_revision(self, skipflags, state, node)
2985 2982
2986 2983 l1 = self.rawsize(rev)
2987 2984 l2 = len(self.rawdata(node))
2988 2985
2989 2986 if l1 != l2:
2990 2987 yield revlogproblem(
2991 2988 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2992 2989 node=node,
2993 2990 )
2994 2991
2995 2992 except error.CensoredNodeError:
2996 2993 if state[b'erroroncensored']:
2997 2994 yield revlogproblem(
2998 2995 error=_(b'censored file data'), node=node
2999 2996 )
3000 2997 state[b'skipread'].add(node)
3001 2998 except Exception as e:
3002 2999 yield revlogproblem(
3003 3000 error=_(b'unpacking %s: %s')
3004 3001 % (short(node), stringutil.forcebytestr(e)),
3005 3002 node=node,
3006 3003 )
3007 3004 state[b'skipread'].add(node)
3008 3005
3009 3006 def storageinfo(
3010 3007 self,
3011 3008 exclusivefiles=False,
3012 3009 sharedfiles=False,
3013 3010 revisionscount=False,
3014 3011 trackedsize=False,
3015 3012 storedsize=False,
3016 3013 ):
3017 3014 d = {}
3018 3015
3019 3016 if exclusivefiles:
3020 3017 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3021 3018 if not self._inline:
3022 3019 d[b'exclusivefiles'].append((self.opener, self.datafile))
3023 3020
3024 3021 if sharedfiles:
3025 3022 d[b'sharedfiles'] = []
3026 3023
3027 3024 if revisionscount:
3028 3025 d[b'revisionscount'] = len(self)
3029 3026
3030 3027 if trackedsize:
3031 3028 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3032 3029
3033 3030 if storedsize:
3034 3031 d[b'storedsize'] = sum(
3035 3032 self.opener.stat(path).st_size for path in self.files()
3036 3033 )
3037 3034
3038 3035 return d
General Comments 0
You need to be logged in to leave comments. Login now