##// END OF EJS Templates
rust: clean remains of direct-ffi code...
marmoute -
r44975:fb16ad36 default
parent child Browse files
Show More
@@ -1,431 +1,395
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11
11
12 from .node import nullrev
12 from .node import nullrev
13 from . import (
13 from . import (
14 dagop,
14 dagop,
15 policy,
15 policy,
16 pycompat,
16 pycompat,
17 )
17 )
18
18
19 parsers = policy.importmod('parsers')
19 parsers = policy.importmod('parsers')
20
20
21
21
22 def commonancestorsheads(pfunc, *nodes):
22 def commonancestorsheads(pfunc, *nodes):
23 """Returns a set with the heads of all common ancestors of all nodes,
23 """Returns a set with the heads of all common ancestors of all nodes,
24 heads(::nodes[0] and ::nodes[1] and ...) .
24 heads(::nodes[0] and ::nodes[1] and ...) .
25
25
26 pfunc must return a list of parent vertices for a given vertex.
26 pfunc must return a list of parent vertices for a given vertex.
27 """
27 """
28 if not isinstance(nodes, set):
28 if not isinstance(nodes, set):
29 nodes = set(nodes)
29 nodes = set(nodes)
30 if nullrev in nodes:
30 if nullrev in nodes:
31 return set()
31 return set()
32 if len(nodes) <= 1:
32 if len(nodes) <= 1:
33 return nodes
33 return nodes
34
34
35 allseen = (1 << len(nodes)) - 1
35 allseen = (1 << len(nodes)) - 1
36 seen = [0] * (max(nodes) + 1)
36 seen = [0] * (max(nodes) + 1)
37 for i, n in enumerate(nodes):
37 for i, n in enumerate(nodes):
38 seen[n] = 1 << i
38 seen[n] = 1 << i
39 poison = 1 << (i + 1)
39 poison = 1 << (i + 1)
40
40
41 gca = set()
41 gca = set()
42 interesting = len(nodes)
42 interesting = len(nodes)
43 nv = len(seen) - 1
43 nv = len(seen) - 1
44 while nv >= 0 and interesting:
44 while nv >= 0 and interesting:
45 v = nv
45 v = nv
46 nv -= 1
46 nv -= 1
47 if not seen[v]:
47 if not seen[v]:
48 continue
48 continue
49 sv = seen[v]
49 sv = seen[v]
50 if sv < poison:
50 if sv < poison:
51 interesting -= 1
51 interesting -= 1
52 if sv == allseen:
52 if sv == allseen:
53 gca.add(v)
53 gca.add(v)
54 sv |= poison
54 sv |= poison
55 if v in nodes:
55 if v in nodes:
56 # history is linear
56 # history is linear
57 return {v}
57 return {v}
58 if sv < poison:
58 if sv < poison:
59 for p in pfunc(v):
59 for p in pfunc(v):
60 sp = seen[p]
60 sp = seen[p]
61 if p == nullrev:
61 if p == nullrev:
62 continue
62 continue
63 if sp == 0:
63 if sp == 0:
64 seen[p] = sv
64 seen[p] = sv
65 interesting += 1
65 interesting += 1
66 elif sp != sv:
66 elif sp != sv:
67 seen[p] |= sv
67 seen[p] |= sv
68 else:
68 else:
69 for p in pfunc(v):
69 for p in pfunc(v):
70 if p == nullrev:
70 if p == nullrev:
71 continue
71 continue
72 sp = seen[p]
72 sp = seen[p]
73 if sp and sp < poison:
73 if sp and sp < poison:
74 interesting -= 1
74 interesting -= 1
75 seen[p] = sv
75 seen[p] = sv
76 return gca
76 return gca
77
77
78
78
79 def ancestors(pfunc, *orignodes):
79 def ancestors(pfunc, *orignodes):
80 """
80 """
81 Returns the common ancestors of a and b that are furthest from a
81 Returns the common ancestors of a and b that are furthest from a
82 root (as measured by longest path).
82 root (as measured by longest path).
83
83
84 pfunc must return a list of parent vertices for a given vertex.
84 pfunc must return a list of parent vertices for a given vertex.
85 """
85 """
86
86
87 def deepest(nodes):
87 def deepest(nodes):
88 interesting = {}
88 interesting = {}
89 count = max(nodes) + 1
89 count = max(nodes) + 1
90 depth = [0] * count
90 depth = [0] * count
91 seen = [0] * count
91 seen = [0] * count
92 mapping = []
92 mapping = []
93 for (i, n) in enumerate(sorted(nodes)):
93 for (i, n) in enumerate(sorted(nodes)):
94 depth[n] = 1
94 depth[n] = 1
95 b = 1 << i
95 b = 1 << i
96 seen[n] = b
96 seen[n] = b
97 interesting[b] = 1
97 interesting[b] = 1
98 mapping.append((b, n))
98 mapping.append((b, n))
99 nv = count - 1
99 nv = count - 1
100 while nv >= 0 and len(interesting) > 1:
100 while nv >= 0 and len(interesting) > 1:
101 v = nv
101 v = nv
102 nv -= 1
102 nv -= 1
103 dv = depth[v]
103 dv = depth[v]
104 if dv == 0:
104 if dv == 0:
105 continue
105 continue
106 sv = seen[v]
106 sv = seen[v]
107 for p in pfunc(v):
107 for p in pfunc(v):
108 if p == nullrev:
108 if p == nullrev:
109 continue
109 continue
110 dp = depth[p]
110 dp = depth[p]
111 sp = seen[p]
111 sp = seen[p]
112 if dp <= dv:
112 if dp <= dv:
113 depth[p] = dv + 1
113 depth[p] = dv + 1
114 if sp != sv:
114 if sp != sv:
115 interesting[sv] += 1
115 interesting[sv] += 1
116 seen[p] = sv
116 seen[p] = sv
117 if sp:
117 if sp:
118 interesting[sp] -= 1
118 interesting[sp] -= 1
119 if interesting[sp] == 0:
119 if interesting[sp] == 0:
120 del interesting[sp]
120 del interesting[sp]
121 elif dv == dp - 1:
121 elif dv == dp - 1:
122 nsp = sp | sv
122 nsp = sp | sv
123 if nsp == sp:
123 if nsp == sp:
124 continue
124 continue
125 seen[p] = nsp
125 seen[p] = nsp
126 interesting.setdefault(nsp, 0)
126 interesting.setdefault(nsp, 0)
127 interesting[nsp] += 1
127 interesting[nsp] += 1
128 interesting[sp] -= 1
128 interesting[sp] -= 1
129 if interesting[sp] == 0:
129 if interesting[sp] == 0:
130 del interesting[sp]
130 del interesting[sp]
131 interesting[sv] -= 1
131 interesting[sv] -= 1
132 if interesting[sv] == 0:
132 if interesting[sv] == 0:
133 del interesting[sv]
133 del interesting[sv]
134
134
135 if len(interesting) != 1:
135 if len(interesting) != 1:
136 return []
136 return []
137
137
138 k = 0
138 k = 0
139 for i in interesting:
139 for i in interesting:
140 k |= i
140 k |= i
141 return {n for (i, n) in mapping if k & i}
141 return {n for (i, n) in mapping if k & i}
142
142
143 gca = commonancestorsheads(pfunc, *orignodes)
143 gca = commonancestorsheads(pfunc, *orignodes)
144
144
145 if len(gca) <= 1:
145 if len(gca) <= 1:
146 return gca
146 return gca
147 return deepest(gca)
147 return deepest(gca)
148
148
149
149
150 class incrementalmissingancestors(object):
150 class incrementalmissingancestors(object):
151 '''persistent state used to calculate missing ancestors incrementally
151 '''persistent state used to calculate missing ancestors incrementally
152
152
153 Although similar in spirit to lazyancestors below, this is a separate class
153 Although similar in spirit to lazyancestors below, this is a separate class
154 because trying to support contains and missingancestors operations with the
154 because trying to support contains and missingancestors operations with the
155 same internal data structures adds needless complexity.'''
155 same internal data structures adds needless complexity.'''
156
156
157 def __init__(self, pfunc, bases):
157 def __init__(self, pfunc, bases):
158 self.bases = set(bases)
158 self.bases = set(bases)
159 if not self.bases:
159 if not self.bases:
160 self.bases.add(nullrev)
160 self.bases.add(nullrev)
161 self.pfunc = pfunc
161 self.pfunc = pfunc
162
162
163 def hasbases(self):
163 def hasbases(self):
164 '''whether the common set has any non-trivial bases'''
164 '''whether the common set has any non-trivial bases'''
165 return self.bases and self.bases != {nullrev}
165 return self.bases and self.bases != {nullrev}
166
166
167 def addbases(self, newbases):
167 def addbases(self, newbases):
168 '''grow the ancestor set by adding new bases'''
168 '''grow the ancestor set by adding new bases'''
169 self.bases.update(newbases)
169 self.bases.update(newbases)
170
170
171 def basesheads(self):
171 def basesheads(self):
172 return dagop.headrevs(self.bases, self.pfunc)
172 return dagop.headrevs(self.bases, self.pfunc)
173
173
174 def removeancestorsfrom(self, revs):
174 def removeancestorsfrom(self, revs):
175 '''remove all ancestors of bases from the set revs (in place)'''
175 '''remove all ancestors of bases from the set revs (in place)'''
176 bases = self.bases
176 bases = self.bases
177 pfunc = self.pfunc
177 pfunc = self.pfunc
178 revs.difference_update(bases)
178 revs.difference_update(bases)
179 # nullrev is always an ancestor
179 # nullrev is always an ancestor
180 revs.discard(nullrev)
180 revs.discard(nullrev)
181 if not revs:
181 if not revs:
182 return
182 return
183 # anything in revs > start is definitely not an ancestor of bases
183 # anything in revs > start is definitely not an ancestor of bases
184 # revs <= start needs to be investigated
184 # revs <= start needs to be investigated
185 start = max(bases)
185 start = max(bases)
186 keepcount = sum(1 for r in revs if r > start)
186 keepcount = sum(1 for r in revs if r > start)
187 if len(revs) == keepcount:
187 if len(revs) == keepcount:
188 # no revs to consider
188 # no revs to consider
189 return
189 return
190
190
191 for curr in pycompat.xrange(start, min(revs) - 1, -1):
191 for curr in pycompat.xrange(start, min(revs) - 1, -1):
192 if curr not in bases:
192 if curr not in bases:
193 continue
193 continue
194 revs.discard(curr)
194 revs.discard(curr)
195 bases.update(pfunc(curr))
195 bases.update(pfunc(curr))
196 if len(revs) == keepcount:
196 if len(revs) == keepcount:
197 # no more potential revs to discard
197 # no more potential revs to discard
198 break
198 break
199
199
200 def missingancestors(self, revs):
200 def missingancestors(self, revs):
201 '''return all the ancestors of revs that are not ancestors of self.bases
201 '''return all the ancestors of revs that are not ancestors of self.bases
202
202
203 This may include elements from revs.
203 This may include elements from revs.
204
204
205 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
205 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
206 revision number order, which is a topological order.'''
206 revision number order, which is a topological order.'''
207 revsvisit = set(revs)
207 revsvisit = set(revs)
208 basesvisit = self.bases
208 basesvisit = self.bases
209 pfunc = self.pfunc
209 pfunc = self.pfunc
210 bothvisit = revsvisit.intersection(basesvisit)
210 bothvisit = revsvisit.intersection(basesvisit)
211 revsvisit.difference_update(bothvisit)
211 revsvisit.difference_update(bothvisit)
212 if not revsvisit:
212 if not revsvisit:
213 return []
213 return []
214
214
215 start = max(max(revsvisit), max(basesvisit))
215 start = max(max(revsvisit), max(basesvisit))
216 # At this point, we hold the invariants that:
216 # At this point, we hold the invariants that:
217 # - revsvisit is the set of nodes we know are an ancestor of at least
217 # - revsvisit is the set of nodes we know are an ancestor of at least
218 # one of the nodes in revs
218 # one of the nodes in revs
219 # - basesvisit is the same for bases
219 # - basesvisit is the same for bases
220 # - bothvisit is the set of nodes we know are ancestors of at least one
220 # - bothvisit is the set of nodes we know are ancestors of at least one
221 # of the nodes in revs and one of the nodes in bases. bothvisit and
221 # of the nodes in revs and one of the nodes in bases. bothvisit and
222 # revsvisit are mutually exclusive, but bothvisit is a subset of
222 # revsvisit are mutually exclusive, but bothvisit is a subset of
223 # basesvisit.
223 # basesvisit.
224 # Now we walk down in reverse topo order, adding parents of nodes
224 # Now we walk down in reverse topo order, adding parents of nodes
225 # already visited to the sets while maintaining the invariants. When a
225 # already visited to the sets while maintaining the invariants. When a
226 # node is found in both revsvisit and basesvisit, it is removed from
226 # node is found in both revsvisit and basesvisit, it is removed from
227 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
227 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
228 # are no more ancestors of revs that aren't also ancestors of bases, so
228 # are no more ancestors of revs that aren't also ancestors of bases, so
229 # exit.
229 # exit.
230
230
231 missing = []
231 missing = []
232 for curr in pycompat.xrange(start, nullrev, -1):
232 for curr in pycompat.xrange(start, nullrev, -1):
233 if not revsvisit:
233 if not revsvisit:
234 break
234 break
235
235
236 if curr in bothvisit:
236 if curr in bothvisit:
237 bothvisit.remove(curr)
237 bothvisit.remove(curr)
238 # curr's parents might have made it into revsvisit through
238 # curr's parents might have made it into revsvisit through
239 # another path
239 # another path
240 for p in pfunc(curr):
240 for p in pfunc(curr):
241 revsvisit.discard(p)
241 revsvisit.discard(p)
242 basesvisit.add(p)
242 basesvisit.add(p)
243 bothvisit.add(p)
243 bothvisit.add(p)
244 continue
244 continue
245
245
246 if curr in revsvisit:
246 if curr in revsvisit:
247 missing.append(curr)
247 missing.append(curr)
248 revsvisit.remove(curr)
248 revsvisit.remove(curr)
249 thisvisit = revsvisit
249 thisvisit = revsvisit
250 othervisit = basesvisit
250 othervisit = basesvisit
251 elif curr in basesvisit:
251 elif curr in basesvisit:
252 thisvisit = basesvisit
252 thisvisit = basesvisit
253 othervisit = revsvisit
253 othervisit = revsvisit
254 else:
254 else:
255 # not an ancestor of revs or bases: ignore
255 # not an ancestor of revs or bases: ignore
256 continue
256 continue
257
257
258 for p in pfunc(curr):
258 for p in pfunc(curr):
259 if p == nullrev:
259 if p == nullrev:
260 pass
260 pass
261 elif p in othervisit or p in bothvisit:
261 elif p in othervisit or p in bothvisit:
262 # p is implicitly in thisvisit. This means p is or should be
262 # p is implicitly in thisvisit. This means p is or should be
263 # in bothvisit
263 # in bothvisit
264 revsvisit.discard(p)
264 revsvisit.discard(p)
265 basesvisit.add(p)
265 basesvisit.add(p)
266 bothvisit.add(p)
266 bothvisit.add(p)
267 else:
267 else:
268 # visit later
268 # visit later
269 thisvisit.add(p)
269 thisvisit.add(p)
270
270
271 missing.reverse()
271 missing.reverse()
272 return missing
272 return missing
273
273
274
274
275 # Extracted from lazyancestors.__iter__ to avoid a reference cycle
275 # Extracted from lazyancestors.__iter__ to avoid a reference cycle
276 def _lazyancestorsiter(parentrevs, initrevs, stoprev, inclusive):
276 def _lazyancestorsiter(parentrevs, initrevs, stoprev, inclusive):
277 seen = {nullrev}
277 seen = {nullrev}
278 heappush = heapq.heappush
278 heappush = heapq.heappush
279 heappop = heapq.heappop
279 heappop = heapq.heappop
280 heapreplace = heapq.heapreplace
280 heapreplace = heapq.heapreplace
281 see = seen.add
281 see = seen.add
282
282
283 if inclusive:
283 if inclusive:
284 visit = [-r for r in initrevs]
284 visit = [-r for r in initrevs]
285 seen.update(initrevs)
285 seen.update(initrevs)
286 heapq.heapify(visit)
286 heapq.heapify(visit)
287 else:
287 else:
288 visit = []
288 visit = []
289 heapq.heapify(visit)
289 heapq.heapify(visit)
290 for r in initrevs:
290 for r in initrevs:
291 p1, p2 = parentrevs(r)
291 p1, p2 = parentrevs(r)
292 if p1 not in seen:
292 if p1 not in seen:
293 heappush(visit, -p1)
293 heappush(visit, -p1)
294 see(p1)
294 see(p1)
295 if p2 not in seen:
295 if p2 not in seen:
296 heappush(visit, -p2)
296 heappush(visit, -p2)
297 see(p2)
297 see(p2)
298
298
299 while visit:
299 while visit:
300 current = -visit[0]
300 current = -visit[0]
301 if current < stoprev:
301 if current < stoprev:
302 break
302 break
303 yield current
303 yield current
304 # optimize out heapq operation if p1 is known to be the next highest
304 # optimize out heapq operation if p1 is known to be the next highest
305 # revision, which is quite common in linear history.
305 # revision, which is quite common in linear history.
306 p1, p2 = parentrevs(current)
306 p1, p2 = parentrevs(current)
307 if p1 not in seen:
307 if p1 not in seen:
308 if current - p1 == 1:
308 if current - p1 == 1:
309 visit[0] = -p1
309 visit[0] = -p1
310 else:
310 else:
311 heapreplace(visit, -p1)
311 heapreplace(visit, -p1)
312 see(p1)
312 see(p1)
313 else:
313 else:
314 heappop(visit)
314 heappop(visit)
315 if p2 not in seen:
315 if p2 not in seen:
316 heappush(visit, -p2)
316 heappush(visit, -p2)
317 see(p2)
317 see(p2)
318
318
319
319
320 class lazyancestors(object):
320 class lazyancestors(object):
321 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
321 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
322 """Create a new object generating ancestors for the given revs. Does
322 """Create a new object generating ancestors for the given revs. Does
323 not generate revs lower than stoprev.
323 not generate revs lower than stoprev.
324
324
325 This is computed lazily starting from revs. The object supports
325 This is computed lazily starting from revs. The object supports
326 iteration and membership.
326 iteration and membership.
327
327
328 cl should be a changelog and revs should be an iterable. inclusive is
328 cl should be a changelog and revs should be an iterable. inclusive is
329 a boolean that indicates whether revs should be included. Revs lower
329 a boolean that indicates whether revs should be included. Revs lower
330 than stoprev will not be generated.
330 than stoprev will not be generated.
331
331
332 Result does not include the null revision."""
332 Result does not include the null revision."""
333 self._parentrevs = pfunc
333 self._parentrevs = pfunc
334 self._initrevs = [r for r in revs if r >= stoprev]
334 self._initrevs = [r for r in revs if r >= stoprev]
335 self._stoprev = stoprev
335 self._stoprev = stoprev
336 self._inclusive = inclusive
336 self._inclusive = inclusive
337
337
338 self._containsseen = set()
338 self._containsseen = set()
339 self._containsiter = _lazyancestorsiter(
339 self._containsiter = _lazyancestorsiter(
340 self._parentrevs, self._initrevs, self._stoprev, self._inclusive
340 self._parentrevs, self._initrevs, self._stoprev, self._inclusive
341 )
341 )
342
342
343 def __nonzero__(self):
343 def __nonzero__(self):
344 """False if the set is empty, True otherwise."""
344 """False if the set is empty, True otherwise."""
345 try:
345 try:
346 next(iter(self))
346 next(iter(self))
347 return True
347 return True
348 except StopIteration:
348 except StopIteration:
349 return False
349 return False
350
350
351 __bool__ = __nonzero__
351 __bool__ = __nonzero__
352
352
353 def __iter__(self):
353 def __iter__(self):
354 """Generate the ancestors of _initrevs in reverse topological order.
354 """Generate the ancestors of _initrevs in reverse topological order.
355
355
356 If inclusive is False, yield a sequence of revision numbers starting
356 If inclusive is False, yield a sequence of revision numbers starting
357 with the parents of each revision in revs, i.e., each revision is
357 with the parents of each revision in revs, i.e., each revision is
358 *not* considered an ancestor of itself. Results are emitted in reverse
358 *not* considered an ancestor of itself. Results are emitted in reverse
359 revision number order. That order is also topological: a child is
359 revision number order. That order is also topological: a child is
360 always emitted before its parent.
360 always emitted before its parent.
361
361
362 If inclusive is True, the source revisions are also yielded. The
362 If inclusive is True, the source revisions are also yielded. The
363 reverse revision number order is still enforced."""
363 reverse revision number order is still enforced."""
364 return _lazyancestorsiter(
364 return _lazyancestorsiter(
365 self._parentrevs, self._initrevs, self._stoprev, self._inclusive
365 self._parentrevs, self._initrevs, self._stoprev, self._inclusive
366 )
366 )
367
367
368 def __contains__(self, target):
368 def __contains__(self, target):
369 """Test whether target is an ancestor of self._initrevs."""
369 """Test whether target is an ancestor of self._initrevs."""
370 seen = self._containsseen
370 seen = self._containsseen
371 if target in seen:
371 if target in seen:
372 return True
372 return True
373 iter = self._containsiter
373 iter = self._containsiter
374 if iter is None:
374 if iter is None:
375 # Iterator exhausted
375 # Iterator exhausted
376 return False
376 return False
377 # Only integer target is valid, but some callers expect 'None in self'
377 # Only integer target is valid, but some callers expect 'None in self'
378 # to be False. So we explicitly allow it.
378 # to be False. So we explicitly allow it.
379 if target is None:
379 if target is None:
380 return False
380 return False
381
381
382 see = seen.add
382 see = seen.add
383 try:
383 try:
384 while True:
384 while True:
385 rev = next(iter)
385 rev = next(iter)
386 see(rev)
386 see(rev)
387 if rev == target:
387 if rev == target:
388 return True
388 return True
389 if rev < target:
389 if rev < target:
390 return False
390 return False
391 except StopIteration:
391 except StopIteration:
392 # Set to None to indicate fast-path can be used next time, and to
392 # Set to None to indicate fast-path can be used next time, and to
393 # free up memory.
393 # free up memory.
394 self._containsiter = None
394 self._containsiter = None
395 return False
395 return False
396
397
398 class rustlazyancestors(object):
399 def __init__(self, index, revs, stoprev=0, inclusive=False):
400 self._index = index
401 self._stoprev = stoprev
402 self._inclusive = inclusive
403 # no need to prefilter out init revs that are smaller than stoprev,
404 # it's done by rustlazyancestors constructor.
405 # we need to convert to a list, because our ruslazyancestors
406 # constructor (from C code) doesn't understand anything else yet
407 self._initrevs = initrevs = list(revs)
408
409 self._containsiter = parsers.rustlazyancestors(
410 index, initrevs, stoprev, inclusive
411 )
412
413 def __nonzero__(self):
414 """False if the set is empty, True otherwise.
415
416 It's better to duplicate this essentially trivial method than
417 to subclass lazyancestors
418 """
419 try:
420 next(iter(self))
421 return True
422 except StopIteration:
423 return False
424
425 def __iter__(self):
426 return parsers.rustlazyancestors(
427 self._index, self._initrevs, self._stoprev, self._inclusive
428 )
429
430 def __contains__(self, target):
431 return target in self._containsiter
@@ -1,3038 +1,3035
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 nodemap as nodemaputil,
78 nodemap as nodemaputil,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88 REVLOGV0
88 REVLOGV0
89 REVLOGV1
89 REVLOGV1
90 REVLOGV2
90 REVLOGV2
91 FLAG_INLINE_DATA
91 FLAG_INLINE_DATA
92 FLAG_GENERALDELTA
92 FLAG_GENERALDELTA
93 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_VERSION
95 REVLOG_DEFAULT_VERSION
96 REVLOGV1_FLAGS
96 REVLOGV1_FLAGS
97 REVLOGV2_FLAGS
97 REVLOGV2_FLAGS
98 REVIDX_ISCENSORED
98 REVIDX_ISCENSORED
99 REVIDX_ELLIPSIS
99 REVIDX_ELLIPSIS
100 REVIDX_SIDEDATA
100 REVIDX_SIDEDATA
101 REVIDX_EXTSTORED
101 REVIDX_EXTSTORED
102 REVIDX_DEFAULT_FLAGS
102 REVIDX_DEFAULT_FLAGS
103 REVIDX_FLAGS_ORDER
103 REVIDX_FLAGS_ORDER
104 REVIDX_RAWTEXT_CHANGING_FLAGS
104 REVIDX_RAWTEXT_CHANGING_FLAGS
105
105
106 parsers = policy.importmod('parsers')
106 parsers = policy.importmod('parsers')
107 rustancestor = policy.importrust('ancestor')
107 rustancestor = policy.importrust('ancestor')
108 rustdagop = policy.importrust('dagop')
108 rustdagop = policy.importrust('dagop')
109 rustrevlog = policy.importrust('revlog')
109 rustrevlog = policy.importrust('revlog')
110
110
111 # Aliased for performance.
111 # Aliased for performance.
112 _zlibdecompress = zlib.decompress
112 _zlibdecompress = zlib.decompress
113
113
114 # max size of revlog with inline data
114 # max size of revlog with inline data
115 _maxinline = 131072
115 _maxinline = 131072
116 _chunksize = 1048576
116 _chunksize = 1048576
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False, {}
120 return text, False, {}
121
121
122
122
123 def ellipsiswriteprocessor(rl, text, sidedata):
123 def ellipsiswriteprocessor(rl, text, sidedata):
124 return text, False
124 return text, False
125
125
126
126
127 def ellipsisrawprocessor(rl, text):
127 def ellipsisrawprocessor(rl, text):
128 return False
128 return False
129
129
130
130
131 ellipsisprocessor = (
131 ellipsisprocessor = (
132 ellipsisreadprocessor,
132 ellipsisreadprocessor,
133 ellipsiswriteprocessor,
133 ellipsiswriteprocessor,
134 ellipsisrawprocessor,
134 ellipsisrawprocessor,
135 )
135 )
136
136
137
137
138 def getoffset(q):
138 def getoffset(q):
139 return int(q >> 16)
139 return int(q >> 16)
140
140
141
141
142 def gettype(q):
142 def gettype(q):
143 return int(q & 0xFFFF)
143 return int(q & 0xFFFF)
144
144
145
145
146 def offset_type(offset, type):
146 def offset_type(offset, type):
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 raise ValueError(b'unknown revlog index flags')
148 raise ValueError(b'unknown revlog index flags')
149 return int(int(offset) << 16 | type)
149 return int(int(offset) << 16 | type)
150
150
151
151
152 def _verify_revision(rl, skipflags, state, node):
152 def _verify_revision(rl, skipflags, state, node):
153 """Verify the integrity of the given revlog ``node`` while providing a hook
153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 point for extensions to influence the operation."""
154 point for extensions to influence the operation."""
155 if skipflags:
155 if skipflags:
156 state[b'skipread'].add(node)
156 state[b'skipread'].add(node)
157 else:
157 else:
158 # Side-effect: read content and verify hash.
158 # Side-effect: read content and verify hash.
159 rl.revision(node)
159 rl.revision(node)
160
160
161
161
162 @attr.s(slots=True, frozen=True)
162 @attr.s(slots=True, frozen=True)
163 class _revisioninfo(object):
163 class _revisioninfo(object):
164 """Information about a revision that allows building its fulltext
164 """Information about a revision that allows building its fulltext
165 node: expected hash of the revision
165 node: expected hash of the revision
166 p1, p2: parent revs of the revision
166 p1, p2: parent revs of the revision
167 btext: built text cache consisting of a one-element list
167 btext: built text cache consisting of a one-element list
168 cachedelta: (baserev, uncompressed_delta) or None
168 cachedelta: (baserev, uncompressed_delta) or None
169 flags: flags associated to the revision storage
169 flags: flags associated to the revision storage
170
170
171 One of btext[0] or cachedelta must be set.
171 One of btext[0] or cachedelta must be set.
172 """
172 """
173
173
174 node = attr.ib()
174 node = attr.ib()
175 p1 = attr.ib()
175 p1 = attr.ib()
176 p2 = attr.ib()
176 p2 = attr.ib()
177 btext = attr.ib()
177 btext = attr.ib()
178 textlen = attr.ib()
178 textlen = attr.ib()
179 cachedelta = attr.ib()
179 cachedelta = attr.ib()
180 flags = attr.ib()
180 flags = attr.ib()
181
181
182
182
183 @interfaceutil.implementer(repository.irevisiondelta)
183 @interfaceutil.implementer(repository.irevisiondelta)
184 @attr.s(slots=True)
184 @attr.s(slots=True)
185 class revlogrevisiondelta(object):
185 class revlogrevisiondelta(object):
186 node = attr.ib()
186 node = attr.ib()
187 p1node = attr.ib()
187 p1node = attr.ib()
188 p2node = attr.ib()
188 p2node = attr.ib()
189 basenode = attr.ib()
189 basenode = attr.ib()
190 flags = attr.ib()
190 flags = attr.ib()
191 baserevisionsize = attr.ib()
191 baserevisionsize = attr.ib()
192 revision = attr.ib()
192 revision = attr.ib()
193 delta = attr.ib()
193 delta = attr.ib()
194 linknode = attr.ib(default=None)
194 linknode = attr.ib(default=None)
195
195
196
196
197 @interfaceutil.implementer(repository.iverifyproblem)
197 @interfaceutil.implementer(repository.iverifyproblem)
198 @attr.s(frozen=True)
198 @attr.s(frozen=True)
199 class revlogproblem(object):
199 class revlogproblem(object):
200 warning = attr.ib(default=None)
200 warning = attr.ib(default=None)
201 error = attr.ib(default=None)
201 error = attr.ib(default=None)
202 node = attr.ib(default=None)
202 node = attr.ib(default=None)
203
203
204
204
205 # index v0:
205 # index v0:
206 # 4 bytes: offset
206 # 4 bytes: offset
207 # 4 bytes: compressed length
207 # 4 bytes: compressed length
208 # 4 bytes: base rev
208 # 4 bytes: base rev
209 # 4 bytes: link rev
209 # 4 bytes: link rev
210 # 20 bytes: parent 1 nodeid
210 # 20 bytes: parent 1 nodeid
211 # 20 bytes: parent 2 nodeid
211 # 20 bytes: parent 2 nodeid
212 # 20 bytes: nodeid
212 # 20 bytes: nodeid
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 indexformatv0_pack = indexformatv0.pack
214 indexformatv0_pack = indexformatv0.pack
215 indexformatv0_unpack = indexformatv0.unpack
215 indexformatv0_unpack = indexformatv0.unpack
216
216
217
217
218 class revlogoldindex(list):
218 class revlogoldindex(list):
219 @property
219 @property
220 def nodemap(self):
220 def nodemap(self):
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 return self._nodemap
223 return self._nodemap
224
224
225 @util.propertycache
225 @util.propertycache
226 def _nodemap(self):
226 def _nodemap(self):
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 for r in range(0, len(self)):
228 for r in range(0, len(self)):
229 n = self[r][7]
229 n = self[r][7]
230 nodemap[n] = r
230 nodemap[n] = r
231 return nodemap
231 return nodemap
232
232
233 def has_node(self, node):
233 def has_node(self, node):
234 """return True if the node exist in the index"""
234 """return True if the node exist in the index"""
235 return node in self._nodemap
235 return node in self._nodemap
236
236
237 def rev(self, node):
237 def rev(self, node):
238 """return a revision for a node
238 """return a revision for a node
239
239
240 If the node is unknown, raise a RevlogError"""
240 If the node is unknown, raise a RevlogError"""
241 return self._nodemap[node]
241 return self._nodemap[node]
242
242
243 def get_rev(self, node):
243 def get_rev(self, node):
244 """return a revision for a node
244 """return a revision for a node
245
245
246 If the node is unknown, return None"""
246 If the node is unknown, return None"""
247 return self._nodemap.get(node)
247 return self._nodemap.get(node)
248
248
249 def append(self, tup):
249 def append(self, tup):
250 self._nodemap[tup[7]] = len(self)
250 self._nodemap[tup[7]] = len(self)
251 super(revlogoldindex, self).append(tup)
251 super(revlogoldindex, self).append(tup)
252
252
253 def __delitem__(self, i):
253 def __delitem__(self, i):
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 for r in pycompat.xrange(i.start, len(self)):
256 for r in pycompat.xrange(i.start, len(self)):
257 del self._nodemap[self[r][7]]
257 del self._nodemap[self[r][7]]
258 super(revlogoldindex, self).__delitem__(i)
258 super(revlogoldindex, self).__delitem__(i)
259
259
260 def clearcaches(self):
260 def clearcaches(self):
261 self.__dict__.pop('_nodemap', None)
261 self.__dict__.pop('_nodemap', None)
262
262
263 def __getitem__(self, i):
263 def __getitem__(self, i):
264 if i == -1:
264 if i == -1:
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 return list.__getitem__(self, i)
266 return list.__getitem__(self, i)
267
267
268
268
269 class revlogoldio(object):
269 class revlogoldio(object):
270 def __init__(self):
270 def __init__(self):
271 self.size = indexformatv0.size
271 self.size = indexformatv0.size
272
272
273 def parseindex(self, data, inline):
273 def parseindex(self, data, inline):
274 s = self.size
274 s = self.size
275 index = []
275 index = []
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 n = off = 0
277 n = off = 0
278 l = len(data)
278 l = len(data)
279 while off + s <= l:
279 while off + s <= l:
280 cur = data[off : off + s]
280 cur = data[off : off + s]
281 off += s
281 off += s
282 e = indexformatv0_unpack(cur)
282 e = indexformatv0_unpack(cur)
283 # transform to revlogv1 format
283 # transform to revlogv1 format
284 e2 = (
284 e2 = (
285 offset_type(e[0], 0),
285 offset_type(e[0], 0),
286 e[1],
286 e[1],
287 -1,
287 -1,
288 e[2],
288 e[2],
289 e[3],
289 e[3],
290 nodemap.get(e[4], nullrev),
290 nodemap.get(e[4], nullrev),
291 nodemap.get(e[5], nullrev),
291 nodemap.get(e[5], nullrev),
292 e[6],
292 e[6],
293 )
293 )
294 index.append(e2)
294 index.append(e2)
295 nodemap[e[6]] = n
295 nodemap[e[6]] = n
296 n += 1
296 n += 1
297
297
298 index = revlogoldindex(index)
298 index = revlogoldindex(index)
299 return index, None
299 return index, None
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 if gettype(entry[0]):
302 if gettype(entry[0]):
303 raise error.RevlogError(
303 raise error.RevlogError(
304 _(b'index entry flags need revlog version 1')
304 _(b'index entry flags need revlog version 1')
305 )
305 )
306 e2 = (
306 e2 = (
307 getoffset(entry[0]),
307 getoffset(entry[0]),
308 entry[1],
308 entry[1],
309 entry[3],
309 entry[3],
310 entry[4],
310 entry[4],
311 node(entry[5]),
311 node(entry[5]),
312 node(entry[6]),
312 node(entry[6]),
313 entry[7],
313 entry[7],
314 )
314 )
315 return indexformatv0_pack(*e2)
315 return indexformatv0_pack(*e2)
316
316
317
317
318 # index ng:
318 # index ng:
319 # 6 bytes: offset
319 # 6 bytes: offset
320 # 2 bytes: flags
320 # 2 bytes: flags
321 # 4 bytes: compressed length
321 # 4 bytes: compressed length
322 # 4 bytes: uncompressed length
322 # 4 bytes: uncompressed length
323 # 4 bytes: base rev
323 # 4 bytes: base rev
324 # 4 bytes: link rev
324 # 4 bytes: link rev
325 # 4 bytes: parent 1 rev
325 # 4 bytes: parent 1 rev
326 # 4 bytes: parent 2 rev
326 # 4 bytes: parent 2 rev
327 # 32 bytes: nodeid
327 # 32 bytes: nodeid
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 indexformatng_pack = indexformatng.pack
329 indexformatng_pack = indexformatng.pack
330 versionformat = struct.Struct(b">I")
330 versionformat = struct.Struct(b">I")
331 versionformat_pack = versionformat.pack
331 versionformat_pack = versionformat.pack
332 versionformat_unpack = versionformat.unpack
332 versionformat_unpack = versionformat.unpack
333
333
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 # signed integer)
335 # signed integer)
336 _maxentrysize = 0x7FFFFFFF
336 _maxentrysize = 0x7FFFFFFF
337
337
338
338
339 class revlogio(object):
339 class revlogio(object):
340 def __init__(self):
340 def __init__(self):
341 self.size = indexformatng.size
341 self.size = indexformatng.size
342
342
343 def parseindex(self, data, inline):
343 def parseindex(self, data, inline):
344 # call the C implementation to parse the index data
344 # call the C implementation to parse the index data
345 index, cache = parsers.parse_index2(data, inline)
345 index, cache = parsers.parse_index2(data, inline)
346 return index, cache
346 return index, cache
347
347
348 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
349 p = indexformatng_pack(*entry)
349 p = indexformatng_pack(*entry)
350 if rev == 0:
350 if rev == 0:
351 p = versionformat_pack(version) + p[4:]
351 p = versionformat_pack(version) + p[4:]
352 return p
352 return p
353
353
354
354
355 NodemapRevlogIO = None
355 NodemapRevlogIO = None
356
356
357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
357 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358
358
359 class NodemapRevlogIO(revlogio):
359 class NodemapRevlogIO(revlogio):
360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
360 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361
361
362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
362 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 """
363 """
364
364
365 def parseindex(self, data, inline):
365 def parseindex(self, data, inline):
366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
366 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 return index, cache
367 return index, cache
368
368
369
369
370 class rustrevlogio(revlogio):
370 class rustrevlogio(revlogio):
371 def parseindex(self, data, inline):
371 def parseindex(self, data, inline):
372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
372 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 return rustrevlog.MixedIndex(index), cache
373 return rustrevlog.MixedIndex(index), cache
374
374
375
375
376 class revlog(object):
376 class revlog(object):
377 """
377 """
378 the underlying revision storage object
378 the underlying revision storage object
379
379
380 A revlog consists of two parts, an index and the revision data.
380 A revlog consists of two parts, an index and the revision data.
381
381
382 The index is a file with a fixed record size containing
382 The index is a file with a fixed record size containing
383 information on each revision, including its nodeid (hash), the
383 information on each revision, including its nodeid (hash), the
384 nodeids of its parents, the position and offset of its data within
384 nodeids of its parents, the position and offset of its data within
385 the data file, and the revision it's based on. Finally, each entry
385 the data file, and the revision it's based on. Finally, each entry
386 contains a linkrev entry that can serve as a pointer to external
386 contains a linkrev entry that can serve as a pointer to external
387 data.
387 data.
388
388
389 The revision data itself is a linear collection of data chunks.
389 The revision data itself is a linear collection of data chunks.
390 Each chunk represents a revision and is usually represented as a
390 Each chunk represents a revision and is usually represented as a
391 delta against the previous chunk. To bound lookup time, runs of
391 delta against the previous chunk. To bound lookup time, runs of
392 deltas are limited to about 2 times the length of the original
392 deltas are limited to about 2 times the length of the original
393 version data. This makes retrieval of a version proportional to
393 version data. This makes retrieval of a version proportional to
394 its size, or O(1) relative to the number of revisions.
394 its size, or O(1) relative to the number of revisions.
395
395
396 Both pieces of the revlog are written to in an append-only
396 Both pieces of the revlog are written to in an append-only
397 fashion, which means we never need to rewrite a file to insert or
397 fashion, which means we never need to rewrite a file to insert or
398 remove data, and can use some simple techniques to avoid the need
398 remove data, and can use some simple techniques to avoid the need
399 for locking while reading.
399 for locking while reading.
400
400
401 If checkambig, indexfile is opened with checkambig=True at
401 If checkambig, indexfile is opened with checkambig=True at
402 writing, to avoid file stat ambiguity.
402 writing, to avoid file stat ambiguity.
403
403
404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
404 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 index will be mmapped rather than read if it is larger than the
405 index will be mmapped rather than read if it is larger than the
406 configured threshold.
406 configured threshold.
407
407
408 If censorable is True, the revlog can have censored revisions.
408 If censorable is True, the revlog can have censored revisions.
409
409
410 If `upperboundcomp` is not None, this is the expected maximal gain from
410 If `upperboundcomp` is not None, this is the expected maximal gain from
411 compression for the data content.
411 compression for the data content.
412 """
412 """
413
413
414 _flagserrorclass = error.RevlogError
414 _flagserrorclass = error.RevlogError
415
415
416 def __init__(
416 def __init__(
417 self,
417 self,
418 opener,
418 opener,
419 indexfile,
419 indexfile,
420 datafile=None,
420 datafile=None,
421 checkambig=False,
421 checkambig=False,
422 mmaplargeindex=False,
422 mmaplargeindex=False,
423 censorable=False,
423 censorable=False,
424 upperboundcomp=None,
424 upperboundcomp=None,
425 persistentnodemap=False,
425 persistentnodemap=False,
426 ):
426 ):
427 """
427 """
428 create a revlog object
428 create a revlog object
429
429
430 opener is a function that abstracts the file opening operation
430 opener is a function that abstracts the file opening operation
431 and can be used to implement COW semantics or the like.
431 and can be used to implement COW semantics or the like.
432
432
433 """
433 """
434 self.upperboundcomp = upperboundcomp
434 self.upperboundcomp = upperboundcomp
435 self.indexfile = indexfile
435 self.indexfile = indexfile
436 self.datafile = datafile or (indexfile[:-2] + b".d")
436 self.datafile = datafile or (indexfile[:-2] + b".d")
437 self.nodemap_file = None
437 self.nodemap_file = None
438 if persistentnodemap:
438 if persistentnodemap:
439 self.nodemap_file = indexfile[:-2] + b".n"
439 self.nodemap_file = indexfile[:-2] + b".n"
440
440
441 self.opener = opener
441 self.opener = opener
442 # When True, indexfile is opened with checkambig=True at writing, to
442 # When True, indexfile is opened with checkambig=True at writing, to
443 # avoid file stat ambiguity.
443 # avoid file stat ambiguity.
444 self._checkambig = checkambig
444 self._checkambig = checkambig
445 self._mmaplargeindex = mmaplargeindex
445 self._mmaplargeindex = mmaplargeindex
446 self._censorable = censorable
446 self._censorable = censorable
447 # 3-tuple of (node, rev, text) for a raw revision.
447 # 3-tuple of (node, rev, text) for a raw revision.
448 self._revisioncache = None
448 self._revisioncache = None
449 # Maps rev to chain base rev.
449 # Maps rev to chain base rev.
450 self._chainbasecache = util.lrucachedict(100)
450 self._chainbasecache = util.lrucachedict(100)
451 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
451 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
452 self._chunkcache = (0, b'')
452 self._chunkcache = (0, b'')
453 # How much data to read and cache into the raw revlog data cache.
453 # How much data to read and cache into the raw revlog data cache.
454 self._chunkcachesize = 65536
454 self._chunkcachesize = 65536
455 self._maxchainlen = None
455 self._maxchainlen = None
456 self._deltabothparents = True
456 self._deltabothparents = True
457 self.index = None
457 self.index = None
458 self._nodemap_docket = None
458 self._nodemap_docket = None
459 # Mapping of partial identifiers to full nodes.
459 # Mapping of partial identifiers to full nodes.
460 self._pcache = {}
460 self._pcache = {}
461 # Mapping of revision integer to full node.
461 # Mapping of revision integer to full node.
462 self._compengine = b'zlib'
462 self._compengine = b'zlib'
463 self._compengineopts = {}
463 self._compengineopts = {}
464 self._maxdeltachainspan = -1
464 self._maxdeltachainspan = -1
465 self._withsparseread = False
465 self._withsparseread = False
466 self._sparserevlog = False
466 self._sparserevlog = False
467 self._srdensitythreshold = 0.50
467 self._srdensitythreshold = 0.50
468 self._srmingapsize = 262144
468 self._srmingapsize = 262144
469
469
470 # Make copy of flag processors so each revlog instance can support
470 # Make copy of flag processors so each revlog instance can support
471 # custom flags.
471 # custom flags.
472 self._flagprocessors = dict(flagutil.flagprocessors)
472 self._flagprocessors = dict(flagutil.flagprocessors)
473
473
474 # 2-tuple of file handles being used for active writing.
474 # 2-tuple of file handles being used for active writing.
475 self._writinghandles = None
475 self._writinghandles = None
476
476
477 self._loadindex()
477 self._loadindex()
478
478
479 def _loadindex(self):
479 def _loadindex(self):
480 mmapindexthreshold = None
480 mmapindexthreshold = None
481 opts = self.opener.options
481 opts = self.opener.options
482
482
483 if b'revlogv2' in opts:
483 if b'revlogv2' in opts:
484 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
484 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
485 elif b'revlogv1' in opts:
485 elif b'revlogv1' in opts:
486 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
486 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
487 if b'generaldelta' in opts:
487 if b'generaldelta' in opts:
488 newversionflags |= FLAG_GENERALDELTA
488 newversionflags |= FLAG_GENERALDELTA
489 elif b'revlogv0' in self.opener.options:
489 elif b'revlogv0' in self.opener.options:
490 newversionflags = REVLOGV0
490 newversionflags = REVLOGV0
491 else:
491 else:
492 newversionflags = REVLOG_DEFAULT_VERSION
492 newversionflags = REVLOG_DEFAULT_VERSION
493
493
494 if b'chunkcachesize' in opts:
494 if b'chunkcachesize' in opts:
495 self._chunkcachesize = opts[b'chunkcachesize']
495 self._chunkcachesize = opts[b'chunkcachesize']
496 if b'maxchainlen' in opts:
496 if b'maxchainlen' in opts:
497 self._maxchainlen = opts[b'maxchainlen']
497 self._maxchainlen = opts[b'maxchainlen']
498 if b'deltabothparents' in opts:
498 if b'deltabothparents' in opts:
499 self._deltabothparents = opts[b'deltabothparents']
499 self._deltabothparents = opts[b'deltabothparents']
500 self._lazydelta = bool(opts.get(b'lazydelta', True))
500 self._lazydelta = bool(opts.get(b'lazydelta', True))
501 self._lazydeltabase = False
501 self._lazydeltabase = False
502 if self._lazydelta:
502 if self._lazydelta:
503 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
503 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
504 if b'compengine' in opts:
504 if b'compengine' in opts:
505 self._compengine = opts[b'compengine']
505 self._compengine = opts[b'compengine']
506 if b'zlib.level' in opts:
506 if b'zlib.level' in opts:
507 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
507 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
508 if b'zstd.level' in opts:
508 if b'zstd.level' in opts:
509 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
509 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
510 if b'maxdeltachainspan' in opts:
510 if b'maxdeltachainspan' in opts:
511 self._maxdeltachainspan = opts[b'maxdeltachainspan']
511 self._maxdeltachainspan = opts[b'maxdeltachainspan']
512 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
512 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
513 mmapindexthreshold = opts[b'mmapindexthreshold']
513 mmapindexthreshold = opts[b'mmapindexthreshold']
514 self.hassidedata = bool(opts.get(b'side-data', False))
514 self.hassidedata = bool(opts.get(b'side-data', False))
515 if self.hassidedata:
515 if self.hassidedata:
516 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
516 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
517 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
517 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
518 withsparseread = bool(opts.get(b'with-sparse-read', False))
518 withsparseread = bool(opts.get(b'with-sparse-read', False))
519 # sparse-revlog forces sparse-read
519 # sparse-revlog forces sparse-read
520 self._withsparseread = self._sparserevlog or withsparseread
520 self._withsparseread = self._sparserevlog or withsparseread
521 if b'sparse-read-density-threshold' in opts:
521 if b'sparse-read-density-threshold' in opts:
522 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
522 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
523 if b'sparse-read-min-gap-size' in opts:
523 if b'sparse-read-min-gap-size' in opts:
524 self._srmingapsize = opts[b'sparse-read-min-gap-size']
524 self._srmingapsize = opts[b'sparse-read-min-gap-size']
525 if opts.get(b'enableellipsis'):
525 if opts.get(b'enableellipsis'):
526 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
526 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
527
527
528 # revlog v0 doesn't have flag processors
528 # revlog v0 doesn't have flag processors
529 for flag, processor in pycompat.iteritems(
529 for flag, processor in pycompat.iteritems(
530 opts.get(b'flagprocessors', {})
530 opts.get(b'flagprocessors', {})
531 ):
531 ):
532 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
532 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
533
533
534 if self._chunkcachesize <= 0:
534 if self._chunkcachesize <= 0:
535 raise error.RevlogError(
535 raise error.RevlogError(
536 _(b'revlog chunk cache size %r is not greater than 0')
536 _(b'revlog chunk cache size %r is not greater than 0')
537 % self._chunkcachesize
537 % self._chunkcachesize
538 )
538 )
539 elif self._chunkcachesize & (self._chunkcachesize - 1):
539 elif self._chunkcachesize & (self._chunkcachesize - 1):
540 raise error.RevlogError(
540 raise error.RevlogError(
541 _(b'revlog chunk cache size %r is not a power of 2')
541 _(b'revlog chunk cache size %r is not a power of 2')
542 % self._chunkcachesize
542 % self._chunkcachesize
543 )
543 )
544
544
545 indexdata = b''
545 indexdata = b''
546 self._initempty = True
546 self._initempty = True
547 try:
547 try:
548 with self._indexfp() as f:
548 with self._indexfp() as f:
549 if (
549 if (
550 mmapindexthreshold is not None
550 mmapindexthreshold is not None
551 and self.opener.fstat(f).st_size >= mmapindexthreshold
551 and self.opener.fstat(f).st_size >= mmapindexthreshold
552 ):
552 ):
553 # TODO: should .close() to release resources without
553 # TODO: should .close() to release resources without
554 # relying on Python GC
554 # relying on Python GC
555 indexdata = util.buffer(util.mmapread(f))
555 indexdata = util.buffer(util.mmapread(f))
556 else:
556 else:
557 indexdata = f.read()
557 indexdata = f.read()
558 if len(indexdata) > 0:
558 if len(indexdata) > 0:
559 versionflags = versionformat_unpack(indexdata[:4])[0]
559 versionflags = versionformat_unpack(indexdata[:4])[0]
560 self._initempty = False
560 self._initempty = False
561 else:
561 else:
562 versionflags = newversionflags
562 versionflags = newversionflags
563 except IOError as inst:
563 except IOError as inst:
564 if inst.errno != errno.ENOENT:
564 if inst.errno != errno.ENOENT:
565 raise
565 raise
566
566
567 versionflags = newversionflags
567 versionflags = newversionflags
568
568
569 self.version = versionflags
569 self.version = versionflags
570
570
571 flags = versionflags & ~0xFFFF
571 flags = versionflags & ~0xFFFF
572 fmt = versionflags & 0xFFFF
572 fmt = versionflags & 0xFFFF
573
573
574 if fmt == REVLOGV0:
574 if fmt == REVLOGV0:
575 if flags:
575 if flags:
576 raise error.RevlogError(
576 raise error.RevlogError(
577 _(b'unknown flags (%#04x) in version %d revlog %s')
577 _(b'unknown flags (%#04x) in version %d revlog %s')
578 % (flags >> 16, fmt, self.indexfile)
578 % (flags >> 16, fmt, self.indexfile)
579 )
579 )
580
580
581 self._inline = False
581 self._inline = False
582 self._generaldelta = False
582 self._generaldelta = False
583
583
584 elif fmt == REVLOGV1:
584 elif fmt == REVLOGV1:
585 if flags & ~REVLOGV1_FLAGS:
585 if flags & ~REVLOGV1_FLAGS:
586 raise error.RevlogError(
586 raise error.RevlogError(
587 _(b'unknown flags (%#04x) in version %d revlog %s')
587 _(b'unknown flags (%#04x) in version %d revlog %s')
588 % (flags >> 16, fmt, self.indexfile)
588 % (flags >> 16, fmt, self.indexfile)
589 )
589 )
590
590
591 self._inline = versionflags & FLAG_INLINE_DATA
591 self._inline = versionflags & FLAG_INLINE_DATA
592 self._generaldelta = versionflags & FLAG_GENERALDELTA
592 self._generaldelta = versionflags & FLAG_GENERALDELTA
593
593
594 elif fmt == REVLOGV2:
594 elif fmt == REVLOGV2:
595 if flags & ~REVLOGV2_FLAGS:
595 if flags & ~REVLOGV2_FLAGS:
596 raise error.RevlogError(
596 raise error.RevlogError(
597 _(b'unknown flags (%#04x) in version %d revlog %s')
597 _(b'unknown flags (%#04x) in version %d revlog %s')
598 % (flags >> 16, fmt, self.indexfile)
598 % (flags >> 16, fmt, self.indexfile)
599 )
599 )
600
600
601 self._inline = versionflags & FLAG_INLINE_DATA
601 self._inline = versionflags & FLAG_INLINE_DATA
602 # generaldelta implied by version 2 revlogs.
602 # generaldelta implied by version 2 revlogs.
603 self._generaldelta = True
603 self._generaldelta = True
604
604
605 else:
605 else:
606 raise error.RevlogError(
606 raise error.RevlogError(
607 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
607 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
608 )
608 )
609 # sparse-revlog can't be on without general-delta (issue6056)
609 # sparse-revlog can't be on without general-delta (issue6056)
610 if not self._generaldelta:
610 if not self._generaldelta:
611 self._sparserevlog = False
611 self._sparserevlog = False
612
612
613 self._storedeltachains = True
613 self._storedeltachains = True
614
614
615 devel_nodemap = (
615 devel_nodemap = (
616 self.nodemap_file
616 self.nodemap_file
617 and opts.get(b'devel-force-nodemap', False)
617 and opts.get(b'devel-force-nodemap', False)
618 and NodemapRevlogIO is not None
618 and NodemapRevlogIO is not None
619 )
619 )
620
620
621 self._io = revlogio()
621 self._io = revlogio()
622 if self.version == REVLOGV0:
622 if self.version == REVLOGV0:
623 self._io = revlogoldio()
623 self._io = revlogoldio()
624 elif devel_nodemap:
624 elif devel_nodemap:
625 self._io = NodemapRevlogIO()
625 self._io = NodemapRevlogIO()
626 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
626 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
627 self._io = rustrevlogio()
627 self._io = rustrevlogio()
628 try:
628 try:
629 d = self._io.parseindex(indexdata, self._inline)
629 d = self._io.parseindex(indexdata, self._inline)
630 index, _chunkcache = d
630 index, _chunkcache = d
631 use_nodemap = (
631 use_nodemap = (
632 not self._inline
632 not self._inline
633 and self.nodemap_file is not None
633 and self.nodemap_file is not None
634 and util.safehasattr(index, 'update_nodemap_data')
634 and util.safehasattr(index, 'update_nodemap_data')
635 )
635 )
636 if use_nodemap:
636 if use_nodemap:
637 nodemap_data = nodemaputil.persisted_data(self)
637 nodemap_data = nodemaputil.persisted_data(self)
638 if nodemap_data is not None:
638 if nodemap_data is not None:
639 self._nodemap_docket = nodemap_data[0]
639 self._nodemap_docket = nodemap_data[0]
640 index.update_nodemap_data(*nodemap_data)
640 index.update_nodemap_data(*nodemap_data)
641 except (ValueError, IndexError):
641 except (ValueError, IndexError):
642 raise error.RevlogError(
642 raise error.RevlogError(
643 _(b"index %s is corrupted") % self.indexfile
643 _(b"index %s is corrupted") % self.indexfile
644 )
644 )
645 self.index, self._chunkcache = d
645 self.index, self._chunkcache = d
646 if not self._chunkcache:
646 if not self._chunkcache:
647 self._chunkclear()
647 self._chunkclear()
648 # revnum -> (chain-length, sum-delta-length)
648 # revnum -> (chain-length, sum-delta-length)
649 self._chaininfocache = {}
649 self._chaininfocache = {}
650 # revlog header -> revlog compressor
650 # revlog header -> revlog compressor
651 self._decompressors = {}
651 self._decompressors = {}
652
652
653 @util.propertycache
653 @util.propertycache
654 def _compressor(self):
654 def _compressor(self):
655 engine = util.compengines[self._compengine]
655 engine = util.compengines[self._compengine]
656 return engine.revlogcompressor(self._compengineopts)
656 return engine.revlogcompressor(self._compengineopts)
657
657
658 def _indexfp(self, mode=b'r'):
658 def _indexfp(self, mode=b'r'):
659 """file object for the revlog's index file"""
659 """file object for the revlog's index file"""
660 args = {'mode': mode}
660 args = {'mode': mode}
661 if mode != b'r':
661 if mode != b'r':
662 args['checkambig'] = self._checkambig
662 args['checkambig'] = self._checkambig
663 if mode == b'w':
663 if mode == b'w':
664 args['atomictemp'] = True
664 args['atomictemp'] = True
665 return self.opener(self.indexfile, **args)
665 return self.opener(self.indexfile, **args)
666
666
667 def _datafp(self, mode=b'r'):
667 def _datafp(self, mode=b'r'):
668 """file object for the revlog's data file"""
668 """file object for the revlog's data file"""
669 return self.opener(self.datafile, mode=mode)
669 return self.opener(self.datafile, mode=mode)
670
670
671 @contextlib.contextmanager
671 @contextlib.contextmanager
672 def _datareadfp(self, existingfp=None):
672 def _datareadfp(self, existingfp=None):
673 """file object suitable to read data"""
673 """file object suitable to read data"""
674 # Use explicit file handle, if given.
674 # Use explicit file handle, if given.
675 if existingfp is not None:
675 if existingfp is not None:
676 yield existingfp
676 yield existingfp
677
677
678 # Use a file handle being actively used for writes, if available.
678 # Use a file handle being actively used for writes, if available.
679 # There is some danger to doing this because reads will seek the
679 # There is some danger to doing this because reads will seek the
680 # file. However, _writeentry() performs a SEEK_END before all writes,
680 # file. However, _writeentry() performs a SEEK_END before all writes,
681 # so we should be safe.
681 # so we should be safe.
682 elif self._writinghandles:
682 elif self._writinghandles:
683 if self._inline:
683 if self._inline:
684 yield self._writinghandles[0]
684 yield self._writinghandles[0]
685 else:
685 else:
686 yield self._writinghandles[1]
686 yield self._writinghandles[1]
687
687
688 # Otherwise open a new file handle.
688 # Otherwise open a new file handle.
689 else:
689 else:
690 if self._inline:
690 if self._inline:
691 func = self._indexfp
691 func = self._indexfp
692 else:
692 else:
693 func = self._datafp
693 func = self._datafp
694 with func() as fp:
694 with func() as fp:
695 yield fp
695 yield fp
696
696
697 def tiprev(self):
697 def tiprev(self):
698 return len(self.index) - 1
698 return len(self.index) - 1
699
699
700 def tip(self):
700 def tip(self):
701 return self.node(self.tiprev())
701 return self.node(self.tiprev())
702
702
703 def __contains__(self, rev):
703 def __contains__(self, rev):
704 return 0 <= rev < len(self)
704 return 0 <= rev < len(self)
705
705
706 def __len__(self):
706 def __len__(self):
707 return len(self.index)
707 return len(self.index)
708
708
709 def __iter__(self):
709 def __iter__(self):
710 return iter(pycompat.xrange(len(self)))
710 return iter(pycompat.xrange(len(self)))
711
711
712 def revs(self, start=0, stop=None):
712 def revs(self, start=0, stop=None):
713 """iterate over all rev in this revlog (from start to stop)"""
713 """iterate over all rev in this revlog (from start to stop)"""
714 return storageutil.iterrevs(len(self), start=start, stop=stop)
714 return storageutil.iterrevs(len(self), start=start, stop=stop)
715
715
716 @property
716 @property
717 def nodemap(self):
717 def nodemap(self):
718 msg = (
718 msg = (
719 b"revlog.nodemap is deprecated, "
719 b"revlog.nodemap is deprecated, "
720 b"use revlog.index.[has_node|rev|get_rev]"
720 b"use revlog.index.[has_node|rev|get_rev]"
721 )
721 )
722 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
722 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
723 return self.index.nodemap
723 return self.index.nodemap
724
724
725 @property
725 @property
726 def _nodecache(self):
726 def _nodecache(self):
727 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
727 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
728 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
728 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
729 return self.index.nodemap
729 return self.index.nodemap
730
730
731 def hasnode(self, node):
731 def hasnode(self, node):
732 try:
732 try:
733 self.rev(node)
733 self.rev(node)
734 return True
734 return True
735 except KeyError:
735 except KeyError:
736 return False
736 return False
737
737
738 def candelta(self, baserev, rev):
738 def candelta(self, baserev, rev):
739 """whether two revisions (baserev, rev) can be delta-ed or not"""
739 """whether two revisions (baserev, rev) can be delta-ed or not"""
740 # Disable delta if either rev requires a content-changing flag
740 # Disable delta if either rev requires a content-changing flag
741 # processor (ex. LFS). This is because such flag processor can alter
741 # processor (ex. LFS). This is because such flag processor can alter
742 # the rawtext content that the delta will be based on, and two clients
742 # the rawtext content that the delta will be based on, and two clients
743 # could have a same revlog node with different flags (i.e. different
743 # could have a same revlog node with different flags (i.e. different
744 # rawtext contents) and the delta could be incompatible.
744 # rawtext contents) and the delta could be incompatible.
745 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
745 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
746 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
746 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
747 ):
747 ):
748 return False
748 return False
749 return True
749 return True
750
750
751 def update_caches(self, transaction):
751 def update_caches(self, transaction):
752 if self.nodemap_file is not None:
752 if self.nodemap_file is not None:
753 if transaction is None:
753 if transaction is None:
754 nodemaputil.update_persistent_nodemap(self)
754 nodemaputil.update_persistent_nodemap(self)
755 else:
755 else:
756 nodemaputil.setup_persistent_nodemap(transaction, self)
756 nodemaputil.setup_persistent_nodemap(transaction, self)
757
757
758 def clearcaches(self):
758 def clearcaches(self):
759 self._revisioncache = None
759 self._revisioncache = None
760 self._chainbasecache.clear()
760 self._chainbasecache.clear()
761 self._chunkcache = (0, b'')
761 self._chunkcache = (0, b'')
762 self._pcache = {}
762 self._pcache = {}
763 self.index.clearcaches()
763 self.index.clearcaches()
764
764
765 def rev(self, node):
765 def rev(self, node):
766 try:
766 try:
767 return self.index.rev(node)
767 return self.index.rev(node)
768 except TypeError:
768 except TypeError:
769 raise
769 raise
770 except error.RevlogError:
770 except error.RevlogError:
771 # parsers.c radix tree lookup failed
771 # parsers.c radix tree lookup failed
772 if node == wdirid or node in wdirfilenodeids:
772 if node == wdirid or node in wdirfilenodeids:
773 raise error.WdirUnsupported
773 raise error.WdirUnsupported
774 raise error.LookupError(node, self.indexfile, _(b'no node'))
774 raise error.LookupError(node, self.indexfile, _(b'no node'))
775
775
776 # Accessors for index entries.
776 # Accessors for index entries.
777
777
778 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
778 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
779 # are flags.
779 # are flags.
780 def start(self, rev):
780 def start(self, rev):
781 return int(self.index[rev][0] >> 16)
781 return int(self.index[rev][0] >> 16)
782
782
783 def flags(self, rev):
783 def flags(self, rev):
784 return self.index[rev][0] & 0xFFFF
784 return self.index[rev][0] & 0xFFFF
785
785
786 def length(self, rev):
786 def length(self, rev):
787 return self.index[rev][1]
787 return self.index[rev][1]
788
788
789 def rawsize(self, rev):
789 def rawsize(self, rev):
790 """return the length of the uncompressed text for a given revision"""
790 """return the length of the uncompressed text for a given revision"""
791 l = self.index[rev][2]
791 l = self.index[rev][2]
792 if l >= 0:
792 if l >= 0:
793 return l
793 return l
794
794
795 t = self.rawdata(rev)
795 t = self.rawdata(rev)
796 return len(t)
796 return len(t)
797
797
798 def size(self, rev):
798 def size(self, rev):
799 """length of non-raw text (processed by a "read" flag processor)"""
799 """length of non-raw text (processed by a "read" flag processor)"""
800 # fast path: if no "read" flag processor could change the content,
800 # fast path: if no "read" flag processor could change the content,
801 # size is rawsize. note: ELLIPSIS is known to not change the content.
801 # size is rawsize. note: ELLIPSIS is known to not change the content.
802 flags = self.flags(rev)
802 flags = self.flags(rev)
803 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
803 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
804 return self.rawsize(rev)
804 return self.rawsize(rev)
805
805
806 return len(self.revision(rev, raw=False))
806 return len(self.revision(rev, raw=False))
807
807
808 def chainbase(self, rev):
808 def chainbase(self, rev):
809 base = self._chainbasecache.get(rev)
809 base = self._chainbasecache.get(rev)
810 if base is not None:
810 if base is not None:
811 return base
811 return base
812
812
813 index = self.index
813 index = self.index
814 iterrev = rev
814 iterrev = rev
815 base = index[iterrev][3]
815 base = index[iterrev][3]
816 while base != iterrev:
816 while base != iterrev:
817 iterrev = base
817 iterrev = base
818 base = index[iterrev][3]
818 base = index[iterrev][3]
819
819
820 self._chainbasecache[rev] = base
820 self._chainbasecache[rev] = base
821 return base
821 return base
822
822
823 def linkrev(self, rev):
823 def linkrev(self, rev):
824 return self.index[rev][4]
824 return self.index[rev][4]
825
825
826 def parentrevs(self, rev):
826 def parentrevs(self, rev):
827 try:
827 try:
828 entry = self.index[rev]
828 entry = self.index[rev]
829 except IndexError:
829 except IndexError:
830 if rev == wdirrev:
830 if rev == wdirrev:
831 raise error.WdirUnsupported
831 raise error.WdirUnsupported
832 raise
832 raise
833
833
834 return entry[5], entry[6]
834 return entry[5], entry[6]
835
835
836 # fast parentrevs(rev) where rev isn't filtered
836 # fast parentrevs(rev) where rev isn't filtered
837 _uncheckedparentrevs = parentrevs
837 _uncheckedparentrevs = parentrevs
838
838
839 def node(self, rev):
839 def node(self, rev):
840 try:
840 try:
841 return self.index[rev][7]
841 return self.index[rev][7]
842 except IndexError:
842 except IndexError:
843 if rev == wdirrev:
843 if rev == wdirrev:
844 raise error.WdirUnsupported
844 raise error.WdirUnsupported
845 raise
845 raise
846
846
847 # Derived from index values.
847 # Derived from index values.
848
848
849 def end(self, rev):
849 def end(self, rev):
850 return self.start(rev) + self.length(rev)
850 return self.start(rev) + self.length(rev)
851
851
852 def parents(self, node):
852 def parents(self, node):
853 i = self.index
853 i = self.index
854 d = i[self.rev(node)]
854 d = i[self.rev(node)]
855 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
855 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
856
856
857 def chainlen(self, rev):
857 def chainlen(self, rev):
858 return self._chaininfo(rev)[0]
858 return self._chaininfo(rev)[0]
859
859
860 def _chaininfo(self, rev):
860 def _chaininfo(self, rev):
861 chaininfocache = self._chaininfocache
861 chaininfocache = self._chaininfocache
862 if rev in chaininfocache:
862 if rev in chaininfocache:
863 return chaininfocache[rev]
863 return chaininfocache[rev]
864 index = self.index
864 index = self.index
865 generaldelta = self._generaldelta
865 generaldelta = self._generaldelta
866 iterrev = rev
866 iterrev = rev
867 e = index[iterrev]
867 e = index[iterrev]
868 clen = 0
868 clen = 0
869 compresseddeltalen = 0
869 compresseddeltalen = 0
870 while iterrev != e[3]:
870 while iterrev != e[3]:
871 clen += 1
871 clen += 1
872 compresseddeltalen += e[1]
872 compresseddeltalen += e[1]
873 if generaldelta:
873 if generaldelta:
874 iterrev = e[3]
874 iterrev = e[3]
875 else:
875 else:
876 iterrev -= 1
876 iterrev -= 1
877 if iterrev in chaininfocache:
877 if iterrev in chaininfocache:
878 t = chaininfocache[iterrev]
878 t = chaininfocache[iterrev]
879 clen += t[0]
879 clen += t[0]
880 compresseddeltalen += t[1]
880 compresseddeltalen += t[1]
881 break
881 break
882 e = index[iterrev]
882 e = index[iterrev]
883 else:
883 else:
884 # Add text length of base since decompressing that also takes
884 # Add text length of base since decompressing that also takes
885 # work. For cache hits the length is already included.
885 # work. For cache hits the length is already included.
886 compresseddeltalen += e[1]
886 compresseddeltalen += e[1]
887 r = (clen, compresseddeltalen)
887 r = (clen, compresseddeltalen)
888 chaininfocache[rev] = r
888 chaininfocache[rev] = r
889 return r
889 return r
890
890
891 def _deltachain(self, rev, stoprev=None):
891 def _deltachain(self, rev, stoprev=None):
892 """Obtain the delta chain for a revision.
892 """Obtain the delta chain for a revision.
893
893
894 ``stoprev`` specifies a revision to stop at. If not specified, we
894 ``stoprev`` specifies a revision to stop at. If not specified, we
895 stop at the base of the chain.
895 stop at the base of the chain.
896
896
897 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
897 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
898 revs in ascending order and ``stopped`` is a bool indicating whether
898 revs in ascending order and ``stopped`` is a bool indicating whether
899 ``stoprev`` was hit.
899 ``stoprev`` was hit.
900 """
900 """
901 # Try C implementation.
901 # Try C implementation.
902 try:
902 try:
903 return self.index.deltachain(rev, stoprev, self._generaldelta)
903 return self.index.deltachain(rev, stoprev, self._generaldelta)
904 except AttributeError:
904 except AttributeError:
905 pass
905 pass
906
906
907 chain = []
907 chain = []
908
908
909 # Alias to prevent attribute lookup in tight loop.
909 # Alias to prevent attribute lookup in tight loop.
910 index = self.index
910 index = self.index
911 generaldelta = self._generaldelta
911 generaldelta = self._generaldelta
912
912
913 iterrev = rev
913 iterrev = rev
914 e = index[iterrev]
914 e = index[iterrev]
915 while iterrev != e[3] and iterrev != stoprev:
915 while iterrev != e[3] and iterrev != stoprev:
916 chain.append(iterrev)
916 chain.append(iterrev)
917 if generaldelta:
917 if generaldelta:
918 iterrev = e[3]
918 iterrev = e[3]
919 else:
919 else:
920 iterrev -= 1
920 iterrev -= 1
921 e = index[iterrev]
921 e = index[iterrev]
922
922
923 if iterrev == stoprev:
923 if iterrev == stoprev:
924 stopped = True
924 stopped = True
925 else:
925 else:
926 chain.append(iterrev)
926 chain.append(iterrev)
927 stopped = False
927 stopped = False
928
928
929 chain.reverse()
929 chain.reverse()
930 return chain, stopped
930 return chain, stopped
931
931
932 def ancestors(self, revs, stoprev=0, inclusive=False):
932 def ancestors(self, revs, stoprev=0, inclusive=False):
933 """Generate the ancestors of 'revs' in reverse revision order.
933 """Generate the ancestors of 'revs' in reverse revision order.
934 Does not generate revs lower than stoprev.
934 Does not generate revs lower than stoprev.
935
935
936 See the documentation for ancestor.lazyancestors for more details."""
936 See the documentation for ancestor.lazyancestors for more details."""
937
937
938 # first, make sure start revisions aren't filtered
938 # first, make sure start revisions aren't filtered
939 revs = list(revs)
939 revs = list(revs)
940 checkrev = self.node
940 checkrev = self.node
941 for r in revs:
941 for r in revs:
942 checkrev(r)
942 checkrev(r)
943 # and we're sure ancestors aren't filtered as well
943 # and we're sure ancestors aren't filtered as well
944
944
945 if rustancestor is not None:
945 if rustancestor is not None:
946 lazyancestors = rustancestor.LazyAncestors
946 lazyancestors = rustancestor.LazyAncestors
947 arg = self.index
947 arg = self.index
948 elif util.safehasattr(parsers, b'rustlazyancestors'):
949 lazyancestors = ancestor.rustlazyancestors
950 arg = self.index
951 else:
948 else:
952 lazyancestors = ancestor.lazyancestors
949 lazyancestors = ancestor.lazyancestors
953 arg = self._uncheckedparentrevs
950 arg = self._uncheckedparentrevs
954 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
951 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
955
952
956 def descendants(self, revs):
953 def descendants(self, revs):
957 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
954 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
958
955
959 def findcommonmissing(self, common=None, heads=None):
956 def findcommonmissing(self, common=None, heads=None):
960 """Return a tuple of the ancestors of common and the ancestors of heads
957 """Return a tuple of the ancestors of common and the ancestors of heads
961 that are not ancestors of common. In revset terminology, we return the
958 that are not ancestors of common. In revset terminology, we return the
962 tuple:
959 tuple:
963
960
964 ::common, (::heads) - (::common)
961 ::common, (::heads) - (::common)
965
962
966 The list is sorted by revision number, meaning it is
963 The list is sorted by revision number, meaning it is
967 topologically sorted.
964 topologically sorted.
968
965
969 'heads' and 'common' are both lists of node IDs. If heads is
966 'heads' and 'common' are both lists of node IDs. If heads is
970 not supplied, uses all of the revlog's heads. If common is not
967 not supplied, uses all of the revlog's heads. If common is not
971 supplied, uses nullid."""
968 supplied, uses nullid."""
972 if common is None:
969 if common is None:
973 common = [nullid]
970 common = [nullid]
974 if heads is None:
971 if heads is None:
975 heads = self.heads()
972 heads = self.heads()
976
973
977 common = [self.rev(n) for n in common]
974 common = [self.rev(n) for n in common]
978 heads = [self.rev(n) for n in heads]
975 heads = [self.rev(n) for n in heads]
979
976
980 # we want the ancestors, but inclusive
977 # we want the ancestors, but inclusive
981 class lazyset(object):
978 class lazyset(object):
982 def __init__(self, lazyvalues):
979 def __init__(self, lazyvalues):
983 self.addedvalues = set()
980 self.addedvalues = set()
984 self.lazyvalues = lazyvalues
981 self.lazyvalues = lazyvalues
985
982
986 def __contains__(self, value):
983 def __contains__(self, value):
987 return value in self.addedvalues or value in self.lazyvalues
984 return value in self.addedvalues or value in self.lazyvalues
988
985
989 def __iter__(self):
986 def __iter__(self):
990 added = self.addedvalues
987 added = self.addedvalues
991 for r in added:
988 for r in added:
992 yield r
989 yield r
993 for r in self.lazyvalues:
990 for r in self.lazyvalues:
994 if not r in added:
991 if not r in added:
995 yield r
992 yield r
996
993
997 def add(self, value):
994 def add(self, value):
998 self.addedvalues.add(value)
995 self.addedvalues.add(value)
999
996
1000 def update(self, values):
997 def update(self, values):
1001 self.addedvalues.update(values)
998 self.addedvalues.update(values)
1002
999
1003 has = lazyset(self.ancestors(common))
1000 has = lazyset(self.ancestors(common))
1004 has.add(nullrev)
1001 has.add(nullrev)
1005 has.update(common)
1002 has.update(common)
1006
1003
1007 # take all ancestors from heads that aren't in has
1004 # take all ancestors from heads that aren't in has
1008 missing = set()
1005 missing = set()
1009 visit = collections.deque(r for r in heads if r not in has)
1006 visit = collections.deque(r for r in heads if r not in has)
1010 while visit:
1007 while visit:
1011 r = visit.popleft()
1008 r = visit.popleft()
1012 if r in missing:
1009 if r in missing:
1013 continue
1010 continue
1014 else:
1011 else:
1015 missing.add(r)
1012 missing.add(r)
1016 for p in self.parentrevs(r):
1013 for p in self.parentrevs(r):
1017 if p not in has:
1014 if p not in has:
1018 visit.append(p)
1015 visit.append(p)
1019 missing = list(missing)
1016 missing = list(missing)
1020 missing.sort()
1017 missing.sort()
1021 return has, [self.node(miss) for miss in missing]
1018 return has, [self.node(miss) for miss in missing]
1022
1019
1023 def incrementalmissingrevs(self, common=None):
1020 def incrementalmissingrevs(self, common=None):
1024 """Return an object that can be used to incrementally compute the
1021 """Return an object that can be used to incrementally compute the
1025 revision numbers of the ancestors of arbitrary sets that are not
1022 revision numbers of the ancestors of arbitrary sets that are not
1026 ancestors of common. This is an ancestor.incrementalmissingancestors
1023 ancestors of common. This is an ancestor.incrementalmissingancestors
1027 object.
1024 object.
1028
1025
1029 'common' is a list of revision numbers. If common is not supplied, uses
1026 'common' is a list of revision numbers. If common is not supplied, uses
1030 nullrev.
1027 nullrev.
1031 """
1028 """
1032 if common is None:
1029 if common is None:
1033 common = [nullrev]
1030 common = [nullrev]
1034
1031
1035 if rustancestor is not None:
1032 if rustancestor is not None:
1036 return rustancestor.MissingAncestors(self.index, common)
1033 return rustancestor.MissingAncestors(self.index, common)
1037 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1034 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1038
1035
1039 def findmissingrevs(self, common=None, heads=None):
1036 def findmissingrevs(self, common=None, heads=None):
1040 """Return the revision numbers of the ancestors of heads that
1037 """Return the revision numbers of the ancestors of heads that
1041 are not ancestors of common.
1038 are not ancestors of common.
1042
1039
1043 More specifically, return a list of revision numbers corresponding to
1040 More specifically, return a list of revision numbers corresponding to
1044 nodes N such that every N satisfies the following constraints:
1041 nodes N such that every N satisfies the following constraints:
1045
1042
1046 1. N is an ancestor of some node in 'heads'
1043 1. N is an ancestor of some node in 'heads'
1047 2. N is not an ancestor of any node in 'common'
1044 2. N is not an ancestor of any node in 'common'
1048
1045
1049 The list is sorted by revision number, meaning it is
1046 The list is sorted by revision number, meaning it is
1050 topologically sorted.
1047 topologically sorted.
1051
1048
1052 'heads' and 'common' are both lists of revision numbers. If heads is
1049 'heads' and 'common' are both lists of revision numbers. If heads is
1053 not supplied, uses all of the revlog's heads. If common is not
1050 not supplied, uses all of the revlog's heads. If common is not
1054 supplied, uses nullid."""
1051 supplied, uses nullid."""
1055 if common is None:
1052 if common is None:
1056 common = [nullrev]
1053 common = [nullrev]
1057 if heads is None:
1054 if heads is None:
1058 heads = self.headrevs()
1055 heads = self.headrevs()
1059
1056
1060 inc = self.incrementalmissingrevs(common=common)
1057 inc = self.incrementalmissingrevs(common=common)
1061 return inc.missingancestors(heads)
1058 return inc.missingancestors(heads)
1062
1059
1063 def findmissing(self, common=None, heads=None):
1060 def findmissing(self, common=None, heads=None):
1064 """Return the ancestors of heads that are not ancestors of common.
1061 """Return the ancestors of heads that are not ancestors of common.
1065
1062
1066 More specifically, return a list of nodes N such that every N
1063 More specifically, return a list of nodes N such that every N
1067 satisfies the following constraints:
1064 satisfies the following constraints:
1068
1065
1069 1. N is an ancestor of some node in 'heads'
1066 1. N is an ancestor of some node in 'heads'
1070 2. N is not an ancestor of any node in 'common'
1067 2. N is not an ancestor of any node in 'common'
1071
1068
1072 The list is sorted by revision number, meaning it is
1069 The list is sorted by revision number, meaning it is
1073 topologically sorted.
1070 topologically sorted.
1074
1071
1075 'heads' and 'common' are both lists of node IDs. If heads is
1072 'heads' and 'common' are both lists of node IDs. If heads is
1076 not supplied, uses all of the revlog's heads. If common is not
1073 not supplied, uses all of the revlog's heads. If common is not
1077 supplied, uses nullid."""
1074 supplied, uses nullid."""
1078 if common is None:
1075 if common is None:
1079 common = [nullid]
1076 common = [nullid]
1080 if heads is None:
1077 if heads is None:
1081 heads = self.heads()
1078 heads = self.heads()
1082
1079
1083 common = [self.rev(n) for n in common]
1080 common = [self.rev(n) for n in common]
1084 heads = [self.rev(n) for n in heads]
1081 heads = [self.rev(n) for n in heads]
1085
1082
1086 inc = self.incrementalmissingrevs(common=common)
1083 inc = self.incrementalmissingrevs(common=common)
1087 return [self.node(r) for r in inc.missingancestors(heads)]
1084 return [self.node(r) for r in inc.missingancestors(heads)]
1088
1085
1089 def nodesbetween(self, roots=None, heads=None):
1086 def nodesbetween(self, roots=None, heads=None):
1090 """Return a topological path from 'roots' to 'heads'.
1087 """Return a topological path from 'roots' to 'heads'.
1091
1088
1092 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1089 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1093 topologically sorted list of all nodes N that satisfy both of
1090 topologically sorted list of all nodes N that satisfy both of
1094 these constraints:
1091 these constraints:
1095
1092
1096 1. N is a descendant of some node in 'roots'
1093 1. N is a descendant of some node in 'roots'
1097 2. N is an ancestor of some node in 'heads'
1094 2. N is an ancestor of some node in 'heads'
1098
1095
1099 Every node is considered to be both a descendant and an ancestor
1096 Every node is considered to be both a descendant and an ancestor
1100 of itself, so every reachable node in 'roots' and 'heads' will be
1097 of itself, so every reachable node in 'roots' and 'heads' will be
1101 included in 'nodes'.
1098 included in 'nodes'.
1102
1099
1103 'outroots' is the list of reachable nodes in 'roots', i.e., the
1100 'outroots' is the list of reachable nodes in 'roots', i.e., the
1104 subset of 'roots' that is returned in 'nodes'. Likewise,
1101 subset of 'roots' that is returned in 'nodes'. Likewise,
1105 'outheads' is the subset of 'heads' that is also in 'nodes'.
1102 'outheads' is the subset of 'heads' that is also in 'nodes'.
1106
1103
1107 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1104 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1108 unspecified, uses nullid as the only root. If 'heads' is
1105 unspecified, uses nullid as the only root. If 'heads' is
1109 unspecified, uses list of all of the revlog's heads."""
1106 unspecified, uses list of all of the revlog's heads."""
1110 nonodes = ([], [], [])
1107 nonodes = ([], [], [])
1111 if roots is not None:
1108 if roots is not None:
1112 roots = list(roots)
1109 roots = list(roots)
1113 if not roots:
1110 if not roots:
1114 return nonodes
1111 return nonodes
1115 lowestrev = min([self.rev(n) for n in roots])
1112 lowestrev = min([self.rev(n) for n in roots])
1116 else:
1113 else:
1117 roots = [nullid] # Everybody's a descendant of nullid
1114 roots = [nullid] # Everybody's a descendant of nullid
1118 lowestrev = nullrev
1115 lowestrev = nullrev
1119 if (lowestrev == nullrev) and (heads is None):
1116 if (lowestrev == nullrev) and (heads is None):
1120 # We want _all_ the nodes!
1117 # We want _all_ the nodes!
1121 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1118 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1122 if heads is None:
1119 if heads is None:
1123 # All nodes are ancestors, so the latest ancestor is the last
1120 # All nodes are ancestors, so the latest ancestor is the last
1124 # node.
1121 # node.
1125 highestrev = len(self) - 1
1122 highestrev = len(self) - 1
1126 # Set ancestors to None to signal that every node is an ancestor.
1123 # Set ancestors to None to signal that every node is an ancestor.
1127 ancestors = None
1124 ancestors = None
1128 # Set heads to an empty dictionary for later discovery of heads
1125 # Set heads to an empty dictionary for later discovery of heads
1129 heads = {}
1126 heads = {}
1130 else:
1127 else:
1131 heads = list(heads)
1128 heads = list(heads)
1132 if not heads:
1129 if not heads:
1133 return nonodes
1130 return nonodes
1134 ancestors = set()
1131 ancestors = set()
1135 # Turn heads into a dictionary so we can remove 'fake' heads.
1132 # Turn heads into a dictionary so we can remove 'fake' heads.
1136 # Also, later we will be using it to filter out the heads we can't
1133 # Also, later we will be using it to filter out the heads we can't
1137 # find from roots.
1134 # find from roots.
1138 heads = dict.fromkeys(heads, False)
1135 heads = dict.fromkeys(heads, False)
1139 # Start at the top and keep marking parents until we're done.
1136 # Start at the top and keep marking parents until we're done.
1140 nodestotag = set(heads)
1137 nodestotag = set(heads)
1141 # Remember where the top was so we can use it as a limit later.
1138 # Remember where the top was so we can use it as a limit later.
1142 highestrev = max([self.rev(n) for n in nodestotag])
1139 highestrev = max([self.rev(n) for n in nodestotag])
1143 while nodestotag:
1140 while nodestotag:
1144 # grab a node to tag
1141 # grab a node to tag
1145 n = nodestotag.pop()
1142 n = nodestotag.pop()
1146 # Never tag nullid
1143 # Never tag nullid
1147 if n == nullid:
1144 if n == nullid:
1148 continue
1145 continue
1149 # A node's revision number represents its place in a
1146 # A node's revision number represents its place in a
1150 # topologically sorted list of nodes.
1147 # topologically sorted list of nodes.
1151 r = self.rev(n)
1148 r = self.rev(n)
1152 if r >= lowestrev:
1149 if r >= lowestrev:
1153 if n not in ancestors:
1150 if n not in ancestors:
1154 # If we are possibly a descendant of one of the roots
1151 # If we are possibly a descendant of one of the roots
1155 # and we haven't already been marked as an ancestor
1152 # and we haven't already been marked as an ancestor
1156 ancestors.add(n) # Mark as ancestor
1153 ancestors.add(n) # Mark as ancestor
1157 # Add non-nullid parents to list of nodes to tag.
1154 # Add non-nullid parents to list of nodes to tag.
1158 nodestotag.update(
1155 nodestotag.update(
1159 [p for p in self.parents(n) if p != nullid]
1156 [p for p in self.parents(n) if p != nullid]
1160 )
1157 )
1161 elif n in heads: # We've seen it before, is it a fake head?
1158 elif n in heads: # We've seen it before, is it a fake head?
1162 # So it is, real heads should not be the ancestors of
1159 # So it is, real heads should not be the ancestors of
1163 # any other heads.
1160 # any other heads.
1164 heads.pop(n)
1161 heads.pop(n)
1165 if not ancestors:
1162 if not ancestors:
1166 return nonodes
1163 return nonodes
1167 # Now that we have our set of ancestors, we want to remove any
1164 # Now that we have our set of ancestors, we want to remove any
1168 # roots that are not ancestors.
1165 # roots that are not ancestors.
1169
1166
1170 # If one of the roots was nullid, everything is included anyway.
1167 # If one of the roots was nullid, everything is included anyway.
1171 if lowestrev > nullrev:
1168 if lowestrev > nullrev:
1172 # But, since we weren't, let's recompute the lowest rev to not
1169 # But, since we weren't, let's recompute the lowest rev to not
1173 # include roots that aren't ancestors.
1170 # include roots that aren't ancestors.
1174
1171
1175 # Filter out roots that aren't ancestors of heads
1172 # Filter out roots that aren't ancestors of heads
1176 roots = [root for root in roots if root in ancestors]
1173 roots = [root for root in roots if root in ancestors]
1177 # Recompute the lowest revision
1174 # Recompute the lowest revision
1178 if roots:
1175 if roots:
1179 lowestrev = min([self.rev(root) for root in roots])
1176 lowestrev = min([self.rev(root) for root in roots])
1180 else:
1177 else:
1181 # No more roots? Return empty list
1178 # No more roots? Return empty list
1182 return nonodes
1179 return nonodes
1183 else:
1180 else:
1184 # We are descending from nullid, and don't need to care about
1181 # We are descending from nullid, and don't need to care about
1185 # any other roots.
1182 # any other roots.
1186 lowestrev = nullrev
1183 lowestrev = nullrev
1187 roots = [nullid]
1184 roots = [nullid]
1188 # Transform our roots list into a set.
1185 # Transform our roots list into a set.
1189 descendants = set(roots)
1186 descendants = set(roots)
1190 # Also, keep the original roots so we can filter out roots that aren't
1187 # Also, keep the original roots so we can filter out roots that aren't
1191 # 'real' roots (i.e. are descended from other roots).
1188 # 'real' roots (i.e. are descended from other roots).
1192 roots = descendants.copy()
1189 roots = descendants.copy()
1193 # Our topologically sorted list of output nodes.
1190 # Our topologically sorted list of output nodes.
1194 orderedout = []
1191 orderedout = []
1195 # Don't start at nullid since we don't want nullid in our output list,
1192 # Don't start at nullid since we don't want nullid in our output list,
1196 # and if nullid shows up in descendants, empty parents will look like
1193 # and if nullid shows up in descendants, empty parents will look like
1197 # they're descendants.
1194 # they're descendants.
1198 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1195 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1199 n = self.node(r)
1196 n = self.node(r)
1200 isdescendant = False
1197 isdescendant = False
1201 if lowestrev == nullrev: # Everybody is a descendant of nullid
1198 if lowestrev == nullrev: # Everybody is a descendant of nullid
1202 isdescendant = True
1199 isdescendant = True
1203 elif n in descendants:
1200 elif n in descendants:
1204 # n is already a descendant
1201 # n is already a descendant
1205 isdescendant = True
1202 isdescendant = True
1206 # This check only needs to be done here because all the roots
1203 # This check only needs to be done here because all the roots
1207 # will start being marked is descendants before the loop.
1204 # will start being marked is descendants before the loop.
1208 if n in roots:
1205 if n in roots:
1209 # If n was a root, check if it's a 'real' root.
1206 # If n was a root, check if it's a 'real' root.
1210 p = tuple(self.parents(n))
1207 p = tuple(self.parents(n))
1211 # If any of its parents are descendants, it's not a root.
1208 # If any of its parents are descendants, it's not a root.
1212 if (p[0] in descendants) or (p[1] in descendants):
1209 if (p[0] in descendants) or (p[1] in descendants):
1213 roots.remove(n)
1210 roots.remove(n)
1214 else:
1211 else:
1215 p = tuple(self.parents(n))
1212 p = tuple(self.parents(n))
1216 # A node is a descendant if either of its parents are
1213 # A node is a descendant if either of its parents are
1217 # descendants. (We seeded the dependents list with the roots
1214 # descendants. (We seeded the dependents list with the roots
1218 # up there, remember?)
1215 # up there, remember?)
1219 if (p[0] in descendants) or (p[1] in descendants):
1216 if (p[0] in descendants) or (p[1] in descendants):
1220 descendants.add(n)
1217 descendants.add(n)
1221 isdescendant = True
1218 isdescendant = True
1222 if isdescendant and ((ancestors is None) or (n in ancestors)):
1219 if isdescendant and ((ancestors is None) or (n in ancestors)):
1223 # Only include nodes that are both descendants and ancestors.
1220 # Only include nodes that are both descendants and ancestors.
1224 orderedout.append(n)
1221 orderedout.append(n)
1225 if (ancestors is not None) and (n in heads):
1222 if (ancestors is not None) and (n in heads):
1226 # We're trying to figure out which heads are reachable
1223 # We're trying to figure out which heads are reachable
1227 # from roots.
1224 # from roots.
1228 # Mark this head as having been reached
1225 # Mark this head as having been reached
1229 heads[n] = True
1226 heads[n] = True
1230 elif ancestors is None:
1227 elif ancestors is None:
1231 # Otherwise, we're trying to discover the heads.
1228 # Otherwise, we're trying to discover the heads.
1232 # Assume this is a head because if it isn't, the next step
1229 # Assume this is a head because if it isn't, the next step
1233 # will eventually remove it.
1230 # will eventually remove it.
1234 heads[n] = True
1231 heads[n] = True
1235 # But, obviously its parents aren't.
1232 # But, obviously its parents aren't.
1236 for p in self.parents(n):
1233 for p in self.parents(n):
1237 heads.pop(p, None)
1234 heads.pop(p, None)
1238 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1235 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1239 roots = list(roots)
1236 roots = list(roots)
1240 assert orderedout
1237 assert orderedout
1241 assert roots
1238 assert roots
1242 assert heads
1239 assert heads
1243 return (orderedout, roots, heads)
1240 return (orderedout, roots, heads)
1244
1241
1245 def headrevs(self, revs=None):
1242 def headrevs(self, revs=None):
1246 if revs is None:
1243 if revs is None:
1247 try:
1244 try:
1248 return self.index.headrevs()
1245 return self.index.headrevs()
1249 except AttributeError:
1246 except AttributeError:
1250 return self._headrevs()
1247 return self._headrevs()
1251 if rustdagop is not None:
1248 if rustdagop is not None:
1252 return rustdagop.headrevs(self.index, revs)
1249 return rustdagop.headrevs(self.index, revs)
1253 return dagop.headrevs(revs, self._uncheckedparentrevs)
1250 return dagop.headrevs(revs, self._uncheckedparentrevs)
1254
1251
1255 def computephases(self, roots):
1252 def computephases(self, roots):
1256 return self.index.computephasesmapsets(roots)
1253 return self.index.computephasesmapsets(roots)
1257
1254
1258 def _headrevs(self):
1255 def _headrevs(self):
1259 count = len(self)
1256 count = len(self)
1260 if not count:
1257 if not count:
1261 return [nullrev]
1258 return [nullrev]
1262 # we won't iter over filtered rev so nobody is a head at start
1259 # we won't iter over filtered rev so nobody is a head at start
1263 ishead = [0] * (count + 1)
1260 ishead = [0] * (count + 1)
1264 index = self.index
1261 index = self.index
1265 for r in self:
1262 for r in self:
1266 ishead[r] = 1 # I may be an head
1263 ishead[r] = 1 # I may be an head
1267 e = index[r]
1264 e = index[r]
1268 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1265 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1269 return [r for r, val in enumerate(ishead) if val]
1266 return [r for r, val in enumerate(ishead) if val]
1270
1267
1271 def heads(self, start=None, stop=None):
1268 def heads(self, start=None, stop=None):
1272 """return the list of all nodes that have no children
1269 """return the list of all nodes that have no children
1273
1270
1274 if start is specified, only heads that are descendants of
1271 if start is specified, only heads that are descendants of
1275 start will be returned
1272 start will be returned
1276 if stop is specified, it will consider all the revs from stop
1273 if stop is specified, it will consider all the revs from stop
1277 as if they had no children
1274 as if they had no children
1278 """
1275 """
1279 if start is None and stop is None:
1276 if start is None and stop is None:
1280 if not len(self):
1277 if not len(self):
1281 return [nullid]
1278 return [nullid]
1282 return [self.node(r) for r in self.headrevs()]
1279 return [self.node(r) for r in self.headrevs()]
1283
1280
1284 if start is None:
1281 if start is None:
1285 start = nullrev
1282 start = nullrev
1286 else:
1283 else:
1287 start = self.rev(start)
1284 start = self.rev(start)
1288
1285
1289 stoprevs = {self.rev(n) for n in stop or []}
1286 stoprevs = {self.rev(n) for n in stop or []}
1290
1287
1291 revs = dagop.headrevssubset(
1288 revs = dagop.headrevssubset(
1292 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1289 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1293 )
1290 )
1294
1291
1295 return [self.node(rev) for rev in revs]
1292 return [self.node(rev) for rev in revs]
1296
1293
1297 def children(self, node):
1294 def children(self, node):
1298 """find the children of a given node"""
1295 """find the children of a given node"""
1299 c = []
1296 c = []
1300 p = self.rev(node)
1297 p = self.rev(node)
1301 for r in self.revs(start=p + 1):
1298 for r in self.revs(start=p + 1):
1302 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1299 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1303 if prevs:
1300 if prevs:
1304 for pr in prevs:
1301 for pr in prevs:
1305 if pr == p:
1302 if pr == p:
1306 c.append(self.node(r))
1303 c.append(self.node(r))
1307 elif p == nullrev:
1304 elif p == nullrev:
1308 c.append(self.node(r))
1305 c.append(self.node(r))
1309 return c
1306 return c
1310
1307
1311 def commonancestorsheads(self, a, b):
1308 def commonancestorsheads(self, a, b):
1312 """calculate all the heads of the common ancestors of nodes a and b"""
1309 """calculate all the heads of the common ancestors of nodes a and b"""
1313 a, b = self.rev(a), self.rev(b)
1310 a, b = self.rev(a), self.rev(b)
1314 ancs = self._commonancestorsheads(a, b)
1311 ancs = self._commonancestorsheads(a, b)
1315 return pycompat.maplist(self.node, ancs)
1312 return pycompat.maplist(self.node, ancs)
1316
1313
1317 def _commonancestorsheads(self, *revs):
1314 def _commonancestorsheads(self, *revs):
1318 """calculate all the heads of the common ancestors of revs"""
1315 """calculate all the heads of the common ancestors of revs"""
1319 try:
1316 try:
1320 ancs = self.index.commonancestorsheads(*revs)
1317 ancs = self.index.commonancestorsheads(*revs)
1321 except (AttributeError, OverflowError): # C implementation failed
1318 except (AttributeError, OverflowError): # C implementation failed
1322 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1319 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1323 return ancs
1320 return ancs
1324
1321
1325 def isancestor(self, a, b):
1322 def isancestor(self, a, b):
1326 """return True if node a is an ancestor of node b
1323 """return True if node a is an ancestor of node b
1327
1324
1328 A revision is considered an ancestor of itself."""
1325 A revision is considered an ancestor of itself."""
1329 a, b = self.rev(a), self.rev(b)
1326 a, b = self.rev(a), self.rev(b)
1330 return self.isancestorrev(a, b)
1327 return self.isancestorrev(a, b)
1331
1328
1332 def isancestorrev(self, a, b):
1329 def isancestorrev(self, a, b):
1333 """return True if revision a is an ancestor of revision b
1330 """return True if revision a is an ancestor of revision b
1334
1331
1335 A revision is considered an ancestor of itself.
1332 A revision is considered an ancestor of itself.
1336
1333
1337 The implementation of this is trivial but the use of
1334 The implementation of this is trivial but the use of
1338 reachableroots is not."""
1335 reachableroots is not."""
1339 if a == nullrev:
1336 if a == nullrev:
1340 return True
1337 return True
1341 elif a == b:
1338 elif a == b:
1342 return True
1339 return True
1343 elif a > b:
1340 elif a > b:
1344 return False
1341 return False
1345 return bool(self.reachableroots(a, [b], [a], includepath=False))
1342 return bool(self.reachableroots(a, [b], [a], includepath=False))
1346
1343
1347 def reachableroots(self, minroot, heads, roots, includepath=False):
1344 def reachableroots(self, minroot, heads, roots, includepath=False):
1348 """return (heads(::(<roots> and <roots>::<heads>)))
1345 """return (heads(::(<roots> and <roots>::<heads>)))
1349
1346
1350 If includepath is True, return (<roots>::<heads>)."""
1347 If includepath is True, return (<roots>::<heads>)."""
1351 try:
1348 try:
1352 return self.index.reachableroots2(
1349 return self.index.reachableroots2(
1353 minroot, heads, roots, includepath
1350 minroot, heads, roots, includepath
1354 )
1351 )
1355 except AttributeError:
1352 except AttributeError:
1356 return dagop._reachablerootspure(
1353 return dagop._reachablerootspure(
1357 self.parentrevs, minroot, roots, heads, includepath
1354 self.parentrevs, minroot, roots, heads, includepath
1358 )
1355 )
1359
1356
1360 def ancestor(self, a, b):
1357 def ancestor(self, a, b):
1361 """calculate the "best" common ancestor of nodes a and b"""
1358 """calculate the "best" common ancestor of nodes a and b"""
1362
1359
1363 a, b = self.rev(a), self.rev(b)
1360 a, b = self.rev(a), self.rev(b)
1364 try:
1361 try:
1365 ancs = self.index.ancestors(a, b)
1362 ancs = self.index.ancestors(a, b)
1366 except (AttributeError, OverflowError):
1363 except (AttributeError, OverflowError):
1367 ancs = ancestor.ancestors(self.parentrevs, a, b)
1364 ancs = ancestor.ancestors(self.parentrevs, a, b)
1368 if ancs:
1365 if ancs:
1369 # choose a consistent winner when there's a tie
1366 # choose a consistent winner when there's a tie
1370 return min(map(self.node, ancs))
1367 return min(map(self.node, ancs))
1371 return nullid
1368 return nullid
1372
1369
1373 def _match(self, id):
1370 def _match(self, id):
1374 if isinstance(id, int):
1371 if isinstance(id, int):
1375 # rev
1372 # rev
1376 return self.node(id)
1373 return self.node(id)
1377 if len(id) == 20:
1374 if len(id) == 20:
1378 # possibly a binary node
1375 # possibly a binary node
1379 # odds of a binary node being all hex in ASCII are 1 in 10**25
1376 # odds of a binary node being all hex in ASCII are 1 in 10**25
1380 try:
1377 try:
1381 node = id
1378 node = id
1382 self.rev(node) # quick search the index
1379 self.rev(node) # quick search the index
1383 return node
1380 return node
1384 except error.LookupError:
1381 except error.LookupError:
1385 pass # may be partial hex id
1382 pass # may be partial hex id
1386 try:
1383 try:
1387 # str(rev)
1384 # str(rev)
1388 rev = int(id)
1385 rev = int(id)
1389 if b"%d" % rev != id:
1386 if b"%d" % rev != id:
1390 raise ValueError
1387 raise ValueError
1391 if rev < 0:
1388 if rev < 0:
1392 rev = len(self) + rev
1389 rev = len(self) + rev
1393 if rev < 0 or rev >= len(self):
1390 if rev < 0 or rev >= len(self):
1394 raise ValueError
1391 raise ValueError
1395 return self.node(rev)
1392 return self.node(rev)
1396 except (ValueError, OverflowError):
1393 except (ValueError, OverflowError):
1397 pass
1394 pass
1398 if len(id) == 40:
1395 if len(id) == 40:
1399 try:
1396 try:
1400 # a full hex nodeid?
1397 # a full hex nodeid?
1401 node = bin(id)
1398 node = bin(id)
1402 self.rev(node)
1399 self.rev(node)
1403 return node
1400 return node
1404 except (TypeError, error.LookupError):
1401 except (TypeError, error.LookupError):
1405 pass
1402 pass
1406
1403
1407 def _partialmatch(self, id):
1404 def _partialmatch(self, id):
1408 # we don't care wdirfilenodeids as they should be always full hash
1405 # we don't care wdirfilenodeids as they should be always full hash
1409 maybewdir = wdirhex.startswith(id)
1406 maybewdir = wdirhex.startswith(id)
1410 try:
1407 try:
1411 partial = self.index.partialmatch(id)
1408 partial = self.index.partialmatch(id)
1412 if partial and self.hasnode(partial):
1409 if partial and self.hasnode(partial):
1413 if maybewdir:
1410 if maybewdir:
1414 # single 'ff...' match in radix tree, ambiguous with wdir
1411 # single 'ff...' match in radix tree, ambiguous with wdir
1415 raise error.RevlogError
1412 raise error.RevlogError
1416 return partial
1413 return partial
1417 if maybewdir:
1414 if maybewdir:
1418 # no 'ff...' match in radix tree, wdir identified
1415 # no 'ff...' match in radix tree, wdir identified
1419 raise error.WdirUnsupported
1416 raise error.WdirUnsupported
1420 return None
1417 return None
1421 except error.RevlogError:
1418 except error.RevlogError:
1422 # parsers.c radix tree lookup gave multiple matches
1419 # parsers.c radix tree lookup gave multiple matches
1423 # fast path: for unfiltered changelog, radix tree is accurate
1420 # fast path: for unfiltered changelog, radix tree is accurate
1424 if not getattr(self, 'filteredrevs', None):
1421 if not getattr(self, 'filteredrevs', None):
1425 raise error.AmbiguousPrefixLookupError(
1422 raise error.AmbiguousPrefixLookupError(
1426 id, self.indexfile, _(b'ambiguous identifier')
1423 id, self.indexfile, _(b'ambiguous identifier')
1427 )
1424 )
1428 # fall through to slow path that filters hidden revisions
1425 # fall through to slow path that filters hidden revisions
1429 except (AttributeError, ValueError):
1426 except (AttributeError, ValueError):
1430 # we are pure python, or key was too short to search radix tree
1427 # we are pure python, or key was too short to search radix tree
1431 pass
1428 pass
1432
1429
1433 if id in self._pcache:
1430 if id in self._pcache:
1434 return self._pcache[id]
1431 return self._pcache[id]
1435
1432
1436 if len(id) <= 40:
1433 if len(id) <= 40:
1437 try:
1434 try:
1438 # hex(node)[:...]
1435 # hex(node)[:...]
1439 l = len(id) // 2 # grab an even number of digits
1436 l = len(id) // 2 # grab an even number of digits
1440 prefix = bin(id[: l * 2])
1437 prefix = bin(id[: l * 2])
1441 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1438 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1442 nl = [
1439 nl = [
1443 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1440 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1444 ]
1441 ]
1445 if nullhex.startswith(id):
1442 if nullhex.startswith(id):
1446 nl.append(nullid)
1443 nl.append(nullid)
1447 if len(nl) > 0:
1444 if len(nl) > 0:
1448 if len(nl) == 1 and not maybewdir:
1445 if len(nl) == 1 and not maybewdir:
1449 self._pcache[id] = nl[0]
1446 self._pcache[id] = nl[0]
1450 return nl[0]
1447 return nl[0]
1451 raise error.AmbiguousPrefixLookupError(
1448 raise error.AmbiguousPrefixLookupError(
1452 id, self.indexfile, _(b'ambiguous identifier')
1449 id, self.indexfile, _(b'ambiguous identifier')
1453 )
1450 )
1454 if maybewdir:
1451 if maybewdir:
1455 raise error.WdirUnsupported
1452 raise error.WdirUnsupported
1456 return None
1453 return None
1457 except TypeError:
1454 except TypeError:
1458 pass
1455 pass
1459
1456
1460 def lookup(self, id):
1457 def lookup(self, id):
1461 """locate a node based on:
1458 """locate a node based on:
1462 - revision number or str(revision number)
1459 - revision number or str(revision number)
1463 - nodeid or subset of hex nodeid
1460 - nodeid or subset of hex nodeid
1464 """
1461 """
1465 n = self._match(id)
1462 n = self._match(id)
1466 if n is not None:
1463 if n is not None:
1467 return n
1464 return n
1468 n = self._partialmatch(id)
1465 n = self._partialmatch(id)
1469 if n:
1466 if n:
1470 return n
1467 return n
1471
1468
1472 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1469 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1473
1470
1474 def shortest(self, node, minlength=1):
1471 def shortest(self, node, minlength=1):
1475 """Find the shortest unambiguous prefix that matches node."""
1472 """Find the shortest unambiguous prefix that matches node."""
1476
1473
1477 def isvalid(prefix):
1474 def isvalid(prefix):
1478 try:
1475 try:
1479 matchednode = self._partialmatch(prefix)
1476 matchednode = self._partialmatch(prefix)
1480 except error.AmbiguousPrefixLookupError:
1477 except error.AmbiguousPrefixLookupError:
1481 return False
1478 return False
1482 except error.WdirUnsupported:
1479 except error.WdirUnsupported:
1483 # single 'ff...' match
1480 # single 'ff...' match
1484 return True
1481 return True
1485 if matchednode is None:
1482 if matchednode is None:
1486 raise error.LookupError(node, self.indexfile, _(b'no node'))
1483 raise error.LookupError(node, self.indexfile, _(b'no node'))
1487 return True
1484 return True
1488
1485
1489 def maybewdir(prefix):
1486 def maybewdir(prefix):
1490 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1487 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1491
1488
1492 hexnode = hex(node)
1489 hexnode = hex(node)
1493
1490
1494 def disambiguate(hexnode, minlength):
1491 def disambiguate(hexnode, minlength):
1495 """Disambiguate against wdirid."""
1492 """Disambiguate against wdirid."""
1496 for length in range(minlength, 41):
1493 for length in range(minlength, 41):
1497 prefix = hexnode[:length]
1494 prefix = hexnode[:length]
1498 if not maybewdir(prefix):
1495 if not maybewdir(prefix):
1499 return prefix
1496 return prefix
1500
1497
1501 if not getattr(self, 'filteredrevs', None):
1498 if not getattr(self, 'filteredrevs', None):
1502 try:
1499 try:
1503 length = max(self.index.shortest(node), minlength)
1500 length = max(self.index.shortest(node), minlength)
1504 return disambiguate(hexnode, length)
1501 return disambiguate(hexnode, length)
1505 except error.RevlogError:
1502 except error.RevlogError:
1506 if node != wdirid:
1503 if node != wdirid:
1507 raise error.LookupError(node, self.indexfile, _(b'no node'))
1504 raise error.LookupError(node, self.indexfile, _(b'no node'))
1508 except AttributeError:
1505 except AttributeError:
1509 # Fall through to pure code
1506 # Fall through to pure code
1510 pass
1507 pass
1511
1508
1512 if node == wdirid:
1509 if node == wdirid:
1513 for length in range(minlength, 41):
1510 for length in range(minlength, 41):
1514 prefix = hexnode[:length]
1511 prefix = hexnode[:length]
1515 if isvalid(prefix):
1512 if isvalid(prefix):
1516 return prefix
1513 return prefix
1517
1514
1518 for length in range(minlength, 41):
1515 for length in range(minlength, 41):
1519 prefix = hexnode[:length]
1516 prefix = hexnode[:length]
1520 if isvalid(prefix):
1517 if isvalid(prefix):
1521 return disambiguate(hexnode, length)
1518 return disambiguate(hexnode, length)
1522
1519
1523 def cmp(self, node, text):
1520 def cmp(self, node, text):
1524 """compare text with a given file revision
1521 """compare text with a given file revision
1525
1522
1526 returns True if text is different than what is stored.
1523 returns True if text is different than what is stored.
1527 """
1524 """
1528 p1, p2 = self.parents(node)
1525 p1, p2 = self.parents(node)
1529 return storageutil.hashrevisionsha1(text, p1, p2) != node
1526 return storageutil.hashrevisionsha1(text, p1, p2) != node
1530
1527
1531 def _cachesegment(self, offset, data):
1528 def _cachesegment(self, offset, data):
1532 """Add a segment to the revlog cache.
1529 """Add a segment to the revlog cache.
1533
1530
1534 Accepts an absolute offset and the data that is at that location.
1531 Accepts an absolute offset and the data that is at that location.
1535 """
1532 """
1536 o, d = self._chunkcache
1533 o, d = self._chunkcache
1537 # try to add to existing cache
1534 # try to add to existing cache
1538 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1535 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1539 self._chunkcache = o, d + data
1536 self._chunkcache = o, d + data
1540 else:
1537 else:
1541 self._chunkcache = offset, data
1538 self._chunkcache = offset, data
1542
1539
1543 def _readsegment(self, offset, length, df=None):
1540 def _readsegment(self, offset, length, df=None):
1544 """Load a segment of raw data from the revlog.
1541 """Load a segment of raw data from the revlog.
1545
1542
1546 Accepts an absolute offset, length to read, and an optional existing
1543 Accepts an absolute offset, length to read, and an optional existing
1547 file handle to read from.
1544 file handle to read from.
1548
1545
1549 If an existing file handle is passed, it will be seeked and the
1546 If an existing file handle is passed, it will be seeked and the
1550 original seek position will NOT be restored.
1547 original seek position will NOT be restored.
1551
1548
1552 Returns a str or buffer of raw byte data.
1549 Returns a str or buffer of raw byte data.
1553
1550
1554 Raises if the requested number of bytes could not be read.
1551 Raises if the requested number of bytes could not be read.
1555 """
1552 """
1556 # Cache data both forward and backward around the requested
1553 # Cache data both forward and backward around the requested
1557 # data, in a fixed size window. This helps speed up operations
1554 # data, in a fixed size window. This helps speed up operations
1558 # involving reading the revlog backwards.
1555 # involving reading the revlog backwards.
1559 cachesize = self._chunkcachesize
1556 cachesize = self._chunkcachesize
1560 realoffset = offset & ~(cachesize - 1)
1557 realoffset = offset & ~(cachesize - 1)
1561 reallength = (
1558 reallength = (
1562 (offset + length + cachesize) & ~(cachesize - 1)
1559 (offset + length + cachesize) & ~(cachesize - 1)
1563 ) - realoffset
1560 ) - realoffset
1564 with self._datareadfp(df) as df:
1561 with self._datareadfp(df) as df:
1565 df.seek(realoffset)
1562 df.seek(realoffset)
1566 d = df.read(reallength)
1563 d = df.read(reallength)
1567
1564
1568 self._cachesegment(realoffset, d)
1565 self._cachesegment(realoffset, d)
1569 if offset != realoffset or reallength != length:
1566 if offset != realoffset or reallength != length:
1570 startoffset = offset - realoffset
1567 startoffset = offset - realoffset
1571 if len(d) - startoffset < length:
1568 if len(d) - startoffset < length:
1572 raise error.RevlogError(
1569 raise error.RevlogError(
1573 _(
1570 _(
1574 b'partial read of revlog %s; expected %d bytes from '
1571 b'partial read of revlog %s; expected %d bytes from '
1575 b'offset %d, got %d'
1572 b'offset %d, got %d'
1576 )
1573 )
1577 % (
1574 % (
1578 self.indexfile if self._inline else self.datafile,
1575 self.indexfile if self._inline else self.datafile,
1579 length,
1576 length,
1580 realoffset,
1577 realoffset,
1581 len(d) - startoffset,
1578 len(d) - startoffset,
1582 )
1579 )
1583 )
1580 )
1584
1581
1585 return util.buffer(d, startoffset, length)
1582 return util.buffer(d, startoffset, length)
1586
1583
1587 if len(d) < length:
1584 if len(d) < length:
1588 raise error.RevlogError(
1585 raise error.RevlogError(
1589 _(
1586 _(
1590 b'partial read of revlog %s; expected %d bytes from offset '
1587 b'partial read of revlog %s; expected %d bytes from offset '
1591 b'%d, got %d'
1588 b'%d, got %d'
1592 )
1589 )
1593 % (
1590 % (
1594 self.indexfile if self._inline else self.datafile,
1591 self.indexfile if self._inline else self.datafile,
1595 length,
1592 length,
1596 offset,
1593 offset,
1597 len(d),
1594 len(d),
1598 )
1595 )
1599 )
1596 )
1600
1597
1601 return d
1598 return d
1602
1599
1603 def _getsegment(self, offset, length, df=None):
1600 def _getsegment(self, offset, length, df=None):
1604 """Obtain a segment of raw data from the revlog.
1601 """Obtain a segment of raw data from the revlog.
1605
1602
1606 Accepts an absolute offset, length of bytes to obtain, and an
1603 Accepts an absolute offset, length of bytes to obtain, and an
1607 optional file handle to the already-opened revlog. If the file
1604 optional file handle to the already-opened revlog. If the file
1608 handle is used, it's original seek position will not be preserved.
1605 handle is used, it's original seek position will not be preserved.
1609
1606
1610 Requests for data may be returned from a cache.
1607 Requests for data may be returned from a cache.
1611
1608
1612 Returns a str or a buffer instance of raw byte data.
1609 Returns a str or a buffer instance of raw byte data.
1613 """
1610 """
1614 o, d = self._chunkcache
1611 o, d = self._chunkcache
1615 l = len(d)
1612 l = len(d)
1616
1613
1617 # is it in the cache?
1614 # is it in the cache?
1618 cachestart = offset - o
1615 cachestart = offset - o
1619 cacheend = cachestart + length
1616 cacheend = cachestart + length
1620 if cachestart >= 0 and cacheend <= l:
1617 if cachestart >= 0 and cacheend <= l:
1621 if cachestart == 0 and cacheend == l:
1618 if cachestart == 0 and cacheend == l:
1622 return d # avoid a copy
1619 return d # avoid a copy
1623 return util.buffer(d, cachestart, cacheend - cachestart)
1620 return util.buffer(d, cachestart, cacheend - cachestart)
1624
1621
1625 return self._readsegment(offset, length, df=df)
1622 return self._readsegment(offset, length, df=df)
1626
1623
1627 def _getsegmentforrevs(self, startrev, endrev, df=None):
1624 def _getsegmentforrevs(self, startrev, endrev, df=None):
1628 """Obtain a segment of raw data corresponding to a range of revisions.
1625 """Obtain a segment of raw data corresponding to a range of revisions.
1629
1626
1630 Accepts the start and end revisions and an optional already-open
1627 Accepts the start and end revisions and an optional already-open
1631 file handle to be used for reading. If the file handle is read, its
1628 file handle to be used for reading. If the file handle is read, its
1632 seek position will not be preserved.
1629 seek position will not be preserved.
1633
1630
1634 Requests for data may be satisfied by a cache.
1631 Requests for data may be satisfied by a cache.
1635
1632
1636 Returns a 2-tuple of (offset, data) for the requested range of
1633 Returns a 2-tuple of (offset, data) for the requested range of
1637 revisions. Offset is the integer offset from the beginning of the
1634 revisions. Offset is the integer offset from the beginning of the
1638 revlog and data is a str or buffer of the raw byte data.
1635 revlog and data is a str or buffer of the raw byte data.
1639
1636
1640 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1637 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1641 to determine where each revision's data begins and ends.
1638 to determine where each revision's data begins and ends.
1642 """
1639 """
1643 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1640 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1644 # (functions are expensive).
1641 # (functions are expensive).
1645 index = self.index
1642 index = self.index
1646 istart = index[startrev]
1643 istart = index[startrev]
1647 start = int(istart[0] >> 16)
1644 start = int(istart[0] >> 16)
1648 if startrev == endrev:
1645 if startrev == endrev:
1649 end = start + istart[1]
1646 end = start + istart[1]
1650 else:
1647 else:
1651 iend = index[endrev]
1648 iend = index[endrev]
1652 end = int(iend[0] >> 16) + iend[1]
1649 end = int(iend[0] >> 16) + iend[1]
1653
1650
1654 if self._inline:
1651 if self._inline:
1655 start += (startrev + 1) * self._io.size
1652 start += (startrev + 1) * self._io.size
1656 end += (endrev + 1) * self._io.size
1653 end += (endrev + 1) * self._io.size
1657 length = end - start
1654 length = end - start
1658
1655
1659 return start, self._getsegment(start, length, df=df)
1656 return start, self._getsegment(start, length, df=df)
1660
1657
1661 def _chunk(self, rev, df=None):
1658 def _chunk(self, rev, df=None):
1662 """Obtain a single decompressed chunk for a revision.
1659 """Obtain a single decompressed chunk for a revision.
1663
1660
1664 Accepts an integer revision and an optional already-open file handle
1661 Accepts an integer revision and an optional already-open file handle
1665 to be used for reading. If used, the seek position of the file will not
1662 to be used for reading. If used, the seek position of the file will not
1666 be preserved.
1663 be preserved.
1667
1664
1668 Returns a str holding uncompressed data for the requested revision.
1665 Returns a str holding uncompressed data for the requested revision.
1669 """
1666 """
1670 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1667 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1671
1668
1672 def _chunks(self, revs, df=None, targetsize=None):
1669 def _chunks(self, revs, df=None, targetsize=None):
1673 """Obtain decompressed chunks for the specified revisions.
1670 """Obtain decompressed chunks for the specified revisions.
1674
1671
1675 Accepts an iterable of numeric revisions that are assumed to be in
1672 Accepts an iterable of numeric revisions that are assumed to be in
1676 ascending order. Also accepts an optional already-open file handle
1673 ascending order. Also accepts an optional already-open file handle
1677 to be used for reading. If used, the seek position of the file will
1674 to be used for reading. If used, the seek position of the file will
1678 not be preserved.
1675 not be preserved.
1679
1676
1680 This function is similar to calling ``self._chunk()`` multiple times,
1677 This function is similar to calling ``self._chunk()`` multiple times,
1681 but is faster.
1678 but is faster.
1682
1679
1683 Returns a list with decompressed data for each requested revision.
1680 Returns a list with decompressed data for each requested revision.
1684 """
1681 """
1685 if not revs:
1682 if not revs:
1686 return []
1683 return []
1687 start = self.start
1684 start = self.start
1688 length = self.length
1685 length = self.length
1689 inline = self._inline
1686 inline = self._inline
1690 iosize = self._io.size
1687 iosize = self._io.size
1691 buffer = util.buffer
1688 buffer = util.buffer
1692
1689
1693 l = []
1690 l = []
1694 ladd = l.append
1691 ladd = l.append
1695
1692
1696 if not self._withsparseread:
1693 if not self._withsparseread:
1697 slicedchunks = (revs,)
1694 slicedchunks = (revs,)
1698 else:
1695 else:
1699 slicedchunks = deltautil.slicechunk(
1696 slicedchunks = deltautil.slicechunk(
1700 self, revs, targetsize=targetsize
1697 self, revs, targetsize=targetsize
1701 )
1698 )
1702
1699
1703 for revschunk in slicedchunks:
1700 for revschunk in slicedchunks:
1704 firstrev = revschunk[0]
1701 firstrev = revschunk[0]
1705 # Skip trailing revisions with empty diff
1702 # Skip trailing revisions with empty diff
1706 for lastrev in revschunk[::-1]:
1703 for lastrev in revschunk[::-1]:
1707 if length(lastrev) != 0:
1704 if length(lastrev) != 0:
1708 break
1705 break
1709
1706
1710 try:
1707 try:
1711 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1708 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1712 except OverflowError:
1709 except OverflowError:
1713 # issue4215 - we can't cache a run of chunks greater than
1710 # issue4215 - we can't cache a run of chunks greater than
1714 # 2G on Windows
1711 # 2G on Windows
1715 return [self._chunk(rev, df=df) for rev in revschunk]
1712 return [self._chunk(rev, df=df) for rev in revschunk]
1716
1713
1717 decomp = self.decompress
1714 decomp = self.decompress
1718 for rev in revschunk:
1715 for rev in revschunk:
1719 chunkstart = start(rev)
1716 chunkstart = start(rev)
1720 if inline:
1717 if inline:
1721 chunkstart += (rev + 1) * iosize
1718 chunkstart += (rev + 1) * iosize
1722 chunklength = length(rev)
1719 chunklength = length(rev)
1723 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1720 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1724
1721
1725 return l
1722 return l
1726
1723
1727 def _chunkclear(self):
1724 def _chunkclear(self):
1728 """Clear the raw chunk cache."""
1725 """Clear the raw chunk cache."""
1729 self._chunkcache = (0, b'')
1726 self._chunkcache = (0, b'')
1730
1727
1731 def deltaparent(self, rev):
1728 def deltaparent(self, rev):
1732 """return deltaparent of the given revision"""
1729 """return deltaparent of the given revision"""
1733 base = self.index[rev][3]
1730 base = self.index[rev][3]
1734 if base == rev:
1731 if base == rev:
1735 return nullrev
1732 return nullrev
1736 elif self._generaldelta:
1733 elif self._generaldelta:
1737 return base
1734 return base
1738 else:
1735 else:
1739 return rev - 1
1736 return rev - 1
1740
1737
1741 def issnapshot(self, rev):
1738 def issnapshot(self, rev):
1742 """tells whether rev is a snapshot
1739 """tells whether rev is a snapshot
1743 """
1740 """
1744 if not self._sparserevlog:
1741 if not self._sparserevlog:
1745 return self.deltaparent(rev) == nullrev
1742 return self.deltaparent(rev) == nullrev
1746 elif util.safehasattr(self.index, b'issnapshot'):
1743 elif util.safehasattr(self.index, b'issnapshot'):
1747 # directly assign the method to cache the testing and access
1744 # directly assign the method to cache the testing and access
1748 self.issnapshot = self.index.issnapshot
1745 self.issnapshot = self.index.issnapshot
1749 return self.issnapshot(rev)
1746 return self.issnapshot(rev)
1750 if rev == nullrev:
1747 if rev == nullrev:
1751 return True
1748 return True
1752 entry = self.index[rev]
1749 entry = self.index[rev]
1753 base = entry[3]
1750 base = entry[3]
1754 if base == rev:
1751 if base == rev:
1755 return True
1752 return True
1756 if base == nullrev:
1753 if base == nullrev:
1757 return True
1754 return True
1758 p1 = entry[5]
1755 p1 = entry[5]
1759 p2 = entry[6]
1756 p2 = entry[6]
1760 if base == p1 or base == p2:
1757 if base == p1 or base == p2:
1761 return False
1758 return False
1762 return self.issnapshot(base)
1759 return self.issnapshot(base)
1763
1760
1764 def snapshotdepth(self, rev):
1761 def snapshotdepth(self, rev):
1765 """number of snapshot in the chain before this one"""
1762 """number of snapshot in the chain before this one"""
1766 if not self.issnapshot(rev):
1763 if not self.issnapshot(rev):
1767 raise error.ProgrammingError(b'revision %d not a snapshot')
1764 raise error.ProgrammingError(b'revision %d not a snapshot')
1768 return len(self._deltachain(rev)[0]) - 1
1765 return len(self._deltachain(rev)[0]) - 1
1769
1766
1770 def revdiff(self, rev1, rev2):
1767 def revdiff(self, rev1, rev2):
1771 """return or calculate a delta between two revisions
1768 """return or calculate a delta between two revisions
1772
1769
1773 The delta calculated is in binary form and is intended to be written to
1770 The delta calculated is in binary form and is intended to be written to
1774 revlog data directly. So this function needs raw revision data.
1771 revlog data directly. So this function needs raw revision data.
1775 """
1772 """
1776 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1773 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1777 return bytes(self._chunk(rev2))
1774 return bytes(self._chunk(rev2))
1778
1775
1779 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1776 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1780
1777
1781 def _processflags(self, text, flags, operation, raw=False):
1778 def _processflags(self, text, flags, operation, raw=False):
1782 """deprecated entry point to access flag processors"""
1779 """deprecated entry point to access flag processors"""
1783 msg = b'_processflag(...) use the specialized variant'
1780 msg = b'_processflag(...) use the specialized variant'
1784 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1781 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1785 if raw:
1782 if raw:
1786 return text, flagutil.processflagsraw(self, text, flags)
1783 return text, flagutil.processflagsraw(self, text, flags)
1787 elif operation == b'read':
1784 elif operation == b'read':
1788 return flagutil.processflagsread(self, text, flags)
1785 return flagutil.processflagsread(self, text, flags)
1789 else: # write operation
1786 else: # write operation
1790 return flagutil.processflagswrite(self, text, flags)
1787 return flagutil.processflagswrite(self, text, flags)
1791
1788
1792 def revision(self, nodeorrev, _df=None, raw=False):
1789 def revision(self, nodeorrev, _df=None, raw=False):
1793 """return an uncompressed revision of a given node or revision
1790 """return an uncompressed revision of a given node or revision
1794 number.
1791 number.
1795
1792
1796 _df - an existing file handle to read from. (internal-only)
1793 _df - an existing file handle to read from. (internal-only)
1797 raw - an optional argument specifying if the revision data is to be
1794 raw - an optional argument specifying if the revision data is to be
1798 treated as raw data when applying flag transforms. 'raw' should be set
1795 treated as raw data when applying flag transforms. 'raw' should be set
1799 to True when generating changegroups or in debug commands.
1796 to True when generating changegroups or in debug commands.
1800 """
1797 """
1801 if raw:
1798 if raw:
1802 msg = (
1799 msg = (
1803 b'revlog.revision(..., raw=True) is deprecated, '
1800 b'revlog.revision(..., raw=True) is deprecated, '
1804 b'use revlog.rawdata(...)'
1801 b'use revlog.rawdata(...)'
1805 )
1802 )
1806 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1803 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1807 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1804 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1808
1805
1809 def sidedata(self, nodeorrev, _df=None):
1806 def sidedata(self, nodeorrev, _df=None):
1810 """a map of extra data related to the changeset but not part of the hash
1807 """a map of extra data related to the changeset but not part of the hash
1811
1808
1812 This function currently return a dictionary. However, more advanced
1809 This function currently return a dictionary. However, more advanced
1813 mapping object will likely be used in the future for a more
1810 mapping object will likely be used in the future for a more
1814 efficient/lazy code.
1811 efficient/lazy code.
1815 """
1812 """
1816 return self._revisiondata(nodeorrev, _df)[1]
1813 return self._revisiondata(nodeorrev, _df)[1]
1817
1814
1818 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1815 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1819 # deal with <nodeorrev> argument type
1816 # deal with <nodeorrev> argument type
1820 if isinstance(nodeorrev, int):
1817 if isinstance(nodeorrev, int):
1821 rev = nodeorrev
1818 rev = nodeorrev
1822 node = self.node(rev)
1819 node = self.node(rev)
1823 else:
1820 else:
1824 node = nodeorrev
1821 node = nodeorrev
1825 rev = None
1822 rev = None
1826
1823
1827 # fast path the special `nullid` rev
1824 # fast path the special `nullid` rev
1828 if node == nullid:
1825 if node == nullid:
1829 return b"", {}
1826 return b"", {}
1830
1827
1831 # ``rawtext`` is the text as stored inside the revlog. Might be the
1828 # ``rawtext`` is the text as stored inside the revlog. Might be the
1832 # revision or might need to be processed to retrieve the revision.
1829 # revision or might need to be processed to retrieve the revision.
1833 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1830 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1834
1831
1835 if raw and validated:
1832 if raw and validated:
1836 # if we don't want to process the raw text and that raw
1833 # if we don't want to process the raw text and that raw
1837 # text is cached, we can exit early.
1834 # text is cached, we can exit early.
1838 return rawtext, {}
1835 return rawtext, {}
1839 if rev is None:
1836 if rev is None:
1840 rev = self.rev(node)
1837 rev = self.rev(node)
1841 # the revlog's flag for this revision
1838 # the revlog's flag for this revision
1842 # (usually alter its state or content)
1839 # (usually alter its state or content)
1843 flags = self.flags(rev)
1840 flags = self.flags(rev)
1844
1841
1845 if validated and flags == REVIDX_DEFAULT_FLAGS:
1842 if validated and flags == REVIDX_DEFAULT_FLAGS:
1846 # no extra flags set, no flag processor runs, text = rawtext
1843 # no extra flags set, no flag processor runs, text = rawtext
1847 return rawtext, {}
1844 return rawtext, {}
1848
1845
1849 sidedata = {}
1846 sidedata = {}
1850 if raw:
1847 if raw:
1851 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1848 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1852 text = rawtext
1849 text = rawtext
1853 else:
1850 else:
1854 try:
1851 try:
1855 r = flagutil.processflagsread(self, rawtext, flags)
1852 r = flagutil.processflagsread(self, rawtext, flags)
1856 except error.SidedataHashError as exc:
1853 except error.SidedataHashError as exc:
1857 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1854 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1858 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1855 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1859 raise error.RevlogError(msg)
1856 raise error.RevlogError(msg)
1860 text, validatehash, sidedata = r
1857 text, validatehash, sidedata = r
1861 if validatehash:
1858 if validatehash:
1862 self.checkhash(text, node, rev=rev)
1859 self.checkhash(text, node, rev=rev)
1863 if not validated:
1860 if not validated:
1864 self._revisioncache = (node, rev, rawtext)
1861 self._revisioncache = (node, rev, rawtext)
1865
1862
1866 return text, sidedata
1863 return text, sidedata
1867
1864
1868 def _rawtext(self, node, rev, _df=None):
1865 def _rawtext(self, node, rev, _df=None):
1869 """return the possibly unvalidated rawtext for a revision
1866 """return the possibly unvalidated rawtext for a revision
1870
1867
1871 returns (rev, rawtext, validated)
1868 returns (rev, rawtext, validated)
1872 """
1869 """
1873
1870
1874 # revision in the cache (could be useful to apply delta)
1871 # revision in the cache (could be useful to apply delta)
1875 cachedrev = None
1872 cachedrev = None
1876 # An intermediate text to apply deltas to
1873 # An intermediate text to apply deltas to
1877 basetext = None
1874 basetext = None
1878
1875
1879 # Check if we have the entry in cache
1876 # Check if we have the entry in cache
1880 # The cache entry looks like (node, rev, rawtext)
1877 # The cache entry looks like (node, rev, rawtext)
1881 if self._revisioncache:
1878 if self._revisioncache:
1882 if self._revisioncache[0] == node:
1879 if self._revisioncache[0] == node:
1883 return (rev, self._revisioncache[2], True)
1880 return (rev, self._revisioncache[2], True)
1884 cachedrev = self._revisioncache[1]
1881 cachedrev = self._revisioncache[1]
1885
1882
1886 if rev is None:
1883 if rev is None:
1887 rev = self.rev(node)
1884 rev = self.rev(node)
1888
1885
1889 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1886 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1890 if stopped:
1887 if stopped:
1891 basetext = self._revisioncache[2]
1888 basetext = self._revisioncache[2]
1892
1889
1893 # drop cache to save memory, the caller is expected to
1890 # drop cache to save memory, the caller is expected to
1894 # update self._revisioncache after validating the text
1891 # update self._revisioncache after validating the text
1895 self._revisioncache = None
1892 self._revisioncache = None
1896
1893
1897 targetsize = None
1894 targetsize = None
1898 rawsize = self.index[rev][2]
1895 rawsize = self.index[rev][2]
1899 if 0 <= rawsize:
1896 if 0 <= rawsize:
1900 targetsize = 4 * rawsize
1897 targetsize = 4 * rawsize
1901
1898
1902 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1899 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1903 if basetext is None:
1900 if basetext is None:
1904 basetext = bytes(bins[0])
1901 basetext = bytes(bins[0])
1905 bins = bins[1:]
1902 bins = bins[1:]
1906
1903
1907 rawtext = mdiff.patches(basetext, bins)
1904 rawtext = mdiff.patches(basetext, bins)
1908 del basetext # let us have a chance to free memory early
1905 del basetext # let us have a chance to free memory early
1909 return (rev, rawtext, False)
1906 return (rev, rawtext, False)
1910
1907
1911 def rawdata(self, nodeorrev, _df=None):
1908 def rawdata(self, nodeorrev, _df=None):
1912 """return an uncompressed raw data of a given node or revision number.
1909 """return an uncompressed raw data of a given node or revision number.
1913
1910
1914 _df - an existing file handle to read from. (internal-only)
1911 _df - an existing file handle to read from. (internal-only)
1915 """
1912 """
1916 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1913 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1917
1914
1918 def hash(self, text, p1, p2):
1915 def hash(self, text, p1, p2):
1919 """Compute a node hash.
1916 """Compute a node hash.
1920
1917
1921 Available as a function so that subclasses can replace the hash
1918 Available as a function so that subclasses can replace the hash
1922 as needed.
1919 as needed.
1923 """
1920 """
1924 return storageutil.hashrevisionsha1(text, p1, p2)
1921 return storageutil.hashrevisionsha1(text, p1, p2)
1925
1922
1926 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1923 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1927 """Check node hash integrity.
1924 """Check node hash integrity.
1928
1925
1929 Available as a function so that subclasses can extend hash mismatch
1926 Available as a function so that subclasses can extend hash mismatch
1930 behaviors as needed.
1927 behaviors as needed.
1931 """
1928 """
1932 try:
1929 try:
1933 if p1 is None and p2 is None:
1930 if p1 is None and p2 is None:
1934 p1, p2 = self.parents(node)
1931 p1, p2 = self.parents(node)
1935 if node != self.hash(text, p1, p2):
1932 if node != self.hash(text, p1, p2):
1936 # Clear the revision cache on hash failure. The revision cache
1933 # Clear the revision cache on hash failure. The revision cache
1937 # only stores the raw revision and clearing the cache does have
1934 # only stores the raw revision and clearing the cache does have
1938 # the side-effect that we won't have a cache hit when the raw
1935 # the side-effect that we won't have a cache hit when the raw
1939 # revision data is accessed. But this case should be rare and
1936 # revision data is accessed. But this case should be rare and
1940 # it is extra work to teach the cache about the hash
1937 # it is extra work to teach the cache about the hash
1941 # verification state.
1938 # verification state.
1942 if self._revisioncache and self._revisioncache[0] == node:
1939 if self._revisioncache and self._revisioncache[0] == node:
1943 self._revisioncache = None
1940 self._revisioncache = None
1944
1941
1945 revornode = rev
1942 revornode = rev
1946 if revornode is None:
1943 if revornode is None:
1947 revornode = templatefilters.short(hex(node))
1944 revornode = templatefilters.short(hex(node))
1948 raise error.RevlogError(
1945 raise error.RevlogError(
1949 _(b"integrity check failed on %s:%s")
1946 _(b"integrity check failed on %s:%s")
1950 % (self.indexfile, pycompat.bytestr(revornode))
1947 % (self.indexfile, pycompat.bytestr(revornode))
1951 )
1948 )
1952 except error.RevlogError:
1949 except error.RevlogError:
1953 if self._censorable and storageutil.iscensoredtext(text):
1950 if self._censorable and storageutil.iscensoredtext(text):
1954 raise error.CensoredNodeError(self.indexfile, node, text)
1951 raise error.CensoredNodeError(self.indexfile, node, text)
1955 raise
1952 raise
1956
1953
1957 def _enforceinlinesize(self, tr, fp=None):
1954 def _enforceinlinesize(self, tr, fp=None):
1958 """Check if the revlog is too big for inline and convert if so.
1955 """Check if the revlog is too big for inline and convert if so.
1959
1956
1960 This should be called after revisions are added to the revlog. If the
1957 This should be called after revisions are added to the revlog. If the
1961 revlog has grown too large to be an inline revlog, it will convert it
1958 revlog has grown too large to be an inline revlog, it will convert it
1962 to use multiple index and data files.
1959 to use multiple index and data files.
1963 """
1960 """
1964 tiprev = len(self) - 1
1961 tiprev = len(self) - 1
1965 if (
1962 if (
1966 not self._inline
1963 not self._inline
1967 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1964 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1968 ):
1965 ):
1969 return
1966 return
1970
1967
1971 trinfo = tr.find(self.indexfile)
1968 trinfo = tr.find(self.indexfile)
1972 if trinfo is None:
1969 if trinfo is None:
1973 raise error.RevlogError(
1970 raise error.RevlogError(
1974 _(b"%s not found in the transaction") % self.indexfile
1971 _(b"%s not found in the transaction") % self.indexfile
1975 )
1972 )
1976
1973
1977 trindex = trinfo[2]
1974 trindex = trinfo[2]
1978 if trindex is not None:
1975 if trindex is not None:
1979 dataoff = self.start(trindex)
1976 dataoff = self.start(trindex)
1980 else:
1977 else:
1981 # revlog was stripped at start of transaction, use all leftover data
1978 # revlog was stripped at start of transaction, use all leftover data
1982 trindex = len(self) - 1
1979 trindex = len(self) - 1
1983 dataoff = self.end(tiprev)
1980 dataoff = self.end(tiprev)
1984
1981
1985 tr.add(self.datafile, dataoff)
1982 tr.add(self.datafile, dataoff)
1986
1983
1987 if fp:
1984 if fp:
1988 fp.flush()
1985 fp.flush()
1989 fp.close()
1986 fp.close()
1990 # We can't use the cached file handle after close(). So prevent
1987 # We can't use the cached file handle after close(). So prevent
1991 # its usage.
1988 # its usage.
1992 self._writinghandles = None
1989 self._writinghandles = None
1993
1990
1994 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1991 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1995 for r in self:
1992 for r in self:
1996 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1993 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1997
1994
1998 with self._indexfp(b'w') as fp:
1995 with self._indexfp(b'w') as fp:
1999 self.version &= ~FLAG_INLINE_DATA
1996 self.version &= ~FLAG_INLINE_DATA
2000 self._inline = False
1997 self._inline = False
2001 io = self._io
1998 io = self._io
2002 for i in self:
1999 for i in self:
2003 e = io.packentry(self.index[i], self.node, self.version, i)
2000 e = io.packentry(self.index[i], self.node, self.version, i)
2004 fp.write(e)
2001 fp.write(e)
2005
2002
2006 # the temp file replace the real index when we exit the context
2003 # the temp file replace the real index when we exit the context
2007 # manager
2004 # manager
2008
2005
2009 tr.replace(self.indexfile, trindex * self._io.size)
2006 tr.replace(self.indexfile, trindex * self._io.size)
2010 nodemaputil.setup_persistent_nodemap(tr, self)
2007 nodemaputil.setup_persistent_nodemap(tr, self)
2011 self._chunkclear()
2008 self._chunkclear()
2012
2009
2013 def _nodeduplicatecallback(self, transaction, node):
2010 def _nodeduplicatecallback(self, transaction, node):
2014 """called when trying to add a node already stored.
2011 """called when trying to add a node already stored.
2015 """
2012 """
2016
2013
2017 def addrevision(
2014 def addrevision(
2018 self,
2015 self,
2019 text,
2016 text,
2020 transaction,
2017 transaction,
2021 link,
2018 link,
2022 p1,
2019 p1,
2023 p2,
2020 p2,
2024 cachedelta=None,
2021 cachedelta=None,
2025 node=None,
2022 node=None,
2026 flags=REVIDX_DEFAULT_FLAGS,
2023 flags=REVIDX_DEFAULT_FLAGS,
2027 deltacomputer=None,
2024 deltacomputer=None,
2028 sidedata=None,
2025 sidedata=None,
2029 ):
2026 ):
2030 """add a revision to the log
2027 """add a revision to the log
2031
2028
2032 text - the revision data to add
2029 text - the revision data to add
2033 transaction - the transaction object used for rollback
2030 transaction - the transaction object used for rollback
2034 link - the linkrev data to add
2031 link - the linkrev data to add
2035 p1, p2 - the parent nodeids of the revision
2032 p1, p2 - the parent nodeids of the revision
2036 cachedelta - an optional precomputed delta
2033 cachedelta - an optional precomputed delta
2037 node - nodeid of revision; typically node is not specified, and it is
2034 node - nodeid of revision; typically node is not specified, and it is
2038 computed by default as hash(text, p1, p2), however subclasses might
2035 computed by default as hash(text, p1, p2), however subclasses might
2039 use different hashing method (and override checkhash() in such case)
2036 use different hashing method (and override checkhash() in such case)
2040 flags - the known flags to set on the revision
2037 flags - the known flags to set on the revision
2041 deltacomputer - an optional deltacomputer instance shared between
2038 deltacomputer - an optional deltacomputer instance shared between
2042 multiple calls
2039 multiple calls
2043 """
2040 """
2044 if link == nullrev:
2041 if link == nullrev:
2045 raise error.RevlogError(
2042 raise error.RevlogError(
2046 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2043 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2047 )
2044 )
2048
2045
2049 if sidedata is None:
2046 if sidedata is None:
2050 sidedata = {}
2047 sidedata = {}
2051 flags = flags & ~REVIDX_SIDEDATA
2048 flags = flags & ~REVIDX_SIDEDATA
2052 elif not self.hassidedata:
2049 elif not self.hassidedata:
2053 raise error.ProgrammingError(
2050 raise error.ProgrammingError(
2054 _(b"trying to add sidedata to a revlog who don't support them")
2051 _(b"trying to add sidedata to a revlog who don't support them")
2055 )
2052 )
2056 else:
2053 else:
2057 flags |= REVIDX_SIDEDATA
2054 flags |= REVIDX_SIDEDATA
2058
2055
2059 if flags:
2056 if flags:
2060 node = node or self.hash(text, p1, p2)
2057 node = node or self.hash(text, p1, p2)
2061
2058
2062 rawtext, validatehash = flagutil.processflagswrite(
2059 rawtext, validatehash = flagutil.processflagswrite(
2063 self, text, flags, sidedata=sidedata
2060 self, text, flags, sidedata=sidedata
2064 )
2061 )
2065
2062
2066 # If the flag processor modifies the revision data, ignore any provided
2063 # If the flag processor modifies the revision data, ignore any provided
2067 # cachedelta.
2064 # cachedelta.
2068 if rawtext != text:
2065 if rawtext != text:
2069 cachedelta = None
2066 cachedelta = None
2070
2067
2071 if len(rawtext) > _maxentrysize:
2068 if len(rawtext) > _maxentrysize:
2072 raise error.RevlogError(
2069 raise error.RevlogError(
2073 _(
2070 _(
2074 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2071 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2075 )
2072 )
2076 % (self.indexfile, len(rawtext))
2073 % (self.indexfile, len(rawtext))
2077 )
2074 )
2078
2075
2079 node = node or self.hash(rawtext, p1, p2)
2076 node = node or self.hash(rawtext, p1, p2)
2080 if self.index.has_node(node):
2077 if self.index.has_node(node):
2081 return node
2078 return node
2082
2079
2083 if validatehash:
2080 if validatehash:
2084 self.checkhash(rawtext, node, p1=p1, p2=p2)
2081 self.checkhash(rawtext, node, p1=p1, p2=p2)
2085
2082
2086 return self.addrawrevision(
2083 return self.addrawrevision(
2087 rawtext,
2084 rawtext,
2088 transaction,
2085 transaction,
2089 link,
2086 link,
2090 p1,
2087 p1,
2091 p2,
2088 p2,
2092 node,
2089 node,
2093 flags,
2090 flags,
2094 cachedelta=cachedelta,
2091 cachedelta=cachedelta,
2095 deltacomputer=deltacomputer,
2092 deltacomputer=deltacomputer,
2096 )
2093 )
2097
2094
2098 def addrawrevision(
2095 def addrawrevision(
2099 self,
2096 self,
2100 rawtext,
2097 rawtext,
2101 transaction,
2098 transaction,
2102 link,
2099 link,
2103 p1,
2100 p1,
2104 p2,
2101 p2,
2105 node,
2102 node,
2106 flags,
2103 flags,
2107 cachedelta=None,
2104 cachedelta=None,
2108 deltacomputer=None,
2105 deltacomputer=None,
2109 ):
2106 ):
2110 """add a raw revision with known flags, node and parents
2107 """add a raw revision with known flags, node and parents
2111 useful when reusing a revision not stored in this revlog (ex: received
2108 useful when reusing a revision not stored in this revlog (ex: received
2112 over wire, or read from an external bundle).
2109 over wire, or read from an external bundle).
2113 """
2110 """
2114 dfh = None
2111 dfh = None
2115 if not self._inline:
2112 if not self._inline:
2116 dfh = self._datafp(b"a+")
2113 dfh = self._datafp(b"a+")
2117 ifh = self._indexfp(b"a+")
2114 ifh = self._indexfp(b"a+")
2118 try:
2115 try:
2119 return self._addrevision(
2116 return self._addrevision(
2120 node,
2117 node,
2121 rawtext,
2118 rawtext,
2122 transaction,
2119 transaction,
2123 link,
2120 link,
2124 p1,
2121 p1,
2125 p2,
2122 p2,
2126 flags,
2123 flags,
2127 cachedelta,
2124 cachedelta,
2128 ifh,
2125 ifh,
2129 dfh,
2126 dfh,
2130 deltacomputer=deltacomputer,
2127 deltacomputer=deltacomputer,
2131 )
2128 )
2132 finally:
2129 finally:
2133 if dfh:
2130 if dfh:
2134 dfh.close()
2131 dfh.close()
2135 ifh.close()
2132 ifh.close()
2136
2133
2137 def compress(self, data):
2134 def compress(self, data):
2138 """Generate a possibly-compressed representation of data."""
2135 """Generate a possibly-compressed representation of data."""
2139 if not data:
2136 if not data:
2140 return b'', data
2137 return b'', data
2141
2138
2142 compressed = self._compressor.compress(data)
2139 compressed = self._compressor.compress(data)
2143
2140
2144 if compressed:
2141 if compressed:
2145 # The revlog compressor added the header in the returned data.
2142 # The revlog compressor added the header in the returned data.
2146 return b'', compressed
2143 return b'', compressed
2147
2144
2148 if data[0:1] == b'\0':
2145 if data[0:1] == b'\0':
2149 return b'', data
2146 return b'', data
2150 return b'u', data
2147 return b'u', data
2151
2148
2152 def decompress(self, data):
2149 def decompress(self, data):
2153 """Decompress a revlog chunk.
2150 """Decompress a revlog chunk.
2154
2151
2155 The chunk is expected to begin with a header identifying the
2152 The chunk is expected to begin with a header identifying the
2156 format type so it can be routed to an appropriate decompressor.
2153 format type so it can be routed to an appropriate decompressor.
2157 """
2154 """
2158 if not data:
2155 if not data:
2159 return data
2156 return data
2160
2157
2161 # Revlogs are read much more frequently than they are written and many
2158 # Revlogs are read much more frequently than they are written and many
2162 # chunks only take microseconds to decompress, so performance is
2159 # chunks only take microseconds to decompress, so performance is
2163 # important here.
2160 # important here.
2164 #
2161 #
2165 # We can make a few assumptions about revlogs:
2162 # We can make a few assumptions about revlogs:
2166 #
2163 #
2167 # 1) the majority of chunks will be compressed (as opposed to inline
2164 # 1) the majority of chunks will be compressed (as opposed to inline
2168 # raw data).
2165 # raw data).
2169 # 2) decompressing *any* data will likely by at least 10x slower than
2166 # 2) decompressing *any* data will likely by at least 10x slower than
2170 # returning raw inline data.
2167 # returning raw inline data.
2171 # 3) we want to prioritize common and officially supported compression
2168 # 3) we want to prioritize common and officially supported compression
2172 # engines
2169 # engines
2173 #
2170 #
2174 # It follows that we want to optimize for "decompress compressed data
2171 # It follows that we want to optimize for "decompress compressed data
2175 # when encoded with common and officially supported compression engines"
2172 # when encoded with common and officially supported compression engines"
2176 # case over "raw data" and "data encoded by less common or non-official
2173 # case over "raw data" and "data encoded by less common or non-official
2177 # compression engines." That is why we have the inline lookup first
2174 # compression engines." That is why we have the inline lookup first
2178 # followed by the compengines lookup.
2175 # followed by the compengines lookup.
2179 #
2176 #
2180 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2177 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2181 # compressed chunks. And this matters for changelog and manifest reads.
2178 # compressed chunks. And this matters for changelog and manifest reads.
2182 t = data[0:1]
2179 t = data[0:1]
2183
2180
2184 if t == b'x':
2181 if t == b'x':
2185 try:
2182 try:
2186 return _zlibdecompress(data)
2183 return _zlibdecompress(data)
2187 except zlib.error as e:
2184 except zlib.error as e:
2188 raise error.RevlogError(
2185 raise error.RevlogError(
2189 _(b'revlog decompress error: %s')
2186 _(b'revlog decompress error: %s')
2190 % stringutil.forcebytestr(e)
2187 % stringutil.forcebytestr(e)
2191 )
2188 )
2192 # '\0' is more common than 'u' so it goes first.
2189 # '\0' is more common than 'u' so it goes first.
2193 elif t == b'\0':
2190 elif t == b'\0':
2194 return data
2191 return data
2195 elif t == b'u':
2192 elif t == b'u':
2196 return util.buffer(data, 1)
2193 return util.buffer(data, 1)
2197
2194
2198 try:
2195 try:
2199 compressor = self._decompressors[t]
2196 compressor = self._decompressors[t]
2200 except KeyError:
2197 except KeyError:
2201 try:
2198 try:
2202 engine = util.compengines.forrevlogheader(t)
2199 engine = util.compengines.forrevlogheader(t)
2203 compressor = engine.revlogcompressor(self._compengineopts)
2200 compressor = engine.revlogcompressor(self._compengineopts)
2204 self._decompressors[t] = compressor
2201 self._decompressors[t] = compressor
2205 except KeyError:
2202 except KeyError:
2206 raise error.RevlogError(_(b'unknown compression type %r') % t)
2203 raise error.RevlogError(_(b'unknown compression type %r') % t)
2207
2204
2208 return compressor.decompress(data)
2205 return compressor.decompress(data)
2209
2206
2210 def _addrevision(
2207 def _addrevision(
2211 self,
2208 self,
2212 node,
2209 node,
2213 rawtext,
2210 rawtext,
2214 transaction,
2211 transaction,
2215 link,
2212 link,
2216 p1,
2213 p1,
2217 p2,
2214 p2,
2218 flags,
2215 flags,
2219 cachedelta,
2216 cachedelta,
2220 ifh,
2217 ifh,
2221 dfh,
2218 dfh,
2222 alwayscache=False,
2219 alwayscache=False,
2223 deltacomputer=None,
2220 deltacomputer=None,
2224 ):
2221 ):
2225 """internal function to add revisions to the log
2222 """internal function to add revisions to the log
2226
2223
2227 see addrevision for argument descriptions.
2224 see addrevision for argument descriptions.
2228
2225
2229 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2226 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2230
2227
2231 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2228 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2232 be used.
2229 be used.
2233
2230
2234 invariants:
2231 invariants:
2235 - rawtext is optional (can be None); if not set, cachedelta must be set.
2232 - rawtext is optional (can be None); if not set, cachedelta must be set.
2236 if both are set, they must correspond to each other.
2233 if both are set, they must correspond to each other.
2237 """
2234 """
2238 if node == nullid:
2235 if node == nullid:
2239 raise error.RevlogError(
2236 raise error.RevlogError(
2240 _(b"%s: attempt to add null revision") % self.indexfile
2237 _(b"%s: attempt to add null revision") % self.indexfile
2241 )
2238 )
2242 if node == wdirid or node in wdirfilenodeids:
2239 if node == wdirid or node in wdirfilenodeids:
2243 raise error.RevlogError(
2240 raise error.RevlogError(
2244 _(b"%s: attempt to add wdir revision") % self.indexfile
2241 _(b"%s: attempt to add wdir revision") % self.indexfile
2245 )
2242 )
2246
2243
2247 if self._inline:
2244 if self._inline:
2248 fh = ifh
2245 fh = ifh
2249 else:
2246 else:
2250 fh = dfh
2247 fh = dfh
2251
2248
2252 btext = [rawtext]
2249 btext = [rawtext]
2253
2250
2254 curr = len(self)
2251 curr = len(self)
2255 prev = curr - 1
2252 prev = curr - 1
2256 offset = self.end(prev)
2253 offset = self.end(prev)
2257 p1r, p2r = self.rev(p1), self.rev(p2)
2254 p1r, p2r = self.rev(p1), self.rev(p2)
2258
2255
2259 # full versions are inserted when the needed deltas
2256 # full versions are inserted when the needed deltas
2260 # become comparable to the uncompressed text
2257 # become comparable to the uncompressed text
2261 if rawtext is None:
2258 if rawtext is None:
2262 # need rawtext size, before changed by flag processors, which is
2259 # need rawtext size, before changed by flag processors, which is
2263 # the non-raw size. use revlog explicitly to avoid filelog's extra
2260 # the non-raw size. use revlog explicitly to avoid filelog's extra
2264 # logic that might remove metadata size.
2261 # logic that might remove metadata size.
2265 textlen = mdiff.patchedsize(
2262 textlen = mdiff.patchedsize(
2266 revlog.size(self, cachedelta[0]), cachedelta[1]
2263 revlog.size(self, cachedelta[0]), cachedelta[1]
2267 )
2264 )
2268 else:
2265 else:
2269 textlen = len(rawtext)
2266 textlen = len(rawtext)
2270
2267
2271 if deltacomputer is None:
2268 if deltacomputer is None:
2272 deltacomputer = deltautil.deltacomputer(self)
2269 deltacomputer = deltautil.deltacomputer(self)
2273
2270
2274 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2271 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2275
2272
2276 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2273 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2277
2274
2278 e = (
2275 e = (
2279 offset_type(offset, flags),
2276 offset_type(offset, flags),
2280 deltainfo.deltalen,
2277 deltainfo.deltalen,
2281 textlen,
2278 textlen,
2282 deltainfo.base,
2279 deltainfo.base,
2283 link,
2280 link,
2284 p1r,
2281 p1r,
2285 p2r,
2282 p2r,
2286 node,
2283 node,
2287 )
2284 )
2288 self.index.append(e)
2285 self.index.append(e)
2289
2286
2290 entry = self._io.packentry(e, self.node, self.version, curr)
2287 entry = self._io.packentry(e, self.node, self.version, curr)
2291 self._writeentry(
2288 self._writeentry(
2292 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2289 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2293 )
2290 )
2294
2291
2295 rawtext = btext[0]
2292 rawtext = btext[0]
2296
2293
2297 if alwayscache and rawtext is None:
2294 if alwayscache and rawtext is None:
2298 rawtext = deltacomputer.buildtext(revinfo, fh)
2295 rawtext = deltacomputer.buildtext(revinfo, fh)
2299
2296
2300 if type(rawtext) == bytes: # only accept immutable objects
2297 if type(rawtext) == bytes: # only accept immutable objects
2301 self._revisioncache = (node, curr, rawtext)
2298 self._revisioncache = (node, curr, rawtext)
2302 self._chainbasecache[curr] = deltainfo.chainbase
2299 self._chainbasecache[curr] = deltainfo.chainbase
2303 return node
2300 return node
2304
2301
2305 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2302 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2306 # Files opened in a+ mode have inconsistent behavior on various
2303 # Files opened in a+ mode have inconsistent behavior on various
2307 # platforms. Windows requires that a file positioning call be made
2304 # platforms. Windows requires that a file positioning call be made
2308 # when the file handle transitions between reads and writes. See
2305 # when the file handle transitions between reads and writes. See
2309 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2306 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2310 # platforms, Python or the platform itself can be buggy. Some versions
2307 # platforms, Python or the platform itself can be buggy. Some versions
2311 # of Solaris have been observed to not append at the end of the file
2308 # of Solaris have been observed to not append at the end of the file
2312 # if the file was seeked to before the end. See issue4943 for more.
2309 # if the file was seeked to before the end. See issue4943 for more.
2313 #
2310 #
2314 # We work around this issue by inserting a seek() before writing.
2311 # We work around this issue by inserting a seek() before writing.
2315 # Note: This is likely not necessary on Python 3. However, because
2312 # Note: This is likely not necessary on Python 3. However, because
2316 # the file handle is reused for reads and may be seeked there, we need
2313 # the file handle is reused for reads and may be seeked there, we need
2317 # to be careful before changing this.
2314 # to be careful before changing this.
2318 ifh.seek(0, os.SEEK_END)
2315 ifh.seek(0, os.SEEK_END)
2319 if dfh:
2316 if dfh:
2320 dfh.seek(0, os.SEEK_END)
2317 dfh.seek(0, os.SEEK_END)
2321
2318
2322 curr = len(self) - 1
2319 curr = len(self) - 1
2323 if not self._inline:
2320 if not self._inline:
2324 transaction.add(self.datafile, offset)
2321 transaction.add(self.datafile, offset)
2325 transaction.add(self.indexfile, curr * len(entry))
2322 transaction.add(self.indexfile, curr * len(entry))
2326 if data[0]:
2323 if data[0]:
2327 dfh.write(data[0])
2324 dfh.write(data[0])
2328 dfh.write(data[1])
2325 dfh.write(data[1])
2329 ifh.write(entry)
2326 ifh.write(entry)
2330 else:
2327 else:
2331 offset += curr * self._io.size
2328 offset += curr * self._io.size
2332 transaction.add(self.indexfile, offset, curr)
2329 transaction.add(self.indexfile, offset, curr)
2333 ifh.write(entry)
2330 ifh.write(entry)
2334 ifh.write(data[0])
2331 ifh.write(data[0])
2335 ifh.write(data[1])
2332 ifh.write(data[1])
2336 self._enforceinlinesize(transaction, ifh)
2333 self._enforceinlinesize(transaction, ifh)
2337 nodemaputil.setup_persistent_nodemap(transaction, self)
2334 nodemaputil.setup_persistent_nodemap(transaction, self)
2338
2335
2339 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2336 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2340 """
2337 """
2341 add a delta group
2338 add a delta group
2342
2339
2343 given a set of deltas, add them to the revision log. the
2340 given a set of deltas, add them to the revision log. the
2344 first delta is against its parent, which should be in our
2341 first delta is against its parent, which should be in our
2345 log, the rest are against the previous delta.
2342 log, the rest are against the previous delta.
2346
2343
2347 If ``addrevisioncb`` is defined, it will be called with arguments of
2344 If ``addrevisioncb`` is defined, it will be called with arguments of
2348 this revlog and the node that was added.
2345 this revlog and the node that was added.
2349 """
2346 """
2350
2347
2351 if self._writinghandles:
2348 if self._writinghandles:
2352 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2349 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2353
2350
2354 nodes = []
2351 nodes = []
2355
2352
2356 r = len(self)
2353 r = len(self)
2357 end = 0
2354 end = 0
2358 if r:
2355 if r:
2359 end = self.end(r - 1)
2356 end = self.end(r - 1)
2360 ifh = self._indexfp(b"a+")
2357 ifh = self._indexfp(b"a+")
2361 isize = r * self._io.size
2358 isize = r * self._io.size
2362 if self._inline:
2359 if self._inline:
2363 transaction.add(self.indexfile, end + isize, r)
2360 transaction.add(self.indexfile, end + isize, r)
2364 dfh = None
2361 dfh = None
2365 else:
2362 else:
2366 transaction.add(self.indexfile, isize, r)
2363 transaction.add(self.indexfile, isize, r)
2367 transaction.add(self.datafile, end)
2364 transaction.add(self.datafile, end)
2368 dfh = self._datafp(b"a+")
2365 dfh = self._datafp(b"a+")
2369
2366
2370 def flush():
2367 def flush():
2371 if dfh:
2368 if dfh:
2372 dfh.flush()
2369 dfh.flush()
2373 ifh.flush()
2370 ifh.flush()
2374
2371
2375 self._writinghandles = (ifh, dfh)
2372 self._writinghandles = (ifh, dfh)
2376
2373
2377 try:
2374 try:
2378 deltacomputer = deltautil.deltacomputer(self)
2375 deltacomputer = deltautil.deltacomputer(self)
2379 # loop through our set of deltas
2376 # loop through our set of deltas
2380 for data in deltas:
2377 for data in deltas:
2381 node, p1, p2, linknode, deltabase, delta, flags = data
2378 node, p1, p2, linknode, deltabase, delta, flags = data
2382 link = linkmapper(linknode)
2379 link = linkmapper(linknode)
2383 flags = flags or REVIDX_DEFAULT_FLAGS
2380 flags = flags or REVIDX_DEFAULT_FLAGS
2384
2381
2385 nodes.append(node)
2382 nodes.append(node)
2386
2383
2387 if self.index.has_node(node):
2384 if self.index.has_node(node):
2388 self._nodeduplicatecallback(transaction, node)
2385 self._nodeduplicatecallback(transaction, node)
2389 # this can happen if two branches make the same change
2386 # this can happen if two branches make the same change
2390 continue
2387 continue
2391
2388
2392 for p in (p1, p2):
2389 for p in (p1, p2):
2393 if not self.index.has_node(p):
2390 if not self.index.has_node(p):
2394 raise error.LookupError(
2391 raise error.LookupError(
2395 p, self.indexfile, _(b'unknown parent')
2392 p, self.indexfile, _(b'unknown parent')
2396 )
2393 )
2397
2394
2398 if not self.index.has_node(deltabase):
2395 if not self.index.has_node(deltabase):
2399 raise error.LookupError(
2396 raise error.LookupError(
2400 deltabase, self.indexfile, _(b'unknown delta base')
2397 deltabase, self.indexfile, _(b'unknown delta base')
2401 )
2398 )
2402
2399
2403 baserev = self.rev(deltabase)
2400 baserev = self.rev(deltabase)
2404
2401
2405 if baserev != nullrev and self.iscensored(baserev):
2402 if baserev != nullrev and self.iscensored(baserev):
2406 # if base is censored, delta must be full replacement in a
2403 # if base is censored, delta must be full replacement in a
2407 # single patch operation
2404 # single patch operation
2408 hlen = struct.calcsize(b">lll")
2405 hlen = struct.calcsize(b">lll")
2409 oldlen = self.rawsize(baserev)
2406 oldlen = self.rawsize(baserev)
2410 newlen = len(delta) - hlen
2407 newlen = len(delta) - hlen
2411 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2408 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2412 raise error.CensoredBaseError(
2409 raise error.CensoredBaseError(
2413 self.indexfile, self.node(baserev)
2410 self.indexfile, self.node(baserev)
2414 )
2411 )
2415
2412
2416 if not flags and self._peek_iscensored(baserev, delta, flush):
2413 if not flags and self._peek_iscensored(baserev, delta, flush):
2417 flags |= REVIDX_ISCENSORED
2414 flags |= REVIDX_ISCENSORED
2418
2415
2419 # We assume consumers of addrevisioncb will want to retrieve
2416 # We assume consumers of addrevisioncb will want to retrieve
2420 # the added revision, which will require a call to
2417 # the added revision, which will require a call to
2421 # revision(). revision() will fast path if there is a cache
2418 # revision(). revision() will fast path if there is a cache
2422 # hit. So, we tell _addrevision() to always cache in this case.
2419 # hit. So, we tell _addrevision() to always cache in this case.
2423 # We're only using addgroup() in the context of changegroup
2420 # We're only using addgroup() in the context of changegroup
2424 # generation so the revision data can always be handled as raw
2421 # generation so the revision data can always be handled as raw
2425 # by the flagprocessor.
2422 # by the flagprocessor.
2426 self._addrevision(
2423 self._addrevision(
2427 node,
2424 node,
2428 None,
2425 None,
2429 transaction,
2426 transaction,
2430 link,
2427 link,
2431 p1,
2428 p1,
2432 p2,
2429 p2,
2433 flags,
2430 flags,
2434 (baserev, delta),
2431 (baserev, delta),
2435 ifh,
2432 ifh,
2436 dfh,
2433 dfh,
2437 alwayscache=bool(addrevisioncb),
2434 alwayscache=bool(addrevisioncb),
2438 deltacomputer=deltacomputer,
2435 deltacomputer=deltacomputer,
2439 )
2436 )
2440
2437
2441 if addrevisioncb:
2438 if addrevisioncb:
2442 addrevisioncb(self, node)
2439 addrevisioncb(self, node)
2443
2440
2444 if not dfh and not self._inline:
2441 if not dfh and not self._inline:
2445 # addrevision switched from inline to conventional
2442 # addrevision switched from inline to conventional
2446 # reopen the index
2443 # reopen the index
2447 ifh.close()
2444 ifh.close()
2448 dfh = self._datafp(b"a+")
2445 dfh = self._datafp(b"a+")
2449 ifh = self._indexfp(b"a+")
2446 ifh = self._indexfp(b"a+")
2450 self._writinghandles = (ifh, dfh)
2447 self._writinghandles = (ifh, dfh)
2451 finally:
2448 finally:
2452 self._writinghandles = None
2449 self._writinghandles = None
2453
2450
2454 if dfh:
2451 if dfh:
2455 dfh.close()
2452 dfh.close()
2456 ifh.close()
2453 ifh.close()
2457
2454
2458 return nodes
2455 return nodes
2459
2456
2460 def iscensored(self, rev):
2457 def iscensored(self, rev):
2461 """Check if a file revision is censored."""
2458 """Check if a file revision is censored."""
2462 if not self._censorable:
2459 if not self._censorable:
2463 return False
2460 return False
2464
2461
2465 return self.flags(rev) & REVIDX_ISCENSORED
2462 return self.flags(rev) & REVIDX_ISCENSORED
2466
2463
2467 def _peek_iscensored(self, baserev, delta, flush):
2464 def _peek_iscensored(self, baserev, delta, flush):
2468 """Quickly check if a delta produces a censored revision."""
2465 """Quickly check if a delta produces a censored revision."""
2469 if not self._censorable:
2466 if not self._censorable:
2470 return False
2467 return False
2471
2468
2472 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2469 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2473
2470
2474 def getstrippoint(self, minlink):
2471 def getstrippoint(self, minlink):
2475 """find the minimum rev that must be stripped to strip the linkrev
2472 """find the minimum rev that must be stripped to strip the linkrev
2476
2473
2477 Returns a tuple containing the minimum rev and a set of all revs that
2474 Returns a tuple containing the minimum rev and a set of all revs that
2478 have linkrevs that will be broken by this strip.
2475 have linkrevs that will be broken by this strip.
2479 """
2476 """
2480 return storageutil.resolvestripinfo(
2477 return storageutil.resolvestripinfo(
2481 minlink,
2478 minlink,
2482 len(self) - 1,
2479 len(self) - 1,
2483 self.headrevs(),
2480 self.headrevs(),
2484 self.linkrev,
2481 self.linkrev,
2485 self.parentrevs,
2482 self.parentrevs,
2486 )
2483 )
2487
2484
2488 def strip(self, minlink, transaction):
2485 def strip(self, minlink, transaction):
2489 """truncate the revlog on the first revision with a linkrev >= minlink
2486 """truncate the revlog on the first revision with a linkrev >= minlink
2490
2487
2491 This function is called when we're stripping revision minlink and
2488 This function is called when we're stripping revision minlink and
2492 its descendants from the repository.
2489 its descendants from the repository.
2493
2490
2494 We have to remove all revisions with linkrev >= minlink, because
2491 We have to remove all revisions with linkrev >= minlink, because
2495 the equivalent changelog revisions will be renumbered after the
2492 the equivalent changelog revisions will be renumbered after the
2496 strip.
2493 strip.
2497
2494
2498 So we truncate the revlog on the first of these revisions, and
2495 So we truncate the revlog on the first of these revisions, and
2499 trust that the caller has saved the revisions that shouldn't be
2496 trust that the caller has saved the revisions that shouldn't be
2500 removed and that it'll re-add them after this truncation.
2497 removed and that it'll re-add them after this truncation.
2501 """
2498 """
2502 if len(self) == 0:
2499 if len(self) == 0:
2503 return
2500 return
2504
2501
2505 rev, _ = self.getstrippoint(minlink)
2502 rev, _ = self.getstrippoint(minlink)
2506 if rev == len(self):
2503 if rev == len(self):
2507 return
2504 return
2508
2505
2509 # first truncate the files on disk
2506 # first truncate the files on disk
2510 end = self.start(rev)
2507 end = self.start(rev)
2511 if not self._inline:
2508 if not self._inline:
2512 transaction.add(self.datafile, end)
2509 transaction.add(self.datafile, end)
2513 end = rev * self._io.size
2510 end = rev * self._io.size
2514 else:
2511 else:
2515 end += rev * self._io.size
2512 end += rev * self._io.size
2516
2513
2517 transaction.add(self.indexfile, end)
2514 transaction.add(self.indexfile, end)
2518
2515
2519 # then reset internal state in memory to forget those revisions
2516 # then reset internal state in memory to forget those revisions
2520 self._revisioncache = None
2517 self._revisioncache = None
2521 self._chaininfocache = {}
2518 self._chaininfocache = {}
2522 self._chunkclear()
2519 self._chunkclear()
2523
2520
2524 del self.index[rev:-1]
2521 del self.index[rev:-1]
2525
2522
2526 def checksize(self):
2523 def checksize(self):
2527 """Check size of index and data files
2524 """Check size of index and data files
2528
2525
2529 return a (dd, di) tuple.
2526 return a (dd, di) tuple.
2530 - dd: extra bytes for the "data" file
2527 - dd: extra bytes for the "data" file
2531 - di: extra bytes for the "index" file
2528 - di: extra bytes for the "index" file
2532
2529
2533 A healthy revlog will return (0, 0).
2530 A healthy revlog will return (0, 0).
2534 """
2531 """
2535 expected = 0
2532 expected = 0
2536 if len(self):
2533 if len(self):
2537 expected = max(0, self.end(len(self) - 1))
2534 expected = max(0, self.end(len(self) - 1))
2538
2535
2539 try:
2536 try:
2540 with self._datafp() as f:
2537 with self._datafp() as f:
2541 f.seek(0, io.SEEK_END)
2538 f.seek(0, io.SEEK_END)
2542 actual = f.tell()
2539 actual = f.tell()
2543 dd = actual - expected
2540 dd = actual - expected
2544 except IOError as inst:
2541 except IOError as inst:
2545 if inst.errno != errno.ENOENT:
2542 if inst.errno != errno.ENOENT:
2546 raise
2543 raise
2547 dd = 0
2544 dd = 0
2548
2545
2549 try:
2546 try:
2550 f = self.opener(self.indexfile)
2547 f = self.opener(self.indexfile)
2551 f.seek(0, io.SEEK_END)
2548 f.seek(0, io.SEEK_END)
2552 actual = f.tell()
2549 actual = f.tell()
2553 f.close()
2550 f.close()
2554 s = self._io.size
2551 s = self._io.size
2555 i = max(0, actual // s)
2552 i = max(0, actual // s)
2556 di = actual - (i * s)
2553 di = actual - (i * s)
2557 if self._inline:
2554 if self._inline:
2558 databytes = 0
2555 databytes = 0
2559 for r in self:
2556 for r in self:
2560 databytes += max(0, self.length(r))
2557 databytes += max(0, self.length(r))
2561 dd = 0
2558 dd = 0
2562 di = actual - len(self) * s - databytes
2559 di = actual - len(self) * s - databytes
2563 except IOError as inst:
2560 except IOError as inst:
2564 if inst.errno != errno.ENOENT:
2561 if inst.errno != errno.ENOENT:
2565 raise
2562 raise
2566 di = 0
2563 di = 0
2567
2564
2568 return (dd, di)
2565 return (dd, di)
2569
2566
2570 def files(self):
2567 def files(self):
2571 res = [self.indexfile]
2568 res = [self.indexfile]
2572 if not self._inline:
2569 if not self._inline:
2573 res.append(self.datafile)
2570 res.append(self.datafile)
2574 return res
2571 return res
2575
2572
2576 def emitrevisions(
2573 def emitrevisions(
2577 self,
2574 self,
2578 nodes,
2575 nodes,
2579 nodesorder=None,
2576 nodesorder=None,
2580 revisiondata=False,
2577 revisiondata=False,
2581 assumehaveparentrevisions=False,
2578 assumehaveparentrevisions=False,
2582 deltamode=repository.CG_DELTAMODE_STD,
2579 deltamode=repository.CG_DELTAMODE_STD,
2583 ):
2580 ):
2584 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2581 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2585 raise error.ProgrammingError(
2582 raise error.ProgrammingError(
2586 b'unhandled value for nodesorder: %s' % nodesorder
2583 b'unhandled value for nodesorder: %s' % nodesorder
2587 )
2584 )
2588
2585
2589 if nodesorder is None and not self._generaldelta:
2586 if nodesorder is None and not self._generaldelta:
2590 nodesorder = b'storage'
2587 nodesorder = b'storage'
2591
2588
2592 if (
2589 if (
2593 not self._storedeltachains
2590 not self._storedeltachains
2594 and deltamode != repository.CG_DELTAMODE_PREV
2591 and deltamode != repository.CG_DELTAMODE_PREV
2595 ):
2592 ):
2596 deltamode = repository.CG_DELTAMODE_FULL
2593 deltamode = repository.CG_DELTAMODE_FULL
2597
2594
2598 return storageutil.emitrevisions(
2595 return storageutil.emitrevisions(
2599 self,
2596 self,
2600 nodes,
2597 nodes,
2601 nodesorder,
2598 nodesorder,
2602 revlogrevisiondelta,
2599 revlogrevisiondelta,
2603 deltaparentfn=self.deltaparent,
2600 deltaparentfn=self.deltaparent,
2604 candeltafn=self.candelta,
2601 candeltafn=self.candelta,
2605 rawsizefn=self.rawsize,
2602 rawsizefn=self.rawsize,
2606 revdifffn=self.revdiff,
2603 revdifffn=self.revdiff,
2607 flagsfn=self.flags,
2604 flagsfn=self.flags,
2608 deltamode=deltamode,
2605 deltamode=deltamode,
2609 revisiondata=revisiondata,
2606 revisiondata=revisiondata,
2610 assumehaveparentrevisions=assumehaveparentrevisions,
2607 assumehaveparentrevisions=assumehaveparentrevisions,
2611 )
2608 )
2612
2609
2613 DELTAREUSEALWAYS = b'always'
2610 DELTAREUSEALWAYS = b'always'
2614 DELTAREUSESAMEREVS = b'samerevs'
2611 DELTAREUSESAMEREVS = b'samerevs'
2615 DELTAREUSENEVER = b'never'
2612 DELTAREUSENEVER = b'never'
2616
2613
2617 DELTAREUSEFULLADD = b'fulladd'
2614 DELTAREUSEFULLADD = b'fulladd'
2618
2615
2619 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2616 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2620
2617
2621 def clone(
2618 def clone(
2622 self,
2619 self,
2623 tr,
2620 tr,
2624 destrevlog,
2621 destrevlog,
2625 addrevisioncb=None,
2622 addrevisioncb=None,
2626 deltareuse=DELTAREUSESAMEREVS,
2623 deltareuse=DELTAREUSESAMEREVS,
2627 forcedeltabothparents=None,
2624 forcedeltabothparents=None,
2628 sidedatacompanion=None,
2625 sidedatacompanion=None,
2629 ):
2626 ):
2630 """Copy this revlog to another, possibly with format changes.
2627 """Copy this revlog to another, possibly with format changes.
2631
2628
2632 The destination revlog will contain the same revisions and nodes.
2629 The destination revlog will contain the same revisions and nodes.
2633 However, it may not be bit-for-bit identical due to e.g. delta encoding
2630 However, it may not be bit-for-bit identical due to e.g. delta encoding
2634 differences.
2631 differences.
2635
2632
2636 The ``deltareuse`` argument control how deltas from the existing revlog
2633 The ``deltareuse`` argument control how deltas from the existing revlog
2637 are preserved in the destination revlog. The argument can have the
2634 are preserved in the destination revlog. The argument can have the
2638 following values:
2635 following values:
2639
2636
2640 DELTAREUSEALWAYS
2637 DELTAREUSEALWAYS
2641 Deltas will always be reused (if possible), even if the destination
2638 Deltas will always be reused (if possible), even if the destination
2642 revlog would not select the same revisions for the delta. This is the
2639 revlog would not select the same revisions for the delta. This is the
2643 fastest mode of operation.
2640 fastest mode of operation.
2644 DELTAREUSESAMEREVS
2641 DELTAREUSESAMEREVS
2645 Deltas will be reused if the destination revlog would pick the same
2642 Deltas will be reused if the destination revlog would pick the same
2646 revisions for the delta. This mode strikes a balance between speed
2643 revisions for the delta. This mode strikes a balance between speed
2647 and optimization.
2644 and optimization.
2648 DELTAREUSENEVER
2645 DELTAREUSENEVER
2649 Deltas will never be reused. This is the slowest mode of execution.
2646 Deltas will never be reused. This is the slowest mode of execution.
2650 This mode can be used to recompute deltas (e.g. if the diff/delta
2647 This mode can be used to recompute deltas (e.g. if the diff/delta
2651 algorithm changes).
2648 algorithm changes).
2652 DELTAREUSEFULLADD
2649 DELTAREUSEFULLADD
2653 Revision will be re-added as if their were new content. This is
2650 Revision will be re-added as if their were new content. This is
2654 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2651 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2655 eg: large file detection and handling.
2652 eg: large file detection and handling.
2656
2653
2657 Delta computation can be slow, so the choice of delta reuse policy can
2654 Delta computation can be slow, so the choice of delta reuse policy can
2658 significantly affect run time.
2655 significantly affect run time.
2659
2656
2660 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2657 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2661 two extremes. Deltas will be reused if they are appropriate. But if the
2658 two extremes. Deltas will be reused if they are appropriate. But if the
2662 delta could choose a better revision, it will do so. This means if you
2659 delta could choose a better revision, it will do so. This means if you
2663 are converting a non-generaldelta revlog to a generaldelta revlog,
2660 are converting a non-generaldelta revlog to a generaldelta revlog,
2664 deltas will be recomputed if the delta's parent isn't a parent of the
2661 deltas will be recomputed if the delta's parent isn't a parent of the
2665 revision.
2662 revision.
2666
2663
2667 In addition to the delta policy, the ``forcedeltabothparents``
2664 In addition to the delta policy, the ``forcedeltabothparents``
2668 argument controls whether to force compute deltas against both parents
2665 argument controls whether to force compute deltas against both parents
2669 for merges. By default, the current default is used.
2666 for merges. By default, the current default is used.
2670
2667
2671 If not None, the `sidedatacompanion` is callable that accept two
2668 If not None, the `sidedatacompanion` is callable that accept two
2672 arguments:
2669 arguments:
2673
2670
2674 (srcrevlog, rev)
2671 (srcrevlog, rev)
2675
2672
2676 and return a triplet that control changes to sidedata content from the
2673 and return a triplet that control changes to sidedata content from the
2677 old revision to the new clone result:
2674 old revision to the new clone result:
2678
2675
2679 (dropall, filterout, update)
2676 (dropall, filterout, update)
2680
2677
2681 * if `dropall` is True, all sidedata should be dropped
2678 * if `dropall` is True, all sidedata should be dropped
2682 * `filterout` is a set of sidedata keys that should be dropped
2679 * `filterout` is a set of sidedata keys that should be dropped
2683 * `update` is a mapping of additionnal/new key -> value
2680 * `update` is a mapping of additionnal/new key -> value
2684 """
2681 """
2685 if deltareuse not in self.DELTAREUSEALL:
2682 if deltareuse not in self.DELTAREUSEALL:
2686 raise ValueError(
2683 raise ValueError(
2687 _(b'value for deltareuse invalid: %s') % deltareuse
2684 _(b'value for deltareuse invalid: %s') % deltareuse
2688 )
2685 )
2689
2686
2690 if len(destrevlog):
2687 if len(destrevlog):
2691 raise ValueError(_(b'destination revlog is not empty'))
2688 raise ValueError(_(b'destination revlog is not empty'))
2692
2689
2693 if getattr(self, 'filteredrevs', None):
2690 if getattr(self, 'filteredrevs', None):
2694 raise ValueError(_(b'source revlog has filtered revisions'))
2691 raise ValueError(_(b'source revlog has filtered revisions'))
2695 if getattr(destrevlog, 'filteredrevs', None):
2692 if getattr(destrevlog, 'filteredrevs', None):
2696 raise ValueError(_(b'destination revlog has filtered revisions'))
2693 raise ValueError(_(b'destination revlog has filtered revisions'))
2697
2694
2698 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2695 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2699 # if possible.
2696 # if possible.
2700 oldlazydelta = destrevlog._lazydelta
2697 oldlazydelta = destrevlog._lazydelta
2701 oldlazydeltabase = destrevlog._lazydeltabase
2698 oldlazydeltabase = destrevlog._lazydeltabase
2702 oldamd = destrevlog._deltabothparents
2699 oldamd = destrevlog._deltabothparents
2703
2700
2704 try:
2701 try:
2705 if deltareuse == self.DELTAREUSEALWAYS:
2702 if deltareuse == self.DELTAREUSEALWAYS:
2706 destrevlog._lazydeltabase = True
2703 destrevlog._lazydeltabase = True
2707 destrevlog._lazydelta = True
2704 destrevlog._lazydelta = True
2708 elif deltareuse == self.DELTAREUSESAMEREVS:
2705 elif deltareuse == self.DELTAREUSESAMEREVS:
2709 destrevlog._lazydeltabase = False
2706 destrevlog._lazydeltabase = False
2710 destrevlog._lazydelta = True
2707 destrevlog._lazydelta = True
2711 elif deltareuse == self.DELTAREUSENEVER:
2708 elif deltareuse == self.DELTAREUSENEVER:
2712 destrevlog._lazydeltabase = False
2709 destrevlog._lazydeltabase = False
2713 destrevlog._lazydelta = False
2710 destrevlog._lazydelta = False
2714
2711
2715 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2712 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2716
2713
2717 self._clone(
2714 self._clone(
2718 tr,
2715 tr,
2719 destrevlog,
2716 destrevlog,
2720 addrevisioncb,
2717 addrevisioncb,
2721 deltareuse,
2718 deltareuse,
2722 forcedeltabothparents,
2719 forcedeltabothparents,
2723 sidedatacompanion,
2720 sidedatacompanion,
2724 )
2721 )
2725
2722
2726 finally:
2723 finally:
2727 destrevlog._lazydelta = oldlazydelta
2724 destrevlog._lazydelta = oldlazydelta
2728 destrevlog._lazydeltabase = oldlazydeltabase
2725 destrevlog._lazydeltabase = oldlazydeltabase
2729 destrevlog._deltabothparents = oldamd
2726 destrevlog._deltabothparents = oldamd
2730
2727
2731 def _clone(
2728 def _clone(
2732 self,
2729 self,
2733 tr,
2730 tr,
2734 destrevlog,
2731 destrevlog,
2735 addrevisioncb,
2732 addrevisioncb,
2736 deltareuse,
2733 deltareuse,
2737 forcedeltabothparents,
2734 forcedeltabothparents,
2738 sidedatacompanion,
2735 sidedatacompanion,
2739 ):
2736 ):
2740 """perform the core duty of `revlog.clone` after parameter processing"""
2737 """perform the core duty of `revlog.clone` after parameter processing"""
2741 deltacomputer = deltautil.deltacomputer(destrevlog)
2738 deltacomputer = deltautil.deltacomputer(destrevlog)
2742 index = self.index
2739 index = self.index
2743 for rev in self:
2740 for rev in self:
2744 entry = index[rev]
2741 entry = index[rev]
2745
2742
2746 # Some classes override linkrev to take filtered revs into
2743 # Some classes override linkrev to take filtered revs into
2747 # account. Use raw entry from index.
2744 # account. Use raw entry from index.
2748 flags = entry[0] & 0xFFFF
2745 flags = entry[0] & 0xFFFF
2749 linkrev = entry[4]
2746 linkrev = entry[4]
2750 p1 = index[entry[5]][7]
2747 p1 = index[entry[5]][7]
2751 p2 = index[entry[6]][7]
2748 p2 = index[entry[6]][7]
2752 node = entry[7]
2749 node = entry[7]
2753
2750
2754 sidedataactions = (False, [], {})
2751 sidedataactions = (False, [], {})
2755 if sidedatacompanion is not None:
2752 if sidedatacompanion is not None:
2756 sidedataactions = sidedatacompanion(self, rev)
2753 sidedataactions = sidedatacompanion(self, rev)
2757
2754
2758 # (Possibly) reuse the delta from the revlog if allowed and
2755 # (Possibly) reuse the delta from the revlog if allowed and
2759 # the revlog chunk is a delta.
2756 # the revlog chunk is a delta.
2760 cachedelta = None
2757 cachedelta = None
2761 rawtext = None
2758 rawtext = None
2762 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2759 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2763 dropall, filterout, update = sidedataactions
2760 dropall, filterout, update = sidedataactions
2764 text, sidedata = self._revisiondata(rev)
2761 text, sidedata = self._revisiondata(rev)
2765 if dropall:
2762 if dropall:
2766 sidedata = {}
2763 sidedata = {}
2767 for key in filterout:
2764 for key in filterout:
2768 sidedata.pop(key, None)
2765 sidedata.pop(key, None)
2769 sidedata.update(update)
2766 sidedata.update(update)
2770 if not sidedata:
2767 if not sidedata:
2771 sidedata = None
2768 sidedata = None
2772 destrevlog.addrevision(
2769 destrevlog.addrevision(
2773 text,
2770 text,
2774 tr,
2771 tr,
2775 linkrev,
2772 linkrev,
2776 p1,
2773 p1,
2777 p2,
2774 p2,
2778 cachedelta=cachedelta,
2775 cachedelta=cachedelta,
2779 node=node,
2776 node=node,
2780 flags=flags,
2777 flags=flags,
2781 deltacomputer=deltacomputer,
2778 deltacomputer=deltacomputer,
2782 sidedata=sidedata,
2779 sidedata=sidedata,
2783 )
2780 )
2784 else:
2781 else:
2785 if destrevlog._lazydelta:
2782 if destrevlog._lazydelta:
2786 dp = self.deltaparent(rev)
2783 dp = self.deltaparent(rev)
2787 if dp != nullrev:
2784 if dp != nullrev:
2788 cachedelta = (dp, bytes(self._chunk(rev)))
2785 cachedelta = (dp, bytes(self._chunk(rev)))
2789
2786
2790 if not cachedelta:
2787 if not cachedelta:
2791 rawtext = self.rawdata(rev)
2788 rawtext = self.rawdata(rev)
2792
2789
2793 ifh = destrevlog.opener(
2790 ifh = destrevlog.opener(
2794 destrevlog.indexfile, b'a+', checkambig=False
2791 destrevlog.indexfile, b'a+', checkambig=False
2795 )
2792 )
2796 dfh = None
2793 dfh = None
2797 if not destrevlog._inline:
2794 if not destrevlog._inline:
2798 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2795 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2799 try:
2796 try:
2800 destrevlog._addrevision(
2797 destrevlog._addrevision(
2801 node,
2798 node,
2802 rawtext,
2799 rawtext,
2803 tr,
2800 tr,
2804 linkrev,
2801 linkrev,
2805 p1,
2802 p1,
2806 p2,
2803 p2,
2807 flags,
2804 flags,
2808 cachedelta,
2805 cachedelta,
2809 ifh,
2806 ifh,
2810 dfh,
2807 dfh,
2811 deltacomputer=deltacomputer,
2808 deltacomputer=deltacomputer,
2812 )
2809 )
2813 finally:
2810 finally:
2814 if dfh:
2811 if dfh:
2815 dfh.close()
2812 dfh.close()
2816 ifh.close()
2813 ifh.close()
2817
2814
2818 if addrevisioncb:
2815 if addrevisioncb:
2819 addrevisioncb(self, rev, node)
2816 addrevisioncb(self, rev, node)
2820
2817
2821 def censorrevision(self, tr, censornode, tombstone=b''):
2818 def censorrevision(self, tr, censornode, tombstone=b''):
2822 if (self.version & 0xFFFF) == REVLOGV0:
2819 if (self.version & 0xFFFF) == REVLOGV0:
2823 raise error.RevlogError(
2820 raise error.RevlogError(
2824 _(b'cannot censor with version %d revlogs') % self.version
2821 _(b'cannot censor with version %d revlogs') % self.version
2825 )
2822 )
2826
2823
2827 censorrev = self.rev(censornode)
2824 censorrev = self.rev(censornode)
2828 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2825 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2829
2826
2830 if len(tombstone) > self.rawsize(censorrev):
2827 if len(tombstone) > self.rawsize(censorrev):
2831 raise error.Abort(
2828 raise error.Abort(
2832 _(b'censor tombstone must be no longer than censored data')
2829 _(b'censor tombstone must be no longer than censored data')
2833 )
2830 )
2834
2831
2835 # Rewriting the revlog in place is hard. Our strategy for censoring is
2832 # Rewriting the revlog in place is hard. Our strategy for censoring is
2836 # to create a new revlog, copy all revisions to it, then replace the
2833 # to create a new revlog, copy all revisions to it, then replace the
2837 # revlogs on transaction close.
2834 # revlogs on transaction close.
2838
2835
2839 newindexfile = self.indexfile + b'.tmpcensored'
2836 newindexfile = self.indexfile + b'.tmpcensored'
2840 newdatafile = self.datafile + b'.tmpcensored'
2837 newdatafile = self.datafile + b'.tmpcensored'
2841
2838
2842 # This is a bit dangerous. We could easily have a mismatch of state.
2839 # This is a bit dangerous. We could easily have a mismatch of state.
2843 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2840 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2844 newrl.version = self.version
2841 newrl.version = self.version
2845 newrl._generaldelta = self._generaldelta
2842 newrl._generaldelta = self._generaldelta
2846 newrl._io = self._io
2843 newrl._io = self._io
2847
2844
2848 for rev in self.revs():
2845 for rev in self.revs():
2849 node = self.node(rev)
2846 node = self.node(rev)
2850 p1, p2 = self.parents(node)
2847 p1, p2 = self.parents(node)
2851
2848
2852 if rev == censorrev:
2849 if rev == censorrev:
2853 newrl.addrawrevision(
2850 newrl.addrawrevision(
2854 tombstone,
2851 tombstone,
2855 tr,
2852 tr,
2856 self.linkrev(censorrev),
2853 self.linkrev(censorrev),
2857 p1,
2854 p1,
2858 p2,
2855 p2,
2859 censornode,
2856 censornode,
2860 REVIDX_ISCENSORED,
2857 REVIDX_ISCENSORED,
2861 )
2858 )
2862
2859
2863 if newrl.deltaparent(rev) != nullrev:
2860 if newrl.deltaparent(rev) != nullrev:
2864 raise error.Abort(
2861 raise error.Abort(
2865 _(
2862 _(
2866 b'censored revision stored as delta; '
2863 b'censored revision stored as delta; '
2867 b'cannot censor'
2864 b'cannot censor'
2868 ),
2865 ),
2869 hint=_(
2866 hint=_(
2870 b'censoring of revlogs is not '
2867 b'censoring of revlogs is not '
2871 b'fully implemented; please report '
2868 b'fully implemented; please report '
2872 b'this bug'
2869 b'this bug'
2873 ),
2870 ),
2874 )
2871 )
2875 continue
2872 continue
2876
2873
2877 if self.iscensored(rev):
2874 if self.iscensored(rev):
2878 if self.deltaparent(rev) != nullrev:
2875 if self.deltaparent(rev) != nullrev:
2879 raise error.Abort(
2876 raise error.Abort(
2880 _(
2877 _(
2881 b'cannot censor due to censored '
2878 b'cannot censor due to censored '
2882 b'revision having delta stored'
2879 b'revision having delta stored'
2883 )
2880 )
2884 )
2881 )
2885 rawtext = self._chunk(rev)
2882 rawtext = self._chunk(rev)
2886 else:
2883 else:
2887 rawtext = self.rawdata(rev)
2884 rawtext = self.rawdata(rev)
2888
2885
2889 newrl.addrawrevision(
2886 newrl.addrawrevision(
2890 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2887 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2891 )
2888 )
2892
2889
2893 tr.addbackup(self.indexfile, location=b'store')
2890 tr.addbackup(self.indexfile, location=b'store')
2894 if not self._inline:
2891 if not self._inline:
2895 tr.addbackup(self.datafile, location=b'store')
2892 tr.addbackup(self.datafile, location=b'store')
2896
2893
2897 self.opener.rename(newrl.indexfile, self.indexfile)
2894 self.opener.rename(newrl.indexfile, self.indexfile)
2898 if not self._inline:
2895 if not self._inline:
2899 self.opener.rename(newrl.datafile, self.datafile)
2896 self.opener.rename(newrl.datafile, self.datafile)
2900
2897
2901 self.clearcaches()
2898 self.clearcaches()
2902 self._loadindex()
2899 self._loadindex()
2903
2900
2904 def verifyintegrity(self, state):
2901 def verifyintegrity(self, state):
2905 """Verifies the integrity of the revlog.
2902 """Verifies the integrity of the revlog.
2906
2903
2907 Yields ``revlogproblem`` instances describing problems that are
2904 Yields ``revlogproblem`` instances describing problems that are
2908 found.
2905 found.
2909 """
2906 """
2910 dd, di = self.checksize()
2907 dd, di = self.checksize()
2911 if dd:
2908 if dd:
2912 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2909 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2913 if di:
2910 if di:
2914 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2911 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2915
2912
2916 version = self.version & 0xFFFF
2913 version = self.version & 0xFFFF
2917
2914
2918 # The verifier tells us what version revlog we should be.
2915 # The verifier tells us what version revlog we should be.
2919 if version != state[b'expectedversion']:
2916 if version != state[b'expectedversion']:
2920 yield revlogproblem(
2917 yield revlogproblem(
2921 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2918 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2922 % (self.indexfile, version, state[b'expectedversion'])
2919 % (self.indexfile, version, state[b'expectedversion'])
2923 )
2920 )
2924
2921
2925 state[b'skipread'] = set()
2922 state[b'skipread'] = set()
2926 state[b'safe_renamed'] = set()
2923 state[b'safe_renamed'] = set()
2927
2924
2928 for rev in self:
2925 for rev in self:
2929 node = self.node(rev)
2926 node = self.node(rev)
2930
2927
2931 # Verify contents. 4 cases to care about:
2928 # Verify contents. 4 cases to care about:
2932 #
2929 #
2933 # common: the most common case
2930 # common: the most common case
2934 # rename: with a rename
2931 # rename: with a rename
2935 # meta: file content starts with b'\1\n', the metadata
2932 # meta: file content starts with b'\1\n', the metadata
2936 # header defined in filelog.py, but without a rename
2933 # header defined in filelog.py, but without a rename
2937 # ext: content stored externally
2934 # ext: content stored externally
2938 #
2935 #
2939 # More formally, their differences are shown below:
2936 # More formally, their differences are shown below:
2940 #
2937 #
2941 # | common | rename | meta | ext
2938 # | common | rename | meta | ext
2942 # -------------------------------------------------------
2939 # -------------------------------------------------------
2943 # flags() | 0 | 0 | 0 | not 0
2940 # flags() | 0 | 0 | 0 | not 0
2944 # renamed() | False | True | False | ?
2941 # renamed() | False | True | False | ?
2945 # rawtext[0:2]=='\1\n'| False | True | True | ?
2942 # rawtext[0:2]=='\1\n'| False | True | True | ?
2946 #
2943 #
2947 # "rawtext" means the raw text stored in revlog data, which
2944 # "rawtext" means the raw text stored in revlog data, which
2948 # could be retrieved by "rawdata(rev)". "text"
2945 # could be retrieved by "rawdata(rev)". "text"
2949 # mentioned below is "revision(rev)".
2946 # mentioned below is "revision(rev)".
2950 #
2947 #
2951 # There are 3 different lengths stored physically:
2948 # There are 3 different lengths stored physically:
2952 # 1. L1: rawsize, stored in revlog index
2949 # 1. L1: rawsize, stored in revlog index
2953 # 2. L2: len(rawtext), stored in revlog data
2950 # 2. L2: len(rawtext), stored in revlog data
2954 # 3. L3: len(text), stored in revlog data if flags==0, or
2951 # 3. L3: len(text), stored in revlog data if flags==0, or
2955 # possibly somewhere else if flags!=0
2952 # possibly somewhere else if flags!=0
2956 #
2953 #
2957 # L1 should be equal to L2. L3 could be different from them.
2954 # L1 should be equal to L2. L3 could be different from them.
2958 # "text" may or may not affect commit hash depending on flag
2955 # "text" may or may not affect commit hash depending on flag
2959 # processors (see flagutil.addflagprocessor).
2956 # processors (see flagutil.addflagprocessor).
2960 #
2957 #
2961 # | common | rename | meta | ext
2958 # | common | rename | meta | ext
2962 # -------------------------------------------------
2959 # -------------------------------------------------
2963 # rawsize() | L1 | L1 | L1 | L1
2960 # rawsize() | L1 | L1 | L1 | L1
2964 # size() | L1 | L2-LM | L1(*) | L1 (?)
2961 # size() | L1 | L2-LM | L1(*) | L1 (?)
2965 # len(rawtext) | L2 | L2 | L2 | L2
2962 # len(rawtext) | L2 | L2 | L2 | L2
2966 # len(text) | L2 | L2 | L2 | L3
2963 # len(text) | L2 | L2 | L2 | L3
2967 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2964 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2968 #
2965 #
2969 # LM: length of metadata, depending on rawtext
2966 # LM: length of metadata, depending on rawtext
2970 # (*): not ideal, see comment in filelog.size
2967 # (*): not ideal, see comment in filelog.size
2971 # (?): could be "- len(meta)" if the resolved content has
2968 # (?): could be "- len(meta)" if the resolved content has
2972 # rename metadata
2969 # rename metadata
2973 #
2970 #
2974 # Checks needed to be done:
2971 # Checks needed to be done:
2975 # 1. length check: L1 == L2, in all cases.
2972 # 1. length check: L1 == L2, in all cases.
2976 # 2. hash check: depending on flag processor, we may need to
2973 # 2. hash check: depending on flag processor, we may need to
2977 # use either "text" (external), or "rawtext" (in revlog).
2974 # use either "text" (external), or "rawtext" (in revlog).
2978
2975
2979 try:
2976 try:
2980 skipflags = state.get(b'skipflags', 0)
2977 skipflags = state.get(b'skipflags', 0)
2981 if skipflags:
2978 if skipflags:
2982 skipflags &= self.flags(rev)
2979 skipflags &= self.flags(rev)
2983
2980
2984 _verify_revision(self, skipflags, state, node)
2981 _verify_revision(self, skipflags, state, node)
2985
2982
2986 l1 = self.rawsize(rev)
2983 l1 = self.rawsize(rev)
2987 l2 = len(self.rawdata(node))
2984 l2 = len(self.rawdata(node))
2988
2985
2989 if l1 != l2:
2986 if l1 != l2:
2990 yield revlogproblem(
2987 yield revlogproblem(
2991 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2988 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2992 node=node,
2989 node=node,
2993 )
2990 )
2994
2991
2995 except error.CensoredNodeError:
2992 except error.CensoredNodeError:
2996 if state[b'erroroncensored']:
2993 if state[b'erroroncensored']:
2997 yield revlogproblem(
2994 yield revlogproblem(
2998 error=_(b'censored file data'), node=node
2995 error=_(b'censored file data'), node=node
2999 )
2996 )
3000 state[b'skipread'].add(node)
2997 state[b'skipread'].add(node)
3001 except Exception as e:
2998 except Exception as e:
3002 yield revlogproblem(
2999 yield revlogproblem(
3003 error=_(b'unpacking %s: %s')
3000 error=_(b'unpacking %s: %s')
3004 % (short(node), stringutil.forcebytestr(e)),
3001 % (short(node), stringutil.forcebytestr(e)),
3005 node=node,
3002 node=node,
3006 )
3003 )
3007 state[b'skipread'].add(node)
3004 state[b'skipread'].add(node)
3008
3005
3009 def storageinfo(
3006 def storageinfo(
3010 self,
3007 self,
3011 exclusivefiles=False,
3008 exclusivefiles=False,
3012 sharedfiles=False,
3009 sharedfiles=False,
3013 revisionscount=False,
3010 revisionscount=False,
3014 trackedsize=False,
3011 trackedsize=False,
3015 storedsize=False,
3012 storedsize=False,
3016 ):
3013 ):
3017 d = {}
3014 d = {}
3018
3015
3019 if exclusivefiles:
3016 if exclusivefiles:
3020 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3017 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3021 if not self._inline:
3018 if not self._inline:
3022 d[b'exclusivefiles'].append((self.opener, self.datafile))
3019 d[b'exclusivefiles'].append((self.opener, self.datafile))
3023
3020
3024 if sharedfiles:
3021 if sharedfiles:
3025 d[b'sharedfiles'] = []
3022 d[b'sharedfiles'] = []
3026
3023
3027 if revisionscount:
3024 if revisionscount:
3028 d[b'revisionscount'] = len(self)
3025 d[b'revisionscount'] = len(self)
3029
3026
3030 if trackedsize:
3027 if trackedsize:
3031 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3028 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3032
3029
3033 if storedsize:
3030 if storedsize:
3034 d[b'storedsize'] = sum(
3031 d[b'storedsize'] = sum(
3035 self.opener.stat(path).st_size for path in self.files()
3032 self.opener.stat(path).st_size for path in self.files()
3036 )
3033 )
3037
3034
3038 return d
3035 return d
General Comments 0
You need to be logged in to leave comments. Login now