##// END OF EJS Templates
rust: hooking into Python code...
Georges Racinet -
r40334:9cadb0f5 default
parent child Browse files
Show More
@@ -1,381 +1,406 b''
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
1 # ancestor.py - generic DAG ancestor algorithm for mercurial
2 #
2 #
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11
11
12 from .node import nullrev
12 from .node import nullrev
13 from . import (
13 from . import (
14 policy,
14 pycompat,
15 pycompat,
15 )
16 )
16
17
18 parsers = policy.importmod(r'parsers')
19
17 def commonancestorsheads(pfunc, *nodes):
20 def commonancestorsheads(pfunc, *nodes):
18 """Returns a set with the heads of all common ancestors of all nodes,
21 """Returns a set with the heads of all common ancestors of all nodes,
19 heads(::nodes[0] and ::nodes[1] and ...) .
22 heads(::nodes[0] and ::nodes[1] and ...) .
20
23
21 pfunc must return a list of parent vertices for a given vertex.
24 pfunc must return a list of parent vertices for a given vertex.
22 """
25 """
23 if not isinstance(nodes, set):
26 if not isinstance(nodes, set):
24 nodes = set(nodes)
27 nodes = set(nodes)
25 if nullrev in nodes:
28 if nullrev in nodes:
26 return set()
29 return set()
27 if len(nodes) <= 1:
30 if len(nodes) <= 1:
28 return nodes
31 return nodes
29
32
30 allseen = (1 << len(nodes)) - 1
33 allseen = (1 << len(nodes)) - 1
31 seen = [0] * (max(nodes) + 1)
34 seen = [0] * (max(nodes) + 1)
32 for i, n in enumerate(nodes):
35 for i, n in enumerate(nodes):
33 seen[n] = 1 << i
36 seen[n] = 1 << i
34 poison = 1 << (i + 1)
37 poison = 1 << (i + 1)
35
38
36 gca = set()
39 gca = set()
37 interesting = len(nodes)
40 interesting = len(nodes)
38 nv = len(seen) - 1
41 nv = len(seen) - 1
39 while nv >= 0 and interesting:
42 while nv >= 0 and interesting:
40 v = nv
43 v = nv
41 nv -= 1
44 nv -= 1
42 if not seen[v]:
45 if not seen[v]:
43 continue
46 continue
44 sv = seen[v]
47 sv = seen[v]
45 if sv < poison:
48 if sv < poison:
46 interesting -= 1
49 interesting -= 1
47 if sv == allseen:
50 if sv == allseen:
48 gca.add(v)
51 gca.add(v)
49 sv |= poison
52 sv |= poison
50 if v in nodes:
53 if v in nodes:
51 # history is linear
54 # history is linear
52 return {v}
55 return {v}
53 if sv < poison:
56 if sv < poison:
54 for p in pfunc(v):
57 for p in pfunc(v):
55 sp = seen[p]
58 sp = seen[p]
56 if p == nullrev:
59 if p == nullrev:
57 continue
60 continue
58 if sp == 0:
61 if sp == 0:
59 seen[p] = sv
62 seen[p] = sv
60 interesting += 1
63 interesting += 1
61 elif sp != sv:
64 elif sp != sv:
62 seen[p] |= sv
65 seen[p] |= sv
63 else:
66 else:
64 for p in pfunc(v):
67 for p in pfunc(v):
65 if p == nullrev:
68 if p == nullrev:
66 continue
69 continue
67 sp = seen[p]
70 sp = seen[p]
68 if sp and sp < poison:
71 if sp and sp < poison:
69 interesting -= 1
72 interesting -= 1
70 seen[p] = sv
73 seen[p] = sv
71 return gca
74 return gca
72
75
73 def ancestors(pfunc, *orignodes):
76 def ancestors(pfunc, *orignodes):
74 """
77 """
75 Returns the common ancestors of a and b that are furthest from a
78 Returns the common ancestors of a and b that are furthest from a
76 root (as measured by longest path).
79 root (as measured by longest path).
77
80
78 pfunc must return a list of parent vertices for a given vertex.
81 pfunc must return a list of parent vertices for a given vertex.
79 """
82 """
80 def deepest(nodes):
83 def deepest(nodes):
81 interesting = {}
84 interesting = {}
82 count = max(nodes) + 1
85 count = max(nodes) + 1
83 depth = [0] * count
86 depth = [0] * count
84 seen = [0] * count
87 seen = [0] * count
85 mapping = []
88 mapping = []
86 for (i, n) in enumerate(sorted(nodes)):
89 for (i, n) in enumerate(sorted(nodes)):
87 depth[n] = 1
90 depth[n] = 1
88 b = 1 << i
91 b = 1 << i
89 seen[n] = b
92 seen[n] = b
90 interesting[b] = 1
93 interesting[b] = 1
91 mapping.append((b, n))
94 mapping.append((b, n))
92 nv = count - 1
95 nv = count - 1
93 while nv >= 0 and len(interesting) > 1:
96 while nv >= 0 and len(interesting) > 1:
94 v = nv
97 v = nv
95 nv -= 1
98 nv -= 1
96 dv = depth[v]
99 dv = depth[v]
97 if dv == 0:
100 if dv == 0:
98 continue
101 continue
99 sv = seen[v]
102 sv = seen[v]
100 for p in pfunc(v):
103 for p in pfunc(v):
101 if p == nullrev:
104 if p == nullrev:
102 continue
105 continue
103 dp = depth[p]
106 dp = depth[p]
104 nsp = sp = seen[p]
107 nsp = sp = seen[p]
105 if dp <= dv:
108 if dp <= dv:
106 depth[p] = dv + 1
109 depth[p] = dv + 1
107 if sp != sv:
110 if sp != sv:
108 interesting[sv] += 1
111 interesting[sv] += 1
109 nsp = seen[p] = sv
112 nsp = seen[p] = sv
110 if sp:
113 if sp:
111 interesting[sp] -= 1
114 interesting[sp] -= 1
112 if interesting[sp] == 0:
115 if interesting[sp] == 0:
113 del interesting[sp]
116 del interesting[sp]
114 elif dv == dp - 1:
117 elif dv == dp - 1:
115 nsp = sp | sv
118 nsp = sp | sv
116 if nsp == sp:
119 if nsp == sp:
117 continue
120 continue
118 seen[p] = nsp
121 seen[p] = nsp
119 interesting.setdefault(nsp, 0)
122 interesting.setdefault(nsp, 0)
120 interesting[nsp] += 1
123 interesting[nsp] += 1
121 interesting[sp] -= 1
124 interesting[sp] -= 1
122 if interesting[sp] == 0:
125 if interesting[sp] == 0:
123 del interesting[sp]
126 del interesting[sp]
124 interesting[sv] -= 1
127 interesting[sv] -= 1
125 if interesting[sv] == 0:
128 if interesting[sv] == 0:
126 del interesting[sv]
129 del interesting[sv]
127
130
128 if len(interesting) != 1:
131 if len(interesting) != 1:
129 return []
132 return []
130
133
131 k = 0
134 k = 0
132 for i in interesting:
135 for i in interesting:
133 k |= i
136 k |= i
134 return set(n for (i, n) in mapping if k & i)
137 return set(n for (i, n) in mapping if k & i)
135
138
136 gca = commonancestorsheads(pfunc, *orignodes)
139 gca = commonancestorsheads(pfunc, *orignodes)
137
140
138 if len(gca) <= 1:
141 if len(gca) <= 1:
139 return gca
142 return gca
140 return deepest(gca)
143 return deepest(gca)
141
144
142 class incrementalmissingancestors(object):
145 class incrementalmissingancestors(object):
143 '''persistent state used to calculate missing ancestors incrementally
146 '''persistent state used to calculate missing ancestors incrementally
144
147
145 Although similar in spirit to lazyancestors below, this is a separate class
148 Although similar in spirit to lazyancestors below, this is a separate class
146 because trying to support contains and missingancestors operations with the
149 because trying to support contains and missingancestors operations with the
147 same internal data structures adds needless complexity.'''
150 same internal data structures adds needless complexity.'''
148 def __init__(self, pfunc, bases):
151 def __init__(self, pfunc, bases):
149 self.bases = set(bases)
152 self.bases = set(bases)
150 if not self.bases:
153 if not self.bases:
151 self.bases.add(nullrev)
154 self.bases.add(nullrev)
152 self.pfunc = pfunc
155 self.pfunc = pfunc
153
156
154 def hasbases(self):
157 def hasbases(self):
155 '''whether the common set has any non-trivial bases'''
158 '''whether the common set has any non-trivial bases'''
156 return self.bases and self.bases != {nullrev}
159 return self.bases and self.bases != {nullrev}
157
160
158 def addbases(self, newbases):
161 def addbases(self, newbases):
159 '''grow the ancestor set by adding new bases'''
162 '''grow the ancestor set by adding new bases'''
160 self.bases.update(newbases)
163 self.bases.update(newbases)
161
164
162 def removeancestorsfrom(self, revs):
165 def removeancestorsfrom(self, revs):
163 '''remove all ancestors of bases from the set revs (in place)'''
166 '''remove all ancestors of bases from the set revs (in place)'''
164 bases = self.bases
167 bases = self.bases
165 pfunc = self.pfunc
168 pfunc = self.pfunc
166 revs.difference_update(bases)
169 revs.difference_update(bases)
167 # nullrev is always an ancestor
170 # nullrev is always an ancestor
168 revs.discard(nullrev)
171 revs.discard(nullrev)
169 if not revs:
172 if not revs:
170 return
173 return
171 # anything in revs > start is definitely not an ancestor of bases
174 # anything in revs > start is definitely not an ancestor of bases
172 # revs <= start needs to be investigated
175 # revs <= start needs to be investigated
173 start = max(bases)
176 start = max(bases)
174 keepcount = sum(1 for r in revs if r > start)
177 keepcount = sum(1 for r in revs if r > start)
175 if len(revs) == keepcount:
178 if len(revs) == keepcount:
176 # no revs to consider
179 # no revs to consider
177 return
180 return
178
181
179 for curr in pycompat.xrange(start, min(revs) - 1, -1):
182 for curr in pycompat.xrange(start, min(revs) - 1, -1):
180 if curr not in bases:
183 if curr not in bases:
181 continue
184 continue
182 revs.discard(curr)
185 revs.discard(curr)
183 bases.update(pfunc(curr))
186 bases.update(pfunc(curr))
184 if len(revs) == keepcount:
187 if len(revs) == keepcount:
185 # no more potential revs to discard
188 # no more potential revs to discard
186 break
189 break
187
190
188 def missingancestors(self, revs):
191 def missingancestors(self, revs):
189 '''return all the ancestors of revs that are not ancestors of self.bases
192 '''return all the ancestors of revs that are not ancestors of self.bases
190
193
191 This may include elements from revs.
194 This may include elements from revs.
192
195
193 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
196 Equivalent to the revset (::revs - ::self.bases). Revs are returned in
194 revision number order, which is a topological order.'''
197 revision number order, which is a topological order.'''
195 revsvisit = set(revs)
198 revsvisit = set(revs)
196 basesvisit = self.bases
199 basesvisit = self.bases
197 pfunc = self.pfunc
200 pfunc = self.pfunc
198 bothvisit = revsvisit.intersection(basesvisit)
201 bothvisit = revsvisit.intersection(basesvisit)
199 revsvisit.difference_update(bothvisit)
202 revsvisit.difference_update(bothvisit)
200 if not revsvisit:
203 if not revsvisit:
201 return []
204 return []
202
205
203 start = max(max(revsvisit), max(basesvisit))
206 start = max(max(revsvisit), max(basesvisit))
204 # At this point, we hold the invariants that:
207 # At this point, we hold the invariants that:
205 # - revsvisit is the set of nodes we know are an ancestor of at least
208 # - revsvisit is the set of nodes we know are an ancestor of at least
206 # one of the nodes in revs
209 # one of the nodes in revs
207 # - basesvisit is the same for bases
210 # - basesvisit is the same for bases
208 # - bothvisit is the set of nodes we know are ancestors of at least one
211 # - bothvisit is the set of nodes we know are ancestors of at least one
209 # of the nodes in revs and one of the nodes in bases. bothvisit and
212 # of the nodes in revs and one of the nodes in bases. bothvisit and
210 # revsvisit are mutually exclusive, but bothvisit is a subset of
213 # revsvisit are mutually exclusive, but bothvisit is a subset of
211 # basesvisit.
214 # basesvisit.
212 # Now we walk down in reverse topo order, adding parents of nodes
215 # Now we walk down in reverse topo order, adding parents of nodes
213 # already visited to the sets while maintaining the invariants. When a
216 # already visited to the sets while maintaining the invariants. When a
214 # node is found in both revsvisit and basesvisit, it is removed from
217 # node is found in both revsvisit and basesvisit, it is removed from
215 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
218 # revsvisit and added to bothvisit. When revsvisit becomes empty, there
216 # are no more ancestors of revs that aren't also ancestors of bases, so
219 # are no more ancestors of revs that aren't also ancestors of bases, so
217 # exit.
220 # exit.
218
221
219 missing = []
222 missing = []
220 for curr in pycompat.xrange(start, nullrev, -1):
223 for curr in pycompat.xrange(start, nullrev, -1):
221 if not revsvisit:
224 if not revsvisit:
222 break
225 break
223
226
224 if curr in bothvisit:
227 if curr in bothvisit:
225 bothvisit.remove(curr)
228 bothvisit.remove(curr)
226 # curr's parents might have made it into revsvisit through
229 # curr's parents might have made it into revsvisit through
227 # another path
230 # another path
228 for p in pfunc(curr):
231 for p in pfunc(curr):
229 revsvisit.discard(p)
232 revsvisit.discard(p)
230 basesvisit.add(p)
233 basesvisit.add(p)
231 bothvisit.add(p)
234 bothvisit.add(p)
232 continue
235 continue
233
236
234 if curr in revsvisit:
237 if curr in revsvisit:
235 missing.append(curr)
238 missing.append(curr)
236 revsvisit.remove(curr)
239 revsvisit.remove(curr)
237 thisvisit = revsvisit
240 thisvisit = revsvisit
238 othervisit = basesvisit
241 othervisit = basesvisit
239 elif curr in basesvisit:
242 elif curr in basesvisit:
240 thisvisit = basesvisit
243 thisvisit = basesvisit
241 othervisit = revsvisit
244 othervisit = revsvisit
242 else:
245 else:
243 # not an ancestor of revs or bases: ignore
246 # not an ancestor of revs or bases: ignore
244 continue
247 continue
245
248
246 for p in pfunc(curr):
249 for p in pfunc(curr):
247 if p == nullrev:
250 if p == nullrev:
248 pass
251 pass
249 elif p in othervisit or p in bothvisit:
252 elif p in othervisit or p in bothvisit:
250 # p is implicitly in thisvisit. This means p is or should be
253 # p is implicitly in thisvisit. This means p is or should be
251 # in bothvisit
254 # in bothvisit
252 revsvisit.discard(p)
255 revsvisit.discard(p)
253 basesvisit.add(p)
256 basesvisit.add(p)
254 bothvisit.add(p)
257 bothvisit.add(p)
255 else:
258 else:
256 # visit later
259 # visit later
257 thisvisit.add(p)
260 thisvisit.add(p)
258
261
259 missing.reverse()
262 missing.reverse()
260 return missing
263 return missing
261
264
262 # Extracted from lazyancestors.__iter__ to avoid a reference cycle
265 # Extracted from lazyancestors.__iter__ to avoid a reference cycle
263 def _lazyancestorsiter(parentrevs, initrevs, stoprev, inclusive):
266 def _lazyancestorsiter(parentrevs, initrevs, stoprev, inclusive):
264 seen = {nullrev}
267 seen = {nullrev}
265 heappush = heapq.heappush
268 heappush = heapq.heappush
266 heappop = heapq.heappop
269 heappop = heapq.heappop
267 heapreplace = heapq.heapreplace
270 heapreplace = heapq.heapreplace
268 see = seen.add
271 see = seen.add
269
272
270 if inclusive:
273 if inclusive:
271 visit = [-r for r in initrevs]
274 visit = [-r for r in initrevs]
272 seen.update(initrevs)
275 seen.update(initrevs)
273 heapq.heapify(visit)
276 heapq.heapify(visit)
274 else:
277 else:
275 visit = []
278 visit = []
276 heapq.heapify(visit)
279 heapq.heapify(visit)
277 for r in initrevs:
280 for r in initrevs:
278 p1, p2 = parentrevs(r)
281 p1, p2 = parentrevs(r)
279 if p1 not in seen:
282 if p1 not in seen:
280 heappush(visit, -p1)
283 heappush(visit, -p1)
281 see(p1)
284 see(p1)
282 if p2 not in seen:
285 if p2 not in seen:
283 heappush(visit, -p2)
286 heappush(visit, -p2)
284 see(p2)
287 see(p2)
285
288
286 while visit:
289 while visit:
287 current = -visit[0]
290 current = -visit[0]
288 if current < stoprev:
291 if current < stoprev:
289 break
292 break
290 yield current
293 yield current
291 # optimize out heapq operation if p1 is known to be the next highest
294 # optimize out heapq operation if p1 is known to be the next highest
292 # revision, which is quite common in linear history.
295 # revision, which is quite common in linear history.
293 p1, p2 = parentrevs(current)
296 p1, p2 = parentrevs(current)
294 if p1 not in seen:
297 if p1 not in seen:
295 if current - p1 == 1:
298 if current - p1 == 1:
296 visit[0] = -p1
299 visit[0] = -p1
297 else:
300 else:
298 heapreplace(visit, -p1)
301 heapreplace(visit, -p1)
299 see(p1)
302 see(p1)
300 else:
303 else:
301 heappop(visit)
304 heappop(visit)
302 if p2 not in seen:
305 if p2 not in seen:
303 heappush(visit, -p2)
306 heappush(visit, -p2)
304 see(p2)
307 see(p2)
305
308
306 class lazyancestors(object):
309 class lazyancestors(object):
307 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
310 def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
308 """Create a new object generating ancestors for the given revs. Does
311 """Create a new object generating ancestors for the given revs. Does
309 not generate revs lower than stoprev.
312 not generate revs lower than stoprev.
310
313
311 This is computed lazily starting from revs. The object supports
314 This is computed lazily starting from revs. The object supports
312 iteration and membership.
315 iteration and membership.
313
316
314 cl should be a changelog and revs should be an iterable. inclusive is
317 cl should be a changelog and revs should be an iterable. inclusive is
315 a boolean that indicates whether revs should be included. Revs lower
318 a boolean that indicates whether revs should be included. Revs lower
316 than stoprev will not be generated.
319 than stoprev will not be generated.
317
320
318 Result does not include the null revision."""
321 Result does not include the null revision."""
319 self._parentrevs = pfunc
322 self._parentrevs = pfunc
320 self._initrevs = revs = [r for r in revs if r >= stoprev]
323 self._initrevs = revs = [r for r in revs if r >= stoprev]
321 self._stoprev = stoprev
324 self._stoprev = stoprev
322 self._inclusive = inclusive
325 self._inclusive = inclusive
323
326
324 self._containsseen = set()
327 self._containsseen = set()
325 self._containsiter = _lazyancestorsiter(self._parentrevs,
328 self._containsiter = _lazyancestorsiter(self._parentrevs,
326 self._initrevs,
329 self._initrevs,
327 self._stoprev,
330 self._stoprev,
328 self._inclusive)
331 self._inclusive)
329
332
330 def __nonzero__(self):
333 def __nonzero__(self):
331 """False if the set is empty, True otherwise."""
334 """False if the set is empty, True otherwise."""
332 try:
335 try:
333 next(iter(self))
336 next(iter(self))
334 return True
337 return True
335 except StopIteration:
338 except StopIteration:
336 return False
339 return False
337
340
338 __bool__ = __nonzero__
341 __bool__ = __nonzero__
339
342
340 def __iter__(self):
343 def __iter__(self):
341 """Generate the ancestors of _initrevs in reverse topological order.
344 """Generate the ancestors of _initrevs in reverse topological order.
342
345
343 If inclusive is False, yield a sequence of revision numbers starting
346 If inclusive is False, yield a sequence of revision numbers starting
344 with the parents of each revision in revs, i.e., each revision is
347 with the parents of each revision in revs, i.e., each revision is
345 *not* considered an ancestor of itself. Results are emitted in reverse
348 *not* considered an ancestor of itself. Results are emitted in reverse
346 revision number order. That order is also topological: a child is
349 revision number order. That order is also topological: a child is
347 always emitted before its parent.
350 always emitted before its parent.
348
351
349 If inclusive is True, the source revisions are also yielded. The
352 If inclusive is True, the source revisions are also yielded. The
350 reverse revision number order is still enforced."""
353 reverse revision number order is still enforced."""
351 return _lazyancestorsiter(self._parentrevs, self._initrevs,
354 return _lazyancestorsiter(self._parentrevs, self._initrevs,
352 self._stoprev, self._inclusive)
355 self._stoprev, self._inclusive)
353
356
354 def __contains__(self, target):
357 def __contains__(self, target):
355 """Test whether target is an ancestor of self._initrevs."""
358 """Test whether target is an ancestor of self._initrevs."""
356 seen = self._containsseen
359 seen = self._containsseen
357 if target in seen:
360 if target in seen:
358 return True
361 return True
359 iter = self._containsiter
362 iter = self._containsiter
360 if iter is None:
363 if iter is None:
361 # Iterator exhausted
364 # Iterator exhausted
362 return False
365 return False
363 # Only integer target is valid, but some callers expect 'None in self'
366 # Only integer target is valid, but some callers expect 'None in self'
364 # to be False. So we explicitly allow it.
367 # to be False. So we explicitly allow it.
365 if target is None:
368 if target is None:
366 return False
369 return False
367
370
368 see = seen.add
371 see = seen.add
369 try:
372 try:
370 while True:
373 while True:
371 rev = next(iter)
374 rev = next(iter)
372 see(rev)
375 see(rev)
373 if rev == target:
376 if rev == target:
374 return True
377 return True
375 if rev < target:
378 if rev < target:
376 return False
379 return False
377 except StopIteration:
380 except StopIteration:
378 # Set to None to indicate fast-path can be used next time, and to
381 # Set to None to indicate fast-path can be used next time, and to
379 # free up memory.
382 # free up memory.
380 self._containsiter = None
383 self._containsiter = None
381 return False
384 return False
385
386 class rustlazyancestors(lazyancestors):
387
388 def __init__(self, index, revs, stoprev=0, inclusive=False):
389 self._index = index
390 self._stoprev = stoprev
391 self._inclusive = inclusive
392 # no need to prefilter out init revs that are smaller than stoprev,
393 # it's done by rustlazyancestors constructor.
394 # we need to convert to a list, because our ruslazyancestors
395 # constructor (from C code) doesn't understand anything else yet
396 self._initrevs = initrevs = list(revs)
397
398 self._containsseen = set()
399 self._containsiter = parsers.rustlazyancestors(
400 index, initrevs, stoprev, inclusive)
401
402 def __iter__(self):
403 return parsers.rustlazyancestors(self._index,
404 self._initrevs,
405 self._stoprev,
406 self._inclusive)
@@ -1,2556 +1,2560 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 _insertflagprocessor(flag, processor, _flagprocessors)
154 _insertflagprocessor(flag, processor, _flagprocessors)
155
155
156 def _insertflagprocessor(flag, processor, flagprocessors):
156 def _insertflagprocessor(flag, processor, flagprocessors):
157 if not flag & REVIDX_KNOWN_FLAGS:
157 if not flag & REVIDX_KNOWN_FLAGS:
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag not in REVIDX_FLAGS_ORDER:
160 if flag not in REVIDX_FLAGS_ORDER:
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 raise error.ProgrammingError(msg)
162 raise error.ProgrammingError(msg)
163 if flag in flagprocessors:
163 if flag in flagprocessors:
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 raise error.Abort(msg)
165 raise error.Abort(msg)
166 flagprocessors[flag] = processor
166 flagprocessors[flag] = processor
167
167
168 def getoffset(q):
168 def getoffset(q):
169 return int(q >> 16)
169 return int(q >> 16)
170
170
171 def gettype(q):
171 def gettype(q):
172 return int(q & 0xFFFF)
172 return int(q & 0xFFFF)
173
173
174 def offset_type(offset, type):
174 def offset_type(offset, type):
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 raise ValueError('unknown revlog index flags')
176 raise ValueError('unknown revlog index flags')
177 return int(int(offset) << 16 | type)
177 return int(int(offset) << 16 | type)
178
178
179 @attr.s(slots=True, frozen=True)
179 @attr.s(slots=True, frozen=True)
180 class _revisioninfo(object):
180 class _revisioninfo(object):
181 """Information about a revision that allows building its fulltext
181 """Information about a revision that allows building its fulltext
182 node: expected hash of the revision
182 node: expected hash of the revision
183 p1, p2: parent revs of the revision
183 p1, p2: parent revs of the revision
184 btext: built text cache consisting of a one-element list
184 btext: built text cache consisting of a one-element list
185 cachedelta: (baserev, uncompressed_delta) or None
185 cachedelta: (baserev, uncompressed_delta) or None
186 flags: flags associated to the revision storage
186 flags: flags associated to the revision storage
187
187
188 One of btext[0] or cachedelta must be set.
188 One of btext[0] or cachedelta must be set.
189 """
189 """
190 node = attr.ib()
190 node = attr.ib()
191 p1 = attr.ib()
191 p1 = attr.ib()
192 p2 = attr.ib()
192 p2 = attr.ib()
193 btext = attr.ib()
193 btext = attr.ib()
194 textlen = attr.ib()
194 textlen = attr.ib()
195 cachedelta = attr.ib()
195 cachedelta = attr.ib()
196 flags = attr.ib()
196 flags = attr.ib()
197
197
198 @interfaceutil.implementer(repository.irevisiondelta)
198 @interfaceutil.implementer(repository.irevisiondelta)
199 @attr.s(slots=True)
199 @attr.s(slots=True)
200 class revlogrevisiondelta(object):
200 class revlogrevisiondelta(object):
201 node = attr.ib()
201 node = attr.ib()
202 p1node = attr.ib()
202 p1node = attr.ib()
203 p2node = attr.ib()
203 p2node = attr.ib()
204 basenode = attr.ib()
204 basenode = attr.ib()
205 flags = attr.ib()
205 flags = attr.ib()
206 baserevisionsize = attr.ib()
206 baserevisionsize = attr.ib()
207 revision = attr.ib()
207 revision = attr.ib()
208 delta = attr.ib()
208 delta = attr.ib()
209 linknode = attr.ib(default=None)
209 linknode = attr.ib(default=None)
210
210
211 @interfaceutil.implementer(repository.iverifyproblem)
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
216 node = attr.ib(default=None)
217
217
218 # index v0:
218 # index v0:
219 # 4 bytes: offset
219 # 4 bytes: offset
220 # 4 bytes: compressed length
220 # 4 bytes: compressed length
221 # 4 bytes: base rev
221 # 4 bytes: base rev
222 # 4 bytes: link rev
222 # 4 bytes: link rev
223 # 20 bytes: parent 1 nodeid
223 # 20 bytes: parent 1 nodeid
224 # 20 bytes: parent 2 nodeid
224 # 20 bytes: parent 2 nodeid
225 # 20 bytes: nodeid
225 # 20 bytes: nodeid
226 indexformatv0 = struct.Struct(">4l20s20s20s")
226 indexformatv0 = struct.Struct(">4l20s20s20s")
227 indexformatv0_pack = indexformatv0.pack
227 indexformatv0_pack = indexformatv0.pack
228 indexformatv0_unpack = indexformatv0.unpack
228 indexformatv0_unpack = indexformatv0.unpack
229
229
230 class revlogoldindex(list):
230 class revlogoldindex(list):
231 def __getitem__(self, i):
231 def __getitem__(self, i):
232 if i == -1:
232 if i == -1:
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
233 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 return list.__getitem__(self, i)
234 return list.__getitem__(self, i)
235
235
236 class revlogoldio(object):
236 class revlogoldio(object):
237 def __init__(self):
237 def __init__(self):
238 self.size = indexformatv0.size
238 self.size = indexformatv0.size
239
239
240 def parseindex(self, data, inline):
240 def parseindex(self, data, inline):
241 s = self.size
241 s = self.size
242 index = []
242 index = []
243 nodemap = {nullid: nullrev}
243 nodemap = {nullid: nullrev}
244 n = off = 0
244 n = off = 0
245 l = len(data)
245 l = len(data)
246 while off + s <= l:
246 while off + s <= l:
247 cur = data[off:off + s]
247 cur = data[off:off + s]
248 off += s
248 off += s
249 e = indexformatv0_unpack(cur)
249 e = indexformatv0_unpack(cur)
250 # transform to revlogv1 format
250 # transform to revlogv1 format
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 index.append(e2)
253 index.append(e2)
254 nodemap[e[6]] = n
254 nodemap[e[6]] = n
255 n += 1
255 n += 1
256
256
257 return revlogoldindex(index), nodemap, None
257 return revlogoldindex(index), nodemap, None
258
258
259 def packentry(self, entry, node, version, rev):
259 def packentry(self, entry, node, version, rev):
260 if gettype(entry[0]):
260 if gettype(entry[0]):
261 raise error.RevlogError(_('index entry flags need revlog '
261 raise error.RevlogError(_('index entry flags need revlog '
262 'version 1'))
262 'version 1'))
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 node(entry[5]), node(entry[6]), entry[7])
264 node(entry[5]), node(entry[6]), entry[7])
265 return indexformatv0_pack(*e2)
265 return indexformatv0_pack(*e2)
266
266
267 # index ng:
267 # index ng:
268 # 6 bytes: offset
268 # 6 bytes: offset
269 # 2 bytes: flags
269 # 2 bytes: flags
270 # 4 bytes: compressed length
270 # 4 bytes: compressed length
271 # 4 bytes: uncompressed length
271 # 4 bytes: uncompressed length
272 # 4 bytes: base rev
272 # 4 bytes: base rev
273 # 4 bytes: link rev
273 # 4 bytes: link rev
274 # 4 bytes: parent 1 rev
274 # 4 bytes: parent 1 rev
275 # 4 bytes: parent 2 rev
275 # 4 bytes: parent 2 rev
276 # 32 bytes: nodeid
276 # 32 bytes: nodeid
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
277 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 indexformatng_pack = indexformatng.pack
278 indexformatng_pack = indexformatng.pack
279 versionformat = struct.Struct(">I")
279 versionformat = struct.Struct(">I")
280 versionformat_pack = versionformat.pack
280 versionformat_pack = versionformat.pack
281 versionformat_unpack = versionformat.unpack
281 versionformat_unpack = versionformat.unpack
282
282
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 # signed integer)
284 # signed integer)
285 _maxentrysize = 0x7fffffff
285 _maxentrysize = 0x7fffffff
286
286
287 class revlogio(object):
287 class revlogio(object):
288 def __init__(self):
288 def __init__(self):
289 self.size = indexformatng.size
289 self.size = indexformatng.size
290
290
291 def parseindex(self, data, inline):
291 def parseindex(self, data, inline):
292 # call the C implementation to parse the index data
292 # call the C implementation to parse the index data
293 index, cache = parsers.parse_index2(data, inline)
293 index, cache = parsers.parse_index2(data, inline)
294 return index, getattr(index, 'nodemap', None), cache
294 return index, getattr(index, 'nodemap', None), cache
295
295
296 def packentry(self, entry, node, version, rev):
296 def packentry(self, entry, node, version, rev):
297 p = indexformatng_pack(*entry)
297 p = indexformatng_pack(*entry)
298 if rev == 0:
298 if rev == 0:
299 p = versionformat_pack(version) + p[4:]
299 p = versionformat_pack(version) + p[4:]
300 return p
300 return p
301
301
302 class revlog(object):
302 class revlog(object):
303 """
303 """
304 the underlying revision storage object
304 the underlying revision storage object
305
305
306 A revlog consists of two parts, an index and the revision data.
306 A revlog consists of two parts, an index and the revision data.
307
307
308 The index is a file with a fixed record size containing
308 The index is a file with a fixed record size containing
309 information on each revision, including its nodeid (hash), the
309 information on each revision, including its nodeid (hash), the
310 nodeids of its parents, the position and offset of its data within
310 nodeids of its parents, the position and offset of its data within
311 the data file, and the revision it's based on. Finally, each entry
311 the data file, and the revision it's based on. Finally, each entry
312 contains a linkrev entry that can serve as a pointer to external
312 contains a linkrev entry that can serve as a pointer to external
313 data.
313 data.
314
314
315 The revision data itself is a linear collection of data chunks.
315 The revision data itself is a linear collection of data chunks.
316 Each chunk represents a revision and is usually represented as a
316 Each chunk represents a revision and is usually represented as a
317 delta against the previous chunk. To bound lookup time, runs of
317 delta against the previous chunk. To bound lookup time, runs of
318 deltas are limited to about 2 times the length of the original
318 deltas are limited to about 2 times the length of the original
319 version data. This makes retrieval of a version proportional to
319 version data. This makes retrieval of a version proportional to
320 its size, or O(1) relative to the number of revisions.
320 its size, or O(1) relative to the number of revisions.
321
321
322 Both pieces of the revlog are written to in an append-only
322 Both pieces of the revlog are written to in an append-only
323 fashion, which means we never need to rewrite a file to insert or
323 fashion, which means we never need to rewrite a file to insert or
324 remove data, and can use some simple techniques to avoid the need
324 remove data, and can use some simple techniques to avoid the need
325 for locking while reading.
325 for locking while reading.
326
326
327 If checkambig, indexfile is opened with checkambig=True at
327 If checkambig, indexfile is opened with checkambig=True at
328 writing, to avoid file stat ambiguity.
328 writing, to avoid file stat ambiguity.
329
329
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 index will be mmapped rather than read if it is larger than the
331 index will be mmapped rather than read if it is larger than the
332 configured threshold.
332 configured threshold.
333
333
334 If censorable is True, the revlog can have censored revisions.
334 If censorable is True, the revlog can have censored revisions.
335 """
335 """
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 mmaplargeindex=False, censorable=False):
337 mmaplargeindex=False, censorable=False):
338 """
338 """
339 create a revlog object
339 create a revlog object
340
340
341 opener is a function that abstracts the file opening operation
341 opener is a function that abstracts the file opening operation
342 and can be used to implement COW semantics or the like.
342 and can be used to implement COW semantics or the like.
343 """
343 """
344 self.indexfile = indexfile
344 self.indexfile = indexfile
345 self.datafile = datafile or (indexfile[:-2] + ".d")
345 self.datafile = datafile or (indexfile[:-2] + ".d")
346 self.opener = opener
346 self.opener = opener
347 # When True, indexfile is opened with checkambig=True at writing, to
347 # When True, indexfile is opened with checkambig=True at writing, to
348 # avoid file stat ambiguity.
348 # avoid file stat ambiguity.
349 self._checkambig = checkambig
349 self._checkambig = checkambig
350 self._censorable = censorable
350 self._censorable = censorable
351 # 3-tuple of (node, rev, text) for a raw revision.
351 # 3-tuple of (node, rev, text) for a raw revision.
352 self._revisioncache = None
352 self._revisioncache = None
353 # Maps rev to chain base rev.
353 # Maps rev to chain base rev.
354 self._chainbasecache = util.lrucachedict(100)
354 self._chainbasecache = util.lrucachedict(100)
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 self._chunkcache = (0, '')
356 self._chunkcache = (0, '')
357 # How much data to read and cache into the raw revlog data cache.
357 # How much data to read and cache into the raw revlog data cache.
358 self._chunkcachesize = 65536
358 self._chunkcachesize = 65536
359 self._maxchainlen = None
359 self._maxchainlen = None
360 self._deltabothparents = True
360 self._deltabothparents = True
361 self.index = []
361 self.index = []
362 # Mapping of partial identifiers to full nodes.
362 # Mapping of partial identifiers to full nodes.
363 self._pcache = {}
363 self._pcache = {}
364 # Mapping of revision integer to full node.
364 # Mapping of revision integer to full node.
365 self._nodecache = {nullid: nullrev}
365 self._nodecache = {nullid: nullrev}
366 self._nodepos = None
366 self._nodepos = None
367 self._compengine = 'zlib'
367 self._compengine = 'zlib'
368 self._maxdeltachainspan = -1
368 self._maxdeltachainspan = -1
369 self._withsparseread = False
369 self._withsparseread = False
370 self._sparserevlog = False
370 self._sparserevlog = False
371 self._srdensitythreshold = 0.50
371 self._srdensitythreshold = 0.50
372 self._srmingapsize = 262144
372 self._srmingapsize = 262144
373
373
374 # Make copy of flag processors so each revlog instance can support
374 # Make copy of flag processors so each revlog instance can support
375 # custom flags.
375 # custom flags.
376 self._flagprocessors = dict(_flagprocessors)
376 self._flagprocessors = dict(_flagprocessors)
377
377
378 mmapindexthreshold = None
378 mmapindexthreshold = None
379 v = REVLOG_DEFAULT_VERSION
379 v = REVLOG_DEFAULT_VERSION
380 opts = getattr(opener, 'options', None)
380 opts = getattr(opener, 'options', None)
381 if opts is not None:
381 if opts is not None:
382 if 'revlogv2' in opts:
382 if 'revlogv2' in opts:
383 # version 2 revlogs always use generaldelta.
383 # version 2 revlogs always use generaldelta.
384 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
384 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
385 elif 'revlogv1' in opts:
385 elif 'revlogv1' in opts:
386 if 'generaldelta' in opts:
386 if 'generaldelta' in opts:
387 v |= FLAG_GENERALDELTA
387 v |= FLAG_GENERALDELTA
388 else:
388 else:
389 v = 0
389 v = 0
390 if 'chunkcachesize' in opts:
390 if 'chunkcachesize' in opts:
391 self._chunkcachesize = opts['chunkcachesize']
391 self._chunkcachesize = opts['chunkcachesize']
392 if 'maxchainlen' in opts:
392 if 'maxchainlen' in opts:
393 self._maxchainlen = opts['maxchainlen']
393 self._maxchainlen = opts['maxchainlen']
394 if 'deltabothparents' in opts:
394 if 'deltabothparents' in opts:
395 self._deltabothparents = opts['deltabothparents']
395 self._deltabothparents = opts['deltabothparents']
396 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
396 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
397 if 'compengine' in opts:
397 if 'compengine' in opts:
398 self._compengine = opts['compengine']
398 self._compengine = opts['compengine']
399 if 'maxdeltachainspan' in opts:
399 if 'maxdeltachainspan' in opts:
400 self._maxdeltachainspan = opts['maxdeltachainspan']
400 self._maxdeltachainspan = opts['maxdeltachainspan']
401 if mmaplargeindex and 'mmapindexthreshold' in opts:
401 if mmaplargeindex and 'mmapindexthreshold' in opts:
402 mmapindexthreshold = opts['mmapindexthreshold']
402 mmapindexthreshold = opts['mmapindexthreshold']
403 self._sparserevlog = bool(opts.get('sparse-revlog', False))
403 self._sparserevlog = bool(opts.get('sparse-revlog', False))
404 withsparseread = bool(opts.get('with-sparse-read', False))
404 withsparseread = bool(opts.get('with-sparse-read', False))
405 # sparse-revlog forces sparse-read
405 # sparse-revlog forces sparse-read
406 self._withsparseread = self._sparserevlog or withsparseread
406 self._withsparseread = self._sparserevlog or withsparseread
407 if 'sparse-read-density-threshold' in opts:
407 if 'sparse-read-density-threshold' in opts:
408 self._srdensitythreshold = opts['sparse-read-density-threshold']
408 self._srdensitythreshold = opts['sparse-read-density-threshold']
409 if 'sparse-read-min-gap-size' in opts:
409 if 'sparse-read-min-gap-size' in opts:
410 self._srmingapsize = opts['sparse-read-min-gap-size']
410 self._srmingapsize = opts['sparse-read-min-gap-size']
411 if opts.get('enableellipsis'):
411 if opts.get('enableellipsis'):
412 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
412 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
413
413
414 # revlog v0 doesn't have flag processors
414 # revlog v0 doesn't have flag processors
415 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
415 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
416 _insertflagprocessor(flag, processor, self._flagprocessors)
416 _insertflagprocessor(flag, processor, self._flagprocessors)
417
417
418 if self._chunkcachesize <= 0:
418 if self._chunkcachesize <= 0:
419 raise error.RevlogError(_('revlog chunk cache size %r is not '
419 raise error.RevlogError(_('revlog chunk cache size %r is not '
420 'greater than 0') % self._chunkcachesize)
420 'greater than 0') % self._chunkcachesize)
421 elif self._chunkcachesize & (self._chunkcachesize - 1):
421 elif self._chunkcachesize & (self._chunkcachesize - 1):
422 raise error.RevlogError(_('revlog chunk cache size %r is not a '
422 raise error.RevlogError(_('revlog chunk cache size %r is not a '
423 'power of 2') % self._chunkcachesize)
423 'power of 2') % self._chunkcachesize)
424
424
425 self._loadindex(v, mmapindexthreshold)
425 self._loadindex(v, mmapindexthreshold)
426
426
427 def _loadindex(self, v, mmapindexthreshold):
427 def _loadindex(self, v, mmapindexthreshold):
428 indexdata = ''
428 indexdata = ''
429 self._initempty = True
429 self._initempty = True
430 try:
430 try:
431 with self._indexfp() as f:
431 with self._indexfp() as f:
432 if (mmapindexthreshold is not None and
432 if (mmapindexthreshold is not None and
433 self.opener.fstat(f).st_size >= mmapindexthreshold):
433 self.opener.fstat(f).st_size >= mmapindexthreshold):
434 indexdata = util.buffer(util.mmapread(f))
434 indexdata = util.buffer(util.mmapread(f))
435 else:
435 else:
436 indexdata = f.read()
436 indexdata = f.read()
437 if len(indexdata) > 0:
437 if len(indexdata) > 0:
438 v = versionformat_unpack(indexdata[:4])[0]
438 v = versionformat_unpack(indexdata[:4])[0]
439 self._initempty = False
439 self._initempty = False
440 except IOError as inst:
440 except IOError as inst:
441 if inst.errno != errno.ENOENT:
441 if inst.errno != errno.ENOENT:
442 raise
442 raise
443
443
444 self.version = v
444 self.version = v
445 self._inline = v & FLAG_INLINE_DATA
445 self._inline = v & FLAG_INLINE_DATA
446 self._generaldelta = v & FLAG_GENERALDELTA
446 self._generaldelta = v & FLAG_GENERALDELTA
447 flags = v & ~0xFFFF
447 flags = v & ~0xFFFF
448 fmt = v & 0xFFFF
448 fmt = v & 0xFFFF
449 if fmt == REVLOGV0:
449 if fmt == REVLOGV0:
450 if flags:
450 if flags:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
452 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
453 (flags >> 16, fmt, self.indexfile))
454 elif fmt == REVLOGV1:
454 elif fmt == REVLOGV1:
455 if flags & ~REVLOGV1_FLAGS:
455 if flags & ~REVLOGV1_FLAGS:
456 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
456 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
457 'revlog %s') %
457 'revlog %s') %
458 (flags >> 16, fmt, self.indexfile))
458 (flags >> 16, fmt, self.indexfile))
459 elif fmt == REVLOGV2:
459 elif fmt == REVLOGV2:
460 if flags & ~REVLOGV2_FLAGS:
460 if flags & ~REVLOGV2_FLAGS:
461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
462 'revlog %s') %
462 'revlog %s') %
463 (flags >> 16, fmt, self.indexfile))
463 (flags >> 16, fmt, self.indexfile))
464 else:
464 else:
465 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
465 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
466 (fmt, self.indexfile))
466 (fmt, self.indexfile))
467
467
468 self._storedeltachains = True
468 self._storedeltachains = True
469
469
470 self._io = revlogio()
470 self._io = revlogio()
471 if self.version == REVLOGV0:
471 if self.version == REVLOGV0:
472 self._io = revlogoldio()
472 self._io = revlogoldio()
473 try:
473 try:
474 d = self._io.parseindex(indexdata, self._inline)
474 d = self._io.parseindex(indexdata, self._inline)
475 except (ValueError, IndexError):
475 except (ValueError, IndexError):
476 raise error.RevlogError(_("index %s is corrupted") %
476 raise error.RevlogError(_("index %s is corrupted") %
477 self.indexfile)
477 self.indexfile)
478 self.index, nodemap, self._chunkcache = d
478 self.index, nodemap, self._chunkcache = d
479 if nodemap is not None:
479 if nodemap is not None:
480 self.nodemap = self._nodecache = nodemap
480 self.nodemap = self._nodecache = nodemap
481 if not self._chunkcache:
481 if not self._chunkcache:
482 self._chunkclear()
482 self._chunkclear()
483 # revnum -> (chain-length, sum-delta-length)
483 # revnum -> (chain-length, sum-delta-length)
484 self._chaininfocache = {}
484 self._chaininfocache = {}
485 # revlog header -> revlog compressor
485 # revlog header -> revlog compressor
486 self._decompressors = {}
486 self._decompressors = {}
487
487
488 @util.propertycache
488 @util.propertycache
489 def _compressor(self):
489 def _compressor(self):
490 return util.compengines[self._compengine].revlogcompressor()
490 return util.compengines[self._compengine].revlogcompressor()
491
491
492 def _indexfp(self, mode='r'):
492 def _indexfp(self, mode='r'):
493 """file object for the revlog's index file"""
493 """file object for the revlog's index file"""
494 args = {r'mode': mode}
494 args = {r'mode': mode}
495 if mode != 'r':
495 if mode != 'r':
496 args[r'checkambig'] = self._checkambig
496 args[r'checkambig'] = self._checkambig
497 if mode == 'w':
497 if mode == 'w':
498 args[r'atomictemp'] = True
498 args[r'atomictemp'] = True
499 return self.opener(self.indexfile, **args)
499 return self.opener(self.indexfile, **args)
500
500
501 def _datafp(self, mode='r'):
501 def _datafp(self, mode='r'):
502 """file object for the revlog's data file"""
502 """file object for the revlog's data file"""
503 return self.opener(self.datafile, mode=mode)
503 return self.opener(self.datafile, mode=mode)
504
504
505 @contextlib.contextmanager
505 @contextlib.contextmanager
506 def _datareadfp(self, existingfp=None):
506 def _datareadfp(self, existingfp=None):
507 """file object suitable to read data"""
507 """file object suitable to read data"""
508 if existingfp is not None:
508 if existingfp is not None:
509 yield existingfp
509 yield existingfp
510 else:
510 else:
511 if self._inline:
511 if self._inline:
512 func = self._indexfp
512 func = self._indexfp
513 else:
513 else:
514 func = self._datafp
514 func = self._datafp
515 with func() as fp:
515 with func() as fp:
516 yield fp
516 yield fp
517
517
518 def tip(self):
518 def tip(self):
519 return self.node(len(self.index) - 1)
519 return self.node(len(self.index) - 1)
520 def __contains__(self, rev):
520 def __contains__(self, rev):
521 return 0 <= rev < len(self)
521 return 0 <= rev < len(self)
522 def __len__(self):
522 def __len__(self):
523 return len(self.index)
523 return len(self.index)
524 def __iter__(self):
524 def __iter__(self):
525 return iter(pycompat.xrange(len(self)))
525 return iter(pycompat.xrange(len(self)))
526 def revs(self, start=0, stop=None):
526 def revs(self, start=0, stop=None):
527 """iterate over all rev in this revlog (from start to stop)"""
527 """iterate over all rev in this revlog (from start to stop)"""
528 return storageutil.iterrevs(len(self), start=start, stop=stop)
528 return storageutil.iterrevs(len(self), start=start, stop=stop)
529
529
530 @util.propertycache
530 @util.propertycache
531 def nodemap(self):
531 def nodemap(self):
532 if self.index:
532 if self.index:
533 # populate mapping down to the initial node
533 # populate mapping down to the initial node
534 node0 = self.index[0][7] # get around changelog filtering
534 node0 = self.index[0][7] # get around changelog filtering
535 self.rev(node0)
535 self.rev(node0)
536 return self._nodecache
536 return self._nodecache
537
537
538 def hasnode(self, node):
538 def hasnode(self, node):
539 try:
539 try:
540 self.rev(node)
540 self.rev(node)
541 return True
541 return True
542 except KeyError:
542 except KeyError:
543 return False
543 return False
544
544
545 def candelta(self, baserev, rev):
545 def candelta(self, baserev, rev):
546 """whether two revisions (baserev, rev) can be delta-ed or not"""
546 """whether two revisions (baserev, rev) can be delta-ed or not"""
547 # Disable delta if either rev requires a content-changing flag
547 # Disable delta if either rev requires a content-changing flag
548 # processor (ex. LFS). This is because such flag processor can alter
548 # processor (ex. LFS). This is because such flag processor can alter
549 # the rawtext content that the delta will be based on, and two clients
549 # the rawtext content that the delta will be based on, and two clients
550 # could have a same revlog node with different flags (i.e. different
550 # could have a same revlog node with different flags (i.e. different
551 # rawtext contents) and the delta could be incompatible.
551 # rawtext contents) and the delta could be incompatible.
552 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
552 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
553 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
553 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
554 return False
554 return False
555 return True
555 return True
556
556
557 def clearcaches(self):
557 def clearcaches(self):
558 self._revisioncache = None
558 self._revisioncache = None
559 self._chainbasecache.clear()
559 self._chainbasecache.clear()
560 self._chunkcache = (0, '')
560 self._chunkcache = (0, '')
561 self._pcache = {}
561 self._pcache = {}
562
562
563 try:
563 try:
564 self._nodecache.clearcaches()
564 self._nodecache.clearcaches()
565 except AttributeError:
565 except AttributeError:
566 self._nodecache = {nullid: nullrev}
566 self._nodecache = {nullid: nullrev}
567 self._nodepos = None
567 self._nodepos = None
568
568
569 def rev(self, node):
569 def rev(self, node):
570 try:
570 try:
571 return self._nodecache[node]
571 return self._nodecache[node]
572 except TypeError:
572 except TypeError:
573 raise
573 raise
574 except error.RevlogError:
574 except error.RevlogError:
575 # parsers.c radix tree lookup failed
575 # parsers.c radix tree lookup failed
576 if node == wdirid or node in wdirfilenodeids:
576 if node == wdirid or node in wdirfilenodeids:
577 raise error.WdirUnsupported
577 raise error.WdirUnsupported
578 raise error.LookupError(node, self.indexfile, _('no node'))
578 raise error.LookupError(node, self.indexfile, _('no node'))
579 except KeyError:
579 except KeyError:
580 # pure python cache lookup failed
580 # pure python cache lookup failed
581 n = self._nodecache
581 n = self._nodecache
582 i = self.index
582 i = self.index
583 p = self._nodepos
583 p = self._nodepos
584 if p is None:
584 if p is None:
585 p = len(i) - 1
585 p = len(i) - 1
586 else:
586 else:
587 assert p < len(i)
587 assert p < len(i)
588 for r in pycompat.xrange(p, -1, -1):
588 for r in pycompat.xrange(p, -1, -1):
589 v = i[r][7]
589 v = i[r][7]
590 n[v] = r
590 n[v] = r
591 if v == node:
591 if v == node:
592 self._nodepos = r - 1
592 self._nodepos = r - 1
593 return r
593 return r
594 if node == wdirid or node in wdirfilenodeids:
594 if node == wdirid or node in wdirfilenodeids:
595 raise error.WdirUnsupported
595 raise error.WdirUnsupported
596 raise error.LookupError(node, self.indexfile, _('no node'))
596 raise error.LookupError(node, self.indexfile, _('no node'))
597
597
598 # Accessors for index entries.
598 # Accessors for index entries.
599
599
600 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
600 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
601 # are flags.
601 # are flags.
602 def start(self, rev):
602 def start(self, rev):
603 return int(self.index[rev][0] >> 16)
603 return int(self.index[rev][0] >> 16)
604
604
605 def flags(self, rev):
605 def flags(self, rev):
606 return self.index[rev][0] & 0xFFFF
606 return self.index[rev][0] & 0xFFFF
607
607
608 def length(self, rev):
608 def length(self, rev):
609 return self.index[rev][1]
609 return self.index[rev][1]
610
610
611 def rawsize(self, rev):
611 def rawsize(self, rev):
612 """return the length of the uncompressed text for a given revision"""
612 """return the length of the uncompressed text for a given revision"""
613 l = self.index[rev][2]
613 l = self.index[rev][2]
614 if l >= 0:
614 if l >= 0:
615 return l
615 return l
616
616
617 t = self.revision(rev, raw=True)
617 t = self.revision(rev, raw=True)
618 return len(t)
618 return len(t)
619
619
620 def size(self, rev):
620 def size(self, rev):
621 """length of non-raw text (processed by a "read" flag processor)"""
621 """length of non-raw text (processed by a "read" flag processor)"""
622 # fast path: if no "read" flag processor could change the content,
622 # fast path: if no "read" flag processor could change the content,
623 # size is rawsize. note: ELLIPSIS is known to not change the content.
623 # size is rawsize. note: ELLIPSIS is known to not change the content.
624 flags = self.flags(rev)
624 flags = self.flags(rev)
625 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
625 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
626 return self.rawsize(rev)
626 return self.rawsize(rev)
627
627
628 return len(self.revision(rev, raw=False))
628 return len(self.revision(rev, raw=False))
629
629
630 def chainbase(self, rev):
630 def chainbase(self, rev):
631 base = self._chainbasecache.get(rev)
631 base = self._chainbasecache.get(rev)
632 if base is not None:
632 if base is not None:
633 return base
633 return base
634
634
635 index = self.index
635 index = self.index
636 iterrev = rev
636 iterrev = rev
637 base = index[iterrev][3]
637 base = index[iterrev][3]
638 while base != iterrev:
638 while base != iterrev:
639 iterrev = base
639 iterrev = base
640 base = index[iterrev][3]
640 base = index[iterrev][3]
641
641
642 self._chainbasecache[rev] = base
642 self._chainbasecache[rev] = base
643 return base
643 return base
644
644
645 def linkrev(self, rev):
645 def linkrev(self, rev):
646 return self.index[rev][4]
646 return self.index[rev][4]
647
647
648 def parentrevs(self, rev):
648 def parentrevs(self, rev):
649 try:
649 try:
650 entry = self.index[rev]
650 entry = self.index[rev]
651 except IndexError:
651 except IndexError:
652 if rev == wdirrev:
652 if rev == wdirrev:
653 raise error.WdirUnsupported
653 raise error.WdirUnsupported
654 raise
654 raise
655
655
656 return entry[5], entry[6]
656 return entry[5], entry[6]
657
657
658 # fast parentrevs(rev) where rev isn't filtered
658 # fast parentrevs(rev) where rev isn't filtered
659 _uncheckedparentrevs = parentrevs
659 _uncheckedparentrevs = parentrevs
660
660
661 def node(self, rev):
661 def node(self, rev):
662 try:
662 try:
663 return self.index[rev][7]
663 return self.index[rev][7]
664 except IndexError:
664 except IndexError:
665 if rev == wdirrev:
665 if rev == wdirrev:
666 raise error.WdirUnsupported
666 raise error.WdirUnsupported
667 raise
667 raise
668
668
669 # Derived from index values.
669 # Derived from index values.
670
670
671 def end(self, rev):
671 def end(self, rev):
672 return self.start(rev) + self.length(rev)
672 return self.start(rev) + self.length(rev)
673
673
674 def parents(self, node):
674 def parents(self, node):
675 i = self.index
675 i = self.index
676 d = i[self.rev(node)]
676 d = i[self.rev(node)]
677 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
677 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
678
678
679 def chainlen(self, rev):
679 def chainlen(self, rev):
680 return self._chaininfo(rev)[0]
680 return self._chaininfo(rev)[0]
681
681
682 def _chaininfo(self, rev):
682 def _chaininfo(self, rev):
683 chaininfocache = self._chaininfocache
683 chaininfocache = self._chaininfocache
684 if rev in chaininfocache:
684 if rev in chaininfocache:
685 return chaininfocache[rev]
685 return chaininfocache[rev]
686 index = self.index
686 index = self.index
687 generaldelta = self._generaldelta
687 generaldelta = self._generaldelta
688 iterrev = rev
688 iterrev = rev
689 e = index[iterrev]
689 e = index[iterrev]
690 clen = 0
690 clen = 0
691 compresseddeltalen = 0
691 compresseddeltalen = 0
692 while iterrev != e[3]:
692 while iterrev != e[3]:
693 clen += 1
693 clen += 1
694 compresseddeltalen += e[1]
694 compresseddeltalen += e[1]
695 if generaldelta:
695 if generaldelta:
696 iterrev = e[3]
696 iterrev = e[3]
697 else:
697 else:
698 iterrev -= 1
698 iterrev -= 1
699 if iterrev in chaininfocache:
699 if iterrev in chaininfocache:
700 t = chaininfocache[iterrev]
700 t = chaininfocache[iterrev]
701 clen += t[0]
701 clen += t[0]
702 compresseddeltalen += t[1]
702 compresseddeltalen += t[1]
703 break
703 break
704 e = index[iterrev]
704 e = index[iterrev]
705 else:
705 else:
706 # Add text length of base since decompressing that also takes
706 # Add text length of base since decompressing that also takes
707 # work. For cache hits the length is already included.
707 # work. For cache hits the length is already included.
708 compresseddeltalen += e[1]
708 compresseddeltalen += e[1]
709 r = (clen, compresseddeltalen)
709 r = (clen, compresseddeltalen)
710 chaininfocache[rev] = r
710 chaininfocache[rev] = r
711 return r
711 return r
712
712
713 def _deltachain(self, rev, stoprev=None):
713 def _deltachain(self, rev, stoprev=None):
714 """Obtain the delta chain for a revision.
714 """Obtain the delta chain for a revision.
715
715
716 ``stoprev`` specifies a revision to stop at. If not specified, we
716 ``stoprev`` specifies a revision to stop at. If not specified, we
717 stop at the base of the chain.
717 stop at the base of the chain.
718
718
719 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
719 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
720 revs in ascending order and ``stopped`` is a bool indicating whether
720 revs in ascending order and ``stopped`` is a bool indicating whether
721 ``stoprev`` was hit.
721 ``stoprev`` was hit.
722 """
722 """
723 # Try C implementation.
723 # Try C implementation.
724 try:
724 try:
725 return self.index.deltachain(rev, stoprev, self._generaldelta)
725 return self.index.deltachain(rev, stoprev, self._generaldelta)
726 except AttributeError:
726 except AttributeError:
727 pass
727 pass
728
728
729 chain = []
729 chain = []
730
730
731 # Alias to prevent attribute lookup in tight loop.
731 # Alias to prevent attribute lookup in tight loop.
732 index = self.index
732 index = self.index
733 generaldelta = self._generaldelta
733 generaldelta = self._generaldelta
734
734
735 iterrev = rev
735 iterrev = rev
736 e = index[iterrev]
736 e = index[iterrev]
737 while iterrev != e[3] and iterrev != stoprev:
737 while iterrev != e[3] and iterrev != stoprev:
738 chain.append(iterrev)
738 chain.append(iterrev)
739 if generaldelta:
739 if generaldelta:
740 iterrev = e[3]
740 iterrev = e[3]
741 else:
741 else:
742 iterrev -= 1
742 iterrev -= 1
743 e = index[iterrev]
743 e = index[iterrev]
744
744
745 if iterrev == stoprev:
745 if iterrev == stoprev:
746 stopped = True
746 stopped = True
747 else:
747 else:
748 chain.append(iterrev)
748 chain.append(iterrev)
749 stopped = False
749 stopped = False
750
750
751 chain.reverse()
751 chain.reverse()
752 return chain, stopped
752 return chain, stopped
753
753
754 def ancestors(self, revs, stoprev=0, inclusive=False):
754 def ancestors(self, revs, stoprev=0, inclusive=False):
755 """Generate the ancestors of 'revs' in reverse topological order.
755 """Generate the ancestors of 'revs' in reverse topological order.
756 Does not generate revs lower than stoprev.
756 Does not generate revs lower than stoprev.
757
757
758 See the documentation for ancestor.lazyancestors for more details."""
758 See the documentation for ancestor.lazyancestors for more details."""
759
759
760 # first, make sure start revisions aren't filtered
760 # first, make sure start revisions aren't filtered
761 revs = list(revs)
761 revs = list(revs)
762 checkrev = self.node
762 checkrev = self.node
763 for r in revs:
763 for r in revs:
764 checkrev(r)
764 checkrev(r)
765 # and we're sure ancestors aren't filtered as well
765 # and we're sure ancestors aren't filtered as well
766 if util.safehasattr(parsers, 'rustlazyancestors'):
767 return ancestor.rustlazyancestors(
768 self.index, revs,
769 stoprev=stoprev, inclusive=inclusive)
766 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
770 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
767 stoprev=stoprev, inclusive=inclusive)
771 stoprev=stoprev, inclusive=inclusive)
768
772
769 def descendants(self, revs):
773 def descendants(self, revs):
770 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
774 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
771
775
772 def findcommonmissing(self, common=None, heads=None):
776 def findcommonmissing(self, common=None, heads=None):
773 """Return a tuple of the ancestors of common and the ancestors of heads
777 """Return a tuple of the ancestors of common and the ancestors of heads
774 that are not ancestors of common. In revset terminology, we return the
778 that are not ancestors of common. In revset terminology, we return the
775 tuple:
779 tuple:
776
780
777 ::common, (::heads) - (::common)
781 ::common, (::heads) - (::common)
778
782
779 The list is sorted by revision number, meaning it is
783 The list is sorted by revision number, meaning it is
780 topologically sorted.
784 topologically sorted.
781
785
782 'heads' and 'common' are both lists of node IDs. If heads is
786 'heads' and 'common' are both lists of node IDs. If heads is
783 not supplied, uses all of the revlog's heads. If common is not
787 not supplied, uses all of the revlog's heads. If common is not
784 supplied, uses nullid."""
788 supplied, uses nullid."""
785 if common is None:
789 if common is None:
786 common = [nullid]
790 common = [nullid]
787 if heads is None:
791 if heads is None:
788 heads = self.heads()
792 heads = self.heads()
789
793
790 common = [self.rev(n) for n in common]
794 common = [self.rev(n) for n in common]
791 heads = [self.rev(n) for n in heads]
795 heads = [self.rev(n) for n in heads]
792
796
793 # we want the ancestors, but inclusive
797 # we want the ancestors, but inclusive
794 class lazyset(object):
798 class lazyset(object):
795 def __init__(self, lazyvalues):
799 def __init__(self, lazyvalues):
796 self.addedvalues = set()
800 self.addedvalues = set()
797 self.lazyvalues = lazyvalues
801 self.lazyvalues = lazyvalues
798
802
799 def __contains__(self, value):
803 def __contains__(self, value):
800 return value in self.addedvalues or value in self.lazyvalues
804 return value in self.addedvalues or value in self.lazyvalues
801
805
802 def __iter__(self):
806 def __iter__(self):
803 added = self.addedvalues
807 added = self.addedvalues
804 for r in added:
808 for r in added:
805 yield r
809 yield r
806 for r in self.lazyvalues:
810 for r in self.lazyvalues:
807 if not r in added:
811 if not r in added:
808 yield r
812 yield r
809
813
810 def add(self, value):
814 def add(self, value):
811 self.addedvalues.add(value)
815 self.addedvalues.add(value)
812
816
813 def update(self, values):
817 def update(self, values):
814 self.addedvalues.update(values)
818 self.addedvalues.update(values)
815
819
816 has = lazyset(self.ancestors(common))
820 has = lazyset(self.ancestors(common))
817 has.add(nullrev)
821 has.add(nullrev)
818 has.update(common)
822 has.update(common)
819
823
820 # take all ancestors from heads that aren't in has
824 # take all ancestors from heads that aren't in has
821 missing = set()
825 missing = set()
822 visit = collections.deque(r for r in heads if r not in has)
826 visit = collections.deque(r for r in heads if r not in has)
823 while visit:
827 while visit:
824 r = visit.popleft()
828 r = visit.popleft()
825 if r in missing:
829 if r in missing:
826 continue
830 continue
827 else:
831 else:
828 missing.add(r)
832 missing.add(r)
829 for p in self.parentrevs(r):
833 for p in self.parentrevs(r):
830 if p not in has:
834 if p not in has:
831 visit.append(p)
835 visit.append(p)
832 missing = list(missing)
836 missing = list(missing)
833 missing.sort()
837 missing.sort()
834 return has, [self.node(miss) for miss in missing]
838 return has, [self.node(miss) for miss in missing]
835
839
836 def incrementalmissingrevs(self, common=None):
840 def incrementalmissingrevs(self, common=None):
837 """Return an object that can be used to incrementally compute the
841 """Return an object that can be used to incrementally compute the
838 revision numbers of the ancestors of arbitrary sets that are not
842 revision numbers of the ancestors of arbitrary sets that are not
839 ancestors of common. This is an ancestor.incrementalmissingancestors
843 ancestors of common. This is an ancestor.incrementalmissingancestors
840 object.
844 object.
841
845
842 'common' is a list of revision numbers. If common is not supplied, uses
846 'common' is a list of revision numbers. If common is not supplied, uses
843 nullrev.
847 nullrev.
844 """
848 """
845 if common is None:
849 if common is None:
846 common = [nullrev]
850 common = [nullrev]
847
851
848 return ancestor.incrementalmissingancestors(self.parentrevs, common)
852 return ancestor.incrementalmissingancestors(self.parentrevs, common)
849
853
850 def findmissingrevs(self, common=None, heads=None):
854 def findmissingrevs(self, common=None, heads=None):
851 """Return the revision numbers of the ancestors of heads that
855 """Return the revision numbers of the ancestors of heads that
852 are not ancestors of common.
856 are not ancestors of common.
853
857
854 More specifically, return a list of revision numbers corresponding to
858 More specifically, return a list of revision numbers corresponding to
855 nodes N such that every N satisfies the following constraints:
859 nodes N such that every N satisfies the following constraints:
856
860
857 1. N is an ancestor of some node in 'heads'
861 1. N is an ancestor of some node in 'heads'
858 2. N is not an ancestor of any node in 'common'
862 2. N is not an ancestor of any node in 'common'
859
863
860 The list is sorted by revision number, meaning it is
864 The list is sorted by revision number, meaning it is
861 topologically sorted.
865 topologically sorted.
862
866
863 'heads' and 'common' are both lists of revision numbers. If heads is
867 'heads' and 'common' are both lists of revision numbers. If heads is
864 not supplied, uses all of the revlog's heads. If common is not
868 not supplied, uses all of the revlog's heads. If common is not
865 supplied, uses nullid."""
869 supplied, uses nullid."""
866 if common is None:
870 if common is None:
867 common = [nullrev]
871 common = [nullrev]
868 if heads is None:
872 if heads is None:
869 heads = self.headrevs()
873 heads = self.headrevs()
870
874
871 inc = self.incrementalmissingrevs(common=common)
875 inc = self.incrementalmissingrevs(common=common)
872 return inc.missingancestors(heads)
876 return inc.missingancestors(heads)
873
877
874 def findmissing(self, common=None, heads=None):
878 def findmissing(self, common=None, heads=None):
875 """Return the ancestors of heads that are not ancestors of common.
879 """Return the ancestors of heads that are not ancestors of common.
876
880
877 More specifically, return a list of nodes N such that every N
881 More specifically, return a list of nodes N such that every N
878 satisfies the following constraints:
882 satisfies the following constraints:
879
883
880 1. N is an ancestor of some node in 'heads'
884 1. N is an ancestor of some node in 'heads'
881 2. N is not an ancestor of any node in 'common'
885 2. N is not an ancestor of any node in 'common'
882
886
883 The list is sorted by revision number, meaning it is
887 The list is sorted by revision number, meaning it is
884 topologically sorted.
888 topologically sorted.
885
889
886 'heads' and 'common' are both lists of node IDs. If heads is
890 'heads' and 'common' are both lists of node IDs. If heads is
887 not supplied, uses all of the revlog's heads. If common is not
891 not supplied, uses all of the revlog's heads. If common is not
888 supplied, uses nullid."""
892 supplied, uses nullid."""
889 if common is None:
893 if common is None:
890 common = [nullid]
894 common = [nullid]
891 if heads is None:
895 if heads is None:
892 heads = self.heads()
896 heads = self.heads()
893
897
894 common = [self.rev(n) for n in common]
898 common = [self.rev(n) for n in common]
895 heads = [self.rev(n) for n in heads]
899 heads = [self.rev(n) for n in heads]
896
900
897 inc = self.incrementalmissingrevs(common=common)
901 inc = self.incrementalmissingrevs(common=common)
898 return [self.node(r) for r in inc.missingancestors(heads)]
902 return [self.node(r) for r in inc.missingancestors(heads)]
899
903
900 def nodesbetween(self, roots=None, heads=None):
904 def nodesbetween(self, roots=None, heads=None):
901 """Return a topological path from 'roots' to 'heads'.
905 """Return a topological path from 'roots' to 'heads'.
902
906
903 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
907 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
904 topologically sorted list of all nodes N that satisfy both of
908 topologically sorted list of all nodes N that satisfy both of
905 these constraints:
909 these constraints:
906
910
907 1. N is a descendant of some node in 'roots'
911 1. N is a descendant of some node in 'roots'
908 2. N is an ancestor of some node in 'heads'
912 2. N is an ancestor of some node in 'heads'
909
913
910 Every node is considered to be both a descendant and an ancestor
914 Every node is considered to be both a descendant and an ancestor
911 of itself, so every reachable node in 'roots' and 'heads' will be
915 of itself, so every reachable node in 'roots' and 'heads' will be
912 included in 'nodes'.
916 included in 'nodes'.
913
917
914 'outroots' is the list of reachable nodes in 'roots', i.e., the
918 'outroots' is the list of reachable nodes in 'roots', i.e., the
915 subset of 'roots' that is returned in 'nodes'. Likewise,
919 subset of 'roots' that is returned in 'nodes'. Likewise,
916 'outheads' is the subset of 'heads' that is also in 'nodes'.
920 'outheads' is the subset of 'heads' that is also in 'nodes'.
917
921
918 'roots' and 'heads' are both lists of node IDs. If 'roots' is
922 'roots' and 'heads' are both lists of node IDs. If 'roots' is
919 unspecified, uses nullid as the only root. If 'heads' is
923 unspecified, uses nullid as the only root. If 'heads' is
920 unspecified, uses list of all of the revlog's heads."""
924 unspecified, uses list of all of the revlog's heads."""
921 nonodes = ([], [], [])
925 nonodes = ([], [], [])
922 if roots is not None:
926 if roots is not None:
923 roots = list(roots)
927 roots = list(roots)
924 if not roots:
928 if not roots:
925 return nonodes
929 return nonodes
926 lowestrev = min([self.rev(n) for n in roots])
930 lowestrev = min([self.rev(n) for n in roots])
927 else:
931 else:
928 roots = [nullid] # Everybody's a descendant of nullid
932 roots = [nullid] # Everybody's a descendant of nullid
929 lowestrev = nullrev
933 lowestrev = nullrev
930 if (lowestrev == nullrev) and (heads is None):
934 if (lowestrev == nullrev) and (heads is None):
931 # We want _all_ the nodes!
935 # We want _all_ the nodes!
932 return ([self.node(r) for r in self], [nullid], list(self.heads()))
936 return ([self.node(r) for r in self], [nullid], list(self.heads()))
933 if heads is None:
937 if heads is None:
934 # All nodes are ancestors, so the latest ancestor is the last
938 # All nodes are ancestors, so the latest ancestor is the last
935 # node.
939 # node.
936 highestrev = len(self) - 1
940 highestrev = len(self) - 1
937 # Set ancestors to None to signal that every node is an ancestor.
941 # Set ancestors to None to signal that every node is an ancestor.
938 ancestors = None
942 ancestors = None
939 # Set heads to an empty dictionary for later discovery of heads
943 # Set heads to an empty dictionary for later discovery of heads
940 heads = {}
944 heads = {}
941 else:
945 else:
942 heads = list(heads)
946 heads = list(heads)
943 if not heads:
947 if not heads:
944 return nonodes
948 return nonodes
945 ancestors = set()
949 ancestors = set()
946 # Turn heads into a dictionary so we can remove 'fake' heads.
950 # Turn heads into a dictionary so we can remove 'fake' heads.
947 # Also, later we will be using it to filter out the heads we can't
951 # Also, later we will be using it to filter out the heads we can't
948 # find from roots.
952 # find from roots.
949 heads = dict.fromkeys(heads, False)
953 heads = dict.fromkeys(heads, False)
950 # Start at the top and keep marking parents until we're done.
954 # Start at the top and keep marking parents until we're done.
951 nodestotag = set(heads)
955 nodestotag = set(heads)
952 # Remember where the top was so we can use it as a limit later.
956 # Remember where the top was so we can use it as a limit later.
953 highestrev = max([self.rev(n) for n in nodestotag])
957 highestrev = max([self.rev(n) for n in nodestotag])
954 while nodestotag:
958 while nodestotag:
955 # grab a node to tag
959 # grab a node to tag
956 n = nodestotag.pop()
960 n = nodestotag.pop()
957 # Never tag nullid
961 # Never tag nullid
958 if n == nullid:
962 if n == nullid:
959 continue
963 continue
960 # A node's revision number represents its place in a
964 # A node's revision number represents its place in a
961 # topologically sorted list of nodes.
965 # topologically sorted list of nodes.
962 r = self.rev(n)
966 r = self.rev(n)
963 if r >= lowestrev:
967 if r >= lowestrev:
964 if n not in ancestors:
968 if n not in ancestors:
965 # If we are possibly a descendant of one of the roots
969 # If we are possibly a descendant of one of the roots
966 # and we haven't already been marked as an ancestor
970 # and we haven't already been marked as an ancestor
967 ancestors.add(n) # Mark as ancestor
971 ancestors.add(n) # Mark as ancestor
968 # Add non-nullid parents to list of nodes to tag.
972 # Add non-nullid parents to list of nodes to tag.
969 nodestotag.update([p for p in self.parents(n) if
973 nodestotag.update([p for p in self.parents(n) if
970 p != nullid])
974 p != nullid])
971 elif n in heads: # We've seen it before, is it a fake head?
975 elif n in heads: # We've seen it before, is it a fake head?
972 # So it is, real heads should not be the ancestors of
976 # So it is, real heads should not be the ancestors of
973 # any other heads.
977 # any other heads.
974 heads.pop(n)
978 heads.pop(n)
975 if not ancestors:
979 if not ancestors:
976 return nonodes
980 return nonodes
977 # Now that we have our set of ancestors, we want to remove any
981 # Now that we have our set of ancestors, we want to remove any
978 # roots that are not ancestors.
982 # roots that are not ancestors.
979
983
980 # If one of the roots was nullid, everything is included anyway.
984 # If one of the roots was nullid, everything is included anyway.
981 if lowestrev > nullrev:
985 if lowestrev > nullrev:
982 # But, since we weren't, let's recompute the lowest rev to not
986 # But, since we weren't, let's recompute the lowest rev to not
983 # include roots that aren't ancestors.
987 # include roots that aren't ancestors.
984
988
985 # Filter out roots that aren't ancestors of heads
989 # Filter out roots that aren't ancestors of heads
986 roots = [root for root in roots if root in ancestors]
990 roots = [root for root in roots if root in ancestors]
987 # Recompute the lowest revision
991 # Recompute the lowest revision
988 if roots:
992 if roots:
989 lowestrev = min([self.rev(root) for root in roots])
993 lowestrev = min([self.rev(root) for root in roots])
990 else:
994 else:
991 # No more roots? Return empty list
995 # No more roots? Return empty list
992 return nonodes
996 return nonodes
993 else:
997 else:
994 # We are descending from nullid, and don't need to care about
998 # We are descending from nullid, and don't need to care about
995 # any other roots.
999 # any other roots.
996 lowestrev = nullrev
1000 lowestrev = nullrev
997 roots = [nullid]
1001 roots = [nullid]
998 # Transform our roots list into a set.
1002 # Transform our roots list into a set.
999 descendants = set(roots)
1003 descendants = set(roots)
1000 # Also, keep the original roots so we can filter out roots that aren't
1004 # Also, keep the original roots so we can filter out roots that aren't
1001 # 'real' roots (i.e. are descended from other roots).
1005 # 'real' roots (i.e. are descended from other roots).
1002 roots = descendants.copy()
1006 roots = descendants.copy()
1003 # Our topologically sorted list of output nodes.
1007 # Our topologically sorted list of output nodes.
1004 orderedout = []
1008 orderedout = []
1005 # Don't start at nullid since we don't want nullid in our output list,
1009 # Don't start at nullid since we don't want nullid in our output list,
1006 # and if nullid shows up in descendants, empty parents will look like
1010 # and if nullid shows up in descendants, empty parents will look like
1007 # they're descendants.
1011 # they're descendants.
1008 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1012 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1009 n = self.node(r)
1013 n = self.node(r)
1010 isdescendant = False
1014 isdescendant = False
1011 if lowestrev == nullrev: # Everybody is a descendant of nullid
1015 if lowestrev == nullrev: # Everybody is a descendant of nullid
1012 isdescendant = True
1016 isdescendant = True
1013 elif n in descendants:
1017 elif n in descendants:
1014 # n is already a descendant
1018 # n is already a descendant
1015 isdescendant = True
1019 isdescendant = True
1016 # This check only needs to be done here because all the roots
1020 # This check only needs to be done here because all the roots
1017 # will start being marked is descendants before the loop.
1021 # will start being marked is descendants before the loop.
1018 if n in roots:
1022 if n in roots:
1019 # If n was a root, check if it's a 'real' root.
1023 # If n was a root, check if it's a 'real' root.
1020 p = tuple(self.parents(n))
1024 p = tuple(self.parents(n))
1021 # If any of its parents are descendants, it's not a root.
1025 # If any of its parents are descendants, it's not a root.
1022 if (p[0] in descendants) or (p[1] in descendants):
1026 if (p[0] in descendants) or (p[1] in descendants):
1023 roots.remove(n)
1027 roots.remove(n)
1024 else:
1028 else:
1025 p = tuple(self.parents(n))
1029 p = tuple(self.parents(n))
1026 # A node is a descendant if either of its parents are
1030 # A node is a descendant if either of its parents are
1027 # descendants. (We seeded the dependents list with the roots
1031 # descendants. (We seeded the dependents list with the roots
1028 # up there, remember?)
1032 # up there, remember?)
1029 if (p[0] in descendants) or (p[1] in descendants):
1033 if (p[0] in descendants) or (p[1] in descendants):
1030 descendants.add(n)
1034 descendants.add(n)
1031 isdescendant = True
1035 isdescendant = True
1032 if isdescendant and ((ancestors is None) or (n in ancestors)):
1036 if isdescendant and ((ancestors is None) or (n in ancestors)):
1033 # Only include nodes that are both descendants and ancestors.
1037 # Only include nodes that are both descendants and ancestors.
1034 orderedout.append(n)
1038 orderedout.append(n)
1035 if (ancestors is not None) and (n in heads):
1039 if (ancestors is not None) and (n in heads):
1036 # We're trying to figure out which heads are reachable
1040 # We're trying to figure out which heads are reachable
1037 # from roots.
1041 # from roots.
1038 # Mark this head as having been reached
1042 # Mark this head as having been reached
1039 heads[n] = True
1043 heads[n] = True
1040 elif ancestors is None:
1044 elif ancestors is None:
1041 # Otherwise, we're trying to discover the heads.
1045 # Otherwise, we're trying to discover the heads.
1042 # Assume this is a head because if it isn't, the next step
1046 # Assume this is a head because if it isn't, the next step
1043 # will eventually remove it.
1047 # will eventually remove it.
1044 heads[n] = True
1048 heads[n] = True
1045 # But, obviously its parents aren't.
1049 # But, obviously its parents aren't.
1046 for p in self.parents(n):
1050 for p in self.parents(n):
1047 heads.pop(p, None)
1051 heads.pop(p, None)
1048 heads = [head for head, flag in heads.iteritems() if flag]
1052 heads = [head for head, flag in heads.iteritems() if flag]
1049 roots = list(roots)
1053 roots = list(roots)
1050 assert orderedout
1054 assert orderedout
1051 assert roots
1055 assert roots
1052 assert heads
1056 assert heads
1053 return (orderedout, roots, heads)
1057 return (orderedout, roots, heads)
1054
1058
1055 def headrevs(self):
1059 def headrevs(self):
1056 try:
1060 try:
1057 return self.index.headrevs()
1061 return self.index.headrevs()
1058 except AttributeError:
1062 except AttributeError:
1059 return self._headrevs()
1063 return self._headrevs()
1060
1064
1061 def computephases(self, roots):
1065 def computephases(self, roots):
1062 return self.index.computephasesmapsets(roots)
1066 return self.index.computephasesmapsets(roots)
1063
1067
1064 def _headrevs(self):
1068 def _headrevs(self):
1065 count = len(self)
1069 count = len(self)
1066 if not count:
1070 if not count:
1067 return [nullrev]
1071 return [nullrev]
1068 # we won't iter over filtered rev so nobody is a head at start
1072 # we won't iter over filtered rev so nobody is a head at start
1069 ishead = [0] * (count + 1)
1073 ishead = [0] * (count + 1)
1070 index = self.index
1074 index = self.index
1071 for r in self:
1075 for r in self:
1072 ishead[r] = 1 # I may be an head
1076 ishead[r] = 1 # I may be an head
1073 e = index[r]
1077 e = index[r]
1074 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1078 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1075 return [r for r, val in enumerate(ishead) if val]
1079 return [r for r, val in enumerate(ishead) if val]
1076
1080
1077 def heads(self, start=None, stop=None):
1081 def heads(self, start=None, stop=None):
1078 """return the list of all nodes that have no children
1082 """return the list of all nodes that have no children
1079
1083
1080 if start is specified, only heads that are descendants of
1084 if start is specified, only heads that are descendants of
1081 start will be returned
1085 start will be returned
1082 if stop is specified, it will consider all the revs from stop
1086 if stop is specified, it will consider all the revs from stop
1083 as if they had no children
1087 as if they had no children
1084 """
1088 """
1085 if start is None and stop is None:
1089 if start is None and stop is None:
1086 if not len(self):
1090 if not len(self):
1087 return [nullid]
1091 return [nullid]
1088 return [self.node(r) for r in self.headrevs()]
1092 return [self.node(r) for r in self.headrevs()]
1089
1093
1090 if start is None:
1094 if start is None:
1091 start = nullrev
1095 start = nullrev
1092 else:
1096 else:
1093 start = self.rev(start)
1097 start = self.rev(start)
1094
1098
1095 stoprevs = set(self.rev(n) for n in stop or [])
1099 stoprevs = set(self.rev(n) for n in stop or [])
1096
1100
1097 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1101 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1098 stoprevs=stoprevs)
1102 stoprevs=stoprevs)
1099
1103
1100 return [self.node(rev) for rev in revs]
1104 return [self.node(rev) for rev in revs]
1101
1105
1102 def children(self, node):
1106 def children(self, node):
1103 """find the children of a given node"""
1107 """find the children of a given node"""
1104 c = []
1108 c = []
1105 p = self.rev(node)
1109 p = self.rev(node)
1106 for r in self.revs(start=p + 1):
1110 for r in self.revs(start=p + 1):
1107 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1111 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1108 if prevs:
1112 if prevs:
1109 for pr in prevs:
1113 for pr in prevs:
1110 if pr == p:
1114 if pr == p:
1111 c.append(self.node(r))
1115 c.append(self.node(r))
1112 elif p == nullrev:
1116 elif p == nullrev:
1113 c.append(self.node(r))
1117 c.append(self.node(r))
1114 return c
1118 return c
1115
1119
1116 def commonancestorsheads(self, a, b):
1120 def commonancestorsheads(self, a, b):
1117 """calculate all the heads of the common ancestors of nodes a and b"""
1121 """calculate all the heads of the common ancestors of nodes a and b"""
1118 a, b = self.rev(a), self.rev(b)
1122 a, b = self.rev(a), self.rev(b)
1119 ancs = self._commonancestorsheads(a, b)
1123 ancs = self._commonancestorsheads(a, b)
1120 return pycompat.maplist(self.node, ancs)
1124 return pycompat.maplist(self.node, ancs)
1121
1125
1122 def _commonancestorsheads(self, *revs):
1126 def _commonancestorsheads(self, *revs):
1123 """calculate all the heads of the common ancestors of revs"""
1127 """calculate all the heads of the common ancestors of revs"""
1124 try:
1128 try:
1125 ancs = self.index.commonancestorsheads(*revs)
1129 ancs = self.index.commonancestorsheads(*revs)
1126 except (AttributeError, OverflowError): # C implementation failed
1130 except (AttributeError, OverflowError): # C implementation failed
1127 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1131 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1128 return ancs
1132 return ancs
1129
1133
1130 def isancestor(self, a, b):
1134 def isancestor(self, a, b):
1131 """return True if node a is an ancestor of node b
1135 """return True if node a is an ancestor of node b
1132
1136
1133 A revision is considered an ancestor of itself."""
1137 A revision is considered an ancestor of itself."""
1134 a, b = self.rev(a), self.rev(b)
1138 a, b = self.rev(a), self.rev(b)
1135 return self.isancestorrev(a, b)
1139 return self.isancestorrev(a, b)
1136
1140
1137 def isancestorrev(self, a, b):
1141 def isancestorrev(self, a, b):
1138 """return True if revision a is an ancestor of revision b
1142 """return True if revision a is an ancestor of revision b
1139
1143
1140 A revision is considered an ancestor of itself.
1144 A revision is considered an ancestor of itself.
1141
1145
1142 The implementation of this is trivial but the use of
1146 The implementation of this is trivial but the use of
1143 commonancestorsheads is not."""
1147 commonancestorsheads is not."""
1144 if a == nullrev:
1148 if a == nullrev:
1145 return True
1149 return True
1146 elif a == b:
1150 elif a == b:
1147 return True
1151 return True
1148 elif a > b:
1152 elif a > b:
1149 return False
1153 return False
1150 return a in self._commonancestorsheads(a, b)
1154 return a in self._commonancestorsheads(a, b)
1151
1155
1152 def ancestor(self, a, b):
1156 def ancestor(self, a, b):
1153 """calculate the "best" common ancestor of nodes a and b"""
1157 """calculate the "best" common ancestor of nodes a and b"""
1154
1158
1155 a, b = self.rev(a), self.rev(b)
1159 a, b = self.rev(a), self.rev(b)
1156 try:
1160 try:
1157 ancs = self.index.ancestors(a, b)
1161 ancs = self.index.ancestors(a, b)
1158 except (AttributeError, OverflowError):
1162 except (AttributeError, OverflowError):
1159 ancs = ancestor.ancestors(self.parentrevs, a, b)
1163 ancs = ancestor.ancestors(self.parentrevs, a, b)
1160 if ancs:
1164 if ancs:
1161 # choose a consistent winner when there's a tie
1165 # choose a consistent winner when there's a tie
1162 return min(map(self.node, ancs))
1166 return min(map(self.node, ancs))
1163 return nullid
1167 return nullid
1164
1168
1165 def _match(self, id):
1169 def _match(self, id):
1166 if isinstance(id, int):
1170 if isinstance(id, int):
1167 # rev
1171 # rev
1168 return self.node(id)
1172 return self.node(id)
1169 if len(id) == 20:
1173 if len(id) == 20:
1170 # possibly a binary node
1174 # possibly a binary node
1171 # odds of a binary node being all hex in ASCII are 1 in 10**25
1175 # odds of a binary node being all hex in ASCII are 1 in 10**25
1172 try:
1176 try:
1173 node = id
1177 node = id
1174 self.rev(node) # quick search the index
1178 self.rev(node) # quick search the index
1175 return node
1179 return node
1176 except error.LookupError:
1180 except error.LookupError:
1177 pass # may be partial hex id
1181 pass # may be partial hex id
1178 try:
1182 try:
1179 # str(rev)
1183 # str(rev)
1180 rev = int(id)
1184 rev = int(id)
1181 if "%d" % rev != id:
1185 if "%d" % rev != id:
1182 raise ValueError
1186 raise ValueError
1183 if rev < 0:
1187 if rev < 0:
1184 rev = len(self) + rev
1188 rev = len(self) + rev
1185 if rev < 0 or rev >= len(self):
1189 if rev < 0 or rev >= len(self):
1186 raise ValueError
1190 raise ValueError
1187 return self.node(rev)
1191 return self.node(rev)
1188 except (ValueError, OverflowError):
1192 except (ValueError, OverflowError):
1189 pass
1193 pass
1190 if len(id) == 40:
1194 if len(id) == 40:
1191 try:
1195 try:
1192 # a full hex nodeid?
1196 # a full hex nodeid?
1193 node = bin(id)
1197 node = bin(id)
1194 self.rev(node)
1198 self.rev(node)
1195 return node
1199 return node
1196 except (TypeError, error.LookupError):
1200 except (TypeError, error.LookupError):
1197 pass
1201 pass
1198
1202
1199 def _partialmatch(self, id):
1203 def _partialmatch(self, id):
1200 # we don't care wdirfilenodeids as they should be always full hash
1204 # we don't care wdirfilenodeids as they should be always full hash
1201 maybewdir = wdirhex.startswith(id)
1205 maybewdir = wdirhex.startswith(id)
1202 try:
1206 try:
1203 partial = self.index.partialmatch(id)
1207 partial = self.index.partialmatch(id)
1204 if partial and self.hasnode(partial):
1208 if partial and self.hasnode(partial):
1205 if maybewdir:
1209 if maybewdir:
1206 # single 'ff...' match in radix tree, ambiguous with wdir
1210 # single 'ff...' match in radix tree, ambiguous with wdir
1207 raise error.RevlogError
1211 raise error.RevlogError
1208 return partial
1212 return partial
1209 if maybewdir:
1213 if maybewdir:
1210 # no 'ff...' match in radix tree, wdir identified
1214 # no 'ff...' match in radix tree, wdir identified
1211 raise error.WdirUnsupported
1215 raise error.WdirUnsupported
1212 return None
1216 return None
1213 except error.RevlogError:
1217 except error.RevlogError:
1214 # parsers.c radix tree lookup gave multiple matches
1218 # parsers.c radix tree lookup gave multiple matches
1215 # fast path: for unfiltered changelog, radix tree is accurate
1219 # fast path: for unfiltered changelog, radix tree is accurate
1216 if not getattr(self, 'filteredrevs', None):
1220 if not getattr(self, 'filteredrevs', None):
1217 raise error.AmbiguousPrefixLookupError(
1221 raise error.AmbiguousPrefixLookupError(
1218 id, self.indexfile, _('ambiguous identifier'))
1222 id, self.indexfile, _('ambiguous identifier'))
1219 # fall through to slow path that filters hidden revisions
1223 # fall through to slow path that filters hidden revisions
1220 except (AttributeError, ValueError):
1224 except (AttributeError, ValueError):
1221 # we are pure python, or key was too short to search radix tree
1225 # we are pure python, or key was too short to search radix tree
1222 pass
1226 pass
1223
1227
1224 if id in self._pcache:
1228 if id in self._pcache:
1225 return self._pcache[id]
1229 return self._pcache[id]
1226
1230
1227 if len(id) <= 40:
1231 if len(id) <= 40:
1228 try:
1232 try:
1229 # hex(node)[:...]
1233 # hex(node)[:...]
1230 l = len(id) // 2 # grab an even number of digits
1234 l = len(id) // 2 # grab an even number of digits
1231 prefix = bin(id[:l * 2])
1235 prefix = bin(id[:l * 2])
1232 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1236 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1233 nl = [n for n in nl if hex(n).startswith(id) and
1237 nl = [n for n in nl if hex(n).startswith(id) and
1234 self.hasnode(n)]
1238 self.hasnode(n)]
1235 if nullhex.startswith(id):
1239 if nullhex.startswith(id):
1236 nl.append(nullid)
1240 nl.append(nullid)
1237 if len(nl) > 0:
1241 if len(nl) > 0:
1238 if len(nl) == 1 and not maybewdir:
1242 if len(nl) == 1 and not maybewdir:
1239 self._pcache[id] = nl[0]
1243 self._pcache[id] = nl[0]
1240 return nl[0]
1244 return nl[0]
1241 raise error.AmbiguousPrefixLookupError(
1245 raise error.AmbiguousPrefixLookupError(
1242 id, self.indexfile, _('ambiguous identifier'))
1246 id, self.indexfile, _('ambiguous identifier'))
1243 if maybewdir:
1247 if maybewdir:
1244 raise error.WdirUnsupported
1248 raise error.WdirUnsupported
1245 return None
1249 return None
1246 except TypeError:
1250 except TypeError:
1247 pass
1251 pass
1248
1252
1249 def lookup(self, id):
1253 def lookup(self, id):
1250 """locate a node based on:
1254 """locate a node based on:
1251 - revision number or str(revision number)
1255 - revision number or str(revision number)
1252 - nodeid or subset of hex nodeid
1256 - nodeid or subset of hex nodeid
1253 """
1257 """
1254 n = self._match(id)
1258 n = self._match(id)
1255 if n is not None:
1259 if n is not None:
1256 return n
1260 return n
1257 n = self._partialmatch(id)
1261 n = self._partialmatch(id)
1258 if n:
1262 if n:
1259 return n
1263 return n
1260
1264
1261 raise error.LookupError(id, self.indexfile, _('no match found'))
1265 raise error.LookupError(id, self.indexfile, _('no match found'))
1262
1266
1263 def shortest(self, node, minlength=1):
1267 def shortest(self, node, minlength=1):
1264 """Find the shortest unambiguous prefix that matches node."""
1268 """Find the shortest unambiguous prefix that matches node."""
1265 def isvalid(prefix):
1269 def isvalid(prefix):
1266 try:
1270 try:
1267 node = self._partialmatch(prefix)
1271 node = self._partialmatch(prefix)
1268 except error.AmbiguousPrefixLookupError:
1272 except error.AmbiguousPrefixLookupError:
1269 return False
1273 return False
1270 except error.WdirUnsupported:
1274 except error.WdirUnsupported:
1271 # single 'ff...' match
1275 # single 'ff...' match
1272 return True
1276 return True
1273 if node is None:
1277 if node is None:
1274 raise error.LookupError(node, self.indexfile, _('no node'))
1278 raise error.LookupError(node, self.indexfile, _('no node'))
1275 return True
1279 return True
1276
1280
1277 def maybewdir(prefix):
1281 def maybewdir(prefix):
1278 return all(c == 'f' for c in prefix)
1282 return all(c == 'f' for c in prefix)
1279
1283
1280 hexnode = hex(node)
1284 hexnode = hex(node)
1281
1285
1282 def disambiguate(hexnode, minlength):
1286 def disambiguate(hexnode, minlength):
1283 """Disambiguate against wdirid."""
1287 """Disambiguate against wdirid."""
1284 for length in range(minlength, 41):
1288 for length in range(minlength, 41):
1285 prefix = hexnode[:length]
1289 prefix = hexnode[:length]
1286 if not maybewdir(prefix):
1290 if not maybewdir(prefix):
1287 return prefix
1291 return prefix
1288
1292
1289 if not getattr(self, 'filteredrevs', None):
1293 if not getattr(self, 'filteredrevs', None):
1290 try:
1294 try:
1291 length = max(self.index.shortest(node), minlength)
1295 length = max(self.index.shortest(node), minlength)
1292 return disambiguate(hexnode, length)
1296 return disambiguate(hexnode, length)
1293 except error.RevlogError:
1297 except error.RevlogError:
1294 if node != wdirid:
1298 if node != wdirid:
1295 raise error.LookupError(node, self.indexfile, _('no node'))
1299 raise error.LookupError(node, self.indexfile, _('no node'))
1296 except AttributeError:
1300 except AttributeError:
1297 # Fall through to pure code
1301 # Fall through to pure code
1298 pass
1302 pass
1299
1303
1300 if node == wdirid:
1304 if node == wdirid:
1301 for length in range(minlength, 41):
1305 for length in range(minlength, 41):
1302 prefix = hexnode[:length]
1306 prefix = hexnode[:length]
1303 if isvalid(prefix):
1307 if isvalid(prefix):
1304 return prefix
1308 return prefix
1305
1309
1306 for length in range(minlength, 41):
1310 for length in range(minlength, 41):
1307 prefix = hexnode[:length]
1311 prefix = hexnode[:length]
1308 if isvalid(prefix):
1312 if isvalid(prefix):
1309 return disambiguate(hexnode, length)
1313 return disambiguate(hexnode, length)
1310
1314
1311 def cmp(self, node, text):
1315 def cmp(self, node, text):
1312 """compare text with a given file revision
1316 """compare text with a given file revision
1313
1317
1314 returns True if text is different than what is stored.
1318 returns True if text is different than what is stored.
1315 """
1319 """
1316 p1, p2 = self.parents(node)
1320 p1, p2 = self.parents(node)
1317 return storageutil.hashrevisionsha1(text, p1, p2) != node
1321 return storageutil.hashrevisionsha1(text, p1, p2) != node
1318
1322
1319 def _cachesegment(self, offset, data):
1323 def _cachesegment(self, offset, data):
1320 """Add a segment to the revlog cache.
1324 """Add a segment to the revlog cache.
1321
1325
1322 Accepts an absolute offset and the data that is at that location.
1326 Accepts an absolute offset and the data that is at that location.
1323 """
1327 """
1324 o, d = self._chunkcache
1328 o, d = self._chunkcache
1325 # try to add to existing cache
1329 # try to add to existing cache
1326 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1330 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1327 self._chunkcache = o, d + data
1331 self._chunkcache = o, d + data
1328 else:
1332 else:
1329 self._chunkcache = offset, data
1333 self._chunkcache = offset, data
1330
1334
1331 def _readsegment(self, offset, length, df=None):
1335 def _readsegment(self, offset, length, df=None):
1332 """Load a segment of raw data from the revlog.
1336 """Load a segment of raw data from the revlog.
1333
1337
1334 Accepts an absolute offset, length to read, and an optional existing
1338 Accepts an absolute offset, length to read, and an optional existing
1335 file handle to read from.
1339 file handle to read from.
1336
1340
1337 If an existing file handle is passed, it will be seeked and the
1341 If an existing file handle is passed, it will be seeked and the
1338 original seek position will NOT be restored.
1342 original seek position will NOT be restored.
1339
1343
1340 Returns a str or buffer of raw byte data.
1344 Returns a str or buffer of raw byte data.
1341 """
1345 """
1342 # Cache data both forward and backward around the requested
1346 # Cache data both forward and backward around the requested
1343 # data, in a fixed size window. This helps speed up operations
1347 # data, in a fixed size window. This helps speed up operations
1344 # involving reading the revlog backwards.
1348 # involving reading the revlog backwards.
1345 cachesize = self._chunkcachesize
1349 cachesize = self._chunkcachesize
1346 realoffset = offset & ~(cachesize - 1)
1350 realoffset = offset & ~(cachesize - 1)
1347 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1351 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1348 - realoffset)
1352 - realoffset)
1349 with self._datareadfp(df) as df:
1353 with self._datareadfp(df) as df:
1350 df.seek(realoffset)
1354 df.seek(realoffset)
1351 d = df.read(reallength)
1355 d = df.read(reallength)
1352 self._cachesegment(realoffset, d)
1356 self._cachesegment(realoffset, d)
1353 if offset != realoffset or reallength != length:
1357 if offset != realoffset or reallength != length:
1354 return util.buffer(d, offset - realoffset, length)
1358 return util.buffer(d, offset - realoffset, length)
1355 return d
1359 return d
1356
1360
1357 def _getsegment(self, offset, length, df=None):
1361 def _getsegment(self, offset, length, df=None):
1358 """Obtain a segment of raw data from the revlog.
1362 """Obtain a segment of raw data from the revlog.
1359
1363
1360 Accepts an absolute offset, length of bytes to obtain, and an
1364 Accepts an absolute offset, length of bytes to obtain, and an
1361 optional file handle to the already-opened revlog. If the file
1365 optional file handle to the already-opened revlog. If the file
1362 handle is used, it's original seek position will not be preserved.
1366 handle is used, it's original seek position will not be preserved.
1363
1367
1364 Requests for data may be returned from a cache.
1368 Requests for data may be returned from a cache.
1365
1369
1366 Returns a str or a buffer instance of raw byte data.
1370 Returns a str or a buffer instance of raw byte data.
1367 """
1371 """
1368 o, d = self._chunkcache
1372 o, d = self._chunkcache
1369 l = len(d)
1373 l = len(d)
1370
1374
1371 # is it in the cache?
1375 # is it in the cache?
1372 cachestart = offset - o
1376 cachestart = offset - o
1373 cacheend = cachestart + length
1377 cacheend = cachestart + length
1374 if cachestart >= 0 and cacheend <= l:
1378 if cachestart >= 0 and cacheend <= l:
1375 if cachestart == 0 and cacheend == l:
1379 if cachestart == 0 and cacheend == l:
1376 return d # avoid a copy
1380 return d # avoid a copy
1377 return util.buffer(d, cachestart, cacheend - cachestart)
1381 return util.buffer(d, cachestart, cacheend - cachestart)
1378
1382
1379 return self._readsegment(offset, length, df=df)
1383 return self._readsegment(offset, length, df=df)
1380
1384
1381 def _getsegmentforrevs(self, startrev, endrev, df=None):
1385 def _getsegmentforrevs(self, startrev, endrev, df=None):
1382 """Obtain a segment of raw data corresponding to a range of revisions.
1386 """Obtain a segment of raw data corresponding to a range of revisions.
1383
1387
1384 Accepts the start and end revisions and an optional already-open
1388 Accepts the start and end revisions and an optional already-open
1385 file handle to be used for reading. If the file handle is read, its
1389 file handle to be used for reading. If the file handle is read, its
1386 seek position will not be preserved.
1390 seek position will not be preserved.
1387
1391
1388 Requests for data may be satisfied by a cache.
1392 Requests for data may be satisfied by a cache.
1389
1393
1390 Returns a 2-tuple of (offset, data) for the requested range of
1394 Returns a 2-tuple of (offset, data) for the requested range of
1391 revisions. Offset is the integer offset from the beginning of the
1395 revisions. Offset is the integer offset from the beginning of the
1392 revlog and data is a str or buffer of the raw byte data.
1396 revlog and data is a str or buffer of the raw byte data.
1393
1397
1394 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1398 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1395 to determine where each revision's data begins and ends.
1399 to determine where each revision's data begins and ends.
1396 """
1400 """
1397 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1401 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1398 # (functions are expensive).
1402 # (functions are expensive).
1399 index = self.index
1403 index = self.index
1400 istart = index[startrev]
1404 istart = index[startrev]
1401 start = int(istart[0] >> 16)
1405 start = int(istart[0] >> 16)
1402 if startrev == endrev:
1406 if startrev == endrev:
1403 end = start + istart[1]
1407 end = start + istart[1]
1404 else:
1408 else:
1405 iend = index[endrev]
1409 iend = index[endrev]
1406 end = int(iend[0] >> 16) + iend[1]
1410 end = int(iend[0] >> 16) + iend[1]
1407
1411
1408 if self._inline:
1412 if self._inline:
1409 start += (startrev + 1) * self._io.size
1413 start += (startrev + 1) * self._io.size
1410 end += (endrev + 1) * self._io.size
1414 end += (endrev + 1) * self._io.size
1411 length = end - start
1415 length = end - start
1412
1416
1413 return start, self._getsegment(start, length, df=df)
1417 return start, self._getsegment(start, length, df=df)
1414
1418
1415 def _chunk(self, rev, df=None):
1419 def _chunk(self, rev, df=None):
1416 """Obtain a single decompressed chunk for a revision.
1420 """Obtain a single decompressed chunk for a revision.
1417
1421
1418 Accepts an integer revision and an optional already-open file handle
1422 Accepts an integer revision and an optional already-open file handle
1419 to be used for reading. If used, the seek position of the file will not
1423 to be used for reading. If used, the seek position of the file will not
1420 be preserved.
1424 be preserved.
1421
1425
1422 Returns a str holding uncompressed data for the requested revision.
1426 Returns a str holding uncompressed data for the requested revision.
1423 """
1427 """
1424 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1428 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1425
1429
1426 def _chunks(self, revs, df=None, targetsize=None):
1430 def _chunks(self, revs, df=None, targetsize=None):
1427 """Obtain decompressed chunks for the specified revisions.
1431 """Obtain decompressed chunks for the specified revisions.
1428
1432
1429 Accepts an iterable of numeric revisions that are assumed to be in
1433 Accepts an iterable of numeric revisions that are assumed to be in
1430 ascending order. Also accepts an optional already-open file handle
1434 ascending order. Also accepts an optional already-open file handle
1431 to be used for reading. If used, the seek position of the file will
1435 to be used for reading. If used, the seek position of the file will
1432 not be preserved.
1436 not be preserved.
1433
1437
1434 This function is similar to calling ``self._chunk()`` multiple times,
1438 This function is similar to calling ``self._chunk()`` multiple times,
1435 but is faster.
1439 but is faster.
1436
1440
1437 Returns a list with decompressed data for each requested revision.
1441 Returns a list with decompressed data for each requested revision.
1438 """
1442 """
1439 if not revs:
1443 if not revs:
1440 return []
1444 return []
1441 start = self.start
1445 start = self.start
1442 length = self.length
1446 length = self.length
1443 inline = self._inline
1447 inline = self._inline
1444 iosize = self._io.size
1448 iosize = self._io.size
1445 buffer = util.buffer
1449 buffer = util.buffer
1446
1450
1447 l = []
1451 l = []
1448 ladd = l.append
1452 ladd = l.append
1449
1453
1450 if not self._withsparseread:
1454 if not self._withsparseread:
1451 slicedchunks = (revs,)
1455 slicedchunks = (revs,)
1452 else:
1456 else:
1453 slicedchunks = deltautil.slicechunk(self, revs,
1457 slicedchunks = deltautil.slicechunk(self, revs,
1454 targetsize=targetsize)
1458 targetsize=targetsize)
1455
1459
1456 for revschunk in slicedchunks:
1460 for revschunk in slicedchunks:
1457 firstrev = revschunk[0]
1461 firstrev = revschunk[0]
1458 # Skip trailing revisions with empty diff
1462 # Skip trailing revisions with empty diff
1459 for lastrev in revschunk[::-1]:
1463 for lastrev in revschunk[::-1]:
1460 if length(lastrev) != 0:
1464 if length(lastrev) != 0:
1461 break
1465 break
1462
1466
1463 try:
1467 try:
1464 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1468 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1465 except OverflowError:
1469 except OverflowError:
1466 # issue4215 - we can't cache a run of chunks greater than
1470 # issue4215 - we can't cache a run of chunks greater than
1467 # 2G on Windows
1471 # 2G on Windows
1468 return [self._chunk(rev, df=df) for rev in revschunk]
1472 return [self._chunk(rev, df=df) for rev in revschunk]
1469
1473
1470 decomp = self.decompress
1474 decomp = self.decompress
1471 for rev in revschunk:
1475 for rev in revschunk:
1472 chunkstart = start(rev)
1476 chunkstart = start(rev)
1473 if inline:
1477 if inline:
1474 chunkstart += (rev + 1) * iosize
1478 chunkstart += (rev + 1) * iosize
1475 chunklength = length(rev)
1479 chunklength = length(rev)
1476 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1480 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1477
1481
1478 return l
1482 return l
1479
1483
1480 def _chunkclear(self):
1484 def _chunkclear(self):
1481 """Clear the raw chunk cache."""
1485 """Clear the raw chunk cache."""
1482 self._chunkcache = (0, '')
1486 self._chunkcache = (0, '')
1483
1487
1484 def deltaparent(self, rev):
1488 def deltaparent(self, rev):
1485 """return deltaparent of the given revision"""
1489 """return deltaparent of the given revision"""
1486 base = self.index[rev][3]
1490 base = self.index[rev][3]
1487 if base == rev:
1491 if base == rev:
1488 return nullrev
1492 return nullrev
1489 elif self._generaldelta:
1493 elif self._generaldelta:
1490 return base
1494 return base
1491 else:
1495 else:
1492 return rev - 1
1496 return rev - 1
1493
1497
1494 def issnapshot(self, rev):
1498 def issnapshot(self, rev):
1495 """tells whether rev is a snapshot
1499 """tells whether rev is a snapshot
1496 """
1500 """
1497 if rev == nullrev:
1501 if rev == nullrev:
1498 return True
1502 return True
1499 deltap = self.deltaparent(rev)
1503 deltap = self.deltaparent(rev)
1500 if deltap == nullrev:
1504 if deltap == nullrev:
1501 return True
1505 return True
1502 p1, p2 = self.parentrevs(rev)
1506 p1, p2 = self.parentrevs(rev)
1503 if deltap in (p1, p2):
1507 if deltap in (p1, p2):
1504 return False
1508 return False
1505 return self.issnapshot(deltap)
1509 return self.issnapshot(deltap)
1506
1510
1507 def snapshotdepth(self, rev):
1511 def snapshotdepth(self, rev):
1508 """number of snapshot in the chain before this one"""
1512 """number of snapshot in the chain before this one"""
1509 if not self.issnapshot(rev):
1513 if not self.issnapshot(rev):
1510 raise error.ProgrammingError('revision %d not a snapshot')
1514 raise error.ProgrammingError('revision %d not a snapshot')
1511 return len(self._deltachain(rev)[0]) - 1
1515 return len(self._deltachain(rev)[0]) - 1
1512
1516
1513 def revdiff(self, rev1, rev2):
1517 def revdiff(self, rev1, rev2):
1514 """return or calculate a delta between two revisions
1518 """return or calculate a delta between two revisions
1515
1519
1516 The delta calculated is in binary form and is intended to be written to
1520 The delta calculated is in binary form and is intended to be written to
1517 revlog data directly. So this function needs raw revision data.
1521 revlog data directly. So this function needs raw revision data.
1518 """
1522 """
1519 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1523 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1520 return bytes(self._chunk(rev2))
1524 return bytes(self._chunk(rev2))
1521
1525
1522 return mdiff.textdiff(self.revision(rev1, raw=True),
1526 return mdiff.textdiff(self.revision(rev1, raw=True),
1523 self.revision(rev2, raw=True))
1527 self.revision(rev2, raw=True))
1524
1528
1525 def revision(self, nodeorrev, _df=None, raw=False):
1529 def revision(self, nodeorrev, _df=None, raw=False):
1526 """return an uncompressed revision of a given node or revision
1530 """return an uncompressed revision of a given node or revision
1527 number.
1531 number.
1528
1532
1529 _df - an existing file handle to read from. (internal-only)
1533 _df - an existing file handle to read from. (internal-only)
1530 raw - an optional argument specifying if the revision data is to be
1534 raw - an optional argument specifying if the revision data is to be
1531 treated as raw data when applying flag transforms. 'raw' should be set
1535 treated as raw data when applying flag transforms. 'raw' should be set
1532 to True when generating changegroups or in debug commands.
1536 to True when generating changegroups or in debug commands.
1533 """
1537 """
1534 if isinstance(nodeorrev, int):
1538 if isinstance(nodeorrev, int):
1535 rev = nodeorrev
1539 rev = nodeorrev
1536 node = self.node(rev)
1540 node = self.node(rev)
1537 else:
1541 else:
1538 node = nodeorrev
1542 node = nodeorrev
1539 rev = None
1543 rev = None
1540
1544
1541 cachedrev = None
1545 cachedrev = None
1542 flags = None
1546 flags = None
1543 rawtext = None
1547 rawtext = None
1544 if node == nullid:
1548 if node == nullid:
1545 return ""
1549 return ""
1546 if self._revisioncache:
1550 if self._revisioncache:
1547 if self._revisioncache[0] == node:
1551 if self._revisioncache[0] == node:
1548 # _cache only stores rawtext
1552 # _cache only stores rawtext
1549 if raw:
1553 if raw:
1550 return self._revisioncache[2]
1554 return self._revisioncache[2]
1551 # duplicated, but good for perf
1555 # duplicated, but good for perf
1552 if rev is None:
1556 if rev is None:
1553 rev = self.rev(node)
1557 rev = self.rev(node)
1554 if flags is None:
1558 if flags is None:
1555 flags = self.flags(rev)
1559 flags = self.flags(rev)
1556 # no extra flags set, no flag processor runs, text = rawtext
1560 # no extra flags set, no flag processor runs, text = rawtext
1557 if flags == REVIDX_DEFAULT_FLAGS:
1561 if flags == REVIDX_DEFAULT_FLAGS:
1558 return self._revisioncache[2]
1562 return self._revisioncache[2]
1559 # rawtext is reusable. need to run flag processor
1563 # rawtext is reusable. need to run flag processor
1560 rawtext = self._revisioncache[2]
1564 rawtext = self._revisioncache[2]
1561
1565
1562 cachedrev = self._revisioncache[1]
1566 cachedrev = self._revisioncache[1]
1563
1567
1564 # look up what we need to read
1568 # look up what we need to read
1565 if rawtext is None:
1569 if rawtext is None:
1566 if rev is None:
1570 if rev is None:
1567 rev = self.rev(node)
1571 rev = self.rev(node)
1568
1572
1569 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1573 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1570 if stopped:
1574 if stopped:
1571 rawtext = self._revisioncache[2]
1575 rawtext = self._revisioncache[2]
1572
1576
1573 # drop cache to save memory
1577 # drop cache to save memory
1574 self._revisioncache = None
1578 self._revisioncache = None
1575
1579
1576 targetsize = None
1580 targetsize = None
1577 rawsize = self.index[rev][2]
1581 rawsize = self.index[rev][2]
1578 if 0 <= rawsize:
1582 if 0 <= rawsize:
1579 targetsize = 4 * rawsize
1583 targetsize = 4 * rawsize
1580
1584
1581 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1585 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1582 if rawtext is None:
1586 if rawtext is None:
1583 rawtext = bytes(bins[0])
1587 rawtext = bytes(bins[0])
1584 bins = bins[1:]
1588 bins = bins[1:]
1585
1589
1586 rawtext = mdiff.patches(rawtext, bins)
1590 rawtext = mdiff.patches(rawtext, bins)
1587 self._revisioncache = (node, rev, rawtext)
1591 self._revisioncache = (node, rev, rawtext)
1588
1592
1589 if flags is None:
1593 if flags is None:
1590 if rev is None:
1594 if rev is None:
1591 rev = self.rev(node)
1595 rev = self.rev(node)
1592 flags = self.flags(rev)
1596 flags = self.flags(rev)
1593
1597
1594 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1598 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1595 if validatehash:
1599 if validatehash:
1596 self.checkhash(text, node, rev=rev)
1600 self.checkhash(text, node, rev=rev)
1597
1601
1598 return text
1602 return text
1599
1603
1600 def hash(self, text, p1, p2):
1604 def hash(self, text, p1, p2):
1601 """Compute a node hash.
1605 """Compute a node hash.
1602
1606
1603 Available as a function so that subclasses can replace the hash
1607 Available as a function so that subclasses can replace the hash
1604 as needed.
1608 as needed.
1605 """
1609 """
1606 return storageutil.hashrevisionsha1(text, p1, p2)
1610 return storageutil.hashrevisionsha1(text, p1, p2)
1607
1611
1608 def _processflags(self, text, flags, operation, raw=False):
1612 def _processflags(self, text, flags, operation, raw=False):
1609 """Inspect revision data flags and applies transforms defined by
1613 """Inspect revision data flags and applies transforms defined by
1610 registered flag processors.
1614 registered flag processors.
1611
1615
1612 ``text`` - the revision data to process
1616 ``text`` - the revision data to process
1613 ``flags`` - the revision flags
1617 ``flags`` - the revision flags
1614 ``operation`` - the operation being performed (read or write)
1618 ``operation`` - the operation being performed (read or write)
1615 ``raw`` - an optional argument describing if the raw transform should be
1619 ``raw`` - an optional argument describing if the raw transform should be
1616 applied.
1620 applied.
1617
1621
1618 This method processes the flags in the order (or reverse order if
1622 This method processes the flags in the order (or reverse order if
1619 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1623 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1620 flag processors registered for present flags. The order of flags defined
1624 flag processors registered for present flags. The order of flags defined
1621 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1625 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1622
1626
1623 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1627 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1624 processed text and ``validatehash`` is a bool indicating whether the
1628 processed text and ``validatehash`` is a bool indicating whether the
1625 returned text should be checked for hash integrity.
1629 returned text should be checked for hash integrity.
1626
1630
1627 Note: If the ``raw`` argument is set, it has precedence over the
1631 Note: If the ``raw`` argument is set, it has precedence over the
1628 operation and will only update the value of ``validatehash``.
1632 operation and will only update the value of ``validatehash``.
1629 """
1633 """
1630 # fast path: no flag processors will run
1634 # fast path: no flag processors will run
1631 if flags == 0:
1635 if flags == 0:
1632 return text, True
1636 return text, True
1633 if not operation in ('read', 'write'):
1637 if not operation in ('read', 'write'):
1634 raise error.ProgrammingError(_("invalid '%s' operation") %
1638 raise error.ProgrammingError(_("invalid '%s' operation") %
1635 operation)
1639 operation)
1636 # Check all flags are known.
1640 # Check all flags are known.
1637 if flags & ~REVIDX_KNOWN_FLAGS:
1641 if flags & ~REVIDX_KNOWN_FLAGS:
1638 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1642 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1639 (flags & ~REVIDX_KNOWN_FLAGS))
1643 (flags & ~REVIDX_KNOWN_FLAGS))
1640 validatehash = True
1644 validatehash = True
1641 # Depending on the operation (read or write), the order might be
1645 # Depending on the operation (read or write), the order might be
1642 # reversed due to non-commutative transforms.
1646 # reversed due to non-commutative transforms.
1643 orderedflags = REVIDX_FLAGS_ORDER
1647 orderedflags = REVIDX_FLAGS_ORDER
1644 if operation == 'write':
1648 if operation == 'write':
1645 orderedflags = reversed(orderedflags)
1649 orderedflags = reversed(orderedflags)
1646
1650
1647 for flag in orderedflags:
1651 for flag in orderedflags:
1648 # If a flagprocessor has been registered for a known flag, apply the
1652 # If a flagprocessor has been registered for a known flag, apply the
1649 # related operation transform and update result tuple.
1653 # related operation transform and update result tuple.
1650 if flag & flags:
1654 if flag & flags:
1651 vhash = True
1655 vhash = True
1652
1656
1653 if flag not in self._flagprocessors:
1657 if flag not in self._flagprocessors:
1654 message = _("missing processor for flag '%#x'") % (flag)
1658 message = _("missing processor for flag '%#x'") % (flag)
1655 raise error.RevlogError(message)
1659 raise error.RevlogError(message)
1656
1660
1657 processor = self._flagprocessors[flag]
1661 processor = self._flagprocessors[flag]
1658 if processor is not None:
1662 if processor is not None:
1659 readtransform, writetransform, rawtransform = processor
1663 readtransform, writetransform, rawtransform = processor
1660
1664
1661 if raw:
1665 if raw:
1662 vhash = rawtransform(self, text)
1666 vhash = rawtransform(self, text)
1663 elif operation == 'read':
1667 elif operation == 'read':
1664 text, vhash = readtransform(self, text)
1668 text, vhash = readtransform(self, text)
1665 else: # write operation
1669 else: # write operation
1666 text, vhash = writetransform(self, text)
1670 text, vhash = writetransform(self, text)
1667 validatehash = validatehash and vhash
1671 validatehash = validatehash and vhash
1668
1672
1669 return text, validatehash
1673 return text, validatehash
1670
1674
1671 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1675 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1672 """Check node hash integrity.
1676 """Check node hash integrity.
1673
1677
1674 Available as a function so that subclasses can extend hash mismatch
1678 Available as a function so that subclasses can extend hash mismatch
1675 behaviors as needed.
1679 behaviors as needed.
1676 """
1680 """
1677 try:
1681 try:
1678 if p1 is None and p2 is None:
1682 if p1 is None and p2 is None:
1679 p1, p2 = self.parents(node)
1683 p1, p2 = self.parents(node)
1680 if node != self.hash(text, p1, p2):
1684 if node != self.hash(text, p1, p2):
1681 # Clear the revision cache on hash failure. The revision cache
1685 # Clear the revision cache on hash failure. The revision cache
1682 # only stores the raw revision and clearing the cache does have
1686 # only stores the raw revision and clearing the cache does have
1683 # the side-effect that we won't have a cache hit when the raw
1687 # the side-effect that we won't have a cache hit when the raw
1684 # revision data is accessed. But this case should be rare and
1688 # revision data is accessed. But this case should be rare and
1685 # it is extra work to teach the cache about the hash
1689 # it is extra work to teach the cache about the hash
1686 # verification state.
1690 # verification state.
1687 if self._revisioncache and self._revisioncache[0] == node:
1691 if self._revisioncache and self._revisioncache[0] == node:
1688 self._revisioncache = None
1692 self._revisioncache = None
1689
1693
1690 revornode = rev
1694 revornode = rev
1691 if revornode is None:
1695 if revornode is None:
1692 revornode = templatefilters.short(hex(node))
1696 revornode = templatefilters.short(hex(node))
1693 raise error.RevlogError(_("integrity check failed on %s:%s")
1697 raise error.RevlogError(_("integrity check failed on %s:%s")
1694 % (self.indexfile, pycompat.bytestr(revornode)))
1698 % (self.indexfile, pycompat.bytestr(revornode)))
1695 except error.RevlogError:
1699 except error.RevlogError:
1696 if self._censorable and storageutil.iscensoredtext(text):
1700 if self._censorable and storageutil.iscensoredtext(text):
1697 raise error.CensoredNodeError(self.indexfile, node, text)
1701 raise error.CensoredNodeError(self.indexfile, node, text)
1698 raise
1702 raise
1699
1703
1700 def _enforceinlinesize(self, tr, fp=None):
1704 def _enforceinlinesize(self, tr, fp=None):
1701 """Check if the revlog is too big for inline and convert if so.
1705 """Check if the revlog is too big for inline and convert if so.
1702
1706
1703 This should be called after revisions are added to the revlog. If the
1707 This should be called after revisions are added to the revlog. If the
1704 revlog has grown too large to be an inline revlog, it will convert it
1708 revlog has grown too large to be an inline revlog, it will convert it
1705 to use multiple index and data files.
1709 to use multiple index and data files.
1706 """
1710 """
1707 tiprev = len(self) - 1
1711 tiprev = len(self) - 1
1708 if (not self._inline or
1712 if (not self._inline or
1709 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1713 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1710 return
1714 return
1711
1715
1712 trinfo = tr.find(self.indexfile)
1716 trinfo = tr.find(self.indexfile)
1713 if trinfo is None:
1717 if trinfo is None:
1714 raise error.RevlogError(_("%s not found in the transaction")
1718 raise error.RevlogError(_("%s not found in the transaction")
1715 % self.indexfile)
1719 % self.indexfile)
1716
1720
1717 trindex = trinfo[2]
1721 trindex = trinfo[2]
1718 if trindex is not None:
1722 if trindex is not None:
1719 dataoff = self.start(trindex)
1723 dataoff = self.start(trindex)
1720 else:
1724 else:
1721 # revlog was stripped at start of transaction, use all leftover data
1725 # revlog was stripped at start of transaction, use all leftover data
1722 trindex = len(self) - 1
1726 trindex = len(self) - 1
1723 dataoff = self.end(tiprev)
1727 dataoff = self.end(tiprev)
1724
1728
1725 tr.add(self.datafile, dataoff)
1729 tr.add(self.datafile, dataoff)
1726
1730
1727 if fp:
1731 if fp:
1728 fp.flush()
1732 fp.flush()
1729 fp.close()
1733 fp.close()
1730
1734
1731 with self._datafp('w') as df:
1735 with self._datafp('w') as df:
1732 for r in self:
1736 for r in self:
1733 df.write(self._getsegmentforrevs(r, r)[1])
1737 df.write(self._getsegmentforrevs(r, r)[1])
1734
1738
1735 with self._indexfp('w') as fp:
1739 with self._indexfp('w') as fp:
1736 self.version &= ~FLAG_INLINE_DATA
1740 self.version &= ~FLAG_INLINE_DATA
1737 self._inline = False
1741 self._inline = False
1738 io = self._io
1742 io = self._io
1739 for i in self:
1743 for i in self:
1740 e = io.packentry(self.index[i], self.node, self.version, i)
1744 e = io.packentry(self.index[i], self.node, self.version, i)
1741 fp.write(e)
1745 fp.write(e)
1742
1746
1743 # the temp file replace the real index when we exit the context
1747 # the temp file replace the real index when we exit the context
1744 # manager
1748 # manager
1745
1749
1746 tr.replace(self.indexfile, trindex * self._io.size)
1750 tr.replace(self.indexfile, trindex * self._io.size)
1747 self._chunkclear()
1751 self._chunkclear()
1748
1752
1749 def _nodeduplicatecallback(self, transaction, node):
1753 def _nodeduplicatecallback(self, transaction, node):
1750 """called when trying to add a node already stored.
1754 """called when trying to add a node already stored.
1751 """
1755 """
1752
1756
1753 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1757 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1754 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1758 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1755 """add a revision to the log
1759 """add a revision to the log
1756
1760
1757 text - the revision data to add
1761 text - the revision data to add
1758 transaction - the transaction object used for rollback
1762 transaction - the transaction object used for rollback
1759 link - the linkrev data to add
1763 link - the linkrev data to add
1760 p1, p2 - the parent nodeids of the revision
1764 p1, p2 - the parent nodeids of the revision
1761 cachedelta - an optional precomputed delta
1765 cachedelta - an optional precomputed delta
1762 node - nodeid of revision; typically node is not specified, and it is
1766 node - nodeid of revision; typically node is not specified, and it is
1763 computed by default as hash(text, p1, p2), however subclasses might
1767 computed by default as hash(text, p1, p2), however subclasses might
1764 use different hashing method (and override checkhash() in such case)
1768 use different hashing method (and override checkhash() in such case)
1765 flags - the known flags to set on the revision
1769 flags - the known flags to set on the revision
1766 deltacomputer - an optional deltacomputer instance shared between
1770 deltacomputer - an optional deltacomputer instance shared between
1767 multiple calls
1771 multiple calls
1768 """
1772 """
1769 if link == nullrev:
1773 if link == nullrev:
1770 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1774 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1771 % self.indexfile)
1775 % self.indexfile)
1772
1776
1773 if flags:
1777 if flags:
1774 node = node or self.hash(text, p1, p2)
1778 node = node or self.hash(text, p1, p2)
1775
1779
1776 rawtext, validatehash = self._processflags(text, flags, 'write')
1780 rawtext, validatehash = self._processflags(text, flags, 'write')
1777
1781
1778 # If the flag processor modifies the revision data, ignore any provided
1782 # If the flag processor modifies the revision data, ignore any provided
1779 # cachedelta.
1783 # cachedelta.
1780 if rawtext != text:
1784 if rawtext != text:
1781 cachedelta = None
1785 cachedelta = None
1782
1786
1783 if len(rawtext) > _maxentrysize:
1787 if len(rawtext) > _maxentrysize:
1784 raise error.RevlogError(
1788 raise error.RevlogError(
1785 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1789 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1786 % (self.indexfile, len(rawtext)))
1790 % (self.indexfile, len(rawtext)))
1787
1791
1788 node = node or self.hash(rawtext, p1, p2)
1792 node = node or self.hash(rawtext, p1, p2)
1789 if node in self.nodemap:
1793 if node in self.nodemap:
1790 return node
1794 return node
1791
1795
1792 if validatehash:
1796 if validatehash:
1793 self.checkhash(rawtext, node, p1=p1, p2=p2)
1797 self.checkhash(rawtext, node, p1=p1, p2=p2)
1794
1798
1795 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1799 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1796 flags, cachedelta=cachedelta,
1800 flags, cachedelta=cachedelta,
1797 deltacomputer=deltacomputer)
1801 deltacomputer=deltacomputer)
1798
1802
1799 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1803 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1800 cachedelta=None, deltacomputer=None):
1804 cachedelta=None, deltacomputer=None):
1801 """add a raw revision with known flags, node and parents
1805 """add a raw revision with known flags, node and parents
1802 useful when reusing a revision not stored in this revlog (ex: received
1806 useful when reusing a revision not stored in this revlog (ex: received
1803 over wire, or read from an external bundle).
1807 over wire, or read from an external bundle).
1804 """
1808 """
1805 dfh = None
1809 dfh = None
1806 if not self._inline:
1810 if not self._inline:
1807 dfh = self._datafp("a+")
1811 dfh = self._datafp("a+")
1808 ifh = self._indexfp("a+")
1812 ifh = self._indexfp("a+")
1809 try:
1813 try:
1810 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1814 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1811 flags, cachedelta, ifh, dfh,
1815 flags, cachedelta, ifh, dfh,
1812 deltacomputer=deltacomputer)
1816 deltacomputer=deltacomputer)
1813 finally:
1817 finally:
1814 if dfh:
1818 if dfh:
1815 dfh.close()
1819 dfh.close()
1816 ifh.close()
1820 ifh.close()
1817
1821
1818 def compress(self, data):
1822 def compress(self, data):
1819 """Generate a possibly-compressed representation of data."""
1823 """Generate a possibly-compressed representation of data."""
1820 if not data:
1824 if not data:
1821 return '', data
1825 return '', data
1822
1826
1823 compressed = self._compressor.compress(data)
1827 compressed = self._compressor.compress(data)
1824
1828
1825 if compressed:
1829 if compressed:
1826 # The revlog compressor added the header in the returned data.
1830 # The revlog compressor added the header in the returned data.
1827 return '', compressed
1831 return '', compressed
1828
1832
1829 if data[0:1] == '\0':
1833 if data[0:1] == '\0':
1830 return '', data
1834 return '', data
1831 return 'u', data
1835 return 'u', data
1832
1836
1833 def decompress(self, data):
1837 def decompress(self, data):
1834 """Decompress a revlog chunk.
1838 """Decompress a revlog chunk.
1835
1839
1836 The chunk is expected to begin with a header identifying the
1840 The chunk is expected to begin with a header identifying the
1837 format type so it can be routed to an appropriate decompressor.
1841 format type so it can be routed to an appropriate decompressor.
1838 """
1842 """
1839 if not data:
1843 if not data:
1840 return data
1844 return data
1841
1845
1842 # Revlogs are read much more frequently than they are written and many
1846 # Revlogs are read much more frequently than they are written and many
1843 # chunks only take microseconds to decompress, so performance is
1847 # chunks only take microseconds to decompress, so performance is
1844 # important here.
1848 # important here.
1845 #
1849 #
1846 # We can make a few assumptions about revlogs:
1850 # We can make a few assumptions about revlogs:
1847 #
1851 #
1848 # 1) the majority of chunks will be compressed (as opposed to inline
1852 # 1) the majority of chunks will be compressed (as opposed to inline
1849 # raw data).
1853 # raw data).
1850 # 2) decompressing *any* data will likely by at least 10x slower than
1854 # 2) decompressing *any* data will likely by at least 10x slower than
1851 # returning raw inline data.
1855 # returning raw inline data.
1852 # 3) we want to prioritize common and officially supported compression
1856 # 3) we want to prioritize common and officially supported compression
1853 # engines
1857 # engines
1854 #
1858 #
1855 # It follows that we want to optimize for "decompress compressed data
1859 # It follows that we want to optimize for "decompress compressed data
1856 # when encoded with common and officially supported compression engines"
1860 # when encoded with common and officially supported compression engines"
1857 # case over "raw data" and "data encoded by less common or non-official
1861 # case over "raw data" and "data encoded by less common or non-official
1858 # compression engines." That is why we have the inline lookup first
1862 # compression engines." That is why we have the inline lookup first
1859 # followed by the compengines lookup.
1863 # followed by the compengines lookup.
1860 #
1864 #
1861 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1865 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1862 # compressed chunks. And this matters for changelog and manifest reads.
1866 # compressed chunks. And this matters for changelog and manifest reads.
1863 t = data[0:1]
1867 t = data[0:1]
1864
1868
1865 if t == 'x':
1869 if t == 'x':
1866 try:
1870 try:
1867 return _zlibdecompress(data)
1871 return _zlibdecompress(data)
1868 except zlib.error as e:
1872 except zlib.error as e:
1869 raise error.RevlogError(_('revlog decompress error: %s') %
1873 raise error.RevlogError(_('revlog decompress error: %s') %
1870 stringutil.forcebytestr(e))
1874 stringutil.forcebytestr(e))
1871 # '\0' is more common than 'u' so it goes first.
1875 # '\0' is more common than 'u' so it goes first.
1872 elif t == '\0':
1876 elif t == '\0':
1873 return data
1877 return data
1874 elif t == 'u':
1878 elif t == 'u':
1875 return util.buffer(data, 1)
1879 return util.buffer(data, 1)
1876
1880
1877 try:
1881 try:
1878 compressor = self._decompressors[t]
1882 compressor = self._decompressors[t]
1879 except KeyError:
1883 except KeyError:
1880 try:
1884 try:
1881 engine = util.compengines.forrevlogheader(t)
1885 engine = util.compengines.forrevlogheader(t)
1882 compressor = engine.revlogcompressor()
1886 compressor = engine.revlogcompressor()
1883 self._decompressors[t] = compressor
1887 self._decompressors[t] = compressor
1884 except KeyError:
1888 except KeyError:
1885 raise error.RevlogError(_('unknown compression type %r') % t)
1889 raise error.RevlogError(_('unknown compression type %r') % t)
1886
1890
1887 return compressor.decompress(data)
1891 return compressor.decompress(data)
1888
1892
1889 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1893 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1890 cachedelta, ifh, dfh, alwayscache=False,
1894 cachedelta, ifh, dfh, alwayscache=False,
1891 deltacomputer=None):
1895 deltacomputer=None):
1892 """internal function to add revisions to the log
1896 """internal function to add revisions to the log
1893
1897
1894 see addrevision for argument descriptions.
1898 see addrevision for argument descriptions.
1895
1899
1896 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1900 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1897
1901
1898 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1902 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1899 be used.
1903 be used.
1900
1904
1901 invariants:
1905 invariants:
1902 - rawtext is optional (can be None); if not set, cachedelta must be set.
1906 - rawtext is optional (can be None); if not set, cachedelta must be set.
1903 if both are set, they must correspond to each other.
1907 if both are set, they must correspond to each other.
1904 """
1908 """
1905 if node == nullid:
1909 if node == nullid:
1906 raise error.RevlogError(_("%s: attempt to add null revision") %
1910 raise error.RevlogError(_("%s: attempt to add null revision") %
1907 self.indexfile)
1911 self.indexfile)
1908 if node == wdirid or node in wdirfilenodeids:
1912 if node == wdirid or node in wdirfilenodeids:
1909 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1913 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1910 self.indexfile)
1914 self.indexfile)
1911
1915
1912 if self._inline:
1916 if self._inline:
1913 fh = ifh
1917 fh = ifh
1914 else:
1918 else:
1915 fh = dfh
1919 fh = dfh
1916
1920
1917 btext = [rawtext]
1921 btext = [rawtext]
1918
1922
1919 curr = len(self)
1923 curr = len(self)
1920 prev = curr - 1
1924 prev = curr - 1
1921 offset = self.end(prev)
1925 offset = self.end(prev)
1922 p1r, p2r = self.rev(p1), self.rev(p2)
1926 p1r, p2r = self.rev(p1), self.rev(p2)
1923
1927
1924 # full versions are inserted when the needed deltas
1928 # full versions are inserted when the needed deltas
1925 # become comparable to the uncompressed text
1929 # become comparable to the uncompressed text
1926 if rawtext is None:
1930 if rawtext is None:
1927 # need rawtext size, before changed by flag processors, which is
1931 # need rawtext size, before changed by flag processors, which is
1928 # the non-raw size. use revlog explicitly to avoid filelog's extra
1932 # the non-raw size. use revlog explicitly to avoid filelog's extra
1929 # logic that might remove metadata size.
1933 # logic that might remove metadata size.
1930 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1934 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1931 cachedelta[1])
1935 cachedelta[1])
1932 else:
1936 else:
1933 textlen = len(rawtext)
1937 textlen = len(rawtext)
1934
1938
1935 if deltacomputer is None:
1939 if deltacomputer is None:
1936 deltacomputer = deltautil.deltacomputer(self)
1940 deltacomputer = deltautil.deltacomputer(self)
1937
1941
1938 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1942 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1939
1943
1940 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1944 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1941
1945
1942 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1946 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1943 deltainfo.base, link, p1r, p2r, node)
1947 deltainfo.base, link, p1r, p2r, node)
1944 self.index.append(e)
1948 self.index.append(e)
1945 self.nodemap[node] = curr
1949 self.nodemap[node] = curr
1946
1950
1947 # Reset the pure node cache start lookup offset to account for new
1951 # Reset the pure node cache start lookup offset to account for new
1948 # revision.
1952 # revision.
1949 if self._nodepos is not None:
1953 if self._nodepos is not None:
1950 self._nodepos = curr
1954 self._nodepos = curr
1951
1955
1952 entry = self._io.packentry(e, self.node, self.version, curr)
1956 entry = self._io.packentry(e, self.node, self.version, curr)
1953 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1957 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1954 link, offset)
1958 link, offset)
1955
1959
1956 rawtext = btext[0]
1960 rawtext = btext[0]
1957
1961
1958 if alwayscache and rawtext is None:
1962 if alwayscache and rawtext is None:
1959 rawtext = deltacomputer.buildtext(revinfo, fh)
1963 rawtext = deltacomputer.buildtext(revinfo, fh)
1960
1964
1961 if type(rawtext) == bytes: # only accept immutable objects
1965 if type(rawtext) == bytes: # only accept immutable objects
1962 self._revisioncache = (node, curr, rawtext)
1966 self._revisioncache = (node, curr, rawtext)
1963 self._chainbasecache[curr] = deltainfo.chainbase
1967 self._chainbasecache[curr] = deltainfo.chainbase
1964 return node
1968 return node
1965
1969
1966 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1970 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1967 # Files opened in a+ mode have inconsistent behavior on various
1971 # Files opened in a+ mode have inconsistent behavior on various
1968 # platforms. Windows requires that a file positioning call be made
1972 # platforms. Windows requires that a file positioning call be made
1969 # when the file handle transitions between reads and writes. See
1973 # when the file handle transitions between reads and writes. See
1970 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1974 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1971 # platforms, Python or the platform itself can be buggy. Some versions
1975 # platforms, Python or the platform itself can be buggy. Some versions
1972 # of Solaris have been observed to not append at the end of the file
1976 # of Solaris have been observed to not append at the end of the file
1973 # if the file was seeked to before the end. See issue4943 for more.
1977 # if the file was seeked to before the end. See issue4943 for more.
1974 #
1978 #
1975 # We work around this issue by inserting a seek() before writing.
1979 # We work around this issue by inserting a seek() before writing.
1976 # Note: This is likely not necessary on Python 3.
1980 # Note: This is likely not necessary on Python 3.
1977 ifh.seek(0, os.SEEK_END)
1981 ifh.seek(0, os.SEEK_END)
1978 if dfh:
1982 if dfh:
1979 dfh.seek(0, os.SEEK_END)
1983 dfh.seek(0, os.SEEK_END)
1980
1984
1981 curr = len(self) - 1
1985 curr = len(self) - 1
1982 if not self._inline:
1986 if not self._inline:
1983 transaction.add(self.datafile, offset)
1987 transaction.add(self.datafile, offset)
1984 transaction.add(self.indexfile, curr * len(entry))
1988 transaction.add(self.indexfile, curr * len(entry))
1985 if data[0]:
1989 if data[0]:
1986 dfh.write(data[0])
1990 dfh.write(data[0])
1987 dfh.write(data[1])
1991 dfh.write(data[1])
1988 ifh.write(entry)
1992 ifh.write(entry)
1989 else:
1993 else:
1990 offset += curr * self._io.size
1994 offset += curr * self._io.size
1991 transaction.add(self.indexfile, offset, curr)
1995 transaction.add(self.indexfile, offset, curr)
1992 ifh.write(entry)
1996 ifh.write(entry)
1993 ifh.write(data[0])
1997 ifh.write(data[0])
1994 ifh.write(data[1])
1998 ifh.write(data[1])
1995 self._enforceinlinesize(transaction, ifh)
1999 self._enforceinlinesize(transaction, ifh)
1996
2000
1997 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2001 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1998 """
2002 """
1999 add a delta group
2003 add a delta group
2000
2004
2001 given a set of deltas, add them to the revision log. the
2005 given a set of deltas, add them to the revision log. the
2002 first delta is against its parent, which should be in our
2006 first delta is against its parent, which should be in our
2003 log, the rest are against the previous delta.
2007 log, the rest are against the previous delta.
2004
2008
2005 If ``addrevisioncb`` is defined, it will be called with arguments of
2009 If ``addrevisioncb`` is defined, it will be called with arguments of
2006 this revlog and the node that was added.
2010 this revlog and the node that was added.
2007 """
2011 """
2008
2012
2009 nodes = []
2013 nodes = []
2010
2014
2011 r = len(self)
2015 r = len(self)
2012 end = 0
2016 end = 0
2013 if r:
2017 if r:
2014 end = self.end(r - 1)
2018 end = self.end(r - 1)
2015 ifh = self._indexfp("a+")
2019 ifh = self._indexfp("a+")
2016 isize = r * self._io.size
2020 isize = r * self._io.size
2017 if self._inline:
2021 if self._inline:
2018 transaction.add(self.indexfile, end + isize, r)
2022 transaction.add(self.indexfile, end + isize, r)
2019 dfh = None
2023 dfh = None
2020 else:
2024 else:
2021 transaction.add(self.indexfile, isize, r)
2025 transaction.add(self.indexfile, isize, r)
2022 transaction.add(self.datafile, end)
2026 transaction.add(self.datafile, end)
2023 dfh = self._datafp("a+")
2027 dfh = self._datafp("a+")
2024 def flush():
2028 def flush():
2025 if dfh:
2029 if dfh:
2026 dfh.flush()
2030 dfh.flush()
2027 ifh.flush()
2031 ifh.flush()
2028 try:
2032 try:
2029 deltacomputer = deltautil.deltacomputer(self)
2033 deltacomputer = deltautil.deltacomputer(self)
2030 # loop through our set of deltas
2034 # loop through our set of deltas
2031 for data in deltas:
2035 for data in deltas:
2032 node, p1, p2, linknode, deltabase, delta, flags = data
2036 node, p1, p2, linknode, deltabase, delta, flags = data
2033 link = linkmapper(linknode)
2037 link = linkmapper(linknode)
2034 flags = flags or REVIDX_DEFAULT_FLAGS
2038 flags = flags or REVIDX_DEFAULT_FLAGS
2035
2039
2036 nodes.append(node)
2040 nodes.append(node)
2037
2041
2038 if node in self.nodemap:
2042 if node in self.nodemap:
2039 self._nodeduplicatecallback(transaction, node)
2043 self._nodeduplicatecallback(transaction, node)
2040 # this can happen if two branches make the same change
2044 # this can happen if two branches make the same change
2041 continue
2045 continue
2042
2046
2043 for p in (p1, p2):
2047 for p in (p1, p2):
2044 if p not in self.nodemap:
2048 if p not in self.nodemap:
2045 raise error.LookupError(p, self.indexfile,
2049 raise error.LookupError(p, self.indexfile,
2046 _('unknown parent'))
2050 _('unknown parent'))
2047
2051
2048 if deltabase not in self.nodemap:
2052 if deltabase not in self.nodemap:
2049 raise error.LookupError(deltabase, self.indexfile,
2053 raise error.LookupError(deltabase, self.indexfile,
2050 _('unknown delta base'))
2054 _('unknown delta base'))
2051
2055
2052 baserev = self.rev(deltabase)
2056 baserev = self.rev(deltabase)
2053
2057
2054 if baserev != nullrev and self.iscensored(baserev):
2058 if baserev != nullrev and self.iscensored(baserev):
2055 # if base is censored, delta must be full replacement in a
2059 # if base is censored, delta must be full replacement in a
2056 # single patch operation
2060 # single patch operation
2057 hlen = struct.calcsize(">lll")
2061 hlen = struct.calcsize(">lll")
2058 oldlen = self.rawsize(baserev)
2062 oldlen = self.rawsize(baserev)
2059 newlen = len(delta) - hlen
2063 newlen = len(delta) - hlen
2060 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2064 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2061 raise error.CensoredBaseError(self.indexfile,
2065 raise error.CensoredBaseError(self.indexfile,
2062 self.node(baserev))
2066 self.node(baserev))
2063
2067
2064 if not flags and self._peek_iscensored(baserev, delta, flush):
2068 if not flags and self._peek_iscensored(baserev, delta, flush):
2065 flags |= REVIDX_ISCENSORED
2069 flags |= REVIDX_ISCENSORED
2066
2070
2067 # We assume consumers of addrevisioncb will want to retrieve
2071 # We assume consumers of addrevisioncb will want to retrieve
2068 # the added revision, which will require a call to
2072 # the added revision, which will require a call to
2069 # revision(). revision() will fast path if there is a cache
2073 # revision(). revision() will fast path if there is a cache
2070 # hit. So, we tell _addrevision() to always cache in this case.
2074 # hit. So, we tell _addrevision() to always cache in this case.
2071 # We're only using addgroup() in the context of changegroup
2075 # We're only using addgroup() in the context of changegroup
2072 # generation so the revision data can always be handled as raw
2076 # generation so the revision data can always be handled as raw
2073 # by the flagprocessor.
2077 # by the flagprocessor.
2074 self._addrevision(node, None, transaction, link,
2078 self._addrevision(node, None, transaction, link,
2075 p1, p2, flags, (baserev, delta),
2079 p1, p2, flags, (baserev, delta),
2076 ifh, dfh,
2080 ifh, dfh,
2077 alwayscache=bool(addrevisioncb),
2081 alwayscache=bool(addrevisioncb),
2078 deltacomputer=deltacomputer)
2082 deltacomputer=deltacomputer)
2079
2083
2080 if addrevisioncb:
2084 if addrevisioncb:
2081 addrevisioncb(self, node)
2085 addrevisioncb(self, node)
2082
2086
2083 if not dfh and not self._inline:
2087 if not dfh and not self._inline:
2084 # addrevision switched from inline to conventional
2088 # addrevision switched from inline to conventional
2085 # reopen the index
2089 # reopen the index
2086 ifh.close()
2090 ifh.close()
2087 dfh = self._datafp("a+")
2091 dfh = self._datafp("a+")
2088 ifh = self._indexfp("a+")
2092 ifh = self._indexfp("a+")
2089 finally:
2093 finally:
2090 if dfh:
2094 if dfh:
2091 dfh.close()
2095 dfh.close()
2092 ifh.close()
2096 ifh.close()
2093
2097
2094 return nodes
2098 return nodes
2095
2099
2096 def iscensored(self, rev):
2100 def iscensored(self, rev):
2097 """Check if a file revision is censored."""
2101 """Check if a file revision is censored."""
2098 if not self._censorable:
2102 if not self._censorable:
2099 return False
2103 return False
2100
2104
2101 return self.flags(rev) & REVIDX_ISCENSORED
2105 return self.flags(rev) & REVIDX_ISCENSORED
2102
2106
2103 def _peek_iscensored(self, baserev, delta, flush):
2107 def _peek_iscensored(self, baserev, delta, flush):
2104 """Quickly check if a delta produces a censored revision."""
2108 """Quickly check if a delta produces a censored revision."""
2105 if not self._censorable:
2109 if not self._censorable:
2106 return False
2110 return False
2107
2111
2108 # Fragile heuristic: unless new file meta keys are added alphabetically
2112 # Fragile heuristic: unless new file meta keys are added alphabetically
2109 # preceding "censored", all censored revisions are prefixed by
2113 # preceding "censored", all censored revisions are prefixed by
2110 # "\1\ncensored:". A delta producing such a censored revision must be a
2114 # "\1\ncensored:". A delta producing such a censored revision must be a
2111 # full-replacement delta, so we inspect the first and only patch in the
2115 # full-replacement delta, so we inspect the first and only patch in the
2112 # delta for this prefix.
2116 # delta for this prefix.
2113 hlen = struct.calcsize(">lll")
2117 hlen = struct.calcsize(">lll")
2114 if len(delta) <= hlen:
2118 if len(delta) <= hlen:
2115 return False
2119 return False
2116
2120
2117 oldlen = self.rawsize(baserev)
2121 oldlen = self.rawsize(baserev)
2118 newlen = len(delta) - hlen
2122 newlen = len(delta) - hlen
2119 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2123 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2120 return False
2124 return False
2121
2125
2122 add = "\1\ncensored:"
2126 add = "\1\ncensored:"
2123 addlen = len(add)
2127 addlen = len(add)
2124 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2128 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2125
2129
2126 def getstrippoint(self, minlink):
2130 def getstrippoint(self, minlink):
2127 """find the minimum rev that must be stripped to strip the linkrev
2131 """find the minimum rev that must be stripped to strip the linkrev
2128
2132
2129 Returns a tuple containing the minimum rev and a set of all revs that
2133 Returns a tuple containing the minimum rev and a set of all revs that
2130 have linkrevs that will be broken by this strip.
2134 have linkrevs that will be broken by this strip.
2131 """
2135 """
2132 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2136 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2133 self.headrevs(),
2137 self.headrevs(),
2134 self.linkrev, self.parentrevs)
2138 self.linkrev, self.parentrevs)
2135
2139
2136 def strip(self, minlink, transaction):
2140 def strip(self, minlink, transaction):
2137 """truncate the revlog on the first revision with a linkrev >= minlink
2141 """truncate the revlog on the first revision with a linkrev >= minlink
2138
2142
2139 This function is called when we're stripping revision minlink and
2143 This function is called when we're stripping revision minlink and
2140 its descendants from the repository.
2144 its descendants from the repository.
2141
2145
2142 We have to remove all revisions with linkrev >= minlink, because
2146 We have to remove all revisions with linkrev >= minlink, because
2143 the equivalent changelog revisions will be renumbered after the
2147 the equivalent changelog revisions will be renumbered after the
2144 strip.
2148 strip.
2145
2149
2146 So we truncate the revlog on the first of these revisions, and
2150 So we truncate the revlog on the first of these revisions, and
2147 trust that the caller has saved the revisions that shouldn't be
2151 trust that the caller has saved the revisions that shouldn't be
2148 removed and that it'll re-add them after this truncation.
2152 removed and that it'll re-add them after this truncation.
2149 """
2153 """
2150 if len(self) == 0:
2154 if len(self) == 0:
2151 return
2155 return
2152
2156
2153 rev, _ = self.getstrippoint(minlink)
2157 rev, _ = self.getstrippoint(minlink)
2154 if rev == len(self):
2158 if rev == len(self):
2155 return
2159 return
2156
2160
2157 # first truncate the files on disk
2161 # first truncate the files on disk
2158 end = self.start(rev)
2162 end = self.start(rev)
2159 if not self._inline:
2163 if not self._inline:
2160 transaction.add(self.datafile, end)
2164 transaction.add(self.datafile, end)
2161 end = rev * self._io.size
2165 end = rev * self._io.size
2162 else:
2166 else:
2163 end += rev * self._io.size
2167 end += rev * self._io.size
2164
2168
2165 transaction.add(self.indexfile, end)
2169 transaction.add(self.indexfile, end)
2166
2170
2167 # then reset internal state in memory to forget those revisions
2171 # then reset internal state in memory to forget those revisions
2168 self._revisioncache = None
2172 self._revisioncache = None
2169 self._chaininfocache = {}
2173 self._chaininfocache = {}
2170 self._chunkclear()
2174 self._chunkclear()
2171 for x in pycompat.xrange(rev, len(self)):
2175 for x in pycompat.xrange(rev, len(self)):
2172 del self.nodemap[self.node(x)]
2176 del self.nodemap[self.node(x)]
2173
2177
2174 del self.index[rev:-1]
2178 del self.index[rev:-1]
2175 self._nodepos = None
2179 self._nodepos = None
2176
2180
2177 def checksize(self):
2181 def checksize(self):
2178 expected = 0
2182 expected = 0
2179 if len(self):
2183 if len(self):
2180 expected = max(0, self.end(len(self) - 1))
2184 expected = max(0, self.end(len(self) - 1))
2181
2185
2182 try:
2186 try:
2183 with self._datafp() as f:
2187 with self._datafp() as f:
2184 f.seek(0, 2)
2188 f.seek(0, 2)
2185 actual = f.tell()
2189 actual = f.tell()
2186 dd = actual - expected
2190 dd = actual - expected
2187 except IOError as inst:
2191 except IOError as inst:
2188 if inst.errno != errno.ENOENT:
2192 if inst.errno != errno.ENOENT:
2189 raise
2193 raise
2190 dd = 0
2194 dd = 0
2191
2195
2192 try:
2196 try:
2193 f = self.opener(self.indexfile)
2197 f = self.opener(self.indexfile)
2194 f.seek(0, 2)
2198 f.seek(0, 2)
2195 actual = f.tell()
2199 actual = f.tell()
2196 f.close()
2200 f.close()
2197 s = self._io.size
2201 s = self._io.size
2198 i = max(0, actual // s)
2202 i = max(0, actual // s)
2199 di = actual - (i * s)
2203 di = actual - (i * s)
2200 if self._inline:
2204 if self._inline:
2201 databytes = 0
2205 databytes = 0
2202 for r in self:
2206 for r in self:
2203 databytes += max(0, self.length(r))
2207 databytes += max(0, self.length(r))
2204 dd = 0
2208 dd = 0
2205 di = actual - len(self) * s - databytes
2209 di = actual - len(self) * s - databytes
2206 except IOError as inst:
2210 except IOError as inst:
2207 if inst.errno != errno.ENOENT:
2211 if inst.errno != errno.ENOENT:
2208 raise
2212 raise
2209 di = 0
2213 di = 0
2210
2214
2211 return (dd, di)
2215 return (dd, di)
2212
2216
2213 def files(self):
2217 def files(self):
2214 res = [self.indexfile]
2218 res = [self.indexfile]
2215 if not self._inline:
2219 if not self._inline:
2216 res.append(self.datafile)
2220 res.append(self.datafile)
2217 return res
2221 return res
2218
2222
2219 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2223 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2220 assumehaveparentrevisions=False, deltaprevious=False):
2224 assumehaveparentrevisions=False, deltaprevious=False):
2221 if nodesorder not in ('nodes', 'storage', None):
2225 if nodesorder not in ('nodes', 'storage', None):
2222 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2226 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2223 nodesorder)
2227 nodesorder)
2224
2228
2225 if nodesorder is None and not self._generaldelta:
2229 if nodesorder is None and not self._generaldelta:
2226 nodesorder = 'storage'
2230 nodesorder = 'storage'
2227
2231
2228 return storageutil.emitrevisions(
2232 return storageutil.emitrevisions(
2229 self, nodes, nodesorder, revlogrevisiondelta,
2233 self, nodes, nodesorder, revlogrevisiondelta,
2230 deltaparentfn=self.deltaparent,
2234 deltaparentfn=self.deltaparent,
2231 candeltafn=self.candelta,
2235 candeltafn=self.candelta,
2232 rawsizefn=self.rawsize,
2236 rawsizefn=self.rawsize,
2233 revdifffn=self.revdiff,
2237 revdifffn=self.revdiff,
2234 flagsfn=self.flags,
2238 flagsfn=self.flags,
2235 sendfulltext=not self._storedeltachains,
2239 sendfulltext=not self._storedeltachains,
2236 revisiondata=revisiondata,
2240 revisiondata=revisiondata,
2237 assumehaveparentrevisions=assumehaveparentrevisions,
2241 assumehaveparentrevisions=assumehaveparentrevisions,
2238 deltaprevious=deltaprevious)
2242 deltaprevious=deltaprevious)
2239
2243
2240 DELTAREUSEALWAYS = 'always'
2244 DELTAREUSEALWAYS = 'always'
2241 DELTAREUSESAMEREVS = 'samerevs'
2245 DELTAREUSESAMEREVS = 'samerevs'
2242 DELTAREUSENEVER = 'never'
2246 DELTAREUSENEVER = 'never'
2243
2247
2244 DELTAREUSEFULLADD = 'fulladd'
2248 DELTAREUSEFULLADD = 'fulladd'
2245
2249
2246 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2250 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2247
2251
2248 def clone(self, tr, destrevlog, addrevisioncb=None,
2252 def clone(self, tr, destrevlog, addrevisioncb=None,
2249 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2253 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2250 """Copy this revlog to another, possibly with format changes.
2254 """Copy this revlog to another, possibly with format changes.
2251
2255
2252 The destination revlog will contain the same revisions and nodes.
2256 The destination revlog will contain the same revisions and nodes.
2253 However, it may not be bit-for-bit identical due to e.g. delta encoding
2257 However, it may not be bit-for-bit identical due to e.g. delta encoding
2254 differences.
2258 differences.
2255
2259
2256 The ``deltareuse`` argument control how deltas from the existing revlog
2260 The ``deltareuse`` argument control how deltas from the existing revlog
2257 are preserved in the destination revlog. The argument can have the
2261 are preserved in the destination revlog. The argument can have the
2258 following values:
2262 following values:
2259
2263
2260 DELTAREUSEALWAYS
2264 DELTAREUSEALWAYS
2261 Deltas will always be reused (if possible), even if the destination
2265 Deltas will always be reused (if possible), even if the destination
2262 revlog would not select the same revisions for the delta. This is the
2266 revlog would not select the same revisions for the delta. This is the
2263 fastest mode of operation.
2267 fastest mode of operation.
2264 DELTAREUSESAMEREVS
2268 DELTAREUSESAMEREVS
2265 Deltas will be reused if the destination revlog would pick the same
2269 Deltas will be reused if the destination revlog would pick the same
2266 revisions for the delta. This mode strikes a balance between speed
2270 revisions for the delta. This mode strikes a balance between speed
2267 and optimization.
2271 and optimization.
2268 DELTAREUSENEVER
2272 DELTAREUSENEVER
2269 Deltas will never be reused. This is the slowest mode of execution.
2273 Deltas will never be reused. This is the slowest mode of execution.
2270 This mode can be used to recompute deltas (e.g. if the diff/delta
2274 This mode can be used to recompute deltas (e.g. if the diff/delta
2271 algorithm changes).
2275 algorithm changes).
2272
2276
2273 Delta computation can be slow, so the choice of delta reuse policy can
2277 Delta computation can be slow, so the choice of delta reuse policy can
2274 significantly affect run time.
2278 significantly affect run time.
2275
2279
2276 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2280 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2277 two extremes. Deltas will be reused if they are appropriate. But if the
2281 two extremes. Deltas will be reused if they are appropriate. But if the
2278 delta could choose a better revision, it will do so. This means if you
2282 delta could choose a better revision, it will do so. This means if you
2279 are converting a non-generaldelta revlog to a generaldelta revlog,
2283 are converting a non-generaldelta revlog to a generaldelta revlog,
2280 deltas will be recomputed if the delta's parent isn't a parent of the
2284 deltas will be recomputed if the delta's parent isn't a parent of the
2281 revision.
2285 revision.
2282
2286
2283 In addition to the delta policy, the ``deltabothparents`` argument
2287 In addition to the delta policy, the ``deltabothparents`` argument
2284 controls whether to compute deltas against both parents for merges.
2288 controls whether to compute deltas against both parents for merges.
2285 By default, the current default is used.
2289 By default, the current default is used.
2286 """
2290 """
2287 if deltareuse not in self.DELTAREUSEALL:
2291 if deltareuse not in self.DELTAREUSEALL:
2288 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2292 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2289
2293
2290 if len(destrevlog):
2294 if len(destrevlog):
2291 raise ValueError(_('destination revlog is not empty'))
2295 raise ValueError(_('destination revlog is not empty'))
2292
2296
2293 if getattr(self, 'filteredrevs', None):
2297 if getattr(self, 'filteredrevs', None):
2294 raise ValueError(_('source revlog has filtered revisions'))
2298 raise ValueError(_('source revlog has filtered revisions'))
2295 if getattr(destrevlog, 'filteredrevs', None):
2299 if getattr(destrevlog, 'filteredrevs', None):
2296 raise ValueError(_('destination revlog has filtered revisions'))
2300 raise ValueError(_('destination revlog has filtered revisions'))
2297
2301
2298 # lazydeltabase controls whether to reuse a cached delta, if possible.
2302 # lazydeltabase controls whether to reuse a cached delta, if possible.
2299 oldlazydeltabase = destrevlog._lazydeltabase
2303 oldlazydeltabase = destrevlog._lazydeltabase
2300 oldamd = destrevlog._deltabothparents
2304 oldamd = destrevlog._deltabothparents
2301
2305
2302 try:
2306 try:
2303 if deltareuse == self.DELTAREUSEALWAYS:
2307 if deltareuse == self.DELTAREUSEALWAYS:
2304 destrevlog._lazydeltabase = True
2308 destrevlog._lazydeltabase = True
2305 elif deltareuse == self.DELTAREUSESAMEREVS:
2309 elif deltareuse == self.DELTAREUSESAMEREVS:
2306 destrevlog._lazydeltabase = False
2310 destrevlog._lazydeltabase = False
2307
2311
2308 destrevlog._deltabothparents = deltabothparents or oldamd
2312 destrevlog._deltabothparents = deltabothparents or oldamd
2309
2313
2310 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2314 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2311 self.DELTAREUSESAMEREVS)
2315 self.DELTAREUSESAMEREVS)
2312
2316
2313 deltacomputer = deltautil.deltacomputer(destrevlog)
2317 deltacomputer = deltautil.deltacomputer(destrevlog)
2314 index = self.index
2318 index = self.index
2315 for rev in self:
2319 for rev in self:
2316 entry = index[rev]
2320 entry = index[rev]
2317
2321
2318 # Some classes override linkrev to take filtered revs into
2322 # Some classes override linkrev to take filtered revs into
2319 # account. Use raw entry from index.
2323 # account. Use raw entry from index.
2320 flags = entry[0] & 0xffff
2324 flags = entry[0] & 0xffff
2321 linkrev = entry[4]
2325 linkrev = entry[4]
2322 p1 = index[entry[5]][7]
2326 p1 = index[entry[5]][7]
2323 p2 = index[entry[6]][7]
2327 p2 = index[entry[6]][7]
2324 node = entry[7]
2328 node = entry[7]
2325
2329
2326 # (Possibly) reuse the delta from the revlog if allowed and
2330 # (Possibly) reuse the delta from the revlog if allowed and
2327 # the revlog chunk is a delta.
2331 # the revlog chunk is a delta.
2328 cachedelta = None
2332 cachedelta = None
2329 rawtext = None
2333 rawtext = None
2330 if populatecachedelta:
2334 if populatecachedelta:
2331 dp = self.deltaparent(rev)
2335 dp = self.deltaparent(rev)
2332 if dp != nullrev:
2336 if dp != nullrev:
2333 cachedelta = (dp, bytes(self._chunk(rev)))
2337 cachedelta = (dp, bytes(self._chunk(rev)))
2334
2338
2335 if not cachedelta:
2339 if not cachedelta:
2336 rawtext = self.revision(rev, raw=True)
2340 rawtext = self.revision(rev, raw=True)
2337
2341
2338
2342
2339 if deltareuse == self.DELTAREUSEFULLADD:
2343 if deltareuse == self.DELTAREUSEFULLADD:
2340 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2344 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2341 cachedelta=cachedelta,
2345 cachedelta=cachedelta,
2342 node=node, flags=flags,
2346 node=node, flags=flags,
2343 deltacomputer=deltacomputer)
2347 deltacomputer=deltacomputer)
2344 else:
2348 else:
2345 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2349 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2346 checkambig=False)
2350 checkambig=False)
2347 dfh = None
2351 dfh = None
2348 if not destrevlog._inline:
2352 if not destrevlog._inline:
2349 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2353 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2350 try:
2354 try:
2351 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2355 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2352 p2, flags, cachedelta, ifh, dfh,
2356 p2, flags, cachedelta, ifh, dfh,
2353 deltacomputer=deltacomputer)
2357 deltacomputer=deltacomputer)
2354 finally:
2358 finally:
2355 if dfh:
2359 if dfh:
2356 dfh.close()
2360 dfh.close()
2357 ifh.close()
2361 ifh.close()
2358
2362
2359 if addrevisioncb:
2363 if addrevisioncb:
2360 addrevisioncb(self, rev, node)
2364 addrevisioncb(self, rev, node)
2361 finally:
2365 finally:
2362 destrevlog._lazydeltabase = oldlazydeltabase
2366 destrevlog._lazydeltabase = oldlazydeltabase
2363 destrevlog._deltabothparents = oldamd
2367 destrevlog._deltabothparents = oldamd
2364
2368
2365 def censorrevision(self, tr, censornode, tombstone=b''):
2369 def censorrevision(self, tr, censornode, tombstone=b''):
2366 if (self.version & 0xFFFF) == REVLOGV0:
2370 if (self.version & 0xFFFF) == REVLOGV0:
2367 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2371 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2368 self.version)
2372 self.version)
2369
2373
2370 censorrev = self.rev(censornode)
2374 censorrev = self.rev(censornode)
2371 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2375 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2372
2376
2373 if len(tombstone) > self.rawsize(censorrev):
2377 if len(tombstone) > self.rawsize(censorrev):
2374 raise error.Abort(_('censor tombstone must be no longer than '
2378 raise error.Abort(_('censor tombstone must be no longer than '
2375 'censored data'))
2379 'censored data'))
2376
2380
2377 # Rewriting the revlog in place is hard. Our strategy for censoring is
2381 # Rewriting the revlog in place is hard. Our strategy for censoring is
2378 # to create a new revlog, copy all revisions to it, then replace the
2382 # to create a new revlog, copy all revisions to it, then replace the
2379 # revlogs on transaction close.
2383 # revlogs on transaction close.
2380
2384
2381 newindexfile = self.indexfile + b'.tmpcensored'
2385 newindexfile = self.indexfile + b'.tmpcensored'
2382 newdatafile = self.datafile + b'.tmpcensored'
2386 newdatafile = self.datafile + b'.tmpcensored'
2383
2387
2384 # This is a bit dangerous. We could easily have a mismatch of state.
2388 # This is a bit dangerous. We could easily have a mismatch of state.
2385 newrl = revlog(self.opener, newindexfile, newdatafile,
2389 newrl = revlog(self.opener, newindexfile, newdatafile,
2386 censorable=True)
2390 censorable=True)
2387 newrl.version = self.version
2391 newrl.version = self.version
2388 newrl._generaldelta = self._generaldelta
2392 newrl._generaldelta = self._generaldelta
2389 newrl._io = self._io
2393 newrl._io = self._io
2390
2394
2391 for rev in self.revs():
2395 for rev in self.revs():
2392 node = self.node(rev)
2396 node = self.node(rev)
2393 p1, p2 = self.parents(node)
2397 p1, p2 = self.parents(node)
2394
2398
2395 if rev == censorrev:
2399 if rev == censorrev:
2396 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2400 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2397 p1, p2, censornode, REVIDX_ISCENSORED)
2401 p1, p2, censornode, REVIDX_ISCENSORED)
2398
2402
2399 if newrl.deltaparent(rev) != nullrev:
2403 if newrl.deltaparent(rev) != nullrev:
2400 raise error.Abort(_('censored revision stored as delta; '
2404 raise error.Abort(_('censored revision stored as delta; '
2401 'cannot censor'),
2405 'cannot censor'),
2402 hint=_('censoring of revlogs is not '
2406 hint=_('censoring of revlogs is not '
2403 'fully implemented; please report '
2407 'fully implemented; please report '
2404 'this bug'))
2408 'this bug'))
2405 continue
2409 continue
2406
2410
2407 if self.iscensored(rev):
2411 if self.iscensored(rev):
2408 if self.deltaparent(rev) != nullrev:
2412 if self.deltaparent(rev) != nullrev:
2409 raise error.Abort(_('cannot censor due to censored '
2413 raise error.Abort(_('cannot censor due to censored '
2410 'revision having delta stored'))
2414 'revision having delta stored'))
2411 rawtext = self._chunk(rev)
2415 rawtext = self._chunk(rev)
2412 else:
2416 else:
2413 rawtext = self.revision(rev, raw=True)
2417 rawtext = self.revision(rev, raw=True)
2414
2418
2415 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2419 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2416 self.flags(rev))
2420 self.flags(rev))
2417
2421
2418 tr.addbackup(self.indexfile, location='store')
2422 tr.addbackup(self.indexfile, location='store')
2419 if not self._inline:
2423 if not self._inline:
2420 tr.addbackup(self.datafile, location='store')
2424 tr.addbackup(self.datafile, location='store')
2421
2425
2422 self.opener.rename(newrl.indexfile, self.indexfile)
2426 self.opener.rename(newrl.indexfile, self.indexfile)
2423 if not self._inline:
2427 if not self._inline:
2424 self.opener.rename(newrl.datafile, self.datafile)
2428 self.opener.rename(newrl.datafile, self.datafile)
2425
2429
2426 self.clearcaches()
2430 self.clearcaches()
2427 self._loadindex(self.version, None)
2431 self._loadindex(self.version, None)
2428
2432
2429 def verifyintegrity(self, state):
2433 def verifyintegrity(self, state):
2430 """Verifies the integrity of the revlog.
2434 """Verifies the integrity of the revlog.
2431
2435
2432 Yields ``revlogproblem`` instances describing problems that are
2436 Yields ``revlogproblem`` instances describing problems that are
2433 found.
2437 found.
2434 """
2438 """
2435 dd, di = self.checksize()
2439 dd, di = self.checksize()
2436 if dd:
2440 if dd:
2437 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2441 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2438 if di:
2442 if di:
2439 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2443 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2440
2444
2441 version = self.version & 0xFFFF
2445 version = self.version & 0xFFFF
2442
2446
2443 # The verifier tells us what version revlog we should be.
2447 # The verifier tells us what version revlog we should be.
2444 if version != state['expectedversion']:
2448 if version != state['expectedversion']:
2445 yield revlogproblem(
2449 yield revlogproblem(
2446 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2450 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2447 (self.indexfile, version, state['expectedversion']))
2451 (self.indexfile, version, state['expectedversion']))
2448
2452
2449 state['skipread'] = set()
2453 state['skipread'] = set()
2450
2454
2451 for rev in self:
2455 for rev in self:
2452 node = self.node(rev)
2456 node = self.node(rev)
2453
2457
2454 # Verify contents. 4 cases to care about:
2458 # Verify contents. 4 cases to care about:
2455 #
2459 #
2456 # common: the most common case
2460 # common: the most common case
2457 # rename: with a rename
2461 # rename: with a rename
2458 # meta: file content starts with b'\1\n', the metadata
2462 # meta: file content starts with b'\1\n', the metadata
2459 # header defined in filelog.py, but without a rename
2463 # header defined in filelog.py, but without a rename
2460 # ext: content stored externally
2464 # ext: content stored externally
2461 #
2465 #
2462 # More formally, their differences are shown below:
2466 # More formally, their differences are shown below:
2463 #
2467 #
2464 # | common | rename | meta | ext
2468 # | common | rename | meta | ext
2465 # -------------------------------------------------------
2469 # -------------------------------------------------------
2466 # flags() | 0 | 0 | 0 | not 0
2470 # flags() | 0 | 0 | 0 | not 0
2467 # renamed() | False | True | False | ?
2471 # renamed() | False | True | False | ?
2468 # rawtext[0:2]=='\1\n'| False | True | True | ?
2472 # rawtext[0:2]=='\1\n'| False | True | True | ?
2469 #
2473 #
2470 # "rawtext" means the raw text stored in revlog data, which
2474 # "rawtext" means the raw text stored in revlog data, which
2471 # could be retrieved by "revision(rev, raw=True)". "text"
2475 # could be retrieved by "revision(rev, raw=True)". "text"
2472 # mentioned below is "revision(rev, raw=False)".
2476 # mentioned below is "revision(rev, raw=False)".
2473 #
2477 #
2474 # There are 3 different lengths stored physically:
2478 # There are 3 different lengths stored physically:
2475 # 1. L1: rawsize, stored in revlog index
2479 # 1. L1: rawsize, stored in revlog index
2476 # 2. L2: len(rawtext), stored in revlog data
2480 # 2. L2: len(rawtext), stored in revlog data
2477 # 3. L3: len(text), stored in revlog data if flags==0, or
2481 # 3. L3: len(text), stored in revlog data if flags==0, or
2478 # possibly somewhere else if flags!=0
2482 # possibly somewhere else if flags!=0
2479 #
2483 #
2480 # L1 should be equal to L2. L3 could be different from them.
2484 # L1 should be equal to L2. L3 could be different from them.
2481 # "text" may or may not affect commit hash depending on flag
2485 # "text" may or may not affect commit hash depending on flag
2482 # processors (see revlog.addflagprocessor).
2486 # processors (see revlog.addflagprocessor).
2483 #
2487 #
2484 # | common | rename | meta | ext
2488 # | common | rename | meta | ext
2485 # -------------------------------------------------
2489 # -------------------------------------------------
2486 # rawsize() | L1 | L1 | L1 | L1
2490 # rawsize() | L1 | L1 | L1 | L1
2487 # size() | L1 | L2-LM | L1(*) | L1 (?)
2491 # size() | L1 | L2-LM | L1(*) | L1 (?)
2488 # len(rawtext) | L2 | L2 | L2 | L2
2492 # len(rawtext) | L2 | L2 | L2 | L2
2489 # len(text) | L2 | L2 | L2 | L3
2493 # len(text) | L2 | L2 | L2 | L3
2490 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2494 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2491 #
2495 #
2492 # LM: length of metadata, depending on rawtext
2496 # LM: length of metadata, depending on rawtext
2493 # (*): not ideal, see comment in filelog.size
2497 # (*): not ideal, see comment in filelog.size
2494 # (?): could be "- len(meta)" if the resolved content has
2498 # (?): could be "- len(meta)" if the resolved content has
2495 # rename metadata
2499 # rename metadata
2496 #
2500 #
2497 # Checks needed to be done:
2501 # Checks needed to be done:
2498 # 1. length check: L1 == L2, in all cases.
2502 # 1. length check: L1 == L2, in all cases.
2499 # 2. hash check: depending on flag processor, we may need to
2503 # 2. hash check: depending on flag processor, we may need to
2500 # use either "text" (external), or "rawtext" (in revlog).
2504 # use either "text" (external), or "rawtext" (in revlog).
2501
2505
2502 try:
2506 try:
2503 skipflags = state.get('skipflags', 0)
2507 skipflags = state.get('skipflags', 0)
2504 if skipflags:
2508 if skipflags:
2505 skipflags &= self.flags(rev)
2509 skipflags &= self.flags(rev)
2506
2510
2507 if skipflags:
2511 if skipflags:
2508 state['skipread'].add(node)
2512 state['skipread'].add(node)
2509 else:
2513 else:
2510 # Side-effect: read content and verify hash.
2514 # Side-effect: read content and verify hash.
2511 self.revision(node)
2515 self.revision(node)
2512
2516
2513 l1 = self.rawsize(rev)
2517 l1 = self.rawsize(rev)
2514 l2 = len(self.revision(node, raw=True))
2518 l2 = len(self.revision(node, raw=True))
2515
2519
2516 if l1 != l2:
2520 if l1 != l2:
2517 yield revlogproblem(
2521 yield revlogproblem(
2518 error=_('unpacked size is %d, %d expected') % (l2, l1),
2522 error=_('unpacked size is %d, %d expected') % (l2, l1),
2519 node=node)
2523 node=node)
2520
2524
2521 except error.CensoredNodeError:
2525 except error.CensoredNodeError:
2522 if state['erroroncensored']:
2526 if state['erroroncensored']:
2523 yield revlogproblem(error=_('censored file data'),
2527 yield revlogproblem(error=_('censored file data'),
2524 node=node)
2528 node=node)
2525 state['skipread'].add(node)
2529 state['skipread'].add(node)
2526 except Exception as e:
2530 except Exception as e:
2527 yield revlogproblem(
2531 yield revlogproblem(
2528 error=_('unpacking %s: %s') % (short(node),
2532 error=_('unpacking %s: %s') % (short(node),
2529 stringutil.forcebytestr(e)),
2533 stringutil.forcebytestr(e)),
2530 node=node)
2534 node=node)
2531 state['skipread'].add(node)
2535 state['skipread'].add(node)
2532
2536
2533 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2537 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2534 revisionscount=False, trackedsize=False,
2538 revisionscount=False, trackedsize=False,
2535 storedsize=False):
2539 storedsize=False):
2536 d = {}
2540 d = {}
2537
2541
2538 if exclusivefiles:
2542 if exclusivefiles:
2539 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2543 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2540 if not self._inline:
2544 if not self._inline:
2541 d['exclusivefiles'].append((self.opener, self.datafile))
2545 d['exclusivefiles'].append((self.opener, self.datafile))
2542
2546
2543 if sharedfiles:
2547 if sharedfiles:
2544 d['sharedfiles'] = []
2548 d['sharedfiles'] = []
2545
2549
2546 if revisionscount:
2550 if revisionscount:
2547 d['revisionscount'] = len(self)
2551 d['revisionscount'] = len(self)
2548
2552
2549 if trackedsize:
2553 if trackedsize:
2550 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2554 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2551
2555
2552 if storedsize:
2556 if storedsize:
2553 d['storedsize'] = sum(self.opener.stat(path).st_size
2557 d['storedsize'] = sum(self.opener.stat(path).st_size
2554 for path in self.files())
2558 for path in self.files())
2555
2559
2556 return d
2560 return d
General Comments 0
You need to be logged in to leave comments. Login now