##// END OF EJS Templates
dagop: extract DAG local heads functionality from revlog...
Gregory Szorc -
r40036:8af835af default
parent child Browse files
Show More
@@ -1,811 +1,847
1 # dagop.py - graph ancestry and topology algorithm for revset
1 # dagop.py - graph ancestry and topology algorithm for revset
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11
11
12 from .node import (
12 from .node import (
13 nullrev,
13 nullrev,
14 )
14 )
15 from .thirdparty import (
15 from .thirdparty import (
16 attr,
16 attr,
17 )
17 )
18 from . import (
18 from . import (
19 error,
19 error,
20 mdiff,
20 mdiff,
21 node,
21 node,
22 patch,
22 patch,
23 pycompat,
23 pycompat,
24 smartset,
24 smartset,
25 )
25 )
26
26
27 baseset = smartset.baseset
27 baseset = smartset.baseset
28 generatorset = smartset.generatorset
28 generatorset = smartset.generatorset
29
29
30 # possible maximum depth between null and wdir()
30 # possible maximum depth between null and wdir()
31 _maxlogdepth = 0x80000000
31 _maxlogdepth = 0x80000000
32
32
33 def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
33 def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
34 """Walk DAG using 'pfunc' from the given 'revs' nodes
34 """Walk DAG using 'pfunc' from the given 'revs' nodes
35
35
36 'pfunc(rev)' should return the parent/child revisions of the given 'rev'
36 'pfunc(rev)' should return the parent/child revisions of the given 'rev'
37 if 'reverse' is True/False respectively.
37 if 'reverse' is True/False respectively.
38
38
39 Scan ends at the stopdepth (exlusive) if specified. Revisions found
39 Scan ends at the stopdepth (exlusive) if specified. Revisions found
40 earlier than the startdepth are omitted.
40 earlier than the startdepth are omitted.
41 """
41 """
42 if startdepth is None:
42 if startdepth is None:
43 startdepth = 0
43 startdepth = 0
44 if stopdepth is None:
44 if stopdepth is None:
45 stopdepth = _maxlogdepth
45 stopdepth = _maxlogdepth
46 if stopdepth == 0:
46 if stopdepth == 0:
47 return
47 return
48 if stopdepth < 0:
48 if stopdepth < 0:
49 raise error.ProgrammingError('negative stopdepth')
49 raise error.ProgrammingError('negative stopdepth')
50 if reverse:
50 if reverse:
51 heapsign = -1 # max heap
51 heapsign = -1 # max heap
52 else:
52 else:
53 heapsign = +1 # min heap
53 heapsign = +1 # min heap
54
54
55 # load input revs lazily to heap so earlier revisions can be yielded
55 # load input revs lazily to heap so earlier revisions can be yielded
56 # without fully computing the input revs
56 # without fully computing the input revs
57 revs.sort(reverse)
57 revs.sort(reverse)
58 irevs = iter(revs)
58 irevs = iter(revs)
59 pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first)
59 pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first)
60
60
61 inputrev = next(irevs, None)
61 inputrev = next(irevs, None)
62 if inputrev is not None:
62 if inputrev is not None:
63 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
63 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
64
64
65 lastrev = None
65 lastrev = None
66 while pendingheap:
66 while pendingheap:
67 currev, curdepth = heapq.heappop(pendingheap)
67 currev, curdepth = heapq.heappop(pendingheap)
68 currev = heapsign * currev
68 currev = heapsign * currev
69 if currev == inputrev:
69 if currev == inputrev:
70 inputrev = next(irevs, None)
70 inputrev = next(irevs, None)
71 if inputrev is not None:
71 if inputrev is not None:
72 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
72 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
73 # rescan parents until curdepth >= startdepth because queued entries
73 # rescan parents until curdepth >= startdepth because queued entries
74 # of the same revision are iterated from the lowest depth
74 # of the same revision are iterated from the lowest depth
75 foundnew = (currev != lastrev)
75 foundnew = (currev != lastrev)
76 if foundnew and curdepth >= startdepth:
76 if foundnew and curdepth >= startdepth:
77 lastrev = currev
77 lastrev = currev
78 yield currev
78 yield currev
79 pdepth = curdepth + 1
79 pdepth = curdepth + 1
80 if foundnew and pdepth < stopdepth:
80 if foundnew and pdepth < stopdepth:
81 for prev in pfunc(currev):
81 for prev in pfunc(currev):
82 if prev != node.nullrev:
82 if prev != node.nullrev:
83 heapq.heappush(pendingheap, (heapsign * prev, pdepth))
83 heapq.heappush(pendingheap, (heapsign * prev, pdepth))
84
84
85 def filectxancestors(fctxs, followfirst=False):
85 def filectxancestors(fctxs, followfirst=False):
86 """Like filectx.ancestors(), but can walk from multiple files/revisions,
86 """Like filectx.ancestors(), but can walk from multiple files/revisions,
87 and includes the given fctxs themselves
87 and includes the given fctxs themselves
88
88
89 Yields (rev, {fctx, ...}) pairs in descending order.
89 Yields (rev, {fctx, ...}) pairs in descending order.
90 """
90 """
91 visit = {}
91 visit = {}
92 visitheap = []
92 visitheap = []
93 def addvisit(fctx):
93 def addvisit(fctx):
94 rev = fctx.rev()
94 rev = fctx.rev()
95 if rev not in visit:
95 if rev not in visit:
96 visit[rev] = set()
96 visit[rev] = set()
97 heapq.heappush(visitheap, -rev) # max heap
97 heapq.heappush(visitheap, -rev) # max heap
98 visit[rev].add(fctx)
98 visit[rev].add(fctx)
99
99
100 if followfirst:
100 if followfirst:
101 cut = 1
101 cut = 1
102 else:
102 else:
103 cut = None
103 cut = None
104
104
105 for c in fctxs:
105 for c in fctxs:
106 addvisit(c)
106 addvisit(c)
107 while visit:
107 while visit:
108 currev = -heapq.heappop(visitheap)
108 currev = -heapq.heappop(visitheap)
109 curfctxs = visit.pop(currev)
109 curfctxs = visit.pop(currev)
110 yield currev, curfctxs
110 yield currev, curfctxs
111 for c in curfctxs:
111 for c in curfctxs:
112 for parent in c.parents()[:cut]:
112 for parent in c.parents()[:cut]:
113 addvisit(parent)
113 addvisit(parent)
114 assert not visitheap
114 assert not visitheap
115
115
116 def filerevancestors(fctxs, followfirst=False):
116 def filerevancestors(fctxs, followfirst=False):
117 """Like filectx.ancestors(), but can walk from multiple files/revisions,
117 """Like filectx.ancestors(), but can walk from multiple files/revisions,
118 and includes the given fctxs themselves
118 and includes the given fctxs themselves
119
119
120 Returns a smartset.
120 Returns a smartset.
121 """
121 """
122 gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
122 gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
123 return generatorset(gen, iterasc=False)
123 return generatorset(gen, iterasc=False)
124
124
125 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
125 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
126 if followfirst:
126 if followfirst:
127 cut = 1
127 cut = 1
128 else:
128 else:
129 cut = None
129 cut = None
130 cl = repo.changelog
130 cl = repo.changelog
131 def plainpfunc(rev):
131 def plainpfunc(rev):
132 try:
132 try:
133 return cl.parentrevs(rev)[:cut]
133 return cl.parentrevs(rev)[:cut]
134 except error.WdirUnsupported:
134 except error.WdirUnsupported:
135 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
135 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
136 if cutfunc is None:
136 if cutfunc is None:
137 pfunc = plainpfunc
137 pfunc = plainpfunc
138 else:
138 else:
139 pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
139 pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
140 revs = revs.filter(lambda rev: not cutfunc(rev))
140 revs = revs.filter(lambda rev: not cutfunc(rev))
141 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
141 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
142
142
143 def revancestors(repo, revs, followfirst=False, startdepth=None,
143 def revancestors(repo, revs, followfirst=False, startdepth=None,
144 stopdepth=None, cutfunc=None):
144 stopdepth=None, cutfunc=None):
145 """Like revlog.ancestors(), but supports additional options, includes
145 """Like revlog.ancestors(), but supports additional options, includes
146 the given revs themselves, and returns a smartset
146 the given revs themselves, and returns a smartset
147
147
148 Scan ends at the stopdepth (exlusive) if specified. Revisions found
148 Scan ends at the stopdepth (exlusive) if specified. Revisions found
149 earlier than the startdepth are omitted.
149 earlier than the startdepth are omitted.
150
150
151 If cutfunc is provided, it will be used to cut the traversal of the DAG.
151 If cutfunc is provided, it will be used to cut the traversal of the DAG.
152 When cutfunc(X) returns True, the DAG traversal stops - revision X and
152 When cutfunc(X) returns True, the DAG traversal stops - revision X and
153 X's ancestors in the traversal path will be skipped. This could be an
153 X's ancestors in the traversal path will be skipped. This could be an
154 optimization sometimes.
154 optimization sometimes.
155
155
156 Note: if Y is an ancestor of X, cutfunc(X) returning True does not
156 Note: if Y is an ancestor of X, cutfunc(X) returning True does not
157 necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
157 necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
158 return True in this case. For example,
158 return True in this case. For example,
159
159
160 D # revancestors(repo, D, cutfunc=lambda rev: rev == B)
160 D # revancestors(repo, D, cutfunc=lambda rev: rev == B)
161 |\ # will include "A", because the path D -> C -> A was not cut.
161 |\ # will include "A", because the path D -> C -> A was not cut.
162 B C # If "B" gets cut, "A" might want to be cut too.
162 B C # If "B" gets cut, "A" might want to be cut too.
163 |/
163 |/
164 A
164 A
165 """
165 """
166 gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth,
166 gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth,
167 cutfunc)
167 cutfunc)
168 return generatorset(gen, iterasc=False)
168 return generatorset(gen, iterasc=False)
169
169
170 def _genrevdescendants(repo, revs, followfirst):
170 def _genrevdescendants(repo, revs, followfirst):
171 if followfirst:
171 if followfirst:
172 cut = 1
172 cut = 1
173 else:
173 else:
174 cut = None
174 cut = None
175
175
176 cl = repo.changelog
176 cl = repo.changelog
177 first = revs.min()
177 first = revs.min()
178 nullrev = node.nullrev
178 nullrev = node.nullrev
179 if first == nullrev:
179 if first == nullrev:
180 # Are there nodes with a null first parent and a non-null
180 # Are there nodes with a null first parent and a non-null
181 # second one? Maybe. Do we care? Probably not.
181 # second one? Maybe. Do we care? Probably not.
182 yield first
182 yield first
183 for i in cl:
183 for i in cl:
184 yield i
184 yield i
185 else:
185 else:
186 seen = set(revs)
186 seen = set(revs)
187 for i in cl.revs(first):
187 for i in cl.revs(first):
188 if i in seen:
188 if i in seen:
189 yield i
189 yield i
190 continue
190 continue
191 for x in cl.parentrevs(i)[:cut]:
191 for x in cl.parentrevs(i)[:cut]:
192 if x != nullrev and x in seen:
192 if x != nullrev and x in seen:
193 seen.add(i)
193 seen.add(i)
194 yield i
194 yield i
195 break
195 break
196
196
197 def _builddescendantsmap(repo, startrev, followfirst):
197 def _builddescendantsmap(repo, startrev, followfirst):
198 """Build map of 'rev -> child revs', offset from startrev"""
198 """Build map of 'rev -> child revs', offset from startrev"""
199 cl = repo.changelog
199 cl = repo.changelog
200 nullrev = node.nullrev
200 nullrev = node.nullrev
201 descmap = [[] for _rev in pycompat.xrange(startrev, len(cl))]
201 descmap = [[] for _rev in pycompat.xrange(startrev, len(cl))]
202 for currev in cl.revs(startrev + 1):
202 for currev in cl.revs(startrev + 1):
203 p1rev, p2rev = cl.parentrevs(currev)
203 p1rev, p2rev = cl.parentrevs(currev)
204 if p1rev >= startrev:
204 if p1rev >= startrev:
205 descmap[p1rev - startrev].append(currev)
205 descmap[p1rev - startrev].append(currev)
206 if not followfirst and p2rev != nullrev and p2rev >= startrev:
206 if not followfirst and p2rev != nullrev and p2rev >= startrev:
207 descmap[p2rev - startrev].append(currev)
207 descmap[p2rev - startrev].append(currev)
208 return descmap
208 return descmap
209
209
210 def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
210 def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
211 startrev = revs.min()
211 startrev = revs.min()
212 descmap = _builddescendantsmap(repo, startrev, followfirst)
212 descmap = _builddescendantsmap(repo, startrev, followfirst)
213 def pfunc(rev):
213 def pfunc(rev):
214 return descmap[rev - startrev]
214 return descmap[rev - startrev]
215 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)
215 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)
216
216
217 def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
217 def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
218 """Like revlog.descendants() but supports additional options, includes
218 """Like revlog.descendants() but supports additional options, includes
219 the given revs themselves, and returns a smartset
219 the given revs themselves, and returns a smartset
220
220
221 Scan ends at the stopdepth (exlusive) if specified. Revisions found
221 Scan ends at the stopdepth (exlusive) if specified. Revisions found
222 earlier than the startdepth are omitted.
222 earlier than the startdepth are omitted.
223 """
223 """
224 if startdepth is None and stopdepth is None:
224 if startdepth is None and stopdepth is None:
225 gen = _genrevdescendants(repo, revs, followfirst)
225 gen = _genrevdescendants(repo, revs, followfirst)
226 else:
226 else:
227 gen = _genrevdescendantsofdepth(repo, revs, followfirst,
227 gen = _genrevdescendantsofdepth(repo, revs, followfirst,
228 startdepth, stopdepth)
228 startdepth, stopdepth)
229 return generatorset(gen, iterasc=True)
229 return generatorset(gen, iterasc=True)
230
230
231 def descendantrevs(revs, revsfn, parentrevsfn):
231 def descendantrevs(revs, revsfn, parentrevsfn):
232 """Generate revision number descendants in revision order.
232 """Generate revision number descendants in revision order.
233
233
234 Yields revision numbers starting with a child of some rev in
234 Yields revision numbers starting with a child of some rev in
235 ``revs``. Results are ordered by revision number and are
235 ``revs``. Results are ordered by revision number and are
236 therefore topological. Each revision is not considered a descendant
236 therefore topological. Each revision is not considered a descendant
237 of itself.
237 of itself.
238
238
239 ``revsfn`` is a callable that with no argument iterates over all
239 ``revsfn`` is a callable that with no argument iterates over all
240 revision numbers and with a ``start`` argument iterates over revision
240 revision numbers and with a ``start`` argument iterates over revision
241 numbers beginning with that value.
241 numbers beginning with that value.
242
242
243 ``parentrevsfn`` is a callable that receives a revision number and
243 ``parentrevsfn`` is a callable that receives a revision number and
244 returns an iterable of parent revision numbers, whose values may include
244 returns an iterable of parent revision numbers, whose values may include
245 nullrev.
245 nullrev.
246 """
246 """
247 first = min(revs)
247 first = min(revs)
248
248
249 if first == nullrev:
249 if first == nullrev:
250 for rev in revsfn():
250 for rev in revsfn():
251 yield rev
251 yield rev
252 return
252 return
253
253
254 seen = set(revs)
254 seen = set(revs)
255 for rev in revsfn(start=first + 1):
255 for rev in revsfn(start=first + 1):
256 for prev in parentrevsfn(rev):
256 for prev in parentrevsfn(rev):
257 if prev != nullrev and prev in seen:
257 if prev != nullrev and prev in seen:
258 seen.add(rev)
258 seen.add(rev)
259 yield rev
259 yield rev
260 break
260 break
261
261
262 def _reachablerootspure(repo, minroot, roots, heads, includepath):
262 def _reachablerootspure(repo, minroot, roots, heads, includepath):
263 """return (heads(::<roots> and ::<heads>))
263 """return (heads(::<roots> and ::<heads>))
264
264
265 If includepath is True, return (<roots>::<heads>)."""
265 If includepath is True, return (<roots>::<heads>)."""
266 if not roots:
266 if not roots:
267 return []
267 return []
268 parentrevs = repo.changelog.parentrevs
268 parentrevs = repo.changelog.parentrevs
269 roots = set(roots)
269 roots = set(roots)
270 visit = list(heads)
270 visit = list(heads)
271 reachable = set()
271 reachable = set()
272 seen = {}
272 seen = {}
273 # prefetch all the things! (because python is slow)
273 # prefetch all the things! (because python is slow)
274 reached = reachable.add
274 reached = reachable.add
275 dovisit = visit.append
275 dovisit = visit.append
276 nextvisit = visit.pop
276 nextvisit = visit.pop
277 # open-code the post-order traversal due to the tiny size of
277 # open-code the post-order traversal due to the tiny size of
278 # sys.getrecursionlimit()
278 # sys.getrecursionlimit()
279 while visit:
279 while visit:
280 rev = nextvisit()
280 rev = nextvisit()
281 if rev in roots:
281 if rev in roots:
282 reached(rev)
282 reached(rev)
283 if not includepath:
283 if not includepath:
284 continue
284 continue
285 parents = parentrevs(rev)
285 parents = parentrevs(rev)
286 seen[rev] = parents
286 seen[rev] = parents
287 for parent in parents:
287 for parent in parents:
288 if parent >= minroot and parent not in seen:
288 if parent >= minroot and parent not in seen:
289 dovisit(parent)
289 dovisit(parent)
290 if not reachable:
290 if not reachable:
291 return baseset()
291 return baseset()
292 if not includepath:
292 if not includepath:
293 return reachable
293 return reachable
294 for rev in sorted(seen):
294 for rev in sorted(seen):
295 for parent in seen[rev]:
295 for parent in seen[rev]:
296 if parent in reachable:
296 if parent in reachable:
297 reached(rev)
297 reached(rev)
298 return reachable
298 return reachable
299
299
300 def reachableroots(repo, roots, heads, includepath=False):
300 def reachableroots(repo, roots, heads, includepath=False):
301 """return (heads(::<roots> and ::<heads>))
301 """return (heads(::<roots> and ::<heads>))
302
302
303 If includepath is True, return (<roots>::<heads>)."""
303 If includepath is True, return (<roots>::<heads>)."""
304 if not roots:
304 if not roots:
305 return baseset()
305 return baseset()
306 minroot = roots.min()
306 minroot = roots.min()
307 roots = list(roots)
307 roots = list(roots)
308 heads = list(heads)
308 heads = list(heads)
309 try:
309 try:
310 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
310 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
311 except AttributeError:
311 except AttributeError:
312 revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
312 revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
313 revs = baseset(revs)
313 revs = baseset(revs)
314 revs.sort()
314 revs.sort()
315 return revs
315 return revs
316
316
317 def _changesrange(fctx1, fctx2, linerange2, diffopts):
317 def _changesrange(fctx1, fctx2, linerange2, diffopts):
318 """Return `(diffinrange, linerange1)` where `diffinrange` is True
318 """Return `(diffinrange, linerange1)` where `diffinrange` is True
319 if diff from fctx2 to fctx1 has changes in linerange2 and
319 if diff from fctx2 to fctx1 has changes in linerange2 and
320 `linerange1` is the new line range for fctx1.
320 `linerange1` is the new line range for fctx1.
321 """
321 """
322 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
322 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
323 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
323 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
324 diffinrange = any(stype == '!' for _, stype in filteredblocks)
324 diffinrange = any(stype == '!' for _, stype in filteredblocks)
325 return diffinrange, linerange1
325 return diffinrange, linerange1
326
326
327 def blockancestors(fctx, fromline, toline, followfirst=False):
327 def blockancestors(fctx, fromline, toline, followfirst=False):
328 """Yield ancestors of `fctx` with respect to the block of lines within
328 """Yield ancestors of `fctx` with respect to the block of lines within
329 `fromline`-`toline` range.
329 `fromline`-`toline` range.
330 """
330 """
331 diffopts = patch.diffopts(fctx._repo.ui)
331 diffopts = patch.diffopts(fctx._repo.ui)
332 fctx = fctx.introfilectx()
332 fctx = fctx.introfilectx()
333 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
333 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
334 while visit:
334 while visit:
335 c, linerange2 = visit.pop(max(visit))
335 c, linerange2 = visit.pop(max(visit))
336 pl = c.parents()
336 pl = c.parents()
337 if followfirst:
337 if followfirst:
338 pl = pl[:1]
338 pl = pl[:1]
339 if not pl:
339 if not pl:
340 # The block originates from the initial revision.
340 # The block originates from the initial revision.
341 yield c, linerange2
341 yield c, linerange2
342 continue
342 continue
343 inrange = False
343 inrange = False
344 for p in pl:
344 for p in pl:
345 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
345 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
346 inrange = inrange or inrangep
346 inrange = inrange or inrangep
347 if linerange1[0] == linerange1[1]:
347 if linerange1[0] == linerange1[1]:
348 # Parent's linerange is empty, meaning that the block got
348 # Parent's linerange is empty, meaning that the block got
349 # introduced in this revision; no need to go futher in this
349 # introduced in this revision; no need to go futher in this
350 # branch.
350 # branch.
351 continue
351 continue
352 # Set _descendantrev with 'c' (a known descendant) so that, when
352 # Set _descendantrev with 'c' (a known descendant) so that, when
353 # _adjustlinkrev is called for 'p', it receives this descendant
353 # _adjustlinkrev is called for 'p', it receives this descendant
354 # (as srcrev) instead possibly topmost introrev.
354 # (as srcrev) instead possibly topmost introrev.
355 p._descendantrev = c.rev()
355 p._descendantrev = c.rev()
356 visit[p.linkrev(), p.filenode()] = p, linerange1
356 visit[p.linkrev(), p.filenode()] = p, linerange1
357 if inrange:
357 if inrange:
358 yield c, linerange2
358 yield c, linerange2
359
359
360 def blockdescendants(fctx, fromline, toline):
360 def blockdescendants(fctx, fromline, toline):
361 """Yield descendants of `fctx` with respect to the block of lines within
361 """Yield descendants of `fctx` with respect to the block of lines within
362 `fromline`-`toline` range.
362 `fromline`-`toline` range.
363 """
363 """
364 # First possibly yield 'fctx' if it has changes in range with respect to
364 # First possibly yield 'fctx' if it has changes in range with respect to
365 # its parents.
365 # its parents.
366 try:
366 try:
367 c, linerange1 = next(blockancestors(fctx, fromline, toline))
367 c, linerange1 = next(blockancestors(fctx, fromline, toline))
368 except StopIteration:
368 except StopIteration:
369 pass
369 pass
370 else:
370 else:
371 if c == fctx:
371 if c == fctx:
372 yield c, linerange1
372 yield c, linerange1
373
373
374 diffopts = patch.diffopts(fctx._repo.ui)
374 diffopts = patch.diffopts(fctx._repo.ui)
375 fl = fctx.filelog()
375 fl = fctx.filelog()
376 seen = {fctx.filerev(): (fctx, (fromline, toline))}
376 seen = {fctx.filerev(): (fctx, (fromline, toline))}
377 for i in fl.descendants([fctx.filerev()]):
377 for i in fl.descendants([fctx.filerev()]):
378 c = fctx.filectx(i)
378 c = fctx.filectx(i)
379 inrange = False
379 inrange = False
380 for x in fl.parentrevs(i):
380 for x in fl.parentrevs(i):
381 try:
381 try:
382 p, linerange2 = seen[x]
382 p, linerange2 = seen[x]
383 except KeyError:
383 except KeyError:
384 # nullrev or other branch
384 # nullrev or other branch
385 continue
385 continue
386 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
386 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
387 inrange = inrange or inrangep
387 inrange = inrange or inrangep
388 # If revision 'i' has been seen (it's a merge) and the line range
388 # If revision 'i' has been seen (it's a merge) and the line range
389 # previously computed differs from the one we just got, we take the
389 # previously computed differs from the one we just got, we take the
390 # surrounding interval. This is conservative but avoids loosing
390 # surrounding interval. This is conservative but avoids loosing
391 # information.
391 # information.
392 if i in seen and seen[i][1] != linerange1:
392 if i in seen and seen[i][1] != linerange1:
393 lbs, ubs = zip(linerange1, seen[i][1])
393 lbs, ubs = zip(linerange1, seen[i][1])
394 linerange1 = min(lbs), max(ubs)
394 linerange1 = min(lbs), max(ubs)
395 seen[i] = c, linerange1
395 seen[i] = c, linerange1
396 if inrange:
396 if inrange:
397 yield c, linerange1
397 yield c, linerange1
398
398
399 @attr.s(slots=True, frozen=True)
399 @attr.s(slots=True, frozen=True)
400 class annotateline(object):
400 class annotateline(object):
401 fctx = attr.ib()
401 fctx = attr.ib()
402 lineno = attr.ib()
402 lineno = attr.ib()
403 # Whether this annotation was the result of a skip-annotate.
403 # Whether this annotation was the result of a skip-annotate.
404 skip = attr.ib(default=False)
404 skip = attr.ib(default=False)
405 text = attr.ib(default=None)
405 text = attr.ib(default=None)
406
406
407 @attr.s(slots=True, frozen=True)
407 @attr.s(slots=True, frozen=True)
408 class _annotatedfile(object):
408 class _annotatedfile(object):
409 # list indexed by lineno - 1
409 # list indexed by lineno - 1
410 fctxs = attr.ib()
410 fctxs = attr.ib()
411 linenos = attr.ib()
411 linenos = attr.ib()
412 skips = attr.ib()
412 skips = attr.ib()
413 # full file content
413 # full file content
414 text = attr.ib()
414 text = attr.ib()
415
415
416 def _countlines(text):
416 def _countlines(text):
417 if text.endswith("\n"):
417 if text.endswith("\n"):
418 return text.count("\n")
418 return text.count("\n")
419 return text.count("\n") + int(bool(text))
419 return text.count("\n") + int(bool(text))
420
420
421 def _decoratelines(text, fctx):
421 def _decoratelines(text, fctx):
422 n = _countlines(text)
422 n = _countlines(text)
423 linenos = pycompat.rangelist(1, n + 1)
423 linenos = pycompat.rangelist(1, n + 1)
424 return _annotatedfile([fctx] * n, linenos, [False] * n, text)
424 return _annotatedfile([fctx] * n, linenos, [False] * n, text)
425
425
426 def _annotatepair(parents, childfctx, child, skipchild, diffopts):
426 def _annotatepair(parents, childfctx, child, skipchild, diffopts):
427 r'''
427 r'''
428 Given parent and child fctxes and annotate data for parents, for all lines
428 Given parent and child fctxes and annotate data for parents, for all lines
429 in either parent that match the child, annotate the child with the parent's
429 in either parent that match the child, annotate the child with the parent's
430 data.
430 data.
431
431
432 Additionally, if `skipchild` is True, replace all other lines with parent
432 Additionally, if `skipchild` is True, replace all other lines with parent
433 annotate data as well such that child is never blamed for any lines.
433 annotate data as well such that child is never blamed for any lines.
434
434
435 See test-annotate.py for unit tests.
435 See test-annotate.py for unit tests.
436 '''
436 '''
437 pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
437 pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
438 for parent in parents]
438 for parent in parents]
439
439
440 if skipchild:
440 if skipchild:
441 # Need to iterate over the blocks twice -- make it a list
441 # Need to iterate over the blocks twice -- make it a list
442 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
442 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
443 # Mercurial currently prefers p2 over p1 for annotate.
443 # Mercurial currently prefers p2 over p1 for annotate.
444 # TODO: change this?
444 # TODO: change this?
445 for parent, blocks in pblocks:
445 for parent, blocks in pblocks:
446 for (a1, a2, b1, b2), t in blocks:
446 for (a1, a2, b1, b2), t in blocks:
447 # Changed blocks ('!') or blocks made only of blank lines ('~')
447 # Changed blocks ('!') or blocks made only of blank lines ('~')
448 # belong to the child.
448 # belong to the child.
449 if t == '=':
449 if t == '=':
450 child.fctxs[b1:b2] = parent.fctxs[a1:a2]
450 child.fctxs[b1:b2] = parent.fctxs[a1:a2]
451 child.linenos[b1:b2] = parent.linenos[a1:a2]
451 child.linenos[b1:b2] = parent.linenos[a1:a2]
452 child.skips[b1:b2] = parent.skips[a1:a2]
452 child.skips[b1:b2] = parent.skips[a1:a2]
453
453
454 if skipchild:
454 if skipchild:
455 # Now try and match up anything that couldn't be matched,
455 # Now try and match up anything that couldn't be matched,
456 # Reversing pblocks maintains bias towards p2, matching above
456 # Reversing pblocks maintains bias towards p2, matching above
457 # behavior.
457 # behavior.
458 pblocks.reverse()
458 pblocks.reverse()
459
459
460 # The heuristics are:
460 # The heuristics are:
461 # * Work on blocks of changed lines (effectively diff hunks with -U0).
461 # * Work on blocks of changed lines (effectively diff hunks with -U0).
462 # This could potentially be smarter but works well enough.
462 # This could potentially be smarter but works well enough.
463 # * For a non-matching section, do a best-effort fit. Match lines in
463 # * For a non-matching section, do a best-effort fit. Match lines in
464 # diff hunks 1:1, dropping lines as necessary.
464 # diff hunks 1:1, dropping lines as necessary.
465 # * Repeat the last line as a last resort.
465 # * Repeat the last line as a last resort.
466
466
467 # First, replace as much as possible without repeating the last line.
467 # First, replace as much as possible without repeating the last line.
468 remaining = [(parent, []) for parent, _blocks in pblocks]
468 remaining = [(parent, []) for parent, _blocks in pblocks]
469 for idx, (parent, blocks) in enumerate(pblocks):
469 for idx, (parent, blocks) in enumerate(pblocks):
470 for (a1, a2, b1, b2), _t in blocks:
470 for (a1, a2, b1, b2), _t in blocks:
471 if a2 - a1 >= b2 - b1:
471 if a2 - a1 >= b2 - b1:
472 for bk in pycompat.xrange(b1, b2):
472 for bk in pycompat.xrange(b1, b2):
473 if child.fctxs[bk] == childfctx:
473 if child.fctxs[bk] == childfctx:
474 ak = min(a1 + (bk - b1), a2 - 1)
474 ak = min(a1 + (bk - b1), a2 - 1)
475 child.fctxs[bk] = parent.fctxs[ak]
475 child.fctxs[bk] = parent.fctxs[ak]
476 child.linenos[bk] = parent.linenos[ak]
476 child.linenos[bk] = parent.linenos[ak]
477 child.skips[bk] = True
477 child.skips[bk] = True
478 else:
478 else:
479 remaining[idx][1].append((a1, a2, b1, b2))
479 remaining[idx][1].append((a1, a2, b1, b2))
480
480
481 # Then, look at anything left, which might involve repeating the last
481 # Then, look at anything left, which might involve repeating the last
482 # line.
482 # line.
483 for parent, blocks in remaining:
483 for parent, blocks in remaining:
484 for a1, a2, b1, b2 in blocks:
484 for a1, a2, b1, b2 in blocks:
485 for bk in pycompat.xrange(b1, b2):
485 for bk in pycompat.xrange(b1, b2):
486 if child.fctxs[bk] == childfctx:
486 if child.fctxs[bk] == childfctx:
487 ak = min(a1 + (bk - b1), a2 - 1)
487 ak = min(a1 + (bk - b1), a2 - 1)
488 child.fctxs[bk] = parent.fctxs[ak]
488 child.fctxs[bk] = parent.fctxs[ak]
489 child.linenos[bk] = parent.linenos[ak]
489 child.linenos[bk] = parent.linenos[ak]
490 child.skips[bk] = True
490 child.skips[bk] = True
491 return child
491 return child
492
492
493 def annotate(base, parents, skiprevs=None, diffopts=None):
493 def annotate(base, parents, skiprevs=None, diffopts=None):
494 """Core algorithm for filectx.annotate()
494 """Core algorithm for filectx.annotate()
495
495
496 `parents(fctx)` is a function returning a list of parent filectxs.
496 `parents(fctx)` is a function returning a list of parent filectxs.
497 """
497 """
498
498
499 # This algorithm would prefer to be recursive, but Python is a
499 # This algorithm would prefer to be recursive, but Python is a
500 # bit recursion-hostile. Instead we do an iterative
500 # bit recursion-hostile. Instead we do an iterative
501 # depth-first search.
501 # depth-first search.
502
502
503 # 1st DFS pre-calculates pcache and needed
503 # 1st DFS pre-calculates pcache and needed
504 visit = [base]
504 visit = [base]
505 pcache = {}
505 pcache = {}
506 needed = {base: 1}
506 needed = {base: 1}
507 while visit:
507 while visit:
508 f = visit.pop()
508 f = visit.pop()
509 if f in pcache:
509 if f in pcache:
510 continue
510 continue
511 pl = parents(f)
511 pl = parents(f)
512 pcache[f] = pl
512 pcache[f] = pl
513 for p in pl:
513 for p in pl:
514 needed[p] = needed.get(p, 0) + 1
514 needed[p] = needed.get(p, 0) + 1
515 if p not in pcache:
515 if p not in pcache:
516 visit.append(p)
516 visit.append(p)
517
517
518 # 2nd DFS does the actual annotate
518 # 2nd DFS does the actual annotate
519 visit[:] = [base]
519 visit[:] = [base]
520 hist = {}
520 hist = {}
521 while visit:
521 while visit:
522 f = visit[-1]
522 f = visit[-1]
523 if f in hist:
523 if f in hist:
524 visit.pop()
524 visit.pop()
525 continue
525 continue
526
526
527 ready = True
527 ready = True
528 pl = pcache[f]
528 pl = pcache[f]
529 for p in pl:
529 for p in pl:
530 if p not in hist:
530 if p not in hist:
531 ready = False
531 ready = False
532 visit.append(p)
532 visit.append(p)
533 if ready:
533 if ready:
534 visit.pop()
534 visit.pop()
535 curr = _decoratelines(f.data(), f)
535 curr = _decoratelines(f.data(), f)
536 skipchild = False
536 skipchild = False
537 if skiprevs is not None:
537 if skiprevs is not None:
538 skipchild = f._changeid in skiprevs
538 skipchild = f._changeid in skiprevs
539 curr = _annotatepair([hist[p] for p in pl], f, curr, skipchild,
539 curr = _annotatepair([hist[p] for p in pl], f, curr, skipchild,
540 diffopts)
540 diffopts)
541 for p in pl:
541 for p in pl:
542 if needed[p] == 1:
542 if needed[p] == 1:
543 del hist[p]
543 del hist[p]
544 del needed[p]
544 del needed[p]
545 else:
545 else:
546 needed[p] -= 1
546 needed[p] -= 1
547
547
548 hist[f] = curr
548 hist[f] = curr
549 del pcache[f]
549 del pcache[f]
550
550
551 a = hist[base]
551 a = hist[base]
552 return [annotateline(*r) for r in zip(a.fctxs, a.linenos, a.skips,
552 return [annotateline(*r) for r in zip(a.fctxs, a.linenos, a.skips,
553 mdiff.splitnewlines(a.text))]
553 mdiff.splitnewlines(a.text))]
554
554
555 def toposort(revs, parentsfunc, firstbranch=()):
555 def toposort(revs, parentsfunc, firstbranch=()):
556 """Yield revisions from heads to roots one (topo) branch at a time.
556 """Yield revisions from heads to roots one (topo) branch at a time.
557
557
558 This function aims to be used by a graph generator that wishes to minimize
558 This function aims to be used by a graph generator that wishes to minimize
559 the number of parallel branches and their interleaving.
559 the number of parallel branches and their interleaving.
560
560
561 Example iteration order (numbers show the "true" order in a changelog):
561 Example iteration order (numbers show the "true" order in a changelog):
562
562
563 o 4
563 o 4
564 |
564 |
565 o 1
565 o 1
566 |
566 |
567 | o 3
567 | o 3
568 | |
568 | |
569 | o 2
569 | o 2
570 |/
570 |/
571 o 0
571 o 0
572
572
573 Note that the ancestors of merges are understood by the current
573 Note that the ancestors of merges are understood by the current
574 algorithm to be on the same branch. This means no reordering will
574 algorithm to be on the same branch. This means no reordering will
575 occur behind a merge.
575 occur behind a merge.
576 """
576 """
577
577
578 ### Quick summary of the algorithm
578 ### Quick summary of the algorithm
579 #
579 #
580 # This function is based around a "retention" principle. We keep revisions
580 # This function is based around a "retention" principle. We keep revisions
581 # in memory until we are ready to emit a whole branch that immediately
581 # in memory until we are ready to emit a whole branch that immediately
582 # "merges" into an existing one. This reduces the number of parallel
582 # "merges" into an existing one. This reduces the number of parallel
583 # branches with interleaved revisions.
583 # branches with interleaved revisions.
584 #
584 #
585 # During iteration revs are split into two groups:
585 # During iteration revs are split into two groups:
586 # A) revision already emitted
586 # A) revision already emitted
587 # B) revision in "retention". They are stored as different subgroups.
587 # B) revision in "retention". They are stored as different subgroups.
588 #
588 #
589 # for each REV, we do the following logic:
589 # for each REV, we do the following logic:
590 #
590 #
591 # 1) if REV is a parent of (A), we will emit it. If there is a
591 # 1) if REV is a parent of (A), we will emit it. If there is a
592 # retention group ((B) above) that is blocked on REV being
592 # retention group ((B) above) that is blocked on REV being
593 # available, we emit all the revisions out of that retention
593 # available, we emit all the revisions out of that retention
594 # group first.
594 # group first.
595 #
595 #
596 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
596 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
597 # available, if such subgroup exist, we add REV to it and the subgroup is
597 # available, if such subgroup exist, we add REV to it and the subgroup is
598 # now awaiting for REV.parents() to be available.
598 # now awaiting for REV.parents() to be available.
599 #
599 #
600 # 3) finally if no such group existed in (B), we create a new subgroup.
600 # 3) finally if no such group existed in (B), we create a new subgroup.
601 #
601 #
602 #
602 #
603 # To bootstrap the algorithm, we emit the tipmost revision (which
603 # To bootstrap the algorithm, we emit the tipmost revision (which
604 # puts it in group (A) from above).
604 # puts it in group (A) from above).
605
605
606 revs.sort(reverse=True)
606 revs.sort(reverse=True)
607
607
608 # Set of parents of revision that have been emitted. They can be considered
608 # Set of parents of revision that have been emitted. They can be considered
609 # unblocked as the graph generator is already aware of them so there is no
609 # unblocked as the graph generator is already aware of them so there is no
610 # need to delay the revisions that reference them.
610 # need to delay the revisions that reference them.
611 #
611 #
612 # If someone wants to prioritize a branch over the others, pre-filling this
612 # If someone wants to prioritize a branch over the others, pre-filling this
613 # set will force all other branches to wait until this branch is ready to be
613 # set will force all other branches to wait until this branch is ready to be
614 # emitted.
614 # emitted.
615 unblocked = set(firstbranch)
615 unblocked = set(firstbranch)
616
616
617 # list of groups waiting to be displayed, each group is defined by:
617 # list of groups waiting to be displayed, each group is defined by:
618 #
618 #
619 # (revs: lists of revs waiting to be displayed,
619 # (revs: lists of revs waiting to be displayed,
620 # blocked: set of that cannot be displayed before those in 'revs')
620 # blocked: set of that cannot be displayed before those in 'revs')
621 #
621 #
622 # The second value ('blocked') correspond to parents of any revision in the
622 # The second value ('blocked') correspond to parents of any revision in the
623 # group ('revs') that is not itself contained in the group. The main idea
623 # group ('revs') that is not itself contained in the group. The main idea
624 # of this algorithm is to delay as much as possible the emission of any
624 # of this algorithm is to delay as much as possible the emission of any
625 # revision. This means waiting for the moment we are about to display
625 # revision. This means waiting for the moment we are about to display
626 # these parents to display the revs in a group.
626 # these parents to display the revs in a group.
627 #
627 #
628 # This first implementation is smart until it encounters a merge: it will
628 # This first implementation is smart until it encounters a merge: it will
629 # emit revs as soon as any parent is about to be emitted and can grow an
629 # emit revs as soon as any parent is about to be emitted and can grow an
630 # arbitrary number of revs in 'blocked'. In practice this mean we properly
630 # arbitrary number of revs in 'blocked'. In practice this mean we properly
631 # retains new branches but gives up on any special ordering for ancestors
631 # retains new branches but gives up on any special ordering for ancestors
632 # of merges. The implementation can be improved to handle this better.
632 # of merges. The implementation can be improved to handle this better.
633 #
633 #
634 # The first subgroup is special. It corresponds to all the revision that
634 # The first subgroup is special. It corresponds to all the revision that
635 # were already emitted. The 'revs' lists is expected to be empty and the
635 # were already emitted. The 'revs' lists is expected to be empty and the
636 # 'blocked' set contains the parents revisions of already emitted revision.
636 # 'blocked' set contains the parents revisions of already emitted revision.
637 #
637 #
638 # You could pre-seed the <parents> set of groups[0] to a specific
638 # You could pre-seed the <parents> set of groups[0] to a specific
639 # changesets to select what the first emitted branch should be.
639 # changesets to select what the first emitted branch should be.
640 groups = [([], unblocked)]
640 groups = [([], unblocked)]
641 pendingheap = []
641 pendingheap = []
642 pendingset = set()
642 pendingset = set()
643
643
644 heapq.heapify(pendingheap)
644 heapq.heapify(pendingheap)
645 heappop = heapq.heappop
645 heappop = heapq.heappop
646 heappush = heapq.heappush
646 heappush = heapq.heappush
647 for currentrev in revs:
647 for currentrev in revs:
648 # Heap works with smallest element, we want highest so we invert
648 # Heap works with smallest element, we want highest so we invert
649 if currentrev not in pendingset:
649 if currentrev not in pendingset:
650 heappush(pendingheap, -currentrev)
650 heappush(pendingheap, -currentrev)
651 pendingset.add(currentrev)
651 pendingset.add(currentrev)
652 # iterates on pending rev until after the current rev have been
652 # iterates on pending rev until after the current rev have been
653 # processed.
653 # processed.
654 rev = None
654 rev = None
655 while rev != currentrev:
655 while rev != currentrev:
656 rev = -heappop(pendingheap)
656 rev = -heappop(pendingheap)
657 pendingset.remove(rev)
657 pendingset.remove(rev)
658
658
659 # Seek for a subgroup blocked, waiting for the current revision.
659 # Seek for a subgroup blocked, waiting for the current revision.
660 matching = [i for i, g in enumerate(groups) if rev in g[1]]
660 matching = [i for i, g in enumerate(groups) if rev in g[1]]
661
661
662 if matching:
662 if matching:
663 # The main idea is to gather together all sets that are blocked
663 # The main idea is to gather together all sets that are blocked
664 # on the same revision.
664 # on the same revision.
665 #
665 #
666 # Groups are merged when a common blocking ancestor is
666 # Groups are merged when a common blocking ancestor is
667 # observed. For example, given two groups:
667 # observed. For example, given two groups:
668 #
668 #
669 # revs [5, 4] waiting for 1
669 # revs [5, 4] waiting for 1
670 # revs [3, 2] waiting for 1
670 # revs [3, 2] waiting for 1
671 #
671 #
672 # These two groups will be merged when we process
672 # These two groups will be merged when we process
673 # 1. In theory, we could have merged the groups when
673 # 1. In theory, we could have merged the groups when
674 # we added 2 to the group it is now in (we could have
674 # we added 2 to the group it is now in (we could have
675 # noticed the groups were both blocked on 1 then), but
675 # noticed the groups were both blocked on 1 then), but
676 # the way it works now makes the algorithm simpler.
676 # the way it works now makes the algorithm simpler.
677 #
677 #
678 # We also always keep the oldest subgroup first. We can
678 # We also always keep the oldest subgroup first. We can
679 # probably improve the behavior by having the longest set
679 # probably improve the behavior by having the longest set
680 # first. That way, graph algorithms could minimise the length
680 # first. That way, graph algorithms could minimise the length
681 # of parallel lines their drawing. This is currently not done.
681 # of parallel lines their drawing. This is currently not done.
682 targetidx = matching.pop(0)
682 targetidx = matching.pop(0)
683 trevs, tparents = groups[targetidx]
683 trevs, tparents = groups[targetidx]
684 for i in matching:
684 for i in matching:
685 gr = groups[i]
685 gr = groups[i]
686 trevs.extend(gr[0])
686 trevs.extend(gr[0])
687 tparents |= gr[1]
687 tparents |= gr[1]
688 # delete all merged subgroups (except the one we kept)
688 # delete all merged subgroups (except the one we kept)
689 # (starting from the last subgroup for performance and
689 # (starting from the last subgroup for performance and
690 # sanity reasons)
690 # sanity reasons)
691 for i in reversed(matching):
691 for i in reversed(matching):
692 del groups[i]
692 del groups[i]
693 else:
693 else:
694 # This is a new head. We create a new subgroup for it.
694 # This is a new head. We create a new subgroup for it.
695 targetidx = len(groups)
695 targetidx = len(groups)
696 groups.append(([], {rev}))
696 groups.append(([], {rev}))
697
697
698 gr = groups[targetidx]
698 gr = groups[targetidx]
699
699
700 # We now add the current nodes to this subgroups. This is done
700 # We now add the current nodes to this subgroups. This is done
701 # after the subgroup merging because all elements from a subgroup
701 # after the subgroup merging because all elements from a subgroup
702 # that relied on this rev must precede it.
702 # that relied on this rev must precede it.
703 #
703 #
704 # we also update the <parents> set to include the parents of the
704 # we also update the <parents> set to include the parents of the
705 # new nodes.
705 # new nodes.
706 if rev == currentrev: # only display stuff in rev
706 if rev == currentrev: # only display stuff in rev
707 gr[0].append(rev)
707 gr[0].append(rev)
708 gr[1].remove(rev)
708 gr[1].remove(rev)
709 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
709 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
710 gr[1].update(parents)
710 gr[1].update(parents)
711 for p in parents:
711 for p in parents:
712 if p not in pendingset:
712 if p not in pendingset:
713 pendingset.add(p)
713 pendingset.add(p)
714 heappush(pendingheap, -p)
714 heappush(pendingheap, -p)
715
715
716 # Look for a subgroup to display
716 # Look for a subgroup to display
717 #
717 #
718 # When unblocked is empty (if clause), we were not waiting for any
718 # When unblocked is empty (if clause), we were not waiting for any
719 # revisions during the first iteration (if no priority was given) or
719 # revisions during the first iteration (if no priority was given) or
720 # if we emitted a whole disconnected set of the graph (reached a
720 # if we emitted a whole disconnected set of the graph (reached a
721 # root). In that case we arbitrarily take the oldest known
721 # root). In that case we arbitrarily take the oldest known
722 # subgroup. The heuristic could probably be better.
722 # subgroup. The heuristic could probably be better.
723 #
723 #
724 # Otherwise (elif clause) if the subgroup is blocked on
724 # Otherwise (elif clause) if the subgroup is blocked on
725 # a revision we just emitted, we can safely emit it as
725 # a revision we just emitted, we can safely emit it as
726 # well.
726 # well.
727 if not unblocked:
727 if not unblocked:
728 if len(groups) > 1: # display other subset
728 if len(groups) > 1: # display other subset
729 targetidx = 1
729 targetidx = 1
730 gr = groups[1]
730 gr = groups[1]
731 elif not gr[1] & unblocked:
731 elif not gr[1] & unblocked:
732 gr = None
732 gr = None
733
733
734 if gr is not None:
734 if gr is not None:
735 # update the set of awaited revisions with the one from the
735 # update the set of awaited revisions with the one from the
736 # subgroup
736 # subgroup
737 unblocked |= gr[1]
737 unblocked |= gr[1]
738 # output all revisions in the subgroup
738 # output all revisions in the subgroup
739 for r in gr[0]:
739 for r in gr[0]:
740 yield r
740 yield r
741 # delete the subgroup that you just output
741 # delete the subgroup that you just output
742 # unless it is groups[0] in which case you just empty it.
742 # unless it is groups[0] in which case you just empty it.
743 if targetidx:
743 if targetidx:
744 del groups[targetidx]
744 del groups[targetidx]
745 else:
745 else:
746 gr[0][:] = []
746 gr[0][:] = []
747 # Check if we have some subgroup waiting for revisions we are not going to
747 # Check if we have some subgroup waiting for revisions we are not going to
748 # iterate over
748 # iterate over
749 for g in groups:
749 for g in groups:
750 for r in g[0]:
750 for r in g[0]:
751 yield r
751 yield r
752
752
753 def headrevs(revs, parentsfn):
753 def headrevs(revs, parentsfn):
754 """Resolve the set of heads from a set of revisions.
754 """Resolve the set of heads from a set of revisions.
755
755
756 Receives an iterable of revision numbers and a callbable that receives a
756 Receives an iterable of revision numbers and a callbable that receives a
757 revision number and returns an iterable of parent revision numbers, possibly
757 revision number and returns an iterable of parent revision numbers, possibly
758 including nullrev.
758 including nullrev.
759
759
760 Returns a set of revision numbers that are DAG heads within the passed
760 Returns a set of revision numbers that are DAG heads within the passed
761 subset.
761 subset.
762
762
763 ``nullrev`` is never included in the returned set, even if it is provided in
763 ``nullrev`` is never included in the returned set, even if it is provided in
764 the input set.
764 the input set.
765 """
765 """
766 headrevs = set(revs)
766 headrevs = set(revs)
767
767
768 for rev in revs:
768 for rev in revs:
769 for prev in parentsfn(rev):
769 for prev in parentsfn(rev):
770 headrevs.discard(prev)
770 headrevs.discard(prev)
771
771
772 headrevs.discard(node.nullrev)
772 headrevs.discard(node.nullrev)
773
773
774 return headrevs
774 return headrevs
775
775
776 def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
777 """Returns the set of all revs that have no children with control.
778
779 ``revsfn`` is a callable that with no arguments returns an iterator over
780 all revision numbers in topological order. With a ``start`` argument, it
781 returns revision numbers starting at that number.
782
783 ``parentrevsfn`` is a callable receiving a revision number and returns an
784 iterable of parent revision numbers, where values can include nullrev.
785
786 ``startrev`` is a revision number at which to start the search.
787
788 ``stoprevs`` is an iterable of revision numbers that, when encountered,
789 will stop DAG traversal beyond them. Parents of revisions in this
790 collection will be heads.
791 """
792 if startrev is None:
793 startrev = nullrev
794
795 stoprevs = set(stoprevs or [])
796
797 reachable = {startrev}
798 heads = {startrev}
799
800 for rev in revsfn(start=startrev + 1):
801 for prev in parentrevsfn(rev):
802 if prev in reachable:
803 if rev not in stoprevs:
804 reachable.add(rev)
805 heads.add(rev)
806
807 if prev in heads and prev not in stoprevs:
808 heads.remove(prev)
809
810 return heads
811
776 def linearize(revs, parentsfn):
812 def linearize(revs, parentsfn):
777 """Linearize and topologically sort a list of revisions.
813 """Linearize and topologically sort a list of revisions.
778
814
779 The linearization process tries to create long runs of revs where a child
815 The linearization process tries to create long runs of revs where a child
780 rev comes immediately after its first parent. This is done by visiting the
816 rev comes immediately after its first parent. This is done by visiting the
781 heads of the revs in inverse topological order, and for each visited rev,
817 heads of the revs in inverse topological order, and for each visited rev,
782 visiting its second parent, then its first parent, then adding the rev
818 visiting its second parent, then its first parent, then adding the rev
783 itself to the output list.
819 itself to the output list.
784
820
785 Returns a list of revision numbers.
821 Returns a list of revision numbers.
786 """
822 """
787 visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
823 visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
788 finished = set()
824 finished = set()
789 result = []
825 result = []
790
826
791 while visit:
827 while visit:
792 rev = visit.pop()
828 rev = visit.pop()
793 if rev < 0:
829 if rev < 0:
794 rev = -rev - 1
830 rev = -rev - 1
795
831
796 if rev not in finished:
832 if rev not in finished:
797 result.append(rev)
833 result.append(rev)
798 finished.add(rev)
834 finished.add(rev)
799
835
800 else:
836 else:
801 visit.append(-rev - 1)
837 visit.append(-rev - 1)
802
838
803 for prev in parentsfn(rev):
839 for prev in parentsfn(rev):
804 if prev == node.nullrev or prev not in revs or prev in finished:
840 if prev == node.nullrev or prev not in revs or prev in finished:
805 continue
841 continue
806
842
807 visit.append(prev)
843 visit.append(prev)
808
844
809 assert len(result) == len(revs)
845 assert len(result) == len(revs)
810
846
811 return result
847 return result
@@ -1,2682 +1,2673
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100
100
101 # Aliased for performance.
101 # Aliased for performance.
102 _zlibdecompress = zlib.decompress
102 _zlibdecompress = zlib.decompress
103
103
104 # max size of revlog with inline data
104 # max size of revlog with inline data
105 _maxinline = 131072
105 _maxinline = 131072
106 _chunksize = 1048576
106 _chunksize = 1048576
107
107
108 # Store flag processors (cf. 'addflagprocessor()' to register)
108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 _flagprocessors = {
109 _flagprocessors = {
110 REVIDX_ISCENSORED: None,
110 REVIDX_ISCENSORED: None,
111 }
111 }
112
112
113 # Flag processors for REVIDX_ELLIPSIS.
113 # Flag processors for REVIDX_ELLIPSIS.
114 def ellipsisreadprocessor(rl, text):
114 def ellipsisreadprocessor(rl, text):
115 return text, False
115 return text, False
116
116
117 def ellipsiswriteprocessor(rl, text):
117 def ellipsiswriteprocessor(rl, text):
118 return text, False
118 return text, False
119
119
120 def ellipsisrawprocessor(rl, text):
120 def ellipsisrawprocessor(rl, text):
121 return False
121 return False
122
122
123 ellipsisprocessor = (
123 ellipsisprocessor = (
124 ellipsisreadprocessor,
124 ellipsisreadprocessor,
125 ellipsiswriteprocessor,
125 ellipsiswriteprocessor,
126 ellipsisrawprocessor,
126 ellipsisrawprocessor,
127 )
127 )
128
128
129 def addflagprocessor(flag, processor):
129 def addflagprocessor(flag, processor):
130 """Register a flag processor on a revision data flag.
130 """Register a flag processor on a revision data flag.
131
131
132 Invariant:
132 Invariant:
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 - Only one flag processor can be registered on a specific flag.
135 - Only one flag processor can be registered on a specific flag.
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 following signatures:
137 following signatures:
138 - (read) f(self, rawtext) -> text, bool
138 - (read) f(self, rawtext) -> text, bool
139 - (write) f(self, text) -> rawtext, bool
139 - (write) f(self, text) -> rawtext, bool
140 - (raw) f(self, rawtext) -> bool
140 - (raw) f(self, rawtext) -> bool
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 directly visible to the user.
142 directly visible to the user.
143 The boolean returned by these transforms is used to determine whether
143 The boolean returned by these transforms is used to determine whether
144 the returned text can be used for hash integrity checking. For example,
144 the returned text can be used for hash integrity checking. For example,
145 if "write" returns False, then "text" is used to generate hash. If
145 if "write" returns False, then "text" is used to generate hash. If
146 "write" returns True, that basically means "rawtext" returned by "write"
146 "write" returns True, that basically means "rawtext" returned by "write"
147 should be used to generate hash. Usually, "write" and "read" return
147 should be used to generate hash. Usually, "write" and "read" return
148 different booleans. And "raw" returns a same boolean as "write".
148 different booleans. And "raw" returns a same boolean as "write".
149
149
150 Note: The 'raw' transform is used for changegroup generation and in some
150 Note: The 'raw' transform is used for changegroup generation and in some
151 debug commands. In this case the transform only indicates whether the
151 debug commands. In this case the transform only indicates whether the
152 contents can be used for hash integrity checks.
152 contents can be used for hash integrity checks.
153 """
153 """
154 if not flag & REVIDX_KNOWN_FLAGS:
154 if not flag & REVIDX_KNOWN_FLAGS:
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
155 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
156 raise error.ProgrammingError(msg)
156 raise error.ProgrammingError(msg)
157 if flag not in REVIDX_FLAGS_ORDER:
157 if flag not in REVIDX_FLAGS_ORDER:
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
158 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
159 raise error.ProgrammingError(msg)
159 raise error.ProgrammingError(msg)
160 if flag in _flagprocessors:
160 if flag in _flagprocessors:
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
161 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
162 raise error.Abort(msg)
162 raise error.Abort(msg)
163 _flagprocessors[flag] = processor
163 _flagprocessors[flag] = processor
164
164
165 def getoffset(q):
165 def getoffset(q):
166 return int(q >> 16)
166 return int(q >> 16)
167
167
168 def gettype(q):
168 def gettype(q):
169 return int(q & 0xFFFF)
169 return int(q & 0xFFFF)
170
170
171 def offset_type(offset, type):
171 def offset_type(offset, type):
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
172 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
173 raise ValueError('unknown revlog index flags')
173 raise ValueError('unknown revlog index flags')
174 return int(int(offset) << 16 | type)
174 return int(int(offset) << 16 | type)
175
175
176 @attr.s(slots=True, frozen=True)
176 @attr.s(slots=True, frozen=True)
177 class _revisioninfo(object):
177 class _revisioninfo(object):
178 """Information about a revision that allows building its fulltext
178 """Information about a revision that allows building its fulltext
179 node: expected hash of the revision
179 node: expected hash of the revision
180 p1, p2: parent revs of the revision
180 p1, p2: parent revs of the revision
181 btext: built text cache consisting of a one-element list
181 btext: built text cache consisting of a one-element list
182 cachedelta: (baserev, uncompressed_delta) or None
182 cachedelta: (baserev, uncompressed_delta) or None
183 flags: flags associated to the revision storage
183 flags: flags associated to the revision storage
184
184
185 One of btext[0] or cachedelta must be set.
185 One of btext[0] or cachedelta must be set.
186 """
186 """
187 node = attr.ib()
187 node = attr.ib()
188 p1 = attr.ib()
188 p1 = attr.ib()
189 p2 = attr.ib()
189 p2 = attr.ib()
190 btext = attr.ib()
190 btext = attr.ib()
191 textlen = attr.ib()
191 textlen = attr.ib()
192 cachedelta = attr.ib()
192 cachedelta = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194
194
195 @interfaceutil.implementer(repository.irevisiondelta)
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
197 class revlogrevisiondelta(object):
198 node = attr.ib()
198 node = attr.ib()
199 p1node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
205 delta = attr.ib()
206 linknode = attr.ib(default=None)
206 linknode = attr.ib(default=None)
207
207
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
209 @attr.s(frozen=True)
209 @attr.s(frozen=True)
210 class revlogproblem(object):
210 class revlogproblem(object):
211 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
212 error = attr.ib(default=None)
212 error = attr.ib(default=None)
213 node = attr.ib(default=None)
213 node = attr.ib(default=None)
214
214
215 # index v0:
215 # index v0:
216 # 4 bytes: offset
216 # 4 bytes: offset
217 # 4 bytes: compressed length
217 # 4 bytes: compressed length
218 # 4 bytes: base rev
218 # 4 bytes: base rev
219 # 4 bytes: link rev
219 # 4 bytes: link rev
220 # 20 bytes: parent 1 nodeid
220 # 20 bytes: parent 1 nodeid
221 # 20 bytes: parent 2 nodeid
221 # 20 bytes: parent 2 nodeid
222 # 20 bytes: nodeid
222 # 20 bytes: nodeid
223 indexformatv0 = struct.Struct(">4l20s20s20s")
223 indexformatv0 = struct.Struct(">4l20s20s20s")
224 indexformatv0_pack = indexformatv0.pack
224 indexformatv0_pack = indexformatv0.pack
225 indexformatv0_unpack = indexformatv0.unpack
225 indexformatv0_unpack = indexformatv0.unpack
226
226
227 class revlogoldindex(list):
227 class revlogoldindex(list):
228 def __getitem__(self, i):
228 def __getitem__(self, i):
229 if i == -1:
229 if i == -1:
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
230 return (0, 0, 0, -1, -1, -1, -1, nullid)
231 return list.__getitem__(self, i)
231 return list.__getitem__(self, i)
232
232
233 class revlogoldio(object):
233 class revlogoldio(object):
234 def __init__(self):
234 def __init__(self):
235 self.size = indexformatv0.size
235 self.size = indexformatv0.size
236
236
237 def parseindex(self, data, inline):
237 def parseindex(self, data, inline):
238 s = self.size
238 s = self.size
239 index = []
239 index = []
240 nodemap = {nullid: nullrev}
240 nodemap = {nullid: nullrev}
241 n = off = 0
241 n = off = 0
242 l = len(data)
242 l = len(data)
243 while off + s <= l:
243 while off + s <= l:
244 cur = data[off:off + s]
244 cur = data[off:off + s]
245 off += s
245 off += s
246 e = indexformatv0_unpack(cur)
246 e = indexformatv0_unpack(cur)
247 # transform to revlogv1 format
247 # transform to revlogv1 format
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
248 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
249 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
250 index.append(e2)
250 index.append(e2)
251 nodemap[e[6]] = n
251 nodemap[e[6]] = n
252 n += 1
252 n += 1
253
253
254 return revlogoldindex(index), nodemap, None
254 return revlogoldindex(index), nodemap, None
255
255
256 def packentry(self, entry, node, version, rev):
256 def packentry(self, entry, node, version, rev):
257 if gettype(entry[0]):
257 if gettype(entry[0]):
258 raise error.RevlogError(_('index entry flags need revlog '
258 raise error.RevlogError(_('index entry flags need revlog '
259 'version 1'))
259 'version 1'))
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
260 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
261 node(entry[5]), node(entry[6]), entry[7])
261 node(entry[5]), node(entry[6]), entry[7])
262 return indexformatv0_pack(*e2)
262 return indexformatv0_pack(*e2)
263
263
264 # index ng:
264 # index ng:
265 # 6 bytes: offset
265 # 6 bytes: offset
266 # 2 bytes: flags
266 # 2 bytes: flags
267 # 4 bytes: compressed length
267 # 4 bytes: compressed length
268 # 4 bytes: uncompressed length
268 # 4 bytes: uncompressed length
269 # 4 bytes: base rev
269 # 4 bytes: base rev
270 # 4 bytes: link rev
270 # 4 bytes: link rev
271 # 4 bytes: parent 1 rev
271 # 4 bytes: parent 1 rev
272 # 4 bytes: parent 2 rev
272 # 4 bytes: parent 2 rev
273 # 32 bytes: nodeid
273 # 32 bytes: nodeid
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
274 indexformatng = struct.Struct(">Qiiiiii20s12x")
275 indexformatng_pack = indexformatng.pack
275 indexformatng_pack = indexformatng.pack
276 versionformat = struct.Struct(">I")
276 versionformat = struct.Struct(">I")
277 versionformat_pack = versionformat.pack
277 versionformat_pack = versionformat.pack
278 versionformat_unpack = versionformat.unpack
278 versionformat_unpack = versionformat.unpack
279
279
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
280 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
281 # signed integer)
281 # signed integer)
282 _maxentrysize = 0x7fffffff
282 _maxentrysize = 0x7fffffff
283
283
284 class revlogio(object):
284 class revlogio(object):
285 def __init__(self):
285 def __init__(self):
286 self.size = indexformatng.size
286 self.size = indexformatng.size
287
287
288 def parseindex(self, data, inline):
288 def parseindex(self, data, inline):
289 # call the C implementation to parse the index data
289 # call the C implementation to parse the index data
290 index, cache = parsers.parse_index2(data, inline)
290 index, cache = parsers.parse_index2(data, inline)
291 return index, getattr(index, 'nodemap', None), cache
291 return index, getattr(index, 'nodemap', None), cache
292
292
293 def packentry(self, entry, node, version, rev):
293 def packentry(self, entry, node, version, rev):
294 p = indexformatng_pack(*entry)
294 p = indexformatng_pack(*entry)
295 if rev == 0:
295 if rev == 0:
296 p = versionformat_pack(version) + p[4:]
296 p = versionformat_pack(version) + p[4:]
297 return p
297 return p
298
298
299 class revlog(object):
299 class revlog(object):
300 """
300 """
301 the underlying revision storage object
301 the underlying revision storage object
302
302
303 A revlog consists of two parts, an index and the revision data.
303 A revlog consists of two parts, an index and the revision data.
304
304
305 The index is a file with a fixed record size containing
305 The index is a file with a fixed record size containing
306 information on each revision, including its nodeid (hash), the
306 information on each revision, including its nodeid (hash), the
307 nodeids of its parents, the position and offset of its data within
307 nodeids of its parents, the position and offset of its data within
308 the data file, and the revision it's based on. Finally, each entry
308 the data file, and the revision it's based on. Finally, each entry
309 contains a linkrev entry that can serve as a pointer to external
309 contains a linkrev entry that can serve as a pointer to external
310 data.
310 data.
311
311
312 The revision data itself is a linear collection of data chunks.
312 The revision data itself is a linear collection of data chunks.
313 Each chunk represents a revision and is usually represented as a
313 Each chunk represents a revision and is usually represented as a
314 delta against the previous chunk. To bound lookup time, runs of
314 delta against the previous chunk. To bound lookup time, runs of
315 deltas are limited to about 2 times the length of the original
315 deltas are limited to about 2 times the length of the original
316 version data. This makes retrieval of a version proportional to
316 version data. This makes retrieval of a version proportional to
317 its size, or O(1) relative to the number of revisions.
317 its size, or O(1) relative to the number of revisions.
318
318
319 Both pieces of the revlog are written to in an append-only
319 Both pieces of the revlog are written to in an append-only
320 fashion, which means we never need to rewrite a file to insert or
320 fashion, which means we never need to rewrite a file to insert or
321 remove data, and can use some simple techniques to avoid the need
321 remove data, and can use some simple techniques to avoid the need
322 for locking while reading.
322 for locking while reading.
323
323
324 If checkambig, indexfile is opened with checkambig=True at
324 If checkambig, indexfile is opened with checkambig=True at
325 writing, to avoid file stat ambiguity.
325 writing, to avoid file stat ambiguity.
326
326
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 If mmaplargeindex is True, and an mmapindexthreshold is set, the
328 index will be mmapped rather than read if it is larger than the
328 index will be mmapped rather than read if it is larger than the
329 configured threshold.
329 configured threshold.
330
330
331 If censorable is True, the revlog can have censored revisions.
331 If censorable is True, the revlog can have censored revisions.
332 """
332 """
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
333 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
334 mmaplargeindex=False, censorable=False):
334 mmaplargeindex=False, censorable=False):
335 """
335 """
336 create a revlog object
336 create a revlog object
337
337
338 opener is a function that abstracts the file opening operation
338 opener is a function that abstracts the file opening operation
339 and can be used to implement COW semantics or the like.
339 and can be used to implement COW semantics or the like.
340 """
340 """
341 self.indexfile = indexfile
341 self.indexfile = indexfile
342 self.datafile = datafile or (indexfile[:-2] + ".d")
342 self.datafile = datafile or (indexfile[:-2] + ".d")
343 self.opener = opener
343 self.opener = opener
344 # When True, indexfile is opened with checkambig=True at writing, to
344 # When True, indexfile is opened with checkambig=True at writing, to
345 # avoid file stat ambiguity.
345 # avoid file stat ambiguity.
346 self._checkambig = checkambig
346 self._checkambig = checkambig
347 self._censorable = censorable
347 self._censorable = censorable
348 # 3-tuple of (node, rev, text) for a raw revision.
348 # 3-tuple of (node, rev, text) for a raw revision.
349 self._cache = None
349 self._cache = None
350 # Maps rev to chain base rev.
350 # Maps rev to chain base rev.
351 self._chainbasecache = util.lrucachedict(100)
351 self._chainbasecache = util.lrucachedict(100)
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
352 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
353 self._chunkcache = (0, '')
353 self._chunkcache = (0, '')
354 # How much data to read and cache into the raw revlog data cache.
354 # How much data to read and cache into the raw revlog data cache.
355 self._chunkcachesize = 65536
355 self._chunkcachesize = 65536
356 self._maxchainlen = None
356 self._maxchainlen = None
357 self._deltabothparents = True
357 self._deltabothparents = True
358 self.index = []
358 self.index = []
359 # Mapping of partial identifiers to full nodes.
359 # Mapping of partial identifiers to full nodes.
360 self._pcache = {}
360 self._pcache = {}
361 # Mapping of revision integer to full node.
361 # Mapping of revision integer to full node.
362 self._nodecache = {nullid: nullrev}
362 self._nodecache = {nullid: nullrev}
363 self._nodepos = None
363 self._nodepos = None
364 self._compengine = 'zlib'
364 self._compengine = 'zlib'
365 self._maxdeltachainspan = -1
365 self._maxdeltachainspan = -1
366 self._withsparseread = False
366 self._withsparseread = False
367 self._sparserevlog = False
367 self._sparserevlog = False
368 self._srdensitythreshold = 0.50
368 self._srdensitythreshold = 0.50
369 self._srmingapsize = 262144
369 self._srmingapsize = 262144
370
370
371 # Make copy of flag processors so each revlog instance can support
371 # Make copy of flag processors so each revlog instance can support
372 # custom flags.
372 # custom flags.
373 self._flagprocessors = dict(_flagprocessors)
373 self._flagprocessors = dict(_flagprocessors)
374
374
375 mmapindexthreshold = None
375 mmapindexthreshold = None
376 v = REVLOG_DEFAULT_VERSION
376 v = REVLOG_DEFAULT_VERSION
377 opts = getattr(opener, 'options', None)
377 opts = getattr(opener, 'options', None)
378 if opts is not None:
378 if opts is not None:
379 if 'revlogv2' in opts:
379 if 'revlogv2' in opts:
380 # version 2 revlogs always use generaldelta.
380 # version 2 revlogs always use generaldelta.
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
381 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
382 elif 'revlogv1' in opts:
382 elif 'revlogv1' in opts:
383 if 'generaldelta' in opts:
383 if 'generaldelta' in opts:
384 v |= FLAG_GENERALDELTA
384 v |= FLAG_GENERALDELTA
385 else:
385 else:
386 v = 0
386 v = 0
387 if 'chunkcachesize' in opts:
387 if 'chunkcachesize' in opts:
388 self._chunkcachesize = opts['chunkcachesize']
388 self._chunkcachesize = opts['chunkcachesize']
389 if 'maxchainlen' in opts:
389 if 'maxchainlen' in opts:
390 self._maxchainlen = opts['maxchainlen']
390 self._maxchainlen = opts['maxchainlen']
391 if 'deltabothparents' in opts:
391 if 'deltabothparents' in opts:
392 self._deltabothparents = opts['deltabothparents']
392 self._deltabothparents = opts['deltabothparents']
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
393 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
394 if 'compengine' in opts:
394 if 'compengine' in opts:
395 self._compengine = opts['compengine']
395 self._compengine = opts['compengine']
396 if 'maxdeltachainspan' in opts:
396 if 'maxdeltachainspan' in opts:
397 self._maxdeltachainspan = opts['maxdeltachainspan']
397 self._maxdeltachainspan = opts['maxdeltachainspan']
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
398 if mmaplargeindex and 'mmapindexthreshold' in opts:
399 mmapindexthreshold = opts['mmapindexthreshold']
399 mmapindexthreshold = opts['mmapindexthreshold']
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
400 self._sparserevlog = bool(opts.get('sparse-revlog', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
401 withsparseread = bool(opts.get('with-sparse-read', False))
402 # sparse-revlog forces sparse-read
402 # sparse-revlog forces sparse-read
403 self._withsparseread = self._sparserevlog or withsparseread
403 self._withsparseread = self._sparserevlog or withsparseread
404 if 'sparse-read-density-threshold' in opts:
404 if 'sparse-read-density-threshold' in opts:
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
405 self._srdensitythreshold = opts['sparse-read-density-threshold']
406 if 'sparse-read-min-gap-size' in opts:
406 if 'sparse-read-min-gap-size' in opts:
407 self._srmingapsize = opts['sparse-read-min-gap-size']
407 self._srmingapsize = opts['sparse-read-min-gap-size']
408 if opts.get('enableellipsis'):
408 if opts.get('enableellipsis'):
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
409 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
410
410
411 if self._chunkcachesize <= 0:
411 if self._chunkcachesize <= 0:
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
412 raise error.RevlogError(_('revlog chunk cache size %r is not '
413 'greater than 0') % self._chunkcachesize)
413 'greater than 0') % self._chunkcachesize)
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
414 elif self._chunkcachesize & (self._chunkcachesize - 1):
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
415 raise error.RevlogError(_('revlog chunk cache size %r is not a '
416 'power of 2') % self._chunkcachesize)
416 'power of 2') % self._chunkcachesize)
417
417
418 indexdata = ''
418 indexdata = ''
419 self._initempty = True
419 self._initempty = True
420 try:
420 try:
421 with self._indexfp() as f:
421 with self._indexfp() as f:
422 if (mmapindexthreshold is not None and
422 if (mmapindexthreshold is not None and
423 self.opener.fstat(f).st_size >= mmapindexthreshold):
423 self.opener.fstat(f).st_size >= mmapindexthreshold):
424 indexdata = util.buffer(util.mmapread(f))
424 indexdata = util.buffer(util.mmapread(f))
425 else:
425 else:
426 indexdata = f.read()
426 indexdata = f.read()
427 if len(indexdata) > 0:
427 if len(indexdata) > 0:
428 v = versionformat_unpack(indexdata[:4])[0]
428 v = versionformat_unpack(indexdata[:4])[0]
429 self._initempty = False
429 self._initempty = False
430 except IOError as inst:
430 except IOError as inst:
431 if inst.errno != errno.ENOENT:
431 if inst.errno != errno.ENOENT:
432 raise
432 raise
433
433
434 self.version = v
434 self.version = v
435 self._inline = v & FLAG_INLINE_DATA
435 self._inline = v & FLAG_INLINE_DATA
436 self._generaldelta = v & FLAG_GENERALDELTA
436 self._generaldelta = v & FLAG_GENERALDELTA
437 flags = v & ~0xFFFF
437 flags = v & ~0xFFFF
438 fmt = v & 0xFFFF
438 fmt = v & 0xFFFF
439 if fmt == REVLOGV0:
439 if fmt == REVLOGV0:
440 if flags:
440 if flags:
441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
441 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 'revlog %s') %
442 'revlog %s') %
443 (flags >> 16, fmt, self.indexfile))
443 (flags >> 16, fmt, self.indexfile))
444 elif fmt == REVLOGV1:
444 elif fmt == REVLOGV1:
445 if flags & ~REVLOGV1_FLAGS:
445 if flags & ~REVLOGV1_FLAGS:
446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
446 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
447 'revlog %s') %
447 'revlog %s') %
448 (flags >> 16, fmt, self.indexfile))
448 (flags >> 16, fmt, self.indexfile))
449 elif fmt == REVLOGV2:
449 elif fmt == REVLOGV2:
450 if flags & ~REVLOGV2_FLAGS:
450 if flags & ~REVLOGV2_FLAGS:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
452 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
453 (flags >> 16, fmt, self.indexfile))
454 else:
454 else:
455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
455 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
456 (fmt, self.indexfile))
456 (fmt, self.indexfile))
457
457
458 self._storedeltachains = True
458 self._storedeltachains = True
459
459
460 self._io = revlogio()
460 self._io = revlogio()
461 if self.version == REVLOGV0:
461 if self.version == REVLOGV0:
462 self._io = revlogoldio()
462 self._io = revlogoldio()
463 try:
463 try:
464 d = self._io.parseindex(indexdata, self._inline)
464 d = self._io.parseindex(indexdata, self._inline)
465 except (ValueError, IndexError):
465 except (ValueError, IndexError):
466 raise error.RevlogError(_("index %s is corrupted") %
466 raise error.RevlogError(_("index %s is corrupted") %
467 self.indexfile)
467 self.indexfile)
468 self.index, nodemap, self._chunkcache = d
468 self.index, nodemap, self._chunkcache = d
469 if nodemap is not None:
469 if nodemap is not None:
470 self.nodemap = self._nodecache = nodemap
470 self.nodemap = self._nodecache = nodemap
471 if not self._chunkcache:
471 if not self._chunkcache:
472 self._chunkclear()
472 self._chunkclear()
473 # revnum -> (chain-length, sum-delta-length)
473 # revnum -> (chain-length, sum-delta-length)
474 self._chaininfocache = {}
474 self._chaininfocache = {}
475 # revlog header -> revlog compressor
475 # revlog header -> revlog compressor
476 self._decompressors = {}
476 self._decompressors = {}
477
477
478 @util.propertycache
478 @util.propertycache
479 def _compressor(self):
479 def _compressor(self):
480 return util.compengines[self._compengine].revlogcompressor()
480 return util.compengines[self._compengine].revlogcompressor()
481
481
482 def _indexfp(self, mode='r'):
482 def _indexfp(self, mode='r'):
483 """file object for the revlog's index file"""
483 """file object for the revlog's index file"""
484 args = {r'mode': mode}
484 args = {r'mode': mode}
485 if mode != 'r':
485 if mode != 'r':
486 args[r'checkambig'] = self._checkambig
486 args[r'checkambig'] = self._checkambig
487 if mode == 'w':
487 if mode == 'w':
488 args[r'atomictemp'] = True
488 args[r'atomictemp'] = True
489 return self.opener(self.indexfile, **args)
489 return self.opener(self.indexfile, **args)
490
490
491 def _datafp(self, mode='r'):
491 def _datafp(self, mode='r'):
492 """file object for the revlog's data file"""
492 """file object for the revlog's data file"""
493 return self.opener(self.datafile, mode=mode)
493 return self.opener(self.datafile, mode=mode)
494
494
495 @contextlib.contextmanager
495 @contextlib.contextmanager
496 def _datareadfp(self, existingfp=None):
496 def _datareadfp(self, existingfp=None):
497 """file object suitable to read data"""
497 """file object suitable to read data"""
498 if existingfp is not None:
498 if existingfp is not None:
499 yield existingfp
499 yield existingfp
500 else:
500 else:
501 if self._inline:
501 if self._inline:
502 func = self._indexfp
502 func = self._indexfp
503 else:
503 else:
504 func = self._datafp
504 func = self._datafp
505 with func() as fp:
505 with func() as fp:
506 yield fp
506 yield fp
507
507
508 def tip(self):
508 def tip(self):
509 return self.node(len(self.index) - 1)
509 return self.node(len(self.index) - 1)
510 def __contains__(self, rev):
510 def __contains__(self, rev):
511 return 0 <= rev < len(self)
511 return 0 <= rev < len(self)
512 def __len__(self):
512 def __len__(self):
513 return len(self.index)
513 return len(self.index)
514 def __iter__(self):
514 def __iter__(self):
515 return iter(pycompat.xrange(len(self)))
515 return iter(pycompat.xrange(len(self)))
516 def revs(self, start=0, stop=None):
516 def revs(self, start=0, stop=None):
517 """iterate over all rev in this revlog (from start to stop)"""
517 """iterate over all rev in this revlog (from start to stop)"""
518 return storageutil.iterrevs(len(self), start=start, stop=stop)
518 return storageutil.iterrevs(len(self), start=start, stop=stop)
519
519
520 @util.propertycache
520 @util.propertycache
521 def nodemap(self):
521 def nodemap(self):
522 if self.index:
522 if self.index:
523 # populate mapping down to the initial node
523 # populate mapping down to the initial node
524 node0 = self.index[0][7] # get around changelog filtering
524 node0 = self.index[0][7] # get around changelog filtering
525 self.rev(node0)
525 self.rev(node0)
526 return self._nodecache
526 return self._nodecache
527
527
528 def hasnode(self, node):
528 def hasnode(self, node):
529 try:
529 try:
530 self.rev(node)
530 self.rev(node)
531 return True
531 return True
532 except KeyError:
532 except KeyError:
533 return False
533 return False
534
534
535 def candelta(self, baserev, rev):
535 def candelta(self, baserev, rev):
536 """whether two revisions (baserev, rev) can be delta-ed or not"""
536 """whether two revisions (baserev, rev) can be delta-ed or not"""
537 # Disable delta if either rev requires a content-changing flag
537 # Disable delta if either rev requires a content-changing flag
538 # processor (ex. LFS). This is because such flag processor can alter
538 # processor (ex. LFS). This is because such flag processor can alter
539 # the rawtext content that the delta will be based on, and two clients
539 # the rawtext content that the delta will be based on, and two clients
540 # could have a same revlog node with different flags (i.e. different
540 # could have a same revlog node with different flags (i.e. different
541 # rawtext contents) and the delta could be incompatible.
541 # rawtext contents) and the delta could be incompatible.
542 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
542 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
543 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
543 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
544 return False
544 return False
545 return True
545 return True
546
546
547 def clearcaches(self):
547 def clearcaches(self):
548 self._cache = None
548 self._cache = None
549 self._chainbasecache.clear()
549 self._chainbasecache.clear()
550 self._chunkcache = (0, '')
550 self._chunkcache = (0, '')
551 self._pcache = {}
551 self._pcache = {}
552
552
553 try:
553 try:
554 self._nodecache.clearcaches()
554 self._nodecache.clearcaches()
555 except AttributeError:
555 except AttributeError:
556 self._nodecache = {nullid: nullrev}
556 self._nodecache = {nullid: nullrev}
557 self._nodepos = None
557 self._nodepos = None
558
558
559 def rev(self, node):
559 def rev(self, node):
560 try:
560 try:
561 return self._nodecache[node]
561 return self._nodecache[node]
562 except TypeError:
562 except TypeError:
563 raise
563 raise
564 except error.RevlogError:
564 except error.RevlogError:
565 # parsers.c radix tree lookup failed
565 # parsers.c radix tree lookup failed
566 if node == wdirid or node in wdirfilenodeids:
566 if node == wdirid or node in wdirfilenodeids:
567 raise error.WdirUnsupported
567 raise error.WdirUnsupported
568 raise error.LookupError(node, self.indexfile, _('no node'))
568 raise error.LookupError(node, self.indexfile, _('no node'))
569 except KeyError:
569 except KeyError:
570 # pure python cache lookup failed
570 # pure python cache lookup failed
571 n = self._nodecache
571 n = self._nodecache
572 i = self.index
572 i = self.index
573 p = self._nodepos
573 p = self._nodepos
574 if p is None:
574 if p is None:
575 p = len(i) - 1
575 p = len(i) - 1
576 else:
576 else:
577 assert p < len(i)
577 assert p < len(i)
578 for r in pycompat.xrange(p, -1, -1):
578 for r in pycompat.xrange(p, -1, -1):
579 v = i[r][7]
579 v = i[r][7]
580 n[v] = r
580 n[v] = r
581 if v == node:
581 if v == node:
582 self._nodepos = r - 1
582 self._nodepos = r - 1
583 return r
583 return r
584 if node == wdirid or node in wdirfilenodeids:
584 if node == wdirid or node in wdirfilenodeids:
585 raise error.WdirUnsupported
585 raise error.WdirUnsupported
586 raise error.LookupError(node, self.indexfile, _('no node'))
586 raise error.LookupError(node, self.indexfile, _('no node'))
587
587
588 # Accessors for index entries.
588 # Accessors for index entries.
589
589
590 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
590 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
591 # are flags.
591 # are flags.
592 def start(self, rev):
592 def start(self, rev):
593 return int(self.index[rev][0] >> 16)
593 return int(self.index[rev][0] >> 16)
594
594
595 def flags(self, rev):
595 def flags(self, rev):
596 return self.index[rev][0] & 0xFFFF
596 return self.index[rev][0] & 0xFFFF
597
597
598 def length(self, rev):
598 def length(self, rev):
599 return self.index[rev][1]
599 return self.index[rev][1]
600
600
601 def rawsize(self, rev):
601 def rawsize(self, rev):
602 """return the length of the uncompressed text for a given revision"""
602 """return the length of the uncompressed text for a given revision"""
603 l = self.index[rev][2]
603 l = self.index[rev][2]
604 if l >= 0:
604 if l >= 0:
605 return l
605 return l
606
606
607 t = self.revision(rev, raw=True)
607 t = self.revision(rev, raw=True)
608 return len(t)
608 return len(t)
609
609
610 def size(self, rev):
610 def size(self, rev):
611 """length of non-raw text (processed by a "read" flag processor)"""
611 """length of non-raw text (processed by a "read" flag processor)"""
612 # fast path: if no "read" flag processor could change the content,
612 # fast path: if no "read" flag processor could change the content,
613 # size is rawsize. note: ELLIPSIS is known to not change the content.
613 # size is rawsize. note: ELLIPSIS is known to not change the content.
614 flags = self.flags(rev)
614 flags = self.flags(rev)
615 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
615 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
616 return self.rawsize(rev)
616 return self.rawsize(rev)
617
617
618 return len(self.revision(rev, raw=False))
618 return len(self.revision(rev, raw=False))
619
619
620 def chainbase(self, rev):
620 def chainbase(self, rev):
621 base = self._chainbasecache.get(rev)
621 base = self._chainbasecache.get(rev)
622 if base is not None:
622 if base is not None:
623 return base
623 return base
624
624
625 index = self.index
625 index = self.index
626 iterrev = rev
626 iterrev = rev
627 base = index[iterrev][3]
627 base = index[iterrev][3]
628 while base != iterrev:
628 while base != iterrev:
629 iterrev = base
629 iterrev = base
630 base = index[iterrev][3]
630 base = index[iterrev][3]
631
631
632 self._chainbasecache[rev] = base
632 self._chainbasecache[rev] = base
633 return base
633 return base
634
634
635 def linkrev(self, rev):
635 def linkrev(self, rev):
636 return self.index[rev][4]
636 return self.index[rev][4]
637
637
638 def parentrevs(self, rev):
638 def parentrevs(self, rev):
639 try:
639 try:
640 entry = self.index[rev]
640 entry = self.index[rev]
641 except IndexError:
641 except IndexError:
642 if rev == wdirrev:
642 if rev == wdirrev:
643 raise error.WdirUnsupported
643 raise error.WdirUnsupported
644 raise
644 raise
645
645
646 return entry[5], entry[6]
646 return entry[5], entry[6]
647
647
648 def node(self, rev):
648 def node(self, rev):
649 try:
649 try:
650 return self.index[rev][7]
650 return self.index[rev][7]
651 except IndexError:
651 except IndexError:
652 if rev == wdirrev:
652 if rev == wdirrev:
653 raise error.WdirUnsupported
653 raise error.WdirUnsupported
654 raise
654 raise
655
655
656 # Derived from index values.
656 # Derived from index values.
657
657
658 def end(self, rev):
658 def end(self, rev):
659 return self.start(rev) + self.length(rev)
659 return self.start(rev) + self.length(rev)
660
660
661 def parents(self, node):
661 def parents(self, node):
662 i = self.index
662 i = self.index
663 d = i[self.rev(node)]
663 d = i[self.rev(node)]
664 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
664 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
665
665
666 def chainlen(self, rev):
666 def chainlen(self, rev):
667 return self._chaininfo(rev)[0]
667 return self._chaininfo(rev)[0]
668
668
669 def _chaininfo(self, rev):
669 def _chaininfo(self, rev):
670 chaininfocache = self._chaininfocache
670 chaininfocache = self._chaininfocache
671 if rev in chaininfocache:
671 if rev in chaininfocache:
672 return chaininfocache[rev]
672 return chaininfocache[rev]
673 index = self.index
673 index = self.index
674 generaldelta = self._generaldelta
674 generaldelta = self._generaldelta
675 iterrev = rev
675 iterrev = rev
676 e = index[iterrev]
676 e = index[iterrev]
677 clen = 0
677 clen = 0
678 compresseddeltalen = 0
678 compresseddeltalen = 0
679 while iterrev != e[3]:
679 while iterrev != e[3]:
680 clen += 1
680 clen += 1
681 compresseddeltalen += e[1]
681 compresseddeltalen += e[1]
682 if generaldelta:
682 if generaldelta:
683 iterrev = e[3]
683 iterrev = e[3]
684 else:
684 else:
685 iterrev -= 1
685 iterrev -= 1
686 if iterrev in chaininfocache:
686 if iterrev in chaininfocache:
687 t = chaininfocache[iterrev]
687 t = chaininfocache[iterrev]
688 clen += t[0]
688 clen += t[0]
689 compresseddeltalen += t[1]
689 compresseddeltalen += t[1]
690 break
690 break
691 e = index[iterrev]
691 e = index[iterrev]
692 else:
692 else:
693 # Add text length of base since decompressing that also takes
693 # Add text length of base since decompressing that also takes
694 # work. For cache hits the length is already included.
694 # work. For cache hits the length is already included.
695 compresseddeltalen += e[1]
695 compresseddeltalen += e[1]
696 r = (clen, compresseddeltalen)
696 r = (clen, compresseddeltalen)
697 chaininfocache[rev] = r
697 chaininfocache[rev] = r
698 return r
698 return r
699
699
700 def _deltachain(self, rev, stoprev=None):
700 def _deltachain(self, rev, stoprev=None):
701 """Obtain the delta chain for a revision.
701 """Obtain the delta chain for a revision.
702
702
703 ``stoprev`` specifies a revision to stop at. If not specified, we
703 ``stoprev`` specifies a revision to stop at. If not specified, we
704 stop at the base of the chain.
704 stop at the base of the chain.
705
705
706 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
706 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
707 revs in ascending order and ``stopped`` is a bool indicating whether
707 revs in ascending order and ``stopped`` is a bool indicating whether
708 ``stoprev`` was hit.
708 ``stoprev`` was hit.
709 """
709 """
710 # Try C implementation.
710 # Try C implementation.
711 try:
711 try:
712 return self.index.deltachain(rev, stoprev, self._generaldelta)
712 return self.index.deltachain(rev, stoprev, self._generaldelta)
713 except AttributeError:
713 except AttributeError:
714 pass
714 pass
715
715
716 chain = []
716 chain = []
717
717
718 # Alias to prevent attribute lookup in tight loop.
718 # Alias to prevent attribute lookup in tight loop.
719 index = self.index
719 index = self.index
720 generaldelta = self._generaldelta
720 generaldelta = self._generaldelta
721
721
722 iterrev = rev
722 iterrev = rev
723 e = index[iterrev]
723 e = index[iterrev]
724 while iterrev != e[3] and iterrev != stoprev:
724 while iterrev != e[3] and iterrev != stoprev:
725 chain.append(iterrev)
725 chain.append(iterrev)
726 if generaldelta:
726 if generaldelta:
727 iterrev = e[3]
727 iterrev = e[3]
728 else:
728 else:
729 iterrev -= 1
729 iterrev -= 1
730 e = index[iterrev]
730 e = index[iterrev]
731
731
732 if iterrev == stoprev:
732 if iterrev == stoprev:
733 stopped = True
733 stopped = True
734 else:
734 else:
735 chain.append(iterrev)
735 chain.append(iterrev)
736 stopped = False
736 stopped = False
737
737
738 chain.reverse()
738 chain.reverse()
739 return chain, stopped
739 return chain, stopped
740
740
741 def ancestors(self, revs, stoprev=0, inclusive=False):
741 def ancestors(self, revs, stoprev=0, inclusive=False):
742 """Generate the ancestors of 'revs' in reverse topological order.
742 """Generate the ancestors of 'revs' in reverse topological order.
743 Does not generate revs lower than stoprev.
743 Does not generate revs lower than stoprev.
744
744
745 See the documentation for ancestor.lazyancestors for more details."""
745 See the documentation for ancestor.lazyancestors for more details."""
746
746
747 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
747 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
748 inclusive=inclusive)
748 inclusive=inclusive)
749
749
750 def descendants(self, revs):
750 def descendants(self, revs):
751 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
751 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
752
752
753 def findcommonmissing(self, common=None, heads=None):
753 def findcommonmissing(self, common=None, heads=None):
754 """Return a tuple of the ancestors of common and the ancestors of heads
754 """Return a tuple of the ancestors of common and the ancestors of heads
755 that are not ancestors of common. In revset terminology, we return the
755 that are not ancestors of common. In revset terminology, we return the
756 tuple:
756 tuple:
757
757
758 ::common, (::heads) - (::common)
758 ::common, (::heads) - (::common)
759
759
760 The list is sorted by revision number, meaning it is
760 The list is sorted by revision number, meaning it is
761 topologically sorted.
761 topologically sorted.
762
762
763 'heads' and 'common' are both lists of node IDs. If heads is
763 'heads' and 'common' are both lists of node IDs. If heads is
764 not supplied, uses all of the revlog's heads. If common is not
764 not supplied, uses all of the revlog's heads. If common is not
765 supplied, uses nullid."""
765 supplied, uses nullid."""
766 if common is None:
766 if common is None:
767 common = [nullid]
767 common = [nullid]
768 if heads is None:
768 if heads is None:
769 heads = self.heads()
769 heads = self.heads()
770
770
771 common = [self.rev(n) for n in common]
771 common = [self.rev(n) for n in common]
772 heads = [self.rev(n) for n in heads]
772 heads = [self.rev(n) for n in heads]
773
773
774 # we want the ancestors, but inclusive
774 # we want the ancestors, but inclusive
775 class lazyset(object):
775 class lazyset(object):
776 def __init__(self, lazyvalues):
776 def __init__(self, lazyvalues):
777 self.addedvalues = set()
777 self.addedvalues = set()
778 self.lazyvalues = lazyvalues
778 self.lazyvalues = lazyvalues
779
779
780 def __contains__(self, value):
780 def __contains__(self, value):
781 return value in self.addedvalues or value in self.lazyvalues
781 return value in self.addedvalues or value in self.lazyvalues
782
782
783 def __iter__(self):
783 def __iter__(self):
784 added = self.addedvalues
784 added = self.addedvalues
785 for r in added:
785 for r in added:
786 yield r
786 yield r
787 for r in self.lazyvalues:
787 for r in self.lazyvalues:
788 if not r in added:
788 if not r in added:
789 yield r
789 yield r
790
790
791 def add(self, value):
791 def add(self, value):
792 self.addedvalues.add(value)
792 self.addedvalues.add(value)
793
793
794 def update(self, values):
794 def update(self, values):
795 self.addedvalues.update(values)
795 self.addedvalues.update(values)
796
796
797 has = lazyset(self.ancestors(common))
797 has = lazyset(self.ancestors(common))
798 has.add(nullrev)
798 has.add(nullrev)
799 has.update(common)
799 has.update(common)
800
800
801 # take all ancestors from heads that aren't in has
801 # take all ancestors from heads that aren't in has
802 missing = set()
802 missing = set()
803 visit = collections.deque(r for r in heads if r not in has)
803 visit = collections.deque(r for r in heads if r not in has)
804 while visit:
804 while visit:
805 r = visit.popleft()
805 r = visit.popleft()
806 if r in missing:
806 if r in missing:
807 continue
807 continue
808 else:
808 else:
809 missing.add(r)
809 missing.add(r)
810 for p in self.parentrevs(r):
810 for p in self.parentrevs(r):
811 if p not in has:
811 if p not in has:
812 visit.append(p)
812 visit.append(p)
813 missing = list(missing)
813 missing = list(missing)
814 missing.sort()
814 missing.sort()
815 return has, [self.node(miss) for miss in missing]
815 return has, [self.node(miss) for miss in missing]
816
816
817 def incrementalmissingrevs(self, common=None):
817 def incrementalmissingrevs(self, common=None):
818 """Return an object that can be used to incrementally compute the
818 """Return an object that can be used to incrementally compute the
819 revision numbers of the ancestors of arbitrary sets that are not
819 revision numbers of the ancestors of arbitrary sets that are not
820 ancestors of common. This is an ancestor.incrementalmissingancestors
820 ancestors of common. This is an ancestor.incrementalmissingancestors
821 object.
821 object.
822
822
823 'common' is a list of revision numbers. If common is not supplied, uses
823 'common' is a list of revision numbers. If common is not supplied, uses
824 nullrev.
824 nullrev.
825 """
825 """
826 if common is None:
826 if common is None:
827 common = [nullrev]
827 common = [nullrev]
828
828
829 return ancestor.incrementalmissingancestors(self.parentrevs, common)
829 return ancestor.incrementalmissingancestors(self.parentrevs, common)
830
830
831 def findmissingrevs(self, common=None, heads=None):
831 def findmissingrevs(self, common=None, heads=None):
832 """Return the revision numbers of the ancestors of heads that
832 """Return the revision numbers of the ancestors of heads that
833 are not ancestors of common.
833 are not ancestors of common.
834
834
835 More specifically, return a list of revision numbers corresponding to
835 More specifically, return a list of revision numbers corresponding to
836 nodes N such that every N satisfies the following constraints:
836 nodes N such that every N satisfies the following constraints:
837
837
838 1. N is an ancestor of some node in 'heads'
838 1. N is an ancestor of some node in 'heads'
839 2. N is not an ancestor of any node in 'common'
839 2. N is not an ancestor of any node in 'common'
840
840
841 The list is sorted by revision number, meaning it is
841 The list is sorted by revision number, meaning it is
842 topologically sorted.
842 topologically sorted.
843
843
844 'heads' and 'common' are both lists of revision numbers. If heads is
844 'heads' and 'common' are both lists of revision numbers. If heads is
845 not supplied, uses all of the revlog's heads. If common is not
845 not supplied, uses all of the revlog's heads. If common is not
846 supplied, uses nullid."""
846 supplied, uses nullid."""
847 if common is None:
847 if common is None:
848 common = [nullrev]
848 common = [nullrev]
849 if heads is None:
849 if heads is None:
850 heads = self.headrevs()
850 heads = self.headrevs()
851
851
852 inc = self.incrementalmissingrevs(common=common)
852 inc = self.incrementalmissingrevs(common=common)
853 return inc.missingancestors(heads)
853 return inc.missingancestors(heads)
854
854
855 def findmissing(self, common=None, heads=None):
855 def findmissing(self, common=None, heads=None):
856 """Return the ancestors of heads that are not ancestors of common.
856 """Return the ancestors of heads that are not ancestors of common.
857
857
858 More specifically, return a list of nodes N such that every N
858 More specifically, return a list of nodes N such that every N
859 satisfies the following constraints:
859 satisfies the following constraints:
860
860
861 1. N is an ancestor of some node in 'heads'
861 1. N is an ancestor of some node in 'heads'
862 2. N is not an ancestor of any node in 'common'
862 2. N is not an ancestor of any node in 'common'
863
863
864 The list is sorted by revision number, meaning it is
864 The list is sorted by revision number, meaning it is
865 topologically sorted.
865 topologically sorted.
866
866
867 'heads' and 'common' are both lists of node IDs. If heads is
867 'heads' and 'common' are both lists of node IDs. If heads is
868 not supplied, uses all of the revlog's heads. If common is not
868 not supplied, uses all of the revlog's heads. If common is not
869 supplied, uses nullid."""
869 supplied, uses nullid."""
870 if common is None:
870 if common is None:
871 common = [nullid]
871 common = [nullid]
872 if heads is None:
872 if heads is None:
873 heads = self.heads()
873 heads = self.heads()
874
874
875 common = [self.rev(n) for n in common]
875 common = [self.rev(n) for n in common]
876 heads = [self.rev(n) for n in heads]
876 heads = [self.rev(n) for n in heads]
877
877
878 inc = self.incrementalmissingrevs(common=common)
878 inc = self.incrementalmissingrevs(common=common)
879 return [self.node(r) for r in inc.missingancestors(heads)]
879 return [self.node(r) for r in inc.missingancestors(heads)]
880
880
881 def nodesbetween(self, roots=None, heads=None):
881 def nodesbetween(self, roots=None, heads=None):
882 """Return a topological path from 'roots' to 'heads'.
882 """Return a topological path from 'roots' to 'heads'.
883
883
884 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
884 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
885 topologically sorted list of all nodes N that satisfy both of
885 topologically sorted list of all nodes N that satisfy both of
886 these constraints:
886 these constraints:
887
887
888 1. N is a descendant of some node in 'roots'
888 1. N is a descendant of some node in 'roots'
889 2. N is an ancestor of some node in 'heads'
889 2. N is an ancestor of some node in 'heads'
890
890
891 Every node is considered to be both a descendant and an ancestor
891 Every node is considered to be both a descendant and an ancestor
892 of itself, so every reachable node in 'roots' and 'heads' will be
892 of itself, so every reachable node in 'roots' and 'heads' will be
893 included in 'nodes'.
893 included in 'nodes'.
894
894
895 'outroots' is the list of reachable nodes in 'roots', i.e., the
895 'outroots' is the list of reachable nodes in 'roots', i.e., the
896 subset of 'roots' that is returned in 'nodes'. Likewise,
896 subset of 'roots' that is returned in 'nodes'. Likewise,
897 'outheads' is the subset of 'heads' that is also in 'nodes'.
897 'outheads' is the subset of 'heads' that is also in 'nodes'.
898
898
899 'roots' and 'heads' are both lists of node IDs. If 'roots' is
899 'roots' and 'heads' are both lists of node IDs. If 'roots' is
900 unspecified, uses nullid as the only root. If 'heads' is
900 unspecified, uses nullid as the only root. If 'heads' is
901 unspecified, uses list of all of the revlog's heads."""
901 unspecified, uses list of all of the revlog's heads."""
902 nonodes = ([], [], [])
902 nonodes = ([], [], [])
903 if roots is not None:
903 if roots is not None:
904 roots = list(roots)
904 roots = list(roots)
905 if not roots:
905 if not roots:
906 return nonodes
906 return nonodes
907 lowestrev = min([self.rev(n) for n in roots])
907 lowestrev = min([self.rev(n) for n in roots])
908 else:
908 else:
909 roots = [nullid] # Everybody's a descendant of nullid
909 roots = [nullid] # Everybody's a descendant of nullid
910 lowestrev = nullrev
910 lowestrev = nullrev
911 if (lowestrev == nullrev) and (heads is None):
911 if (lowestrev == nullrev) and (heads is None):
912 # We want _all_ the nodes!
912 # We want _all_ the nodes!
913 return ([self.node(r) for r in self], [nullid], list(self.heads()))
913 return ([self.node(r) for r in self], [nullid], list(self.heads()))
914 if heads is None:
914 if heads is None:
915 # All nodes are ancestors, so the latest ancestor is the last
915 # All nodes are ancestors, so the latest ancestor is the last
916 # node.
916 # node.
917 highestrev = len(self) - 1
917 highestrev = len(self) - 1
918 # Set ancestors to None to signal that every node is an ancestor.
918 # Set ancestors to None to signal that every node is an ancestor.
919 ancestors = None
919 ancestors = None
920 # Set heads to an empty dictionary for later discovery of heads
920 # Set heads to an empty dictionary for later discovery of heads
921 heads = {}
921 heads = {}
922 else:
922 else:
923 heads = list(heads)
923 heads = list(heads)
924 if not heads:
924 if not heads:
925 return nonodes
925 return nonodes
926 ancestors = set()
926 ancestors = set()
927 # Turn heads into a dictionary so we can remove 'fake' heads.
927 # Turn heads into a dictionary so we can remove 'fake' heads.
928 # Also, later we will be using it to filter out the heads we can't
928 # Also, later we will be using it to filter out the heads we can't
929 # find from roots.
929 # find from roots.
930 heads = dict.fromkeys(heads, False)
930 heads = dict.fromkeys(heads, False)
931 # Start at the top and keep marking parents until we're done.
931 # Start at the top and keep marking parents until we're done.
932 nodestotag = set(heads)
932 nodestotag = set(heads)
933 # Remember where the top was so we can use it as a limit later.
933 # Remember where the top was so we can use it as a limit later.
934 highestrev = max([self.rev(n) for n in nodestotag])
934 highestrev = max([self.rev(n) for n in nodestotag])
935 while nodestotag:
935 while nodestotag:
936 # grab a node to tag
936 # grab a node to tag
937 n = nodestotag.pop()
937 n = nodestotag.pop()
938 # Never tag nullid
938 # Never tag nullid
939 if n == nullid:
939 if n == nullid:
940 continue
940 continue
941 # A node's revision number represents its place in a
941 # A node's revision number represents its place in a
942 # topologically sorted list of nodes.
942 # topologically sorted list of nodes.
943 r = self.rev(n)
943 r = self.rev(n)
944 if r >= lowestrev:
944 if r >= lowestrev:
945 if n not in ancestors:
945 if n not in ancestors:
946 # If we are possibly a descendant of one of the roots
946 # If we are possibly a descendant of one of the roots
947 # and we haven't already been marked as an ancestor
947 # and we haven't already been marked as an ancestor
948 ancestors.add(n) # Mark as ancestor
948 ancestors.add(n) # Mark as ancestor
949 # Add non-nullid parents to list of nodes to tag.
949 # Add non-nullid parents to list of nodes to tag.
950 nodestotag.update([p for p in self.parents(n) if
950 nodestotag.update([p for p in self.parents(n) if
951 p != nullid])
951 p != nullid])
952 elif n in heads: # We've seen it before, is it a fake head?
952 elif n in heads: # We've seen it before, is it a fake head?
953 # So it is, real heads should not be the ancestors of
953 # So it is, real heads should not be the ancestors of
954 # any other heads.
954 # any other heads.
955 heads.pop(n)
955 heads.pop(n)
956 if not ancestors:
956 if not ancestors:
957 return nonodes
957 return nonodes
958 # Now that we have our set of ancestors, we want to remove any
958 # Now that we have our set of ancestors, we want to remove any
959 # roots that are not ancestors.
959 # roots that are not ancestors.
960
960
961 # If one of the roots was nullid, everything is included anyway.
961 # If one of the roots was nullid, everything is included anyway.
962 if lowestrev > nullrev:
962 if lowestrev > nullrev:
963 # But, since we weren't, let's recompute the lowest rev to not
963 # But, since we weren't, let's recompute the lowest rev to not
964 # include roots that aren't ancestors.
964 # include roots that aren't ancestors.
965
965
966 # Filter out roots that aren't ancestors of heads
966 # Filter out roots that aren't ancestors of heads
967 roots = [root for root in roots if root in ancestors]
967 roots = [root for root in roots if root in ancestors]
968 # Recompute the lowest revision
968 # Recompute the lowest revision
969 if roots:
969 if roots:
970 lowestrev = min([self.rev(root) for root in roots])
970 lowestrev = min([self.rev(root) for root in roots])
971 else:
971 else:
972 # No more roots? Return empty list
972 # No more roots? Return empty list
973 return nonodes
973 return nonodes
974 else:
974 else:
975 # We are descending from nullid, and don't need to care about
975 # We are descending from nullid, and don't need to care about
976 # any other roots.
976 # any other roots.
977 lowestrev = nullrev
977 lowestrev = nullrev
978 roots = [nullid]
978 roots = [nullid]
979 # Transform our roots list into a set.
979 # Transform our roots list into a set.
980 descendants = set(roots)
980 descendants = set(roots)
981 # Also, keep the original roots so we can filter out roots that aren't
981 # Also, keep the original roots so we can filter out roots that aren't
982 # 'real' roots (i.e. are descended from other roots).
982 # 'real' roots (i.e. are descended from other roots).
983 roots = descendants.copy()
983 roots = descendants.copy()
984 # Our topologically sorted list of output nodes.
984 # Our topologically sorted list of output nodes.
985 orderedout = []
985 orderedout = []
986 # Don't start at nullid since we don't want nullid in our output list,
986 # Don't start at nullid since we don't want nullid in our output list,
987 # and if nullid shows up in descendants, empty parents will look like
987 # and if nullid shows up in descendants, empty parents will look like
988 # they're descendants.
988 # they're descendants.
989 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
989 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
990 n = self.node(r)
990 n = self.node(r)
991 isdescendant = False
991 isdescendant = False
992 if lowestrev == nullrev: # Everybody is a descendant of nullid
992 if lowestrev == nullrev: # Everybody is a descendant of nullid
993 isdescendant = True
993 isdescendant = True
994 elif n in descendants:
994 elif n in descendants:
995 # n is already a descendant
995 # n is already a descendant
996 isdescendant = True
996 isdescendant = True
997 # This check only needs to be done here because all the roots
997 # This check only needs to be done here because all the roots
998 # will start being marked is descendants before the loop.
998 # will start being marked is descendants before the loop.
999 if n in roots:
999 if n in roots:
1000 # If n was a root, check if it's a 'real' root.
1000 # If n was a root, check if it's a 'real' root.
1001 p = tuple(self.parents(n))
1001 p = tuple(self.parents(n))
1002 # If any of its parents are descendants, it's not a root.
1002 # If any of its parents are descendants, it's not a root.
1003 if (p[0] in descendants) or (p[1] in descendants):
1003 if (p[0] in descendants) or (p[1] in descendants):
1004 roots.remove(n)
1004 roots.remove(n)
1005 else:
1005 else:
1006 p = tuple(self.parents(n))
1006 p = tuple(self.parents(n))
1007 # A node is a descendant if either of its parents are
1007 # A node is a descendant if either of its parents are
1008 # descendants. (We seeded the dependents list with the roots
1008 # descendants. (We seeded the dependents list with the roots
1009 # up there, remember?)
1009 # up there, remember?)
1010 if (p[0] in descendants) or (p[1] in descendants):
1010 if (p[0] in descendants) or (p[1] in descendants):
1011 descendants.add(n)
1011 descendants.add(n)
1012 isdescendant = True
1012 isdescendant = True
1013 if isdescendant and ((ancestors is None) or (n in ancestors)):
1013 if isdescendant and ((ancestors is None) or (n in ancestors)):
1014 # Only include nodes that are both descendants and ancestors.
1014 # Only include nodes that are both descendants and ancestors.
1015 orderedout.append(n)
1015 orderedout.append(n)
1016 if (ancestors is not None) and (n in heads):
1016 if (ancestors is not None) and (n in heads):
1017 # We're trying to figure out which heads are reachable
1017 # We're trying to figure out which heads are reachable
1018 # from roots.
1018 # from roots.
1019 # Mark this head as having been reached
1019 # Mark this head as having been reached
1020 heads[n] = True
1020 heads[n] = True
1021 elif ancestors is None:
1021 elif ancestors is None:
1022 # Otherwise, we're trying to discover the heads.
1022 # Otherwise, we're trying to discover the heads.
1023 # Assume this is a head because if it isn't, the next step
1023 # Assume this is a head because if it isn't, the next step
1024 # will eventually remove it.
1024 # will eventually remove it.
1025 heads[n] = True
1025 heads[n] = True
1026 # But, obviously its parents aren't.
1026 # But, obviously its parents aren't.
1027 for p in self.parents(n):
1027 for p in self.parents(n):
1028 heads.pop(p, None)
1028 heads.pop(p, None)
1029 heads = [head for head, flag in heads.iteritems() if flag]
1029 heads = [head for head, flag in heads.iteritems() if flag]
1030 roots = list(roots)
1030 roots = list(roots)
1031 assert orderedout
1031 assert orderedout
1032 assert roots
1032 assert roots
1033 assert heads
1033 assert heads
1034 return (orderedout, roots, heads)
1034 return (orderedout, roots, heads)
1035
1035
1036 def headrevs(self):
1036 def headrevs(self):
1037 try:
1037 try:
1038 return self.index.headrevs()
1038 return self.index.headrevs()
1039 except AttributeError:
1039 except AttributeError:
1040 return self._headrevs()
1040 return self._headrevs()
1041
1041
1042 def computephases(self, roots):
1042 def computephases(self, roots):
1043 return self.index.computephasesmapsets(roots)
1043 return self.index.computephasesmapsets(roots)
1044
1044
1045 def _headrevs(self):
1045 def _headrevs(self):
1046 count = len(self)
1046 count = len(self)
1047 if not count:
1047 if not count:
1048 return [nullrev]
1048 return [nullrev]
1049 # we won't iter over filtered rev so nobody is a head at start
1049 # we won't iter over filtered rev so nobody is a head at start
1050 ishead = [0] * (count + 1)
1050 ishead = [0] * (count + 1)
1051 index = self.index
1051 index = self.index
1052 for r in self:
1052 for r in self:
1053 ishead[r] = 1 # I may be an head
1053 ishead[r] = 1 # I may be an head
1054 e = index[r]
1054 e = index[r]
1055 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1055 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1056 return [r for r, val in enumerate(ishead) if val]
1056 return [r for r, val in enumerate(ishead) if val]
1057
1057
1058 def heads(self, start=None, stop=None):
1058 def heads(self, start=None, stop=None):
1059 """return the list of all nodes that have no children
1059 """return the list of all nodes that have no children
1060
1060
1061 if start is specified, only heads that are descendants of
1061 if start is specified, only heads that are descendants of
1062 start will be returned
1062 start will be returned
1063 if stop is specified, it will consider all the revs from stop
1063 if stop is specified, it will consider all the revs from stop
1064 as if they had no children
1064 as if they had no children
1065 """
1065 """
1066 if start is None and stop is None:
1066 if start is None and stop is None:
1067 if not len(self):
1067 if not len(self):
1068 return [nullid]
1068 return [nullid]
1069 return [self.node(r) for r in self.headrevs()]
1069 return [self.node(r) for r in self.headrevs()]
1070
1070
1071 if start is None:
1071 if start is None:
1072 start = nullid
1072 start = nullrev
1073 if stop is None:
1073 else:
1074 stop = []
1074 start = self.rev(start)
1075 stoprevs = set([self.rev(n) for n in stop])
1075
1076 startrev = self.rev(start)
1076 stoprevs = set(self.rev(n) for n in stop or [])
1077 reachable = {startrev}
1077
1078 heads = {startrev}
1078 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1079
1079 stoprevs=stoprevs)
1080 parentrevs = self.parentrevs
1080
1081 for r in self.revs(start=startrev + 1):
1081 return [self.node(rev) for rev in revs]
1082 for p in parentrevs(r):
1083 if p in reachable:
1084 if r not in stoprevs:
1085 reachable.add(r)
1086 heads.add(r)
1087 if p in heads and p not in stoprevs:
1088 heads.remove(p)
1089
1090 return [self.node(r) for r in heads]
1091
1082
1092 def children(self, node):
1083 def children(self, node):
1093 """find the children of a given node"""
1084 """find the children of a given node"""
1094 c = []
1085 c = []
1095 p = self.rev(node)
1086 p = self.rev(node)
1096 for r in self.revs(start=p + 1):
1087 for r in self.revs(start=p + 1):
1097 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1088 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1098 if prevs:
1089 if prevs:
1099 for pr in prevs:
1090 for pr in prevs:
1100 if pr == p:
1091 if pr == p:
1101 c.append(self.node(r))
1092 c.append(self.node(r))
1102 elif p == nullrev:
1093 elif p == nullrev:
1103 c.append(self.node(r))
1094 c.append(self.node(r))
1104 return c
1095 return c
1105
1096
1106 def commonancestorsheads(self, a, b):
1097 def commonancestorsheads(self, a, b):
1107 """calculate all the heads of the common ancestors of nodes a and b"""
1098 """calculate all the heads of the common ancestors of nodes a and b"""
1108 a, b = self.rev(a), self.rev(b)
1099 a, b = self.rev(a), self.rev(b)
1109 ancs = self._commonancestorsheads(a, b)
1100 ancs = self._commonancestorsheads(a, b)
1110 return pycompat.maplist(self.node, ancs)
1101 return pycompat.maplist(self.node, ancs)
1111
1102
1112 def _commonancestorsheads(self, *revs):
1103 def _commonancestorsheads(self, *revs):
1113 """calculate all the heads of the common ancestors of revs"""
1104 """calculate all the heads of the common ancestors of revs"""
1114 try:
1105 try:
1115 ancs = self.index.commonancestorsheads(*revs)
1106 ancs = self.index.commonancestorsheads(*revs)
1116 except (AttributeError, OverflowError): # C implementation failed
1107 except (AttributeError, OverflowError): # C implementation failed
1117 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1108 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1118 return ancs
1109 return ancs
1119
1110
1120 def isancestor(self, a, b):
1111 def isancestor(self, a, b):
1121 """return True if node a is an ancestor of node b
1112 """return True if node a is an ancestor of node b
1122
1113
1123 A revision is considered an ancestor of itself."""
1114 A revision is considered an ancestor of itself."""
1124 a, b = self.rev(a), self.rev(b)
1115 a, b = self.rev(a), self.rev(b)
1125 return self.isancestorrev(a, b)
1116 return self.isancestorrev(a, b)
1126
1117
1127 def isancestorrev(self, a, b):
1118 def isancestorrev(self, a, b):
1128 """return True if revision a is an ancestor of revision b
1119 """return True if revision a is an ancestor of revision b
1129
1120
1130 A revision is considered an ancestor of itself.
1121 A revision is considered an ancestor of itself.
1131
1122
1132 The implementation of this is trivial but the use of
1123 The implementation of this is trivial but the use of
1133 commonancestorsheads is not."""
1124 commonancestorsheads is not."""
1134 if a == nullrev:
1125 if a == nullrev:
1135 return True
1126 return True
1136 elif a == b:
1127 elif a == b:
1137 return True
1128 return True
1138 elif a > b:
1129 elif a > b:
1139 return False
1130 return False
1140 return a in self._commonancestorsheads(a, b)
1131 return a in self._commonancestorsheads(a, b)
1141
1132
1142 def ancestor(self, a, b):
1133 def ancestor(self, a, b):
1143 """calculate the "best" common ancestor of nodes a and b"""
1134 """calculate the "best" common ancestor of nodes a and b"""
1144
1135
1145 a, b = self.rev(a), self.rev(b)
1136 a, b = self.rev(a), self.rev(b)
1146 try:
1137 try:
1147 ancs = self.index.ancestors(a, b)
1138 ancs = self.index.ancestors(a, b)
1148 except (AttributeError, OverflowError):
1139 except (AttributeError, OverflowError):
1149 ancs = ancestor.ancestors(self.parentrevs, a, b)
1140 ancs = ancestor.ancestors(self.parentrevs, a, b)
1150 if ancs:
1141 if ancs:
1151 # choose a consistent winner when there's a tie
1142 # choose a consistent winner when there's a tie
1152 return min(map(self.node, ancs))
1143 return min(map(self.node, ancs))
1153 return nullid
1144 return nullid
1154
1145
1155 def _match(self, id):
1146 def _match(self, id):
1156 if isinstance(id, int):
1147 if isinstance(id, int):
1157 # rev
1148 # rev
1158 return self.node(id)
1149 return self.node(id)
1159 if len(id) == 20:
1150 if len(id) == 20:
1160 # possibly a binary node
1151 # possibly a binary node
1161 # odds of a binary node being all hex in ASCII are 1 in 10**25
1152 # odds of a binary node being all hex in ASCII are 1 in 10**25
1162 try:
1153 try:
1163 node = id
1154 node = id
1164 self.rev(node) # quick search the index
1155 self.rev(node) # quick search the index
1165 return node
1156 return node
1166 except error.LookupError:
1157 except error.LookupError:
1167 pass # may be partial hex id
1158 pass # may be partial hex id
1168 try:
1159 try:
1169 # str(rev)
1160 # str(rev)
1170 rev = int(id)
1161 rev = int(id)
1171 if "%d" % rev != id:
1162 if "%d" % rev != id:
1172 raise ValueError
1163 raise ValueError
1173 if rev < 0:
1164 if rev < 0:
1174 rev = len(self) + rev
1165 rev = len(self) + rev
1175 if rev < 0 or rev >= len(self):
1166 if rev < 0 or rev >= len(self):
1176 raise ValueError
1167 raise ValueError
1177 return self.node(rev)
1168 return self.node(rev)
1178 except (ValueError, OverflowError):
1169 except (ValueError, OverflowError):
1179 pass
1170 pass
1180 if len(id) == 40:
1171 if len(id) == 40:
1181 try:
1172 try:
1182 # a full hex nodeid?
1173 # a full hex nodeid?
1183 node = bin(id)
1174 node = bin(id)
1184 self.rev(node)
1175 self.rev(node)
1185 return node
1176 return node
1186 except (TypeError, error.LookupError):
1177 except (TypeError, error.LookupError):
1187 pass
1178 pass
1188
1179
1189 def _partialmatch(self, id):
1180 def _partialmatch(self, id):
1190 # we don't care wdirfilenodeids as they should be always full hash
1181 # we don't care wdirfilenodeids as they should be always full hash
1191 maybewdir = wdirhex.startswith(id)
1182 maybewdir = wdirhex.startswith(id)
1192 try:
1183 try:
1193 partial = self.index.partialmatch(id)
1184 partial = self.index.partialmatch(id)
1194 if partial and self.hasnode(partial):
1185 if partial and self.hasnode(partial):
1195 if maybewdir:
1186 if maybewdir:
1196 # single 'ff...' match in radix tree, ambiguous with wdir
1187 # single 'ff...' match in radix tree, ambiguous with wdir
1197 raise error.RevlogError
1188 raise error.RevlogError
1198 return partial
1189 return partial
1199 if maybewdir:
1190 if maybewdir:
1200 # no 'ff...' match in radix tree, wdir identified
1191 # no 'ff...' match in radix tree, wdir identified
1201 raise error.WdirUnsupported
1192 raise error.WdirUnsupported
1202 return None
1193 return None
1203 except error.RevlogError:
1194 except error.RevlogError:
1204 # parsers.c radix tree lookup gave multiple matches
1195 # parsers.c radix tree lookup gave multiple matches
1205 # fast path: for unfiltered changelog, radix tree is accurate
1196 # fast path: for unfiltered changelog, radix tree is accurate
1206 if not getattr(self, 'filteredrevs', None):
1197 if not getattr(self, 'filteredrevs', None):
1207 raise error.AmbiguousPrefixLookupError(
1198 raise error.AmbiguousPrefixLookupError(
1208 id, self.indexfile, _('ambiguous identifier'))
1199 id, self.indexfile, _('ambiguous identifier'))
1209 # fall through to slow path that filters hidden revisions
1200 # fall through to slow path that filters hidden revisions
1210 except (AttributeError, ValueError):
1201 except (AttributeError, ValueError):
1211 # we are pure python, or key was too short to search radix tree
1202 # we are pure python, or key was too short to search radix tree
1212 pass
1203 pass
1213
1204
1214 if id in self._pcache:
1205 if id in self._pcache:
1215 return self._pcache[id]
1206 return self._pcache[id]
1216
1207
1217 if len(id) <= 40:
1208 if len(id) <= 40:
1218 try:
1209 try:
1219 # hex(node)[:...]
1210 # hex(node)[:...]
1220 l = len(id) // 2 # grab an even number of digits
1211 l = len(id) // 2 # grab an even number of digits
1221 prefix = bin(id[:l * 2])
1212 prefix = bin(id[:l * 2])
1222 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1213 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1223 nl = [n for n in nl if hex(n).startswith(id) and
1214 nl = [n for n in nl if hex(n).startswith(id) and
1224 self.hasnode(n)]
1215 self.hasnode(n)]
1225 if nullhex.startswith(id):
1216 if nullhex.startswith(id):
1226 nl.append(nullid)
1217 nl.append(nullid)
1227 if len(nl) > 0:
1218 if len(nl) > 0:
1228 if len(nl) == 1 and not maybewdir:
1219 if len(nl) == 1 and not maybewdir:
1229 self._pcache[id] = nl[0]
1220 self._pcache[id] = nl[0]
1230 return nl[0]
1221 return nl[0]
1231 raise error.AmbiguousPrefixLookupError(
1222 raise error.AmbiguousPrefixLookupError(
1232 id, self.indexfile, _('ambiguous identifier'))
1223 id, self.indexfile, _('ambiguous identifier'))
1233 if maybewdir:
1224 if maybewdir:
1234 raise error.WdirUnsupported
1225 raise error.WdirUnsupported
1235 return None
1226 return None
1236 except TypeError:
1227 except TypeError:
1237 pass
1228 pass
1238
1229
1239 def lookup(self, id):
1230 def lookup(self, id):
1240 """locate a node based on:
1231 """locate a node based on:
1241 - revision number or str(revision number)
1232 - revision number or str(revision number)
1242 - nodeid or subset of hex nodeid
1233 - nodeid or subset of hex nodeid
1243 """
1234 """
1244 n = self._match(id)
1235 n = self._match(id)
1245 if n is not None:
1236 if n is not None:
1246 return n
1237 return n
1247 n = self._partialmatch(id)
1238 n = self._partialmatch(id)
1248 if n:
1239 if n:
1249 return n
1240 return n
1250
1241
1251 raise error.LookupError(id, self.indexfile, _('no match found'))
1242 raise error.LookupError(id, self.indexfile, _('no match found'))
1252
1243
1253 def shortest(self, node, minlength=1):
1244 def shortest(self, node, minlength=1):
1254 """Find the shortest unambiguous prefix that matches node."""
1245 """Find the shortest unambiguous prefix that matches node."""
1255 def isvalid(prefix):
1246 def isvalid(prefix):
1256 try:
1247 try:
1257 node = self._partialmatch(prefix)
1248 node = self._partialmatch(prefix)
1258 except error.AmbiguousPrefixLookupError:
1249 except error.AmbiguousPrefixLookupError:
1259 return False
1250 return False
1260 except error.WdirUnsupported:
1251 except error.WdirUnsupported:
1261 # single 'ff...' match
1252 # single 'ff...' match
1262 return True
1253 return True
1263 if node is None:
1254 if node is None:
1264 raise error.LookupError(node, self.indexfile, _('no node'))
1255 raise error.LookupError(node, self.indexfile, _('no node'))
1265 return True
1256 return True
1266
1257
1267 def maybewdir(prefix):
1258 def maybewdir(prefix):
1268 return all(c == 'f' for c in prefix)
1259 return all(c == 'f' for c in prefix)
1269
1260
1270 hexnode = hex(node)
1261 hexnode = hex(node)
1271
1262
1272 def disambiguate(hexnode, minlength):
1263 def disambiguate(hexnode, minlength):
1273 """Disambiguate against wdirid."""
1264 """Disambiguate against wdirid."""
1274 for length in range(minlength, 41):
1265 for length in range(minlength, 41):
1275 prefix = hexnode[:length]
1266 prefix = hexnode[:length]
1276 if not maybewdir(prefix):
1267 if not maybewdir(prefix):
1277 return prefix
1268 return prefix
1278
1269
1279 if not getattr(self, 'filteredrevs', None):
1270 if not getattr(self, 'filteredrevs', None):
1280 try:
1271 try:
1281 length = max(self.index.shortest(node), minlength)
1272 length = max(self.index.shortest(node), minlength)
1282 return disambiguate(hexnode, length)
1273 return disambiguate(hexnode, length)
1283 except error.RevlogError:
1274 except error.RevlogError:
1284 if node != wdirid:
1275 if node != wdirid:
1285 raise error.LookupError(node, self.indexfile, _('no node'))
1276 raise error.LookupError(node, self.indexfile, _('no node'))
1286 except AttributeError:
1277 except AttributeError:
1287 # Fall through to pure code
1278 # Fall through to pure code
1288 pass
1279 pass
1289
1280
1290 if node == wdirid:
1281 if node == wdirid:
1291 for length in range(minlength, 41):
1282 for length in range(minlength, 41):
1292 prefix = hexnode[:length]
1283 prefix = hexnode[:length]
1293 if isvalid(prefix):
1284 if isvalid(prefix):
1294 return prefix
1285 return prefix
1295
1286
1296 for length in range(minlength, 41):
1287 for length in range(minlength, 41):
1297 prefix = hexnode[:length]
1288 prefix = hexnode[:length]
1298 if isvalid(prefix):
1289 if isvalid(prefix):
1299 return disambiguate(hexnode, length)
1290 return disambiguate(hexnode, length)
1300
1291
1301 def cmp(self, node, text):
1292 def cmp(self, node, text):
1302 """compare text with a given file revision
1293 """compare text with a given file revision
1303
1294
1304 returns True if text is different than what is stored.
1295 returns True if text is different than what is stored.
1305 """
1296 """
1306 p1, p2 = self.parents(node)
1297 p1, p2 = self.parents(node)
1307 return storageutil.hashrevisionsha1(text, p1, p2) != node
1298 return storageutil.hashrevisionsha1(text, p1, p2) != node
1308
1299
1309 def _cachesegment(self, offset, data):
1300 def _cachesegment(self, offset, data):
1310 """Add a segment to the revlog cache.
1301 """Add a segment to the revlog cache.
1311
1302
1312 Accepts an absolute offset and the data that is at that location.
1303 Accepts an absolute offset and the data that is at that location.
1313 """
1304 """
1314 o, d = self._chunkcache
1305 o, d = self._chunkcache
1315 # try to add to existing cache
1306 # try to add to existing cache
1316 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1307 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1317 self._chunkcache = o, d + data
1308 self._chunkcache = o, d + data
1318 else:
1309 else:
1319 self._chunkcache = offset, data
1310 self._chunkcache = offset, data
1320
1311
1321 def _readsegment(self, offset, length, df=None):
1312 def _readsegment(self, offset, length, df=None):
1322 """Load a segment of raw data from the revlog.
1313 """Load a segment of raw data from the revlog.
1323
1314
1324 Accepts an absolute offset, length to read, and an optional existing
1315 Accepts an absolute offset, length to read, and an optional existing
1325 file handle to read from.
1316 file handle to read from.
1326
1317
1327 If an existing file handle is passed, it will be seeked and the
1318 If an existing file handle is passed, it will be seeked and the
1328 original seek position will NOT be restored.
1319 original seek position will NOT be restored.
1329
1320
1330 Returns a str or buffer of raw byte data.
1321 Returns a str or buffer of raw byte data.
1331 """
1322 """
1332 # Cache data both forward and backward around the requested
1323 # Cache data both forward and backward around the requested
1333 # data, in a fixed size window. This helps speed up operations
1324 # data, in a fixed size window. This helps speed up operations
1334 # involving reading the revlog backwards.
1325 # involving reading the revlog backwards.
1335 cachesize = self._chunkcachesize
1326 cachesize = self._chunkcachesize
1336 realoffset = offset & ~(cachesize - 1)
1327 realoffset = offset & ~(cachesize - 1)
1337 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1328 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1338 - realoffset)
1329 - realoffset)
1339 with self._datareadfp(df) as df:
1330 with self._datareadfp(df) as df:
1340 df.seek(realoffset)
1331 df.seek(realoffset)
1341 d = df.read(reallength)
1332 d = df.read(reallength)
1342 self._cachesegment(realoffset, d)
1333 self._cachesegment(realoffset, d)
1343 if offset != realoffset or reallength != length:
1334 if offset != realoffset or reallength != length:
1344 return util.buffer(d, offset - realoffset, length)
1335 return util.buffer(d, offset - realoffset, length)
1345 return d
1336 return d
1346
1337
1347 def _getsegment(self, offset, length, df=None):
1338 def _getsegment(self, offset, length, df=None):
1348 """Obtain a segment of raw data from the revlog.
1339 """Obtain a segment of raw data from the revlog.
1349
1340
1350 Accepts an absolute offset, length of bytes to obtain, and an
1341 Accepts an absolute offset, length of bytes to obtain, and an
1351 optional file handle to the already-opened revlog. If the file
1342 optional file handle to the already-opened revlog. If the file
1352 handle is used, it's original seek position will not be preserved.
1343 handle is used, it's original seek position will not be preserved.
1353
1344
1354 Requests for data may be returned from a cache.
1345 Requests for data may be returned from a cache.
1355
1346
1356 Returns a str or a buffer instance of raw byte data.
1347 Returns a str or a buffer instance of raw byte data.
1357 """
1348 """
1358 o, d = self._chunkcache
1349 o, d = self._chunkcache
1359 l = len(d)
1350 l = len(d)
1360
1351
1361 # is it in the cache?
1352 # is it in the cache?
1362 cachestart = offset - o
1353 cachestart = offset - o
1363 cacheend = cachestart + length
1354 cacheend = cachestart + length
1364 if cachestart >= 0 and cacheend <= l:
1355 if cachestart >= 0 and cacheend <= l:
1365 if cachestart == 0 and cacheend == l:
1356 if cachestart == 0 and cacheend == l:
1366 return d # avoid a copy
1357 return d # avoid a copy
1367 return util.buffer(d, cachestart, cacheend - cachestart)
1358 return util.buffer(d, cachestart, cacheend - cachestart)
1368
1359
1369 return self._readsegment(offset, length, df=df)
1360 return self._readsegment(offset, length, df=df)
1370
1361
1371 def _getsegmentforrevs(self, startrev, endrev, df=None):
1362 def _getsegmentforrevs(self, startrev, endrev, df=None):
1372 """Obtain a segment of raw data corresponding to a range of revisions.
1363 """Obtain a segment of raw data corresponding to a range of revisions.
1373
1364
1374 Accepts the start and end revisions and an optional already-open
1365 Accepts the start and end revisions and an optional already-open
1375 file handle to be used for reading. If the file handle is read, its
1366 file handle to be used for reading. If the file handle is read, its
1376 seek position will not be preserved.
1367 seek position will not be preserved.
1377
1368
1378 Requests for data may be satisfied by a cache.
1369 Requests for data may be satisfied by a cache.
1379
1370
1380 Returns a 2-tuple of (offset, data) for the requested range of
1371 Returns a 2-tuple of (offset, data) for the requested range of
1381 revisions. Offset is the integer offset from the beginning of the
1372 revisions. Offset is the integer offset from the beginning of the
1382 revlog and data is a str or buffer of the raw byte data.
1373 revlog and data is a str or buffer of the raw byte data.
1383
1374
1384 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1375 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1385 to determine where each revision's data begins and ends.
1376 to determine where each revision's data begins and ends.
1386 """
1377 """
1387 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1378 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1388 # (functions are expensive).
1379 # (functions are expensive).
1389 index = self.index
1380 index = self.index
1390 istart = index[startrev]
1381 istart = index[startrev]
1391 start = int(istart[0] >> 16)
1382 start = int(istart[0] >> 16)
1392 if startrev == endrev:
1383 if startrev == endrev:
1393 end = start + istart[1]
1384 end = start + istart[1]
1394 else:
1385 else:
1395 iend = index[endrev]
1386 iend = index[endrev]
1396 end = int(iend[0] >> 16) + iend[1]
1387 end = int(iend[0] >> 16) + iend[1]
1397
1388
1398 if self._inline:
1389 if self._inline:
1399 start += (startrev + 1) * self._io.size
1390 start += (startrev + 1) * self._io.size
1400 end += (endrev + 1) * self._io.size
1391 end += (endrev + 1) * self._io.size
1401 length = end - start
1392 length = end - start
1402
1393
1403 return start, self._getsegment(start, length, df=df)
1394 return start, self._getsegment(start, length, df=df)
1404
1395
1405 def _chunk(self, rev, df=None):
1396 def _chunk(self, rev, df=None):
1406 """Obtain a single decompressed chunk for a revision.
1397 """Obtain a single decompressed chunk for a revision.
1407
1398
1408 Accepts an integer revision and an optional already-open file handle
1399 Accepts an integer revision and an optional already-open file handle
1409 to be used for reading. If used, the seek position of the file will not
1400 to be used for reading. If used, the seek position of the file will not
1410 be preserved.
1401 be preserved.
1411
1402
1412 Returns a str holding uncompressed data for the requested revision.
1403 Returns a str holding uncompressed data for the requested revision.
1413 """
1404 """
1414 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1405 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1415
1406
1416 def _chunks(self, revs, df=None, targetsize=None):
1407 def _chunks(self, revs, df=None, targetsize=None):
1417 """Obtain decompressed chunks for the specified revisions.
1408 """Obtain decompressed chunks for the specified revisions.
1418
1409
1419 Accepts an iterable of numeric revisions that are assumed to be in
1410 Accepts an iterable of numeric revisions that are assumed to be in
1420 ascending order. Also accepts an optional already-open file handle
1411 ascending order. Also accepts an optional already-open file handle
1421 to be used for reading. If used, the seek position of the file will
1412 to be used for reading. If used, the seek position of the file will
1422 not be preserved.
1413 not be preserved.
1423
1414
1424 This function is similar to calling ``self._chunk()`` multiple times,
1415 This function is similar to calling ``self._chunk()`` multiple times,
1425 but is faster.
1416 but is faster.
1426
1417
1427 Returns a list with decompressed data for each requested revision.
1418 Returns a list with decompressed data for each requested revision.
1428 """
1419 """
1429 if not revs:
1420 if not revs:
1430 return []
1421 return []
1431 start = self.start
1422 start = self.start
1432 length = self.length
1423 length = self.length
1433 inline = self._inline
1424 inline = self._inline
1434 iosize = self._io.size
1425 iosize = self._io.size
1435 buffer = util.buffer
1426 buffer = util.buffer
1436
1427
1437 l = []
1428 l = []
1438 ladd = l.append
1429 ladd = l.append
1439
1430
1440 if not self._withsparseread:
1431 if not self._withsparseread:
1441 slicedchunks = (revs,)
1432 slicedchunks = (revs,)
1442 else:
1433 else:
1443 slicedchunks = deltautil.slicechunk(self, revs,
1434 slicedchunks = deltautil.slicechunk(self, revs,
1444 targetsize=targetsize)
1435 targetsize=targetsize)
1445
1436
1446 for revschunk in slicedchunks:
1437 for revschunk in slicedchunks:
1447 firstrev = revschunk[0]
1438 firstrev = revschunk[0]
1448 # Skip trailing revisions with empty diff
1439 # Skip trailing revisions with empty diff
1449 for lastrev in revschunk[::-1]:
1440 for lastrev in revschunk[::-1]:
1450 if length(lastrev) != 0:
1441 if length(lastrev) != 0:
1451 break
1442 break
1452
1443
1453 try:
1444 try:
1454 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1445 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1455 except OverflowError:
1446 except OverflowError:
1456 # issue4215 - we can't cache a run of chunks greater than
1447 # issue4215 - we can't cache a run of chunks greater than
1457 # 2G on Windows
1448 # 2G on Windows
1458 return [self._chunk(rev, df=df) for rev in revschunk]
1449 return [self._chunk(rev, df=df) for rev in revschunk]
1459
1450
1460 decomp = self.decompress
1451 decomp = self.decompress
1461 for rev in revschunk:
1452 for rev in revschunk:
1462 chunkstart = start(rev)
1453 chunkstart = start(rev)
1463 if inline:
1454 if inline:
1464 chunkstart += (rev + 1) * iosize
1455 chunkstart += (rev + 1) * iosize
1465 chunklength = length(rev)
1456 chunklength = length(rev)
1466 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1457 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1467
1458
1468 return l
1459 return l
1469
1460
1470 def _chunkclear(self):
1461 def _chunkclear(self):
1471 """Clear the raw chunk cache."""
1462 """Clear the raw chunk cache."""
1472 self._chunkcache = (0, '')
1463 self._chunkcache = (0, '')
1473
1464
1474 def deltaparent(self, rev):
1465 def deltaparent(self, rev):
1475 """return deltaparent of the given revision"""
1466 """return deltaparent of the given revision"""
1476 base = self.index[rev][3]
1467 base = self.index[rev][3]
1477 if base == rev:
1468 if base == rev:
1478 return nullrev
1469 return nullrev
1479 elif self._generaldelta:
1470 elif self._generaldelta:
1480 return base
1471 return base
1481 else:
1472 else:
1482 return rev - 1
1473 return rev - 1
1483
1474
1484 def issnapshot(self, rev):
1475 def issnapshot(self, rev):
1485 """tells whether rev is a snapshot
1476 """tells whether rev is a snapshot
1486 """
1477 """
1487 if rev == nullrev:
1478 if rev == nullrev:
1488 return True
1479 return True
1489 deltap = self.deltaparent(rev)
1480 deltap = self.deltaparent(rev)
1490 if deltap == nullrev:
1481 if deltap == nullrev:
1491 return True
1482 return True
1492 p1, p2 = self.parentrevs(rev)
1483 p1, p2 = self.parentrevs(rev)
1493 if deltap in (p1, p2):
1484 if deltap in (p1, p2):
1494 return False
1485 return False
1495 return self.issnapshot(deltap)
1486 return self.issnapshot(deltap)
1496
1487
1497 def snapshotdepth(self, rev):
1488 def snapshotdepth(self, rev):
1498 """number of snapshot in the chain before this one"""
1489 """number of snapshot in the chain before this one"""
1499 if not self.issnapshot(rev):
1490 if not self.issnapshot(rev):
1500 raise error.ProgrammingError('revision %d not a snapshot')
1491 raise error.ProgrammingError('revision %d not a snapshot')
1501 return len(self._deltachain(rev)[0]) - 1
1492 return len(self._deltachain(rev)[0]) - 1
1502
1493
1503 def revdiff(self, rev1, rev2):
1494 def revdiff(self, rev1, rev2):
1504 """return or calculate a delta between two revisions
1495 """return or calculate a delta between two revisions
1505
1496
1506 The delta calculated is in binary form and is intended to be written to
1497 The delta calculated is in binary form and is intended to be written to
1507 revlog data directly. So this function needs raw revision data.
1498 revlog data directly. So this function needs raw revision data.
1508 """
1499 """
1509 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1500 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1510 return bytes(self._chunk(rev2))
1501 return bytes(self._chunk(rev2))
1511
1502
1512 return mdiff.textdiff(self.revision(rev1, raw=True),
1503 return mdiff.textdiff(self.revision(rev1, raw=True),
1513 self.revision(rev2, raw=True))
1504 self.revision(rev2, raw=True))
1514
1505
1515 def revision(self, nodeorrev, _df=None, raw=False):
1506 def revision(self, nodeorrev, _df=None, raw=False):
1516 """return an uncompressed revision of a given node or revision
1507 """return an uncompressed revision of a given node or revision
1517 number.
1508 number.
1518
1509
1519 _df - an existing file handle to read from. (internal-only)
1510 _df - an existing file handle to read from. (internal-only)
1520 raw - an optional argument specifying if the revision data is to be
1511 raw - an optional argument specifying if the revision data is to be
1521 treated as raw data when applying flag transforms. 'raw' should be set
1512 treated as raw data when applying flag transforms. 'raw' should be set
1522 to True when generating changegroups or in debug commands.
1513 to True when generating changegroups or in debug commands.
1523 """
1514 """
1524 if isinstance(nodeorrev, int):
1515 if isinstance(nodeorrev, int):
1525 rev = nodeorrev
1516 rev = nodeorrev
1526 node = self.node(rev)
1517 node = self.node(rev)
1527 else:
1518 else:
1528 node = nodeorrev
1519 node = nodeorrev
1529 rev = None
1520 rev = None
1530
1521
1531 cachedrev = None
1522 cachedrev = None
1532 flags = None
1523 flags = None
1533 rawtext = None
1524 rawtext = None
1534 if node == nullid:
1525 if node == nullid:
1535 return ""
1526 return ""
1536 if self._cache:
1527 if self._cache:
1537 if self._cache[0] == node:
1528 if self._cache[0] == node:
1538 # _cache only stores rawtext
1529 # _cache only stores rawtext
1539 if raw:
1530 if raw:
1540 return self._cache[2]
1531 return self._cache[2]
1541 # duplicated, but good for perf
1532 # duplicated, but good for perf
1542 if rev is None:
1533 if rev is None:
1543 rev = self.rev(node)
1534 rev = self.rev(node)
1544 if flags is None:
1535 if flags is None:
1545 flags = self.flags(rev)
1536 flags = self.flags(rev)
1546 # no extra flags set, no flag processor runs, text = rawtext
1537 # no extra flags set, no flag processor runs, text = rawtext
1547 if flags == REVIDX_DEFAULT_FLAGS:
1538 if flags == REVIDX_DEFAULT_FLAGS:
1548 return self._cache[2]
1539 return self._cache[2]
1549 # rawtext is reusable. need to run flag processor
1540 # rawtext is reusable. need to run flag processor
1550 rawtext = self._cache[2]
1541 rawtext = self._cache[2]
1551
1542
1552 cachedrev = self._cache[1]
1543 cachedrev = self._cache[1]
1553
1544
1554 # look up what we need to read
1545 # look up what we need to read
1555 if rawtext is None:
1546 if rawtext is None:
1556 if rev is None:
1547 if rev is None:
1557 rev = self.rev(node)
1548 rev = self.rev(node)
1558
1549
1559 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1550 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1560 if stopped:
1551 if stopped:
1561 rawtext = self._cache[2]
1552 rawtext = self._cache[2]
1562
1553
1563 # drop cache to save memory
1554 # drop cache to save memory
1564 self._cache = None
1555 self._cache = None
1565
1556
1566 targetsize = None
1557 targetsize = None
1567 rawsize = self.index[rev][2]
1558 rawsize = self.index[rev][2]
1568 if 0 <= rawsize:
1559 if 0 <= rawsize:
1569 targetsize = 4 * rawsize
1560 targetsize = 4 * rawsize
1570
1561
1571 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1562 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1572 if rawtext is None:
1563 if rawtext is None:
1573 rawtext = bytes(bins[0])
1564 rawtext = bytes(bins[0])
1574 bins = bins[1:]
1565 bins = bins[1:]
1575
1566
1576 rawtext = mdiff.patches(rawtext, bins)
1567 rawtext = mdiff.patches(rawtext, bins)
1577 self._cache = (node, rev, rawtext)
1568 self._cache = (node, rev, rawtext)
1578
1569
1579 if flags is None:
1570 if flags is None:
1580 if rev is None:
1571 if rev is None:
1581 rev = self.rev(node)
1572 rev = self.rev(node)
1582 flags = self.flags(rev)
1573 flags = self.flags(rev)
1583
1574
1584 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1575 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1585 if validatehash:
1576 if validatehash:
1586 self.checkhash(text, node, rev=rev)
1577 self.checkhash(text, node, rev=rev)
1587
1578
1588 return text
1579 return text
1589
1580
1590 def hash(self, text, p1, p2):
1581 def hash(self, text, p1, p2):
1591 """Compute a node hash.
1582 """Compute a node hash.
1592
1583
1593 Available as a function so that subclasses can replace the hash
1584 Available as a function so that subclasses can replace the hash
1594 as needed.
1585 as needed.
1595 """
1586 """
1596 return storageutil.hashrevisionsha1(text, p1, p2)
1587 return storageutil.hashrevisionsha1(text, p1, p2)
1597
1588
1598 def _processflags(self, text, flags, operation, raw=False):
1589 def _processflags(self, text, flags, operation, raw=False):
1599 """Inspect revision data flags and applies transforms defined by
1590 """Inspect revision data flags and applies transforms defined by
1600 registered flag processors.
1591 registered flag processors.
1601
1592
1602 ``text`` - the revision data to process
1593 ``text`` - the revision data to process
1603 ``flags`` - the revision flags
1594 ``flags`` - the revision flags
1604 ``operation`` - the operation being performed (read or write)
1595 ``operation`` - the operation being performed (read or write)
1605 ``raw`` - an optional argument describing if the raw transform should be
1596 ``raw`` - an optional argument describing if the raw transform should be
1606 applied.
1597 applied.
1607
1598
1608 This method processes the flags in the order (or reverse order if
1599 This method processes the flags in the order (or reverse order if
1609 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1600 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1610 flag processors registered for present flags. The order of flags defined
1601 flag processors registered for present flags. The order of flags defined
1611 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1602 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1612
1603
1613 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1604 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1614 processed text and ``validatehash`` is a bool indicating whether the
1605 processed text and ``validatehash`` is a bool indicating whether the
1615 returned text should be checked for hash integrity.
1606 returned text should be checked for hash integrity.
1616
1607
1617 Note: If the ``raw`` argument is set, it has precedence over the
1608 Note: If the ``raw`` argument is set, it has precedence over the
1618 operation and will only update the value of ``validatehash``.
1609 operation and will only update the value of ``validatehash``.
1619 """
1610 """
1620 # fast path: no flag processors will run
1611 # fast path: no flag processors will run
1621 if flags == 0:
1612 if flags == 0:
1622 return text, True
1613 return text, True
1623 if not operation in ('read', 'write'):
1614 if not operation in ('read', 'write'):
1624 raise error.ProgrammingError(_("invalid '%s' operation") %
1615 raise error.ProgrammingError(_("invalid '%s' operation") %
1625 operation)
1616 operation)
1626 # Check all flags are known.
1617 # Check all flags are known.
1627 if flags & ~REVIDX_KNOWN_FLAGS:
1618 if flags & ~REVIDX_KNOWN_FLAGS:
1628 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1619 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1629 (flags & ~REVIDX_KNOWN_FLAGS))
1620 (flags & ~REVIDX_KNOWN_FLAGS))
1630 validatehash = True
1621 validatehash = True
1631 # Depending on the operation (read or write), the order might be
1622 # Depending on the operation (read or write), the order might be
1632 # reversed due to non-commutative transforms.
1623 # reversed due to non-commutative transforms.
1633 orderedflags = REVIDX_FLAGS_ORDER
1624 orderedflags = REVIDX_FLAGS_ORDER
1634 if operation == 'write':
1625 if operation == 'write':
1635 orderedflags = reversed(orderedflags)
1626 orderedflags = reversed(orderedflags)
1636
1627
1637 for flag in orderedflags:
1628 for flag in orderedflags:
1638 # If a flagprocessor has been registered for a known flag, apply the
1629 # If a flagprocessor has been registered for a known flag, apply the
1639 # related operation transform and update result tuple.
1630 # related operation transform and update result tuple.
1640 if flag & flags:
1631 if flag & flags:
1641 vhash = True
1632 vhash = True
1642
1633
1643 if flag not in self._flagprocessors:
1634 if flag not in self._flagprocessors:
1644 message = _("missing processor for flag '%#x'") % (flag)
1635 message = _("missing processor for flag '%#x'") % (flag)
1645 raise error.RevlogError(message)
1636 raise error.RevlogError(message)
1646
1637
1647 processor = self._flagprocessors[flag]
1638 processor = self._flagprocessors[flag]
1648 if processor is not None:
1639 if processor is not None:
1649 readtransform, writetransform, rawtransform = processor
1640 readtransform, writetransform, rawtransform = processor
1650
1641
1651 if raw:
1642 if raw:
1652 vhash = rawtransform(self, text)
1643 vhash = rawtransform(self, text)
1653 elif operation == 'read':
1644 elif operation == 'read':
1654 text, vhash = readtransform(self, text)
1645 text, vhash = readtransform(self, text)
1655 else: # write operation
1646 else: # write operation
1656 text, vhash = writetransform(self, text)
1647 text, vhash = writetransform(self, text)
1657 validatehash = validatehash and vhash
1648 validatehash = validatehash and vhash
1658
1649
1659 return text, validatehash
1650 return text, validatehash
1660
1651
1661 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1652 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1662 """Check node hash integrity.
1653 """Check node hash integrity.
1663
1654
1664 Available as a function so that subclasses can extend hash mismatch
1655 Available as a function so that subclasses can extend hash mismatch
1665 behaviors as needed.
1656 behaviors as needed.
1666 """
1657 """
1667 try:
1658 try:
1668 if p1 is None and p2 is None:
1659 if p1 is None and p2 is None:
1669 p1, p2 = self.parents(node)
1660 p1, p2 = self.parents(node)
1670 if node != self.hash(text, p1, p2):
1661 if node != self.hash(text, p1, p2):
1671 revornode = rev
1662 revornode = rev
1672 if revornode is None:
1663 if revornode is None:
1673 revornode = templatefilters.short(hex(node))
1664 revornode = templatefilters.short(hex(node))
1674 raise error.RevlogError(_("integrity check failed on %s:%s")
1665 raise error.RevlogError(_("integrity check failed on %s:%s")
1675 % (self.indexfile, pycompat.bytestr(revornode)))
1666 % (self.indexfile, pycompat.bytestr(revornode)))
1676 except error.RevlogError:
1667 except error.RevlogError:
1677 if self._censorable and storageutil.iscensoredtext(text):
1668 if self._censorable and storageutil.iscensoredtext(text):
1678 raise error.CensoredNodeError(self.indexfile, node, text)
1669 raise error.CensoredNodeError(self.indexfile, node, text)
1679 raise
1670 raise
1680
1671
1681 def _enforceinlinesize(self, tr, fp=None):
1672 def _enforceinlinesize(self, tr, fp=None):
1682 """Check if the revlog is too big for inline and convert if so.
1673 """Check if the revlog is too big for inline and convert if so.
1683
1674
1684 This should be called after revisions are added to the revlog. If the
1675 This should be called after revisions are added to the revlog. If the
1685 revlog has grown too large to be an inline revlog, it will convert it
1676 revlog has grown too large to be an inline revlog, it will convert it
1686 to use multiple index and data files.
1677 to use multiple index and data files.
1687 """
1678 """
1688 tiprev = len(self) - 1
1679 tiprev = len(self) - 1
1689 if (not self._inline or
1680 if (not self._inline or
1690 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1681 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1691 return
1682 return
1692
1683
1693 trinfo = tr.find(self.indexfile)
1684 trinfo = tr.find(self.indexfile)
1694 if trinfo is None:
1685 if trinfo is None:
1695 raise error.RevlogError(_("%s not found in the transaction")
1686 raise error.RevlogError(_("%s not found in the transaction")
1696 % self.indexfile)
1687 % self.indexfile)
1697
1688
1698 trindex = trinfo[2]
1689 trindex = trinfo[2]
1699 if trindex is not None:
1690 if trindex is not None:
1700 dataoff = self.start(trindex)
1691 dataoff = self.start(trindex)
1701 else:
1692 else:
1702 # revlog was stripped at start of transaction, use all leftover data
1693 # revlog was stripped at start of transaction, use all leftover data
1703 trindex = len(self) - 1
1694 trindex = len(self) - 1
1704 dataoff = self.end(tiprev)
1695 dataoff = self.end(tiprev)
1705
1696
1706 tr.add(self.datafile, dataoff)
1697 tr.add(self.datafile, dataoff)
1707
1698
1708 if fp:
1699 if fp:
1709 fp.flush()
1700 fp.flush()
1710 fp.close()
1701 fp.close()
1711
1702
1712 with self._datafp('w') as df:
1703 with self._datafp('w') as df:
1713 for r in self:
1704 for r in self:
1714 df.write(self._getsegmentforrevs(r, r)[1])
1705 df.write(self._getsegmentforrevs(r, r)[1])
1715
1706
1716 with self._indexfp('w') as fp:
1707 with self._indexfp('w') as fp:
1717 self.version &= ~FLAG_INLINE_DATA
1708 self.version &= ~FLAG_INLINE_DATA
1718 self._inline = False
1709 self._inline = False
1719 io = self._io
1710 io = self._io
1720 for i in self:
1711 for i in self:
1721 e = io.packentry(self.index[i], self.node, self.version, i)
1712 e = io.packentry(self.index[i], self.node, self.version, i)
1722 fp.write(e)
1713 fp.write(e)
1723
1714
1724 # the temp file replace the real index when we exit the context
1715 # the temp file replace the real index when we exit the context
1725 # manager
1716 # manager
1726
1717
1727 tr.replace(self.indexfile, trindex * self._io.size)
1718 tr.replace(self.indexfile, trindex * self._io.size)
1728 self._chunkclear()
1719 self._chunkclear()
1729
1720
1730 def _nodeduplicatecallback(self, transaction, node):
1721 def _nodeduplicatecallback(self, transaction, node):
1731 """called when trying to add a node already stored.
1722 """called when trying to add a node already stored.
1732 """
1723 """
1733
1724
1734 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1725 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1735 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1726 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1736 """add a revision to the log
1727 """add a revision to the log
1737
1728
1738 text - the revision data to add
1729 text - the revision data to add
1739 transaction - the transaction object used for rollback
1730 transaction - the transaction object used for rollback
1740 link - the linkrev data to add
1731 link - the linkrev data to add
1741 p1, p2 - the parent nodeids of the revision
1732 p1, p2 - the parent nodeids of the revision
1742 cachedelta - an optional precomputed delta
1733 cachedelta - an optional precomputed delta
1743 node - nodeid of revision; typically node is not specified, and it is
1734 node - nodeid of revision; typically node is not specified, and it is
1744 computed by default as hash(text, p1, p2), however subclasses might
1735 computed by default as hash(text, p1, p2), however subclasses might
1745 use different hashing method (and override checkhash() in such case)
1736 use different hashing method (and override checkhash() in such case)
1746 flags - the known flags to set on the revision
1737 flags - the known flags to set on the revision
1747 deltacomputer - an optional deltacomputer instance shared between
1738 deltacomputer - an optional deltacomputer instance shared between
1748 multiple calls
1739 multiple calls
1749 """
1740 """
1750 if link == nullrev:
1741 if link == nullrev:
1751 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1742 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1752 % self.indexfile)
1743 % self.indexfile)
1753
1744
1754 if flags:
1745 if flags:
1755 node = node or self.hash(text, p1, p2)
1746 node = node or self.hash(text, p1, p2)
1756
1747
1757 rawtext, validatehash = self._processflags(text, flags, 'write')
1748 rawtext, validatehash = self._processflags(text, flags, 'write')
1758
1749
1759 # If the flag processor modifies the revision data, ignore any provided
1750 # If the flag processor modifies the revision data, ignore any provided
1760 # cachedelta.
1751 # cachedelta.
1761 if rawtext != text:
1752 if rawtext != text:
1762 cachedelta = None
1753 cachedelta = None
1763
1754
1764 if len(rawtext) > _maxentrysize:
1755 if len(rawtext) > _maxentrysize:
1765 raise error.RevlogError(
1756 raise error.RevlogError(
1766 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1757 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1767 % (self.indexfile, len(rawtext)))
1758 % (self.indexfile, len(rawtext)))
1768
1759
1769 node = node or self.hash(rawtext, p1, p2)
1760 node = node or self.hash(rawtext, p1, p2)
1770 if node in self.nodemap:
1761 if node in self.nodemap:
1771 return node
1762 return node
1772
1763
1773 if validatehash:
1764 if validatehash:
1774 self.checkhash(rawtext, node, p1=p1, p2=p2)
1765 self.checkhash(rawtext, node, p1=p1, p2=p2)
1775
1766
1776 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1767 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1777 flags, cachedelta=cachedelta,
1768 flags, cachedelta=cachedelta,
1778 deltacomputer=deltacomputer)
1769 deltacomputer=deltacomputer)
1779
1770
1780 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1771 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1781 cachedelta=None, deltacomputer=None):
1772 cachedelta=None, deltacomputer=None):
1782 """add a raw revision with known flags, node and parents
1773 """add a raw revision with known flags, node and parents
1783 useful when reusing a revision not stored in this revlog (ex: received
1774 useful when reusing a revision not stored in this revlog (ex: received
1784 over wire, or read from an external bundle).
1775 over wire, or read from an external bundle).
1785 """
1776 """
1786 dfh = None
1777 dfh = None
1787 if not self._inline:
1778 if not self._inline:
1788 dfh = self._datafp("a+")
1779 dfh = self._datafp("a+")
1789 ifh = self._indexfp("a+")
1780 ifh = self._indexfp("a+")
1790 try:
1781 try:
1791 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1782 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1792 flags, cachedelta, ifh, dfh,
1783 flags, cachedelta, ifh, dfh,
1793 deltacomputer=deltacomputer)
1784 deltacomputer=deltacomputer)
1794 finally:
1785 finally:
1795 if dfh:
1786 if dfh:
1796 dfh.close()
1787 dfh.close()
1797 ifh.close()
1788 ifh.close()
1798
1789
1799 def compress(self, data):
1790 def compress(self, data):
1800 """Generate a possibly-compressed representation of data."""
1791 """Generate a possibly-compressed representation of data."""
1801 if not data:
1792 if not data:
1802 return '', data
1793 return '', data
1803
1794
1804 compressed = self._compressor.compress(data)
1795 compressed = self._compressor.compress(data)
1805
1796
1806 if compressed:
1797 if compressed:
1807 # The revlog compressor added the header in the returned data.
1798 # The revlog compressor added the header in the returned data.
1808 return '', compressed
1799 return '', compressed
1809
1800
1810 if data[0:1] == '\0':
1801 if data[0:1] == '\0':
1811 return '', data
1802 return '', data
1812 return 'u', data
1803 return 'u', data
1813
1804
1814 def decompress(self, data):
1805 def decompress(self, data):
1815 """Decompress a revlog chunk.
1806 """Decompress a revlog chunk.
1816
1807
1817 The chunk is expected to begin with a header identifying the
1808 The chunk is expected to begin with a header identifying the
1818 format type so it can be routed to an appropriate decompressor.
1809 format type so it can be routed to an appropriate decompressor.
1819 """
1810 """
1820 if not data:
1811 if not data:
1821 return data
1812 return data
1822
1813
1823 # Revlogs are read much more frequently than they are written and many
1814 # Revlogs are read much more frequently than they are written and many
1824 # chunks only take microseconds to decompress, so performance is
1815 # chunks only take microseconds to decompress, so performance is
1825 # important here.
1816 # important here.
1826 #
1817 #
1827 # We can make a few assumptions about revlogs:
1818 # We can make a few assumptions about revlogs:
1828 #
1819 #
1829 # 1) the majority of chunks will be compressed (as opposed to inline
1820 # 1) the majority of chunks will be compressed (as opposed to inline
1830 # raw data).
1821 # raw data).
1831 # 2) decompressing *any* data will likely by at least 10x slower than
1822 # 2) decompressing *any* data will likely by at least 10x slower than
1832 # returning raw inline data.
1823 # returning raw inline data.
1833 # 3) we want to prioritize common and officially supported compression
1824 # 3) we want to prioritize common and officially supported compression
1834 # engines
1825 # engines
1835 #
1826 #
1836 # It follows that we want to optimize for "decompress compressed data
1827 # It follows that we want to optimize for "decompress compressed data
1837 # when encoded with common and officially supported compression engines"
1828 # when encoded with common and officially supported compression engines"
1838 # case over "raw data" and "data encoded by less common or non-official
1829 # case over "raw data" and "data encoded by less common or non-official
1839 # compression engines." That is why we have the inline lookup first
1830 # compression engines." That is why we have the inline lookup first
1840 # followed by the compengines lookup.
1831 # followed by the compengines lookup.
1841 #
1832 #
1842 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1833 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1843 # compressed chunks. And this matters for changelog and manifest reads.
1834 # compressed chunks. And this matters for changelog and manifest reads.
1844 t = data[0:1]
1835 t = data[0:1]
1845
1836
1846 if t == 'x':
1837 if t == 'x':
1847 try:
1838 try:
1848 return _zlibdecompress(data)
1839 return _zlibdecompress(data)
1849 except zlib.error as e:
1840 except zlib.error as e:
1850 raise error.RevlogError(_('revlog decompress error: %s') %
1841 raise error.RevlogError(_('revlog decompress error: %s') %
1851 stringutil.forcebytestr(e))
1842 stringutil.forcebytestr(e))
1852 # '\0' is more common than 'u' so it goes first.
1843 # '\0' is more common than 'u' so it goes first.
1853 elif t == '\0':
1844 elif t == '\0':
1854 return data
1845 return data
1855 elif t == 'u':
1846 elif t == 'u':
1856 return util.buffer(data, 1)
1847 return util.buffer(data, 1)
1857
1848
1858 try:
1849 try:
1859 compressor = self._decompressors[t]
1850 compressor = self._decompressors[t]
1860 except KeyError:
1851 except KeyError:
1861 try:
1852 try:
1862 engine = util.compengines.forrevlogheader(t)
1853 engine = util.compengines.forrevlogheader(t)
1863 compressor = engine.revlogcompressor()
1854 compressor = engine.revlogcompressor()
1864 self._decompressors[t] = compressor
1855 self._decompressors[t] = compressor
1865 except KeyError:
1856 except KeyError:
1866 raise error.RevlogError(_('unknown compression type %r') % t)
1857 raise error.RevlogError(_('unknown compression type %r') % t)
1867
1858
1868 return compressor.decompress(data)
1859 return compressor.decompress(data)
1869
1860
1870 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1861 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1871 cachedelta, ifh, dfh, alwayscache=False,
1862 cachedelta, ifh, dfh, alwayscache=False,
1872 deltacomputer=None):
1863 deltacomputer=None):
1873 """internal function to add revisions to the log
1864 """internal function to add revisions to the log
1874
1865
1875 see addrevision for argument descriptions.
1866 see addrevision for argument descriptions.
1876
1867
1877 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1868 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1878
1869
1879 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1870 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1880 be used.
1871 be used.
1881
1872
1882 invariants:
1873 invariants:
1883 - rawtext is optional (can be None); if not set, cachedelta must be set.
1874 - rawtext is optional (can be None); if not set, cachedelta must be set.
1884 if both are set, they must correspond to each other.
1875 if both are set, they must correspond to each other.
1885 """
1876 """
1886 if node == nullid:
1877 if node == nullid:
1887 raise error.RevlogError(_("%s: attempt to add null revision") %
1878 raise error.RevlogError(_("%s: attempt to add null revision") %
1888 self.indexfile)
1879 self.indexfile)
1889 if node == wdirid or node in wdirfilenodeids:
1880 if node == wdirid or node in wdirfilenodeids:
1890 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1881 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1891 self.indexfile)
1882 self.indexfile)
1892
1883
1893 if self._inline:
1884 if self._inline:
1894 fh = ifh
1885 fh = ifh
1895 else:
1886 else:
1896 fh = dfh
1887 fh = dfh
1897
1888
1898 btext = [rawtext]
1889 btext = [rawtext]
1899
1890
1900 curr = len(self)
1891 curr = len(self)
1901 prev = curr - 1
1892 prev = curr - 1
1902 offset = self.end(prev)
1893 offset = self.end(prev)
1903 p1r, p2r = self.rev(p1), self.rev(p2)
1894 p1r, p2r = self.rev(p1), self.rev(p2)
1904
1895
1905 # full versions are inserted when the needed deltas
1896 # full versions are inserted when the needed deltas
1906 # become comparable to the uncompressed text
1897 # become comparable to the uncompressed text
1907 if rawtext is None:
1898 if rawtext is None:
1908 # need rawtext size, before changed by flag processors, which is
1899 # need rawtext size, before changed by flag processors, which is
1909 # the non-raw size. use revlog explicitly to avoid filelog's extra
1900 # the non-raw size. use revlog explicitly to avoid filelog's extra
1910 # logic that might remove metadata size.
1901 # logic that might remove metadata size.
1911 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1902 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1912 cachedelta[1])
1903 cachedelta[1])
1913 else:
1904 else:
1914 textlen = len(rawtext)
1905 textlen = len(rawtext)
1915
1906
1916 if deltacomputer is None:
1907 if deltacomputer is None:
1917 deltacomputer = deltautil.deltacomputer(self)
1908 deltacomputer = deltautil.deltacomputer(self)
1918
1909
1919 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1910 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1920
1911
1921 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1912 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1922
1913
1923 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1914 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1924 deltainfo.base, link, p1r, p2r, node)
1915 deltainfo.base, link, p1r, p2r, node)
1925 self.index.append(e)
1916 self.index.append(e)
1926 self.nodemap[node] = curr
1917 self.nodemap[node] = curr
1927
1918
1928 entry = self._io.packentry(e, self.node, self.version, curr)
1919 entry = self._io.packentry(e, self.node, self.version, curr)
1929 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1920 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1930 link, offset)
1921 link, offset)
1931
1922
1932 rawtext = btext[0]
1923 rawtext = btext[0]
1933
1924
1934 if alwayscache and rawtext is None:
1925 if alwayscache and rawtext is None:
1935 rawtext = deltacomputer.buildtext(revinfo, fh)
1926 rawtext = deltacomputer.buildtext(revinfo, fh)
1936
1927
1937 if type(rawtext) == bytes: # only accept immutable objects
1928 if type(rawtext) == bytes: # only accept immutable objects
1938 self._cache = (node, curr, rawtext)
1929 self._cache = (node, curr, rawtext)
1939 self._chainbasecache[curr] = deltainfo.chainbase
1930 self._chainbasecache[curr] = deltainfo.chainbase
1940 return node
1931 return node
1941
1932
1942 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1933 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1943 # Files opened in a+ mode have inconsistent behavior on various
1934 # Files opened in a+ mode have inconsistent behavior on various
1944 # platforms. Windows requires that a file positioning call be made
1935 # platforms. Windows requires that a file positioning call be made
1945 # when the file handle transitions between reads and writes. See
1936 # when the file handle transitions between reads and writes. See
1946 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1937 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1947 # platforms, Python or the platform itself can be buggy. Some versions
1938 # platforms, Python or the platform itself can be buggy. Some versions
1948 # of Solaris have been observed to not append at the end of the file
1939 # of Solaris have been observed to not append at the end of the file
1949 # if the file was seeked to before the end. See issue4943 for more.
1940 # if the file was seeked to before the end. See issue4943 for more.
1950 #
1941 #
1951 # We work around this issue by inserting a seek() before writing.
1942 # We work around this issue by inserting a seek() before writing.
1952 # Note: This is likely not necessary on Python 3.
1943 # Note: This is likely not necessary on Python 3.
1953 ifh.seek(0, os.SEEK_END)
1944 ifh.seek(0, os.SEEK_END)
1954 if dfh:
1945 if dfh:
1955 dfh.seek(0, os.SEEK_END)
1946 dfh.seek(0, os.SEEK_END)
1956
1947
1957 curr = len(self) - 1
1948 curr = len(self) - 1
1958 if not self._inline:
1949 if not self._inline:
1959 transaction.add(self.datafile, offset)
1950 transaction.add(self.datafile, offset)
1960 transaction.add(self.indexfile, curr * len(entry))
1951 transaction.add(self.indexfile, curr * len(entry))
1961 if data[0]:
1952 if data[0]:
1962 dfh.write(data[0])
1953 dfh.write(data[0])
1963 dfh.write(data[1])
1954 dfh.write(data[1])
1964 ifh.write(entry)
1955 ifh.write(entry)
1965 else:
1956 else:
1966 offset += curr * self._io.size
1957 offset += curr * self._io.size
1967 transaction.add(self.indexfile, offset, curr)
1958 transaction.add(self.indexfile, offset, curr)
1968 ifh.write(entry)
1959 ifh.write(entry)
1969 ifh.write(data[0])
1960 ifh.write(data[0])
1970 ifh.write(data[1])
1961 ifh.write(data[1])
1971 self._enforceinlinesize(transaction, ifh)
1962 self._enforceinlinesize(transaction, ifh)
1972
1963
1973 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1964 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
1974 """
1965 """
1975 add a delta group
1966 add a delta group
1976
1967
1977 given a set of deltas, add them to the revision log. the
1968 given a set of deltas, add them to the revision log. the
1978 first delta is against its parent, which should be in our
1969 first delta is against its parent, which should be in our
1979 log, the rest are against the previous delta.
1970 log, the rest are against the previous delta.
1980
1971
1981 If ``addrevisioncb`` is defined, it will be called with arguments of
1972 If ``addrevisioncb`` is defined, it will be called with arguments of
1982 this revlog and the node that was added.
1973 this revlog and the node that was added.
1983 """
1974 """
1984
1975
1985 nodes = []
1976 nodes = []
1986
1977
1987 r = len(self)
1978 r = len(self)
1988 end = 0
1979 end = 0
1989 if r:
1980 if r:
1990 end = self.end(r - 1)
1981 end = self.end(r - 1)
1991 ifh = self._indexfp("a+")
1982 ifh = self._indexfp("a+")
1992 isize = r * self._io.size
1983 isize = r * self._io.size
1993 if self._inline:
1984 if self._inline:
1994 transaction.add(self.indexfile, end + isize, r)
1985 transaction.add(self.indexfile, end + isize, r)
1995 dfh = None
1986 dfh = None
1996 else:
1987 else:
1997 transaction.add(self.indexfile, isize, r)
1988 transaction.add(self.indexfile, isize, r)
1998 transaction.add(self.datafile, end)
1989 transaction.add(self.datafile, end)
1999 dfh = self._datafp("a+")
1990 dfh = self._datafp("a+")
2000 def flush():
1991 def flush():
2001 if dfh:
1992 if dfh:
2002 dfh.flush()
1993 dfh.flush()
2003 ifh.flush()
1994 ifh.flush()
2004 try:
1995 try:
2005 deltacomputer = deltautil.deltacomputer(self)
1996 deltacomputer = deltautil.deltacomputer(self)
2006 # loop through our set of deltas
1997 # loop through our set of deltas
2007 for data in deltas:
1998 for data in deltas:
2008 node, p1, p2, linknode, deltabase, delta, flags = data
1999 node, p1, p2, linknode, deltabase, delta, flags = data
2009 link = linkmapper(linknode)
2000 link = linkmapper(linknode)
2010 flags = flags or REVIDX_DEFAULT_FLAGS
2001 flags = flags or REVIDX_DEFAULT_FLAGS
2011
2002
2012 nodes.append(node)
2003 nodes.append(node)
2013
2004
2014 if node in self.nodemap:
2005 if node in self.nodemap:
2015 self._nodeduplicatecallback(transaction, node)
2006 self._nodeduplicatecallback(transaction, node)
2016 # this can happen if two branches make the same change
2007 # this can happen if two branches make the same change
2017 continue
2008 continue
2018
2009
2019 for p in (p1, p2):
2010 for p in (p1, p2):
2020 if p not in self.nodemap:
2011 if p not in self.nodemap:
2021 raise error.LookupError(p, self.indexfile,
2012 raise error.LookupError(p, self.indexfile,
2022 _('unknown parent'))
2013 _('unknown parent'))
2023
2014
2024 if deltabase not in self.nodemap:
2015 if deltabase not in self.nodemap:
2025 raise error.LookupError(deltabase, self.indexfile,
2016 raise error.LookupError(deltabase, self.indexfile,
2026 _('unknown delta base'))
2017 _('unknown delta base'))
2027
2018
2028 baserev = self.rev(deltabase)
2019 baserev = self.rev(deltabase)
2029
2020
2030 if baserev != nullrev and self.iscensored(baserev):
2021 if baserev != nullrev and self.iscensored(baserev):
2031 # if base is censored, delta must be full replacement in a
2022 # if base is censored, delta must be full replacement in a
2032 # single patch operation
2023 # single patch operation
2033 hlen = struct.calcsize(">lll")
2024 hlen = struct.calcsize(">lll")
2034 oldlen = self.rawsize(baserev)
2025 oldlen = self.rawsize(baserev)
2035 newlen = len(delta) - hlen
2026 newlen = len(delta) - hlen
2036 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2027 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2037 raise error.CensoredBaseError(self.indexfile,
2028 raise error.CensoredBaseError(self.indexfile,
2038 self.node(baserev))
2029 self.node(baserev))
2039
2030
2040 if not flags and self._peek_iscensored(baserev, delta, flush):
2031 if not flags and self._peek_iscensored(baserev, delta, flush):
2041 flags |= REVIDX_ISCENSORED
2032 flags |= REVIDX_ISCENSORED
2042
2033
2043 # We assume consumers of addrevisioncb will want to retrieve
2034 # We assume consumers of addrevisioncb will want to retrieve
2044 # the added revision, which will require a call to
2035 # the added revision, which will require a call to
2045 # revision(). revision() will fast path if there is a cache
2036 # revision(). revision() will fast path if there is a cache
2046 # hit. So, we tell _addrevision() to always cache in this case.
2037 # hit. So, we tell _addrevision() to always cache in this case.
2047 # We're only using addgroup() in the context of changegroup
2038 # We're only using addgroup() in the context of changegroup
2048 # generation so the revision data can always be handled as raw
2039 # generation so the revision data can always be handled as raw
2049 # by the flagprocessor.
2040 # by the flagprocessor.
2050 self._addrevision(node, None, transaction, link,
2041 self._addrevision(node, None, transaction, link,
2051 p1, p2, flags, (baserev, delta),
2042 p1, p2, flags, (baserev, delta),
2052 ifh, dfh,
2043 ifh, dfh,
2053 alwayscache=bool(addrevisioncb),
2044 alwayscache=bool(addrevisioncb),
2054 deltacomputer=deltacomputer)
2045 deltacomputer=deltacomputer)
2055
2046
2056 if addrevisioncb:
2047 if addrevisioncb:
2057 addrevisioncb(self, node)
2048 addrevisioncb(self, node)
2058
2049
2059 if not dfh and not self._inline:
2050 if not dfh and not self._inline:
2060 # addrevision switched from inline to conventional
2051 # addrevision switched from inline to conventional
2061 # reopen the index
2052 # reopen the index
2062 ifh.close()
2053 ifh.close()
2063 dfh = self._datafp("a+")
2054 dfh = self._datafp("a+")
2064 ifh = self._indexfp("a+")
2055 ifh = self._indexfp("a+")
2065 finally:
2056 finally:
2066 if dfh:
2057 if dfh:
2067 dfh.close()
2058 dfh.close()
2068 ifh.close()
2059 ifh.close()
2069
2060
2070 return nodes
2061 return nodes
2071
2062
2072 def iscensored(self, rev):
2063 def iscensored(self, rev):
2073 """Check if a file revision is censored."""
2064 """Check if a file revision is censored."""
2074 if not self._censorable:
2065 if not self._censorable:
2075 return False
2066 return False
2076
2067
2077 return self.flags(rev) & REVIDX_ISCENSORED
2068 return self.flags(rev) & REVIDX_ISCENSORED
2078
2069
2079 def _peek_iscensored(self, baserev, delta, flush):
2070 def _peek_iscensored(self, baserev, delta, flush):
2080 """Quickly check if a delta produces a censored revision."""
2071 """Quickly check if a delta produces a censored revision."""
2081 if not self._censorable:
2072 if not self._censorable:
2082 return False
2073 return False
2083
2074
2084 # Fragile heuristic: unless new file meta keys are added alphabetically
2075 # Fragile heuristic: unless new file meta keys are added alphabetically
2085 # preceding "censored", all censored revisions are prefixed by
2076 # preceding "censored", all censored revisions are prefixed by
2086 # "\1\ncensored:". A delta producing such a censored revision must be a
2077 # "\1\ncensored:". A delta producing such a censored revision must be a
2087 # full-replacement delta, so we inspect the first and only patch in the
2078 # full-replacement delta, so we inspect the first and only patch in the
2088 # delta for this prefix.
2079 # delta for this prefix.
2089 hlen = struct.calcsize(">lll")
2080 hlen = struct.calcsize(">lll")
2090 if len(delta) <= hlen:
2081 if len(delta) <= hlen:
2091 return False
2082 return False
2092
2083
2093 oldlen = self.rawsize(baserev)
2084 oldlen = self.rawsize(baserev)
2094 newlen = len(delta) - hlen
2085 newlen = len(delta) - hlen
2095 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2086 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2096 return False
2087 return False
2097
2088
2098 add = "\1\ncensored:"
2089 add = "\1\ncensored:"
2099 addlen = len(add)
2090 addlen = len(add)
2100 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2091 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2101
2092
2102 def getstrippoint(self, minlink):
2093 def getstrippoint(self, minlink):
2103 """find the minimum rev that must be stripped to strip the linkrev
2094 """find the minimum rev that must be stripped to strip the linkrev
2104
2095
2105 Returns a tuple containing the minimum rev and a set of all revs that
2096 Returns a tuple containing the minimum rev and a set of all revs that
2106 have linkrevs that will be broken by this strip.
2097 have linkrevs that will be broken by this strip.
2107 """
2098 """
2108 brokenrevs = set()
2099 brokenrevs = set()
2109 strippoint = len(self)
2100 strippoint = len(self)
2110
2101
2111 heads = {}
2102 heads = {}
2112 futurelargelinkrevs = set()
2103 futurelargelinkrevs = set()
2113 for head in self.headrevs():
2104 for head in self.headrevs():
2114 headlinkrev = self.linkrev(head)
2105 headlinkrev = self.linkrev(head)
2115 heads[head] = headlinkrev
2106 heads[head] = headlinkrev
2116 if headlinkrev >= minlink:
2107 if headlinkrev >= minlink:
2117 futurelargelinkrevs.add(headlinkrev)
2108 futurelargelinkrevs.add(headlinkrev)
2118
2109
2119 # This algorithm involves walking down the rev graph, starting at the
2110 # This algorithm involves walking down the rev graph, starting at the
2120 # heads. Since the revs are topologically sorted according to linkrev,
2111 # heads. Since the revs are topologically sorted according to linkrev,
2121 # once all head linkrevs are below the minlink, we know there are
2112 # once all head linkrevs are below the minlink, we know there are
2122 # no more revs that could have a linkrev greater than minlink.
2113 # no more revs that could have a linkrev greater than minlink.
2123 # So we can stop walking.
2114 # So we can stop walking.
2124 while futurelargelinkrevs:
2115 while futurelargelinkrevs:
2125 strippoint -= 1
2116 strippoint -= 1
2126 linkrev = heads.pop(strippoint)
2117 linkrev = heads.pop(strippoint)
2127
2118
2128 if linkrev < minlink:
2119 if linkrev < minlink:
2129 brokenrevs.add(strippoint)
2120 brokenrevs.add(strippoint)
2130 else:
2121 else:
2131 futurelargelinkrevs.remove(linkrev)
2122 futurelargelinkrevs.remove(linkrev)
2132
2123
2133 for p in self.parentrevs(strippoint):
2124 for p in self.parentrevs(strippoint):
2134 if p != nullrev:
2125 if p != nullrev:
2135 plinkrev = self.linkrev(p)
2126 plinkrev = self.linkrev(p)
2136 heads[p] = plinkrev
2127 heads[p] = plinkrev
2137 if plinkrev >= minlink:
2128 if plinkrev >= minlink:
2138 futurelargelinkrevs.add(plinkrev)
2129 futurelargelinkrevs.add(plinkrev)
2139
2130
2140 return strippoint, brokenrevs
2131 return strippoint, brokenrevs
2141
2132
2142 def strip(self, minlink, transaction):
2133 def strip(self, minlink, transaction):
2143 """truncate the revlog on the first revision with a linkrev >= minlink
2134 """truncate the revlog on the first revision with a linkrev >= minlink
2144
2135
2145 This function is called when we're stripping revision minlink and
2136 This function is called when we're stripping revision minlink and
2146 its descendants from the repository.
2137 its descendants from the repository.
2147
2138
2148 We have to remove all revisions with linkrev >= minlink, because
2139 We have to remove all revisions with linkrev >= minlink, because
2149 the equivalent changelog revisions will be renumbered after the
2140 the equivalent changelog revisions will be renumbered after the
2150 strip.
2141 strip.
2151
2142
2152 So we truncate the revlog on the first of these revisions, and
2143 So we truncate the revlog on the first of these revisions, and
2153 trust that the caller has saved the revisions that shouldn't be
2144 trust that the caller has saved the revisions that shouldn't be
2154 removed and that it'll re-add them after this truncation.
2145 removed and that it'll re-add them after this truncation.
2155 """
2146 """
2156 if len(self) == 0:
2147 if len(self) == 0:
2157 return
2148 return
2158
2149
2159 rev, _ = self.getstrippoint(minlink)
2150 rev, _ = self.getstrippoint(minlink)
2160 if rev == len(self):
2151 if rev == len(self):
2161 return
2152 return
2162
2153
2163 # first truncate the files on disk
2154 # first truncate the files on disk
2164 end = self.start(rev)
2155 end = self.start(rev)
2165 if not self._inline:
2156 if not self._inline:
2166 transaction.add(self.datafile, end)
2157 transaction.add(self.datafile, end)
2167 end = rev * self._io.size
2158 end = rev * self._io.size
2168 else:
2159 else:
2169 end += rev * self._io.size
2160 end += rev * self._io.size
2170
2161
2171 transaction.add(self.indexfile, end)
2162 transaction.add(self.indexfile, end)
2172
2163
2173 # then reset internal state in memory to forget those revisions
2164 # then reset internal state in memory to forget those revisions
2174 self._cache = None
2165 self._cache = None
2175 self._chaininfocache = {}
2166 self._chaininfocache = {}
2176 self._chunkclear()
2167 self._chunkclear()
2177 for x in pycompat.xrange(rev, len(self)):
2168 for x in pycompat.xrange(rev, len(self)):
2178 del self.nodemap[self.node(x)]
2169 del self.nodemap[self.node(x)]
2179
2170
2180 del self.index[rev:-1]
2171 del self.index[rev:-1]
2181 self._nodepos = None
2172 self._nodepos = None
2182
2173
2183 def checksize(self):
2174 def checksize(self):
2184 expected = 0
2175 expected = 0
2185 if len(self):
2176 if len(self):
2186 expected = max(0, self.end(len(self) - 1))
2177 expected = max(0, self.end(len(self) - 1))
2187
2178
2188 try:
2179 try:
2189 with self._datafp() as f:
2180 with self._datafp() as f:
2190 f.seek(0, 2)
2181 f.seek(0, 2)
2191 actual = f.tell()
2182 actual = f.tell()
2192 dd = actual - expected
2183 dd = actual - expected
2193 except IOError as inst:
2184 except IOError as inst:
2194 if inst.errno != errno.ENOENT:
2185 if inst.errno != errno.ENOENT:
2195 raise
2186 raise
2196 dd = 0
2187 dd = 0
2197
2188
2198 try:
2189 try:
2199 f = self.opener(self.indexfile)
2190 f = self.opener(self.indexfile)
2200 f.seek(0, 2)
2191 f.seek(0, 2)
2201 actual = f.tell()
2192 actual = f.tell()
2202 f.close()
2193 f.close()
2203 s = self._io.size
2194 s = self._io.size
2204 i = max(0, actual // s)
2195 i = max(0, actual // s)
2205 di = actual - (i * s)
2196 di = actual - (i * s)
2206 if self._inline:
2197 if self._inline:
2207 databytes = 0
2198 databytes = 0
2208 for r in self:
2199 for r in self:
2209 databytes += max(0, self.length(r))
2200 databytes += max(0, self.length(r))
2210 dd = 0
2201 dd = 0
2211 di = actual - len(self) * s - databytes
2202 di = actual - len(self) * s - databytes
2212 except IOError as inst:
2203 except IOError as inst:
2213 if inst.errno != errno.ENOENT:
2204 if inst.errno != errno.ENOENT:
2214 raise
2205 raise
2215 di = 0
2206 di = 0
2216
2207
2217 return (dd, di)
2208 return (dd, di)
2218
2209
2219 def files(self):
2210 def files(self):
2220 res = [self.indexfile]
2211 res = [self.indexfile]
2221 if not self._inline:
2212 if not self._inline:
2222 res.append(self.datafile)
2213 res.append(self.datafile)
2223 return res
2214 return res
2224
2215
2225 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2216 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2226 assumehaveparentrevisions=False, deltaprevious=False):
2217 assumehaveparentrevisions=False, deltaprevious=False):
2227 if nodesorder not in ('nodes', 'storage', None):
2218 if nodesorder not in ('nodes', 'storage', None):
2228 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2219 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2229 nodesorder)
2220 nodesorder)
2230
2221
2231 if nodesorder is None and not self._generaldelta:
2222 if nodesorder is None and not self._generaldelta:
2232 nodesorder = 'storage'
2223 nodesorder = 'storage'
2233
2224
2234 frev = self.rev
2225 frev = self.rev
2235 fnode = self.node
2226 fnode = self.node
2236
2227
2237 if nodesorder == 'nodes':
2228 if nodesorder == 'nodes':
2238 revs = [frev(n) for n in nodes]
2229 revs = [frev(n) for n in nodes]
2239 elif nodesorder == 'storage':
2230 elif nodesorder == 'storage':
2240 revs = sorted(frev(n) for n in nodes)
2231 revs = sorted(frev(n) for n in nodes)
2241 else:
2232 else:
2242 assert self._generaldelta
2233 assert self._generaldelta
2243 revs = set(frev(n) for n in nodes)
2234 revs = set(frev(n) for n in nodes)
2244 revs = dagop.linearize(revs, self.parentrevs)
2235 revs = dagop.linearize(revs, self.parentrevs)
2245
2236
2246 prevrev = None
2237 prevrev = None
2247
2238
2248 if deltaprevious or assumehaveparentrevisions:
2239 if deltaprevious or assumehaveparentrevisions:
2249 prevrev = self.parentrevs(revs[0])[0]
2240 prevrev = self.parentrevs(revs[0])[0]
2250
2241
2251 # Set of revs available to delta against.
2242 # Set of revs available to delta against.
2252 available = set()
2243 available = set()
2253
2244
2254 for rev in revs:
2245 for rev in revs:
2255 if rev == nullrev:
2246 if rev == nullrev:
2256 continue
2247 continue
2257
2248
2258 node = fnode(rev)
2249 node = fnode(rev)
2259 deltaparentrev = self.deltaparent(rev)
2250 deltaparentrev = self.deltaparent(rev)
2260 p1rev, p2rev = self.parentrevs(rev)
2251 p1rev, p2rev = self.parentrevs(rev)
2261
2252
2262 # Forced delta against previous mode.
2253 # Forced delta against previous mode.
2263 if deltaprevious:
2254 if deltaprevious:
2264 baserev = prevrev
2255 baserev = prevrev
2265
2256
2266 # Revlog is configured to use full snapshots. Stick to that.
2257 # Revlog is configured to use full snapshots. Stick to that.
2267 elif not self._storedeltachains:
2258 elif not self._storedeltachains:
2268 baserev = nullrev
2259 baserev = nullrev
2269
2260
2270 # There is a delta in storage. We try to use that because it
2261 # There is a delta in storage. We try to use that because it
2271 # amounts to effectively copying data from storage and is
2262 # amounts to effectively copying data from storage and is
2272 # therefore the fastest.
2263 # therefore the fastest.
2273 elif deltaparentrev != nullrev:
2264 elif deltaparentrev != nullrev:
2274 # Base revision was already emitted in this group. We can
2265 # Base revision was already emitted in this group. We can
2275 # always safely use the delta.
2266 # always safely use the delta.
2276 if deltaparentrev in available:
2267 if deltaparentrev in available:
2277 baserev = deltaparentrev
2268 baserev = deltaparentrev
2278
2269
2279 # Base revision is a parent that hasn't been emitted already.
2270 # Base revision is a parent that hasn't been emitted already.
2280 # Use it if we can assume the receiver has the parent revision.
2271 # Use it if we can assume the receiver has the parent revision.
2281 elif (assumehaveparentrevisions
2272 elif (assumehaveparentrevisions
2282 and deltaparentrev in (p1rev, p2rev)):
2273 and deltaparentrev in (p1rev, p2rev)):
2283 baserev = deltaparentrev
2274 baserev = deltaparentrev
2284
2275
2285 # No guarantee the receiver has the delta parent. Send delta
2276 # No guarantee the receiver has the delta parent. Send delta
2286 # against last revision (if possible), which in the common case
2277 # against last revision (if possible), which in the common case
2287 # should be similar enough to this revision that the delta is
2278 # should be similar enough to this revision that the delta is
2288 # reasonable.
2279 # reasonable.
2289 elif prevrev is not None:
2280 elif prevrev is not None:
2290 baserev = prevrev
2281 baserev = prevrev
2291 else:
2282 else:
2292 baserev = nullrev
2283 baserev = nullrev
2293
2284
2294 # Storage has a fulltext revision.
2285 # Storage has a fulltext revision.
2295
2286
2296 # Let's use the previous revision, which is as good a guess as any.
2287 # Let's use the previous revision, which is as good a guess as any.
2297 # There is definitely room to improve this logic.
2288 # There is definitely room to improve this logic.
2298 elif prevrev is not None:
2289 elif prevrev is not None:
2299 baserev = prevrev
2290 baserev = prevrev
2300 else:
2291 else:
2301 baserev = nullrev
2292 baserev = nullrev
2302
2293
2303 # But we can't actually use our chosen delta base for whatever
2294 # But we can't actually use our chosen delta base for whatever
2304 # reason. Reset to fulltext.
2295 # reason. Reset to fulltext.
2305 if baserev != nullrev and not self.candelta(baserev, rev):
2296 if baserev != nullrev and not self.candelta(baserev, rev):
2306 baserev = nullrev
2297 baserev = nullrev
2307
2298
2308 revision = None
2299 revision = None
2309 delta = None
2300 delta = None
2310 baserevisionsize = None
2301 baserevisionsize = None
2311
2302
2312 if revisiondata:
2303 if revisiondata:
2313 if self.iscensored(baserev) or self.iscensored(rev):
2304 if self.iscensored(baserev) or self.iscensored(rev):
2314 try:
2305 try:
2315 revision = self.revision(node, raw=True)
2306 revision = self.revision(node, raw=True)
2316 except error.CensoredNodeError as e:
2307 except error.CensoredNodeError as e:
2317 revision = e.tombstone
2308 revision = e.tombstone
2318
2309
2319 if baserev != nullrev:
2310 if baserev != nullrev:
2320 baserevisionsize = self.rawsize(baserev)
2311 baserevisionsize = self.rawsize(baserev)
2321
2312
2322 elif baserev == nullrev and not deltaprevious:
2313 elif baserev == nullrev and not deltaprevious:
2323 revision = self.revision(node, raw=True)
2314 revision = self.revision(node, raw=True)
2324 available.add(rev)
2315 available.add(rev)
2325 else:
2316 else:
2326 delta = self.revdiff(baserev, rev)
2317 delta = self.revdiff(baserev, rev)
2327 available.add(rev)
2318 available.add(rev)
2328
2319
2329 yield revlogrevisiondelta(
2320 yield revlogrevisiondelta(
2330 node=node,
2321 node=node,
2331 p1node=fnode(p1rev),
2322 p1node=fnode(p1rev),
2332 p2node=fnode(p2rev),
2323 p2node=fnode(p2rev),
2333 basenode=fnode(baserev),
2324 basenode=fnode(baserev),
2334 flags=self.flags(rev),
2325 flags=self.flags(rev),
2335 baserevisionsize=baserevisionsize,
2326 baserevisionsize=baserevisionsize,
2336 revision=revision,
2327 revision=revision,
2337 delta=delta)
2328 delta=delta)
2338
2329
2339 prevrev = rev
2330 prevrev = rev
2340
2331
2341 DELTAREUSEALWAYS = 'always'
2332 DELTAREUSEALWAYS = 'always'
2342 DELTAREUSESAMEREVS = 'samerevs'
2333 DELTAREUSESAMEREVS = 'samerevs'
2343 DELTAREUSENEVER = 'never'
2334 DELTAREUSENEVER = 'never'
2344
2335
2345 DELTAREUSEFULLADD = 'fulladd'
2336 DELTAREUSEFULLADD = 'fulladd'
2346
2337
2347 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2338 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2348
2339
2349 def clone(self, tr, destrevlog, addrevisioncb=None,
2340 def clone(self, tr, destrevlog, addrevisioncb=None,
2350 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2341 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2351 """Copy this revlog to another, possibly with format changes.
2342 """Copy this revlog to another, possibly with format changes.
2352
2343
2353 The destination revlog will contain the same revisions and nodes.
2344 The destination revlog will contain the same revisions and nodes.
2354 However, it may not be bit-for-bit identical due to e.g. delta encoding
2345 However, it may not be bit-for-bit identical due to e.g. delta encoding
2355 differences.
2346 differences.
2356
2347
2357 The ``deltareuse`` argument control how deltas from the existing revlog
2348 The ``deltareuse`` argument control how deltas from the existing revlog
2358 are preserved in the destination revlog. The argument can have the
2349 are preserved in the destination revlog. The argument can have the
2359 following values:
2350 following values:
2360
2351
2361 DELTAREUSEALWAYS
2352 DELTAREUSEALWAYS
2362 Deltas will always be reused (if possible), even if the destination
2353 Deltas will always be reused (if possible), even if the destination
2363 revlog would not select the same revisions for the delta. This is the
2354 revlog would not select the same revisions for the delta. This is the
2364 fastest mode of operation.
2355 fastest mode of operation.
2365 DELTAREUSESAMEREVS
2356 DELTAREUSESAMEREVS
2366 Deltas will be reused if the destination revlog would pick the same
2357 Deltas will be reused if the destination revlog would pick the same
2367 revisions for the delta. This mode strikes a balance between speed
2358 revisions for the delta. This mode strikes a balance between speed
2368 and optimization.
2359 and optimization.
2369 DELTAREUSENEVER
2360 DELTAREUSENEVER
2370 Deltas will never be reused. This is the slowest mode of execution.
2361 Deltas will never be reused. This is the slowest mode of execution.
2371 This mode can be used to recompute deltas (e.g. if the diff/delta
2362 This mode can be used to recompute deltas (e.g. if the diff/delta
2372 algorithm changes).
2363 algorithm changes).
2373
2364
2374 Delta computation can be slow, so the choice of delta reuse policy can
2365 Delta computation can be slow, so the choice of delta reuse policy can
2375 significantly affect run time.
2366 significantly affect run time.
2376
2367
2377 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2368 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2378 two extremes. Deltas will be reused if they are appropriate. But if the
2369 two extremes. Deltas will be reused if they are appropriate. But if the
2379 delta could choose a better revision, it will do so. This means if you
2370 delta could choose a better revision, it will do so. This means if you
2380 are converting a non-generaldelta revlog to a generaldelta revlog,
2371 are converting a non-generaldelta revlog to a generaldelta revlog,
2381 deltas will be recomputed if the delta's parent isn't a parent of the
2372 deltas will be recomputed if the delta's parent isn't a parent of the
2382 revision.
2373 revision.
2383
2374
2384 In addition to the delta policy, the ``deltabothparents`` argument
2375 In addition to the delta policy, the ``deltabothparents`` argument
2385 controls whether to compute deltas against both parents for merges.
2376 controls whether to compute deltas against both parents for merges.
2386 By default, the current default is used.
2377 By default, the current default is used.
2387 """
2378 """
2388 if deltareuse not in self.DELTAREUSEALL:
2379 if deltareuse not in self.DELTAREUSEALL:
2389 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2380 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2390
2381
2391 if len(destrevlog):
2382 if len(destrevlog):
2392 raise ValueError(_('destination revlog is not empty'))
2383 raise ValueError(_('destination revlog is not empty'))
2393
2384
2394 if getattr(self, 'filteredrevs', None):
2385 if getattr(self, 'filteredrevs', None):
2395 raise ValueError(_('source revlog has filtered revisions'))
2386 raise ValueError(_('source revlog has filtered revisions'))
2396 if getattr(destrevlog, 'filteredrevs', None):
2387 if getattr(destrevlog, 'filteredrevs', None):
2397 raise ValueError(_('destination revlog has filtered revisions'))
2388 raise ValueError(_('destination revlog has filtered revisions'))
2398
2389
2399 # lazydeltabase controls whether to reuse a cached delta, if possible.
2390 # lazydeltabase controls whether to reuse a cached delta, if possible.
2400 oldlazydeltabase = destrevlog._lazydeltabase
2391 oldlazydeltabase = destrevlog._lazydeltabase
2401 oldamd = destrevlog._deltabothparents
2392 oldamd = destrevlog._deltabothparents
2402
2393
2403 try:
2394 try:
2404 if deltareuse == self.DELTAREUSEALWAYS:
2395 if deltareuse == self.DELTAREUSEALWAYS:
2405 destrevlog._lazydeltabase = True
2396 destrevlog._lazydeltabase = True
2406 elif deltareuse == self.DELTAREUSESAMEREVS:
2397 elif deltareuse == self.DELTAREUSESAMEREVS:
2407 destrevlog._lazydeltabase = False
2398 destrevlog._lazydeltabase = False
2408
2399
2409 destrevlog._deltabothparents = deltabothparents or oldamd
2400 destrevlog._deltabothparents = deltabothparents or oldamd
2410
2401
2411 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2402 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2412 self.DELTAREUSESAMEREVS)
2403 self.DELTAREUSESAMEREVS)
2413
2404
2414 deltacomputer = deltautil.deltacomputer(destrevlog)
2405 deltacomputer = deltautil.deltacomputer(destrevlog)
2415 index = self.index
2406 index = self.index
2416 for rev in self:
2407 for rev in self:
2417 entry = index[rev]
2408 entry = index[rev]
2418
2409
2419 # Some classes override linkrev to take filtered revs into
2410 # Some classes override linkrev to take filtered revs into
2420 # account. Use raw entry from index.
2411 # account. Use raw entry from index.
2421 flags = entry[0] & 0xffff
2412 flags = entry[0] & 0xffff
2422 linkrev = entry[4]
2413 linkrev = entry[4]
2423 p1 = index[entry[5]][7]
2414 p1 = index[entry[5]][7]
2424 p2 = index[entry[6]][7]
2415 p2 = index[entry[6]][7]
2425 node = entry[7]
2416 node = entry[7]
2426
2417
2427 # (Possibly) reuse the delta from the revlog if allowed and
2418 # (Possibly) reuse the delta from the revlog if allowed and
2428 # the revlog chunk is a delta.
2419 # the revlog chunk is a delta.
2429 cachedelta = None
2420 cachedelta = None
2430 rawtext = None
2421 rawtext = None
2431 if populatecachedelta:
2422 if populatecachedelta:
2432 dp = self.deltaparent(rev)
2423 dp = self.deltaparent(rev)
2433 if dp != nullrev:
2424 if dp != nullrev:
2434 cachedelta = (dp, bytes(self._chunk(rev)))
2425 cachedelta = (dp, bytes(self._chunk(rev)))
2435
2426
2436 if not cachedelta:
2427 if not cachedelta:
2437 rawtext = self.revision(rev, raw=True)
2428 rawtext = self.revision(rev, raw=True)
2438
2429
2439
2430
2440 if deltareuse == self.DELTAREUSEFULLADD:
2431 if deltareuse == self.DELTAREUSEFULLADD:
2441 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2432 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2442 cachedelta=cachedelta,
2433 cachedelta=cachedelta,
2443 node=node, flags=flags,
2434 node=node, flags=flags,
2444 deltacomputer=deltacomputer)
2435 deltacomputer=deltacomputer)
2445 else:
2436 else:
2446 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2437 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2447 checkambig=False)
2438 checkambig=False)
2448 dfh = None
2439 dfh = None
2449 if not destrevlog._inline:
2440 if not destrevlog._inline:
2450 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2441 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2451 try:
2442 try:
2452 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2443 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2453 p2, flags, cachedelta, ifh, dfh,
2444 p2, flags, cachedelta, ifh, dfh,
2454 deltacomputer=deltacomputer)
2445 deltacomputer=deltacomputer)
2455 finally:
2446 finally:
2456 if dfh:
2447 if dfh:
2457 dfh.close()
2448 dfh.close()
2458 ifh.close()
2449 ifh.close()
2459
2450
2460 if addrevisioncb:
2451 if addrevisioncb:
2461 addrevisioncb(self, rev, node)
2452 addrevisioncb(self, rev, node)
2462 finally:
2453 finally:
2463 destrevlog._lazydeltabase = oldlazydeltabase
2454 destrevlog._lazydeltabase = oldlazydeltabase
2464 destrevlog._deltabothparents = oldamd
2455 destrevlog._deltabothparents = oldamd
2465
2456
2466 def censorrevision(self, node, tombstone=b''):
2457 def censorrevision(self, node, tombstone=b''):
2467 if (self.version & 0xFFFF) == REVLOGV0:
2458 if (self.version & 0xFFFF) == REVLOGV0:
2468 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2459 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2469 self.version)
2460 self.version)
2470
2461
2471 rev = self.rev(node)
2462 rev = self.rev(node)
2472 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2463 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2473
2464
2474 if len(tombstone) > self.rawsize(rev):
2465 if len(tombstone) > self.rawsize(rev):
2475 raise error.Abort(_('censor tombstone must be no longer than '
2466 raise error.Abort(_('censor tombstone must be no longer than '
2476 'censored data'))
2467 'censored data'))
2477
2468
2478 # Using two files instead of one makes it easy to rewrite entry-by-entry
2469 # Using two files instead of one makes it easy to rewrite entry-by-entry
2479 idxread = self.opener(self.indexfile, 'r')
2470 idxread = self.opener(self.indexfile, 'r')
2480 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2471 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2481 if self.version & FLAG_INLINE_DATA:
2472 if self.version & FLAG_INLINE_DATA:
2482 dataread, datawrite = idxread, idxwrite
2473 dataread, datawrite = idxread, idxwrite
2483 else:
2474 else:
2484 dataread = self.opener(self.datafile, 'r')
2475 dataread = self.opener(self.datafile, 'r')
2485 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2476 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2486
2477
2487 # Copy all revlog data up to the entry to be censored.
2478 # Copy all revlog data up to the entry to be censored.
2488 offset = self.start(rev)
2479 offset = self.start(rev)
2489
2480
2490 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2481 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2491 idxwrite.write(chunk)
2482 idxwrite.write(chunk)
2492 for chunk in util.filechunkiter(dataread, limit=offset):
2483 for chunk in util.filechunkiter(dataread, limit=offset):
2493 datawrite.write(chunk)
2484 datawrite.write(chunk)
2494
2485
2495 def rewriteindex(r, newoffs, newdata=None):
2486 def rewriteindex(r, newoffs, newdata=None):
2496 """Rewrite the index entry with a new data offset and new data.
2487 """Rewrite the index entry with a new data offset and new data.
2497
2488
2498 The newdata argument, if given, is a tuple of three positive
2489 The newdata argument, if given, is a tuple of three positive
2499 integers: (new compressed, new uncompressed, added flag bits).
2490 integers: (new compressed, new uncompressed, added flag bits).
2500 """
2491 """
2501 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2492 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2502 flags = gettype(offlags)
2493 flags = gettype(offlags)
2503 if newdata:
2494 if newdata:
2504 comp, uncomp, nflags = newdata
2495 comp, uncomp, nflags = newdata
2505 flags |= nflags
2496 flags |= nflags
2506 offlags = offset_type(newoffs, flags)
2497 offlags = offset_type(newoffs, flags)
2507 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2498 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2508 idxwrite.write(self._io.packentry(e, None, self.version, r))
2499 idxwrite.write(self._io.packentry(e, None, self.version, r))
2509 idxread.seek(self._io.size, 1)
2500 idxread.seek(self._io.size, 1)
2510
2501
2511 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2502 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2512 """Write the given fulltext with the given data offset.
2503 """Write the given fulltext with the given data offset.
2513
2504
2514 Returns:
2505 Returns:
2515 The integer number of data bytes written, for tracking data
2506 The integer number of data bytes written, for tracking data
2516 offsets.
2507 offsets.
2517 """
2508 """
2518 flag, compdata = self.compress(data)
2509 flag, compdata = self.compress(data)
2519 newcomp = len(flag) + len(compdata)
2510 newcomp = len(flag) + len(compdata)
2520 rewriteindex(r, offs, (newcomp, len(data), nflags))
2511 rewriteindex(r, offs, (newcomp, len(data), nflags))
2521 datawrite.write(flag)
2512 datawrite.write(flag)
2522 datawrite.write(compdata)
2513 datawrite.write(compdata)
2523 dataread.seek(self.length(r), 1)
2514 dataread.seek(self.length(r), 1)
2524 return newcomp
2515 return newcomp
2525
2516
2526 # Rewrite censored entry with (padded) tombstone data.
2517 # Rewrite censored entry with (padded) tombstone data.
2527 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2518 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2528 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2519 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2529
2520
2530 # Rewrite all following filelog revisions fixing up offsets and deltas.
2521 # Rewrite all following filelog revisions fixing up offsets and deltas.
2531 for srev in pycompat.xrange(rev + 1, len(self)):
2522 for srev in pycompat.xrange(rev + 1, len(self)):
2532 if rev in self.parentrevs(srev):
2523 if rev in self.parentrevs(srev):
2533 # Immediate children of censored node must be re-added as
2524 # Immediate children of censored node must be re-added as
2534 # fulltext.
2525 # fulltext.
2535 try:
2526 try:
2536 revdata = self.revision(srev)
2527 revdata = self.revision(srev)
2537 except error.CensoredNodeError as e:
2528 except error.CensoredNodeError as e:
2538 revdata = e.tombstone
2529 revdata = e.tombstone
2539 dlen = rewrite(srev, offset, revdata)
2530 dlen = rewrite(srev, offset, revdata)
2540 else:
2531 else:
2541 # Copy any other revision data verbatim after fixing up the
2532 # Copy any other revision data verbatim after fixing up the
2542 # offset.
2533 # offset.
2543 rewriteindex(srev, offset)
2534 rewriteindex(srev, offset)
2544 dlen = self.length(srev)
2535 dlen = self.length(srev)
2545 for chunk in util.filechunkiter(dataread, limit=dlen):
2536 for chunk in util.filechunkiter(dataread, limit=dlen):
2546 datawrite.write(chunk)
2537 datawrite.write(chunk)
2547 offset += dlen
2538 offset += dlen
2548
2539
2549 idxread.close()
2540 idxread.close()
2550 idxwrite.close()
2541 idxwrite.close()
2551 if dataread is not idxread:
2542 if dataread is not idxread:
2552 dataread.close()
2543 dataread.close()
2553 datawrite.close()
2544 datawrite.close()
2554
2545
2555 def verifyintegrity(self, state):
2546 def verifyintegrity(self, state):
2556 """Verifies the integrity of the revlog.
2547 """Verifies the integrity of the revlog.
2557
2548
2558 Yields ``revlogproblem`` instances describing problems that are
2549 Yields ``revlogproblem`` instances describing problems that are
2559 found.
2550 found.
2560 """
2551 """
2561 dd, di = self.checksize()
2552 dd, di = self.checksize()
2562 if dd:
2553 if dd:
2563 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2554 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2564 if di:
2555 if di:
2565 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2556 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2566
2557
2567 version = self.version & 0xFFFF
2558 version = self.version & 0xFFFF
2568
2559
2569 # The verifier tells us what version revlog we should be.
2560 # The verifier tells us what version revlog we should be.
2570 if version != state['expectedversion']:
2561 if version != state['expectedversion']:
2571 yield revlogproblem(
2562 yield revlogproblem(
2572 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2563 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2573 (self.indexfile, version, state['expectedversion']))
2564 (self.indexfile, version, state['expectedversion']))
2574
2565
2575 state['skipread'] = set()
2566 state['skipread'] = set()
2576
2567
2577 for rev in self:
2568 for rev in self:
2578 node = self.node(rev)
2569 node = self.node(rev)
2579
2570
2580 # Verify contents. 4 cases to care about:
2571 # Verify contents. 4 cases to care about:
2581 #
2572 #
2582 # common: the most common case
2573 # common: the most common case
2583 # rename: with a rename
2574 # rename: with a rename
2584 # meta: file content starts with b'\1\n', the metadata
2575 # meta: file content starts with b'\1\n', the metadata
2585 # header defined in filelog.py, but without a rename
2576 # header defined in filelog.py, but without a rename
2586 # ext: content stored externally
2577 # ext: content stored externally
2587 #
2578 #
2588 # More formally, their differences are shown below:
2579 # More formally, their differences are shown below:
2589 #
2580 #
2590 # | common | rename | meta | ext
2581 # | common | rename | meta | ext
2591 # -------------------------------------------------------
2582 # -------------------------------------------------------
2592 # flags() | 0 | 0 | 0 | not 0
2583 # flags() | 0 | 0 | 0 | not 0
2593 # renamed() | False | True | False | ?
2584 # renamed() | False | True | False | ?
2594 # rawtext[0:2]=='\1\n'| False | True | True | ?
2585 # rawtext[0:2]=='\1\n'| False | True | True | ?
2595 #
2586 #
2596 # "rawtext" means the raw text stored in revlog data, which
2587 # "rawtext" means the raw text stored in revlog data, which
2597 # could be retrieved by "revision(rev, raw=True)". "text"
2588 # could be retrieved by "revision(rev, raw=True)". "text"
2598 # mentioned below is "revision(rev, raw=False)".
2589 # mentioned below is "revision(rev, raw=False)".
2599 #
2590 #
2600 # There are 3 different lengths stored physically:
2591 # There are 3 different lengths stored physically:
2601 # 1. L1: rawsize, stored in revlog index
2592 # 1. L1: rawsize, stored in revlog index
2602 # 2. L2: len(rawtext), stored in revlog data
2593 # 2. L2: len(rawtext), stored in revlog data
2603 # 3. L3: len(text), stored in revlog data if flags==0, or
2594 # 3. L3: len(text), stored in revlog data if flags==0, or
2604 # possibly somewhere else if flags!=0
2595 # possibly somewhere else if flags!=0
2605 #
2596 #
2606 # L1 should be equal to L2. L3 could be different from them.
2597 # L1 should be equal to L2. L3 could be different from them.
2607 # "text" may or may not affect commit hash depending on flag
2598 # "text" may or may not affect commit hash depending on flag
2608 # processors (see revlog.addflagprocessor).
2599 # processors (see revlog.addflagprocessor).
2609 #
2600 #
2610 # | common | rename | meta | ext
2601 # | common | rename | meta | ext
2611 # -------------------------------------------------
2602 # -------------------------------------------------
2612 # rawsize() | L1 | L1 | L1 | L1
2603 # rawsize() | L1 | L1 | L1 | L1
2613 # size() | L1 | L2-LM | L1(*) | L1 (?)
2604 # size() | L1 | L2-LM | L1(*) | L1 (?)
2614 # len(rawtext) | L2 | L2 | L2 | L2
2605 # len(rawtext) | L2 | L2 | L2 | L2
2615 # len(text) | L2 | L2 | L2 | L3
2606 # len(text) | L2 | L2 | L2 | L3
2616 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2607 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2617 #
2608 #
2618 # LM: length of metadata, depending on rawtext
2609 # LM: length of metadata, depending on rawtext
2619 # (*): not ideal, see comment in filelog.size
2610 # (*): not ideal, see comment in filelog.size
2620 # (?): could be "- len(meta)" if the resolved content has
2611 # (?): could be "- len(meta)" if the resolved content has
2621 # rename metadata
2612 # rename metadata
2622 #
2613 #
2623 # Checks needed to be done:
2614 # Checks needed to be done:
2624 # 1. length check: L1 == L2, in all cases.
2615 # 1. length check: L1 == L2, in all cases.
2625 # 2. hash check: depending on flag processor, we may need to
2616 # 2. hash check: depending on flag processor, we may need to
2626 # use either "text" (external), or "rawtext" (in revlog).
2617 # use either "text" (external), or "rawtext" (in revlog).
2627
2618
2628 try:
2619 try:
2629 skipflags = state.get('skipflags', 0)
2620 skipflags = state.get('skipflags', 0)
2630 if skipflags:
2621 if skipflags:
2631 skipflags &= self.flags(rev)
2622 skipflags &= self.flags(rev)
2632
2623
2633 if skipflags:
2624 if skipflags:
2634 state['skipread'].add(node)
2625 state['skipread'].add(node)
2635 else:
2626 else:
2636 # Side-effect: read content and verify hash.
2627 # Side-effect: read content and verify hash.
2637 self.revision(node)
2628 self.revision(node)
2638
2629
2639 l1 = self.rawsize(rev)
2630 l1 = self.rawsize(rev)
2640 l2 = len(self.revision(node, raw=True))
2631 l2 = len(self.revision(node, raw=True))
2641
2632
2642 if l1 != l2:
2633 if l1 != l2:
2643 yield revlogproblem(
2634 yield revlogproblem(
2644 error=_('unpacked size is %d, %d expected') % (l2, l1),
2635 error=_('unpacked size is %d, %d expected') % (l2, l1),
2645 node=node)
2636 node=node)
2646
2637
2647 except error.CensoredNodeError:
2638 except error.CensoredNodeError:
2648 if state['erroroncensored']:
2639 if state['erroroncensored']:
2649 yield revlogproblem(error=_('censored file data'),
2640 yield revlogproblem(error=_('censored file data'),
2650 node=node)
2641 node=node)
2651 state['skipread'].add(node)
2642 state['skipread'].add(node)
2652 except Exception as e:
2643 except Exception as e:
2653 yield revlogproblem(
2644 yield revlogproblem(
2654 error=_('unpacking %s: %s') % (short(node),
2645 error=_('unpacking %s: %s') % (short(node),
2655 stringutil.forcebytestr(e)),
2646 stringutil.forcebytestr(e)),
2656 node=node)
2647 node=node)
2657 state['skipread'].add(node)
2648 state['skipread'].add(node)
2658
2649
2659 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2650 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2660 revisionscount=False, trackedsize=False,
2651 revisionscount=False, trackedsize=False,
2661 storedsize=False):
2652 storedsize=False):
2662 d = {}
2653 d = {}
2663
2654
2664 if exclusivefiles:
2655 if exclusivefiles:
2665 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2656 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2666 if not self._inline:
2657 if not self._inline:
2667 d['exclusivefiles'].append((self.opener, self.datafile))
2658 d['exclusivefiles'].append((self.opener, self.datafile))
2668
2659
2669 if sharedfiles:
2660 if sharedfiles:
2670 d['sharedfiles'] = []
2661 d['sharedfiles'] = []
2671
2662
2672 if revisionscount:
2663 if revisionscount:
2673 d['revisionscount'] = len(self)
2664 d['revisionscount'] = len(self)
2674
2665
2675 if trackedsize:
2666 if trackedsize:
2676 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2667 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2677
2668
2678 if storedsize:
2669 if storedsize:
2679 d['storedsize'] = sum(self.opener.stat(path).st_size
2670 d['storedsize'] = sum(self.opener.stat(path).st_size
2680 for path in self.files())
2671 for path in self.files())
2681
2672
2682 return d
2673 return d
General Comments 0
You need to be logged in to leave comments. Login now