##// END OF EJS Templates
revlog: return offset from _chunkraw()...
Gregory Szorc -
r27649:6446e9b3 default
parent child Browse files
Show More
@@ -1,784 +1,784 b''
1 # perf.py - performance test routines
1 # perf.py - performance test routines
2 '''helper extension to measure performance'''
2 '''helper extension to measure performance'''
3
3
4 from mercurial import cmdutil, scmutil, util, commands, obsolete
4 from mercurial import cmdutil, scmutil, util, commands, obsolete
5 from mercurial import repoview, branchmap, merge, copies, error, revlog
5 from mercurial import repoview, branchmap, merge, copies, error, revlog
6 from mercurial import mdiff
6 from mercurial import mdiff
7 import time, os, sys
7 import time, os, sys
8 import random
8 import random
9 import functools
9 import functools
10
10
11 formatteropts = commands.formatteropts
11 formatteropts = commands.formatteropts
12 revlogopts = commands.debugrevlogopts
12 revlogopts = commands.debugrevlogopts
13
13
14 cmdtable = {}
14 cmdtable = {}
15 command = cmdutil.command(cmdtable)
15 command = cmdutil.command(cmdtable)
16
16
17 def getlen(ui):
17 def getlen(ui):
18 if ui.configbool("perf", "stub"):
18 if ui.configbool("perf", "stub"):
19 return lambda x: 1
19 return lambda x: 1
20 return len
20 return len
21
21
22 def gettimer(ui, opts=None):
22 def gettimer(ui, opts=None):
23 """return a timer function and formatter: (timer, formatter)
23 """return a timer function and formatter: (timer, formatter)
24
24
25 This function exists to gather the creation of formatter in a single
25 This function exists to gather the creation of formatter in a single
26 place instead of duplicating it in all performance commands."""
26 place instead of duplicating it in all performance commands."""
27
27
28 # enforce an idle period before execution to counteract power management
28 # enforce an idle period before execution to counteract power management
29 # experimental config: perf.presleep
29 # experimental config: perf.presleep
30 time.sleep(ui.configint("perf", "presleep", 1))
30 time.sleep(ui.configint("perf", "presleep", 1))
31
31
32 if opts is None:
32 if opts is None:
33 opts = {}
33 opts = {}
34 # redirect all to stderr
34 # redirect all to stderr
35 ui = ui.copy()
35 ui = ui.copy()
36 ui.fout = ui.ferr
36 ui.fout = ui.ferr
37 # get a formatter
37 # get a formatter
38 fm = ui.formatter('perf', opts)
38 fm = ui.formatter('perf', opts)
39 # stub function, runs code only once instead of in a loop
39 # stub function, runs code only once instead of in a loop
40 # experimental config: perf.stub
40 # experimental config: perf.stub
41 if ui.configbool("perf", "stub"):
41 if ui.configbool("perf", "stub"):
42 return functools.partial(stub_timer, fm), fm
42 return functools.partial(stub_timer, fm), fm
43 return functools.partial(_timer, fm), fm
43 return functools.partial(_timer, fm), fm
44
44
45 def stub_timer(fm, func, title=None):
45 def stub_timer(fm, func, title=None):
46 func()
46 func()
47
47
48 def _timer(fm, func, title=None):
48 def _timer(fm, func, title=None):
49 results = []
49 results = []
50 begin = time.time()
50 begin = time.time()
51 count = 0
51 count = 0
52 while True:
52 while True:
53 ostart = os.times()
53 ostart = os.times()
54 cstart = time.time()
54 cstart = time.time()
55 r = func()
55 r = func()
56 cstop = time.time()
56 cstop = time.time()
57 ostop = os.times()
57 ostop = os.times()
58 count += 1
58 count += 1
59 a, b = ostart, ostop
59 a, b = ostart, ostop
60 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
60 results.append((cstop - cstart, b[0] - a[0], b[1]-a[1]))
61 if cstop - begin > 3 and count >= 100:
61 if cstop - begin > 3 and count >= 100:
62 break
62 break
63 if cstop - begin > 10 and count >= 3:
63 if cstop - begin > 10 and count >= 3:
64 break
64 break
65
65
66 fm.startitem()
66 fm.startitem()
67
67
68 if title:
68 if title:
69 fm.write('title', '! %s\n', title)
69 fm.write('title', '! %s\n', title)
70 if r:
70 if r:
71 fm.write('result', '! result: %s\n', r)
71 fm.write('result', '! result: %s\n', r)
72 m = min(results)
72 m = min(results)
73 fm.plain('!')
73 fm.plain('!')
74 fm.write('wall', ' wall %f', m[0])
74 fm.write('wall', ' wall %f', m[0])
75 fm.write('comb', ' comb %f', m[1] + m[2])
75 fm.write('comb', ' comb %f', m[1] + m[2])
76 fm.write('user', ' user %f', m[1])
76 fm.write('user', ' user %f', m[1])
77 fm.write('sys', ' sys %f', m[2])
77 fm.write('sys', ' sys %f', m[2])
78 fm.write('count', ' (best of %d)', count)
78 fm.write('count', ' (best of %d)', count)
79 fm.plain('\n')
79 fm.plain('\n')
80
80
81 @command('perfwalk', formatteropts)
81 @command('perfwalk', formatteropts)
82 def perfwalk(ui, repo, *pats, **opts):
82 def perfwalk(ui, repo, *pats, **opts):
83 timer, fm = gettimer(ui, opts)
83 timer, fm = gettimer(ui, opts)
84 try:
84 try:
85 m = scmutil.match(repo[None], pats, {})
85 m = scmutil.match(repo[None], pats, {})
86 timer(lambda: len(list(repo.dirstate.walk(m, [], True, False))))
86 timer(lambda: len(list(repo.dirstate.walk(m, [], True, False))))
87 except Exception:
87 except Exception:
88 try:
88 try:
89 m = scmutil.match(repo[None], pats, {})
89 m = scmutil.match(repo[None], pats, {})
90 timer(lambda: len([b for a, b, c in repo.dirstate.statwalk([], m)]))
90 timer(lambda: len([b for a, b, c in repo.dirstate.statwalk([], m)]))
91 except Exception:
91 except Exception:
92 timer(lambda: len(list(cmdutil.walk(repo, pats, {}))))
92 timer(lambda: len(list(cmdutil.walk(repo, pats, {}))))
93 fm.end()
93 fm.end()
94
94
95 @command('perfannotate', formatteropts)
95 @command('perfannotate', formatteropts)
96 def perfannotate(ui, repo, f, **opts):
96 def perfannotate(ui, repo, f, **opts):
97 timer, fm = gettimer(ui, opts)
97 timer, fm = gettimer(ui, opts)
98 fc = repo['.'][f]
98 fc = repo['.'][f]
99 timer(lambda: len(fc.annotate(True)))
99 timer(lambda: len(fc.annotate(True)))
100 fm.end()
100 fm.end()
101
101
102 @command('perfstatus',
102 @command('perfstatus',
103 [('u', 'unknown', False,
103 [('u', 'unknown', False,
104 'ask status to look for unknown files')] + formatteropts)
104 'ask status to look for unknown files')] + formatteropts)
105 def perfstatus(ui, repo, **opts):
105 def perfstatus(ui, repo, **opts):
106 #m = match.always(repo.root, repo.getcwd())
106 #m = match.always(repo.root, repo.getcwd())
107 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
107 #timer(lambda: sum(map(len, repo.dirstate.status(m, [], False, False,
108 # False))))
108 # False))))
109 timer, fm = gettimer(ui, opts)
109 timer, fm = gettimer(ui, opts)
110 timer(lambda: sum(map(len, repo.status(unknown=opts['unknown']))))
110 timer(lambda: sum(map(len, repo.status(unknown=opts['unknown']))))
111 fm.end()
111 fm.end()
112
112
113 @command('perfaddremove', formatteropts)
113 @command('perfaddremove', formatteropts)
114 def perfaddremove(ui, repo, **opts):
114 def perfaddremove(ui, repo, **opts):
115 timer, fm = gettimer(ui, opts)
115 timer, fm = gettimer(ui, opts)
116 try:
116 try:
117 oldquiet = repo.ui.quiet
117 oldquiet = repo.ui.quiet
118 repo.ui.quiet = True
118 repo.ui.quiet = True
119 matcher = scmutil.match(repo[None])
119 matcher = scmutil.match(repo[None])
120 timer(lambda: scmutil.addremove(repo, matcher, "", dry_run=True))
120 timer(lambda: scmutil.addremove(repo, matcher, "", dry_run=True))
121 finally:
121 finally:
122 repo.ui.quiet = oldquiet
122 repo.ui.quiet = oldquiet
123 fm.end()
123 fm.end()
124
124
125 def clearcaches(cl):
125 def clearcaches(cl):
126 # behave somewhat consistently across internal API changes
126 # behave somewhat consistently across internal API changes
127 if util.safehasattr(cl, 'clearcaches'):
127 if util.safehasattr(cl, 'clearcaches'):
128 cl.clearcaches()
128 cl.clearcaches()
129 elif util.safehasattr(cl, '_nodecache'):
129 elif util.safehasattr(cl, '_nodecache'):
130 from mercurial.node import nullid, nullrev
130 from mercurial.node import nullid, nullrev
131 cl._nodecache = {nullid: nullrev}
131 cl._nodecache = {nullid: nullrev}
132 cl._nodepos = None
132 cl._nodepos = None
133
133
134 @command('perfheads', formatteropts)
134 @command('perfheads', formatteropts)
135 def perfheads(ui, repo, **opts):
135 def perfheads(ui, repo, **opts):
136 timer, fm = gettimer(ui, opts)
136 timer, fm = gettimer(ui, opts)
137 cl = repo.changelog
137 cl = repo.changelog
138 def d():
138 def d():
139 len(cl.headrevs())
139 len(cl.headrevs())
140 clearcaches(cl)
140 clearcaches(cl)
141 timer(d)
141 timer(d)
142 fm.end()
142 fm.end()
143
143
144 @command('perftags', formatteropts)
144 @command('perftags', formatteropts)
145 def perftags(ui, repo, **opts):
145 def perftags(ui, repo, **opts):
146 import mercurial.changelog
146 import mercurial.changelog
147 import mercurial.manifest
147 import mercurial.manifest
148 timer, fm = gettimer(ui, opts)
148 timer, fm = gettimer(ui, opts)
149 def t():
149 def t():
150 repo.changelog = mercurial.changelog.changelog(repo.svfs)
150 repo.changelog = mercurial.changelog.changelog(repo.svfs)
151 repo.manifest = mercurial.manifest.manifest(repo.svfs)
151 repo.manifest = mercurial.manifest.manifest(repo.svfs)
152 repo._tags = None
152 repo._tags = None
153 return len(repo.tags())
153 return len(repo.tags())
154 timer(t)
154 timer(t)
155 fm.end()
155 fm.end()
156
156
157 @command('perfancestors', formatteropts)
157 @command('perfancestors', formatteropts)
158 def perfancestors(ui, repo, **opts):
158 def perfancestors(ui, repo, **opts):
159 timer, fm = gettimer(ui, opts)
159 timer, fm = gettimer(ui, opts)
160 heads = repo.changelog.headrevs()
160 heads = repo.changelog.headrevs()
161 def d():
161 def d():
162 for a in repo.changelog.ancestors(heads):
162 for a in repo.changelog.ancestors(heads):
163 pass
163 pass
164 timer(d)
164 timer(d)
165 fm.end()
165 fm.end()
166
166
167 @command('perfancestorset', formatteropts)
167 @command('perfancestorset', formatteropts)
168 def perfancestorset(ui, repo, revset, **opts):
168 def perfancestorset(ui, repo, revset, **opts):
169 timer, fm = gettimer(ui, opts)
169 timer, fm = gettimer(ui, opts)
170 revs = repo.revs(revset)
170 revs = repo.revs(revset)
171 heads = repo.changelog.headrevs()
171 heads = repo.changelog.headrevs()
172 def d():
172 def d():
173 s = repo.changelog.ancestors(heads)
173 s = repo.changelog.ancestors(heads)
174 for rev in revs:
174 for rev in revs:
175 rev in s
175 rev in s
176 timer(d)
176 timer(d)
177 fm.end()
177 fm.end()
178
178
179 @command('perfdirs', formatteropts)
179 @command('perfdirs', formatteropts)
180 def perfdirs(ui, repo, **opts):
180 def perfdirs(ui, repo, **opts):
181 timer, fm = gettimer(ui, opts)
181 timer, fm = gettimer(ui, opts)
182 dirstate = repo.dirstate
182 dirstate = repo.dirstate
183 'a' in dirstate
183 'a' in dirstate
184 def d():
184 def d():
185 dirstate.dirs()
185 dirstate.dirs()
186 del dirstate._dirs
186 del dirstate._dirs
187 timer(d)
187 timer(d)
188 fm.end()
188 fm.end()
189
189
190 @command('perfdirstate', formatteropts)
190 @command('perfdirstate', formatteropts)
191 def perfdirstate(ui, repo, **opts):
191 def perfdirstate(ui, repo, **opts):
192 timer, fm = gettimer(ui, opts)
192 timer, fm = gettimer(ui, opts)
193 "a" in repo.dirstate
193 "a" in repo.dirstate
194 def d():
194 def d():
195 repo.dirstate.invalidate()
195 repo.dirstate.invalidate()
196 "a" in repo.dirstate
196 "a" in repo.dirstate
197 timer(d)
197 timer(d)
198 fm.end()
198 fm.end()
199
199
200 @command('perfdirstatedirs', formatteropts)
200 @command('perfdirstatedirs', formatteropts)
201 def perfdirstatedirs(ui, repo, **opts):
201 def perfdirstatedirs(ui, repo, **opts):
202 timer, fm = gettimer(ui, opts)
202 timer, fm = gettimer(ui, opts)
203 "a" in repo.dirstate
203 "a" in repo.dirstate
204 def d():
204 def d():
205 "a" in repo.dirstate._dirs
205 "a" in repo.dirstate._dirs
206 del repo.dirstate._dirs
206 del repo.dirstate._dirs
207 timer(d)
207 timer(d)
208 fm.end()
208 fm.end()
209
209
210 @command('perfdirstatefoldmap', formatteropts)
210 @command('perfdirstatefoldmap', formatteropts)
211 def perfdirstatefoldmap(ui, repo, **opts):
211 def perfdirstatefoldmap(ui, repo, **opts):
212 timer, fm = gettimer(ui, opts)
212 timer, fm = gettimer(ui, opts)
213 dirstate = repo.dirstate
213 dirstate = repo.dirstate
214 'a' in dirstate
214 'a' in dirstate
215 def d():
215 def d():
216 dirstate._filefoldmap.get('a')
216 dirstate._filefoldmap.get('a')
217 del dirstate._filefoldmap
217 del dirstate._filefoldmap
218 timer(d)
218 timer(d)
219 fm.end()
219 fm.end()
220
220
221 @command('perfdirfoldmap', formatteropts)
221 @command('perfdirfoldmap', formatteropts)
222 def perfdirfoldmap(ui, repo, **opts):
222 def perfdirfoldmap(ui, repo, **opts):
223 timer, fm = gettimer(ui, opts)
223 timer, fm = gettimer(ui, opts)
224 dirstate = repo.dirstate
224 dirstate = repo.dirstate
225 'a' in dirstate
225 'a' in dirstate
226 def d():
226 def d():
227 dirstate._dirfoldmap.get('a')
227 dirstate._dirfoldmap.get('a')
228 del dirstate._dirfoldmap
228 del dirstate._dirfoldmap
229 del dirstate._dirs
229 del dirstate._dirs
230 timer(d)
230 timer(d)
231 fm.end()
231 fm.end()
232
232
233 @command('perfdirstatewrite', formatteropts)
233 @command('perfdirstatewrite', formatteropts)
234 def perfdirstatewrite(ui, repo, **opts):
234 def perfdirstatewrite(ui, repo, **opts):
235 timer, fm = gettimer(ui, opts)
235 timer, fm = gettimer(ui, opts)
236 ds = repo.dirstate
236 ds = repo.dirstate
237 "a" in ds
237 "a" in ds
238 def d():
238 def d():
239 ds._dirty = True
239 ds._dirty = True
240 ds.write(repo.currenttransaction())
240 ds.write(repo.currenttransaction())
241 timer(d)
241 timer(d)
242 fm.end()
242 fm.end()
243
243
244 @command('perfmergecalculate',
244 @command('perfmergecalculate',
245 [('r', 'rev', '.', 'rev to merge against')] + formatteropts)
245 [('r', 'rev', '.', 'rev to merge against')] + formatteropts)
246 def perfmergecalculate(ui, repo, rev, **opts):
246 def perfmergecalculate(ui, repo, rev, **opts):
247 timer, fm = gettimer(ui, opts)
247 timer, fm = gettimer(ui, opts)
248 wctx = repo[None]
248 wctx = repo[None]
249 rctx = scmutil.revsingle(repo, rev, rev)
249 rctx = scmutil.revsingle(repo, rev, rev)
250 ancestor = wctx.ancestor(rctx)
250 ancestor = wctx.ancestor(rctx)
251 # we don't want working dir files to be stat'd in the benchmark, so prime
251 # we don't want working dir files to be stat'd in the benchmark, so prime
252 # that cache
252 # that cache
253 wctx.dirty()
253 wctx.dirty()
254 def d():
254 def d():
255 # acceptremote is True because we don't want prompts in the middle of
255 # acceptremote is True because we don't want prompts in the middle of
256 # our benchmark
256 # our benchmark
257 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
257 merge.calculateupdates(repo, wctx, rctx, [ancestor], False, False,
258 acceptremote=True, followcopies=True)
258 acceptremote=True, followcopies=True)
259 timer(d)
259 timer(d)
260 fm.end()
260 fm.end()
261
261
262 @command('perfpathcopies', [], "REV REV")
262 @command('perfpathcopies', [], "REV REV")
263 def perfpathcopies(ui, repo, rev1, rev2, **opts):
263 def perfpathcopies(ui, repo, rev1, rev2, **opts):
264 timer, fm = gettimer(ui, opts)
264 timer, fm = gettimer(ui, opts)
265 ctx1 = scmutil.revsingle(repo, rev1, rev1)
265 ctx1 = scmutil.revsingle(repo, rev1, rev1)
266 ctx2 = scmutil.revsingle(repo, rev2, rev2)
266 ctx2 = scmutil.revsingle(repo, rev2, rev2)
267 def d():
267 def d():
268 copies.pathcopies(ctx1, ctx2)
268 copies.pathcopies(ctx1, ctx2)
269 timer(d)
269 timer(d)
270 fm.end()
270 fm.end()
271
271
272 @command('perfmanifest', [], 'REV')
272 @command('perfmanifest', [], 'REV')
273 def perfmanifest(ui, repo, rev, **opts):
273 def perfmanifest(ui, repo, rev, **opts):
274 timer, fm = gettimer(ui, opts)
274 timer, fm = gettimer(ui, opts)
275 ctx = scmutil.revsingle(repo, rev, rev)
275 ctx = scmutil.revsingle(repo, rev, rev)
276 t = ctx.manifestnode()
276 t = ctx.manifestnode()
277 def d():
277 def d():
278 repo.manifest.clearcaches()
278 repo.manifest.clearcaches()
279 repo.manifest.read(t)
279 repo.manifest.read(t)
280 timer(d)
280 timer(d)
281 fm.end()
281 fm.end()
282
282
283 @command('perfchangeset', formatteropts)
283 @command('perfchangeset', formatteropts)
284 def perfchangeset(ui, repo, rev, **opts):
284 def perfchangeset(ui, repo, rev, **opts):
285 timer, fm = gettimer(ui, opts)
285 timer, fm = gettimer(ui, opts)
286 n = repo[rev].node()
286 n = repo[rev].node()
287 def d():
287 def d():
288 repo.changelog.read(n)
288 repo.changelog.read(n)
289 #repo.changelog._cache = None
289 #repo.changelog._cache = None
290 timer(d)
290 timer(d)
291 fm.end()
291 fm.end()
292
292
293 @command('perfindex', formatteropts)
293 @command('perfindex', formatteropts)
294 def perfindex(ui, repo, **opts):
294 def perfindex(ui, repo, **opts):
295 import mercurial.revlog
295 import mercurial.revlog
296 timer, fm = gettimer(ui, opts)
296 timer, fm = gettimer(ui, opts)
297 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
297 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
298 n = repo["tip"].node()
298 n = repo["tip"].node()
299 def d():
299 def d():
300 cl = mercurial.revlog.revlog(repo.svfs, "00changelog.i")
300 cl = mercurial.revlog.revlog(repo.svfs, "00changelog.i")
301 cl.rev(n)
301 cl.rev(n)
302 timer(d)
302 timer(d)
303 fm.end()
303 fm.end()
304
304
305 @command('perfstartup', formatteropts)
305 @command('perfstartup', formatteropts)
306 def perfstartup(ui, repo, **opts):
306 def perfstartup(ui, repo, **opts):
307 timer, fm = gettimer(ui, opts)
307 timer, fm = gettimer(ui, opts)
308 cmd = sys.argv[0]
308 cmd = sys.argv[0]
309 def d():
309 def d():
310 if os.name != 'nt':
310 if os.name != 'nt':
311 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
311 os.system("HGRCPATH= %s version -q > /dev/null" % cmd)
312 else:
312 else:
313 os.environ['HGRCPATH'] = ''
313 os.environ['HGRCPATH'] = ''
314 os.system("%s version -q > NUL" % cmd)
314 os.system("%s version -q > NUL" % cmd)
315 timer(d)
315 timer(d)
316 fm.end()
316 fm.end()
317
317
318 @command('perfparents', formatteropts)
318 @command('perfparents', formatteropts)
319 def perfparents(ui, repo, **opts):
319 def perfparents(ui, repo, **opts):
320 timer, fm = gettimer(ui, opts)
320 timer, fm = gettimer(ui, opts)
321 # control the number of commits perfparents iterates over
321 # control the number of commits perfparents iterates over
322 # experimental config: perf.parentscount
322 # experimental config: perf.parentscount
323 count = ui.configint("perf", "parentscount", 1000)
323 count = ui.configint("perf", "parentscount", 1000)
324 if len(repo.changelog) < count:
324 if len(repo.changelog) < count:
325 raise error.Abort("repo needs %d commits for this test" % count)
325 raise error.Abort("repo needs %d commits for this test" % count)
326 repo = repo.unfiltered()
326 repo = repo.unfiltered()
327 nl = [repo.changelog.node(i) for i in xrange(count)]
327 nl = [repo.changelog.node(i) for i in xrange(count)]
328 def d():
328 def d():
329 for n in nl:
329 for n in nl:
330 repo.changelog.parents(n)
330 repo.changelog.parents(n)
331 timer(d)
331 timer(d)
332 fm.end()
332 fm.end()
333
333
334 @command('perfctxfiles', formatteropts)
334 @command('perfctxfiles', formatteropts)
335 def perfctxfiles(ui, repo, x, **opts):
335 def perfctxfiles(ui, repo, x, **opts):
336 x = int(x)
336 x = int(x)
337 timer, fm = gettimer(ui, opts)
337 timer, fm = gettimer(ui, opts)
338 def d():
338 def d():
339 len(repo[x].files())
339 len(repo[x].files())
340 timer(d)
340 timer(d)
341 fm.end()
341 fm.end()
342
342
343 @command('perfrawfiles', formatteropts)
343 @command('perfrawfiles', formatteropts)
344 def perfrawfiles(ui, repo, x, **opts):
344 def perfrawfiles(ui, repo, x, **opts):
345 x = int(x)
345 x = int(x)
346 timer, fm = gettimer(ui, opts)
346 timer, fm = gettimer(ui, opts)
347 cl = repo.changelog
347 cl = repo.changelog
348 def d():
348 def d():
349 len(cl.read(x)[3])
349 len(cl.read(x)[3])
350 timer(d)
350 timer(d)
351 fm.end()
351 fm.end()
352
352
353 @command('perflookup', formatteropts)
353 @command('perflookup', formatteropts)
354 def perflookup(ui, repo, rev, **opts):
354 def perflookup(ui, repo, rev, **opts):
355 timer, fm = gettimer(ui, opts)
355 timer, fm = gettimer(ui, opts)
356 timer(lambda: len(repo.lookup(rev)))
356 timer(lambda: len(repo.lookup(rev)))
357 fm.end()
357 fm.end()
358
358
359 @command('perfrevrange', formatteropts)
359 @command('perfrevrange', formatteropts)
360 def perfrevrange(ui, repo, *specs, **opts):
360 def perfrevrange(ui, repo, *specs, **opts):
361 timer, fm = gettimer(ui, opts)
361 timer, fm = gettimer(ui, opts)
362 revrange = scmutil.revrange
362 revrange = scmutil.revrange
363 timer(lambda: len(revrange(repo, specs)))
363 timer(lambda: len(revrange(repo, specs)))
364 fm.end()
364 fm.end()
365
365
366 @command('perfnodelookup', formatteropts)
366 @command('perfnodelookup', formatteropts)
367 def perfnodelookup(ui, repo, rev, **opts):
367 def perfnodelookup(ui, repo, rev, **opts):
368 timer, fm = gettimer(ui, opts)
368 timer, fm = gettimer(ui, opts)
369 import mercurial.revlog
369 import mercurial.revlog
370 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
370 mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg
371 n = repo[rev].node()
371 n = repo[rev].node()
372 cl = mercurial.revlog.revlog(repo.svfs, "00changelog.i")
372 cl = mercurial.revlog.revlog(repo.svfs, "00changelog.i")
373 def d():
373 def d():
374 cl.rev(n)
374 cl.rev(n)
375 clearcaches(cl)
375 clearcaches(cl)
376 timer(d)
376 timer(d)
377 fm.end()
377 fm.end()
378
378
379 @command('perflog',
379 @command('perflog',
380 [('', 'rename', False, 'ask log to follow renames')] + formatteropts)
380 [('', 'rename', False, 'ask log to follow renames')] + formatteropts)
381 def perflog(ui, repo, rev=None, **opts):
381 def perflog(ui, repo, rev=None, **opts):
382 if rev is None:
382 if rev is None:
383 rev=[]
383 rev=[]
384 timer, fm = gettimer(ui, opts)
384 timer, fm = gettimer(ui, opts)
385 ui.pushbuffer()
385 ui.pushbuffer()
386 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
386 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
387 copies=opts.get('rename')))
387 copies=opts.get('rename')))
388 ui.popbuffer()
388 ui.popbuffer()
389 fm.end()
389 fm.end()
390
390
391 @command('perfmoonwalk', formatteropts)
391 @command('perfmoonwalk', formatteropts)
392 def perfmoonwalk(ui, repo, **opts):
392 def perfmoonwalk(ui, repo, **opts):
393 """benchmark walking the changelog backwards
393 """benchmark walking the changelog backwards
394
394
395 This also loads the changelog data for each revision in the changelog.
395 This also loads the changelog data for each revision in the changelog.
396 """
396 """
397 timer, fm = gettimer(ui, opts)
397 timer, fm = gettimer(ui, opts)
398 def moonwalk():
398 def moonwalk():
399 for i in xrange(len(repo), -1, -1):
399 for i in xrange(len(repo), -1, -1):
400 ctx = repo[i]
400 ctx = repo[i]
401 ctx.branch() # read changelog data (in addition to the index)
401 ctx.branch() # read changelog data (in addition to the index)
402 timer(moonwalk)
402 timer(moonwalk)
403 fm.end()
403 fm.end()
404
404
405 @command('perftemplating', formatteropts)
405 @command('perftemplating', formatteropts)
406 def perftemplating(ui, repo, rev=None, **opts):
406 def perftemplating(ui, repo, rev=None, **opts):
407 if rev is None:
407 if rev is None:
408 rev=[]
408 rev=[]
409 timer, fm = gettimer(ui, opts)
409 timer, fm = gettimer(ui, opts)
410 ui.pushbuffer()
410 ui.pushbuffer()
411 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
411 timer(lambda: commands.log(ui, repo, rev=rev, date='', user='',
412 template='{date|shortdate} [{rev}:{node|short}]'
412 template='{date|shortdate} [{rev}:{node|short}]'
413 ' {author|person}: {desc|firstline}\n'))
413 ' {author|person}: {desc|firstline}\n'))
414 ui.popbuffer()
414 ui.popbuffer()
415 fm.end()
415 fm.end()
416
416
417 @command('perfcca', formatteropts)
417 @command('perfcca', formatteropts)
418 def perfcca(ui, repo, **opts):
418 def perfcca(ui, repo, **opts):
419 timer, fm = gettimer(ui, opts)
419 timer, fm = gettimer(ui, opts)
420 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
420 timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
421 fm.end()
421 fm.end()
422
422
423 @command('perffncacheload', formatteropts)
423 @command('perffncacheload', formatteropts)
424 def perffncacheload(ui, repo, **opts):
424 def perffncacheload(ui, repo, **opts):
425 timer, fm = gettimer(ui, opts)
425 timer, fm = gettimer(ui, opts)
426 s = repo.store
426 s = repo.store
427 def d():
427 def d():
428 s.fncache._load()
428 s.fncache._load()
429 timer(d)
429 timer(d)
430 fm.end()
430 fm.end()
431
431
432 @command('perffncachewrite', formatteropts)
432 @command('perffncachewrite', formatteropts)
433 def perffncachewrite(ui, repo, **opts):
433 def perffncachewrite(ui, repo, **opts):
434 timer, fm = gettimer(ui, opts)
434 timer, fm = gettimer(ui, opts)
435 s = repo.store
435 s = repo.store
436 s.fncache._load()
436 s.fncache._load()
437 lock = repo.lock()
437 lock = repo.lock()
438 tr = repo.transaction('perffncachewrite')
438 tr = repo.transaction('perffncachewrite')
439 def d():
439 def d():
440 s.fncache._dirty = True
440 s.fncache._dirty = True
441 s.fncache.write(tr)
441 s.fncache.write(tr)
442 timer(d)
442 timer(d)
443 lock.release()
443 lock.release()
444 tr.close()
444 tr.close()
445 fm.end()
445 fm.end()
446
446
447 @command('perffncacheencode', formatteropts)
447 @command('perffncacheencode', formatteropts)
448 def perffncacheencode(ui, repo, **opts):
448 def perffncacheencode(ui, repo, **opts):
449 timer, fm = gettimer(ui, opts)
449 timer, fm = gettimer(ui, opts)
450 s = repo.store
450 s = repo.store
451 s.fncache._load()
451 s.fncache._load()
452 def d():
452 def d():
453 for p in s.fncache.entries:
453 for p in s.fncache.entries:
454 s.encode(p)
454 s.encode(p)
455 timer(d)
455 timer(d)
456 fm.end()
456 fm.end()
457
457
458 @command('perfdiffwd', formatteropts)
458 @command('perfdiffwd', formatteropts)
459 def perfdiffwd(ui, repo, **opts):
459 def perfdiffwd(ui, repo, **opts):
460 """Profile diff of working directory changes"""
460 """Profile diff of working directory changes"""
461 timer, fm = gettimer(ui, opts)
461 timer, fm = gettimer(ui, opts)
462 options = {
462 options = {
463 'w': 'ignore_all_space',
463 'w': 'ignore_all_space',
464 'b': 'ignore_space_change',
464 'b': 'ignore_space_change',
465 'B': 'ignore_blank_lines',
465 'B': 'ignore_blank_lines',
466 }
466 }
467
467
468 for diffopt in ('', 'w', 'b', 'B', 'wB'):
468 for diffopt in ('', 'w', 'b', 'B', 'wB'):
469 opts = dict((options[c], '1') for c in diffopt)
469 opts = dict((options[c], '1') for c in diffopt)
470 def d():
470 def d():
471 ui.pushbuffer()
471 ui.pushbuffer()
472 commands.diff(ui, repo, **opts)
472 commands.diff(ui, repo, **opts)
473 ui.popbuffer()
473 ui.popbuffer()
474 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
474 title = 'diffopts: %s' % (diffopt and ('-' + diffopt) or 'none')
475 timer(d, title)
475 timer(d, title)
476 fm.end()
476 fm.end()
477
477
478 @command('perfrevlog', revlogopts + formatteropts +
478 @command('perfrevlog', revlogopts + formatteropts +
479 [('d', 'dist', 100, 'distance between the revisions'),
479 [('d', 'dist', 100, 'distance between the revisions'),
480 ('s', 'startrev', 0, 'revision to start reading at')],
480 ('s', 'startrev', 0, 'revision to start reading at')],
481 '-c|-m|FILE')
481 '-c|-m|FILE')
482 def perfrevlog(ui, repo, file_=None, startrev=0, **opts):
482 def perfrevlog(ui, repo, file_=None, startrev=0, **opts):
483 """Benchmark reading a series of revisions from a revlog.
483 """Benchmark reading a series of revisions from a revlog.
484
484
485 By default, we read every ``-d/--dist`` revision from 0 to tip of
485 By default, we read every ``-d/--dist`` revision from 0 to tip of
486 the specified revlog.
486 the specified revlog.
487
487
488 The start revision can be defined via ``-s/--startrev``.
488 The start revision can be defined via ``-s/--startrev``.
489 """
489 """
490 timer, fm = gettimer(ui, opts)
490 timer, fm = gettimer(ui, opts)
491 dist = opts['dist']
491 dist = opts['dist']
492 _len = getlen(ui)
492 _len = getlen(ui)
493 def d():
493 def d():
494 r = cmdutil.openrevlog(repo, 'perfrevlog', file_, opts)
494 r = cmdutil.openrevlog(repo, 'perfrevlog', file_, opts)
495 for x in xrange(startrev, _len(r), dist):
495 for x in xrange(startrev, _len(r), dist):
496 r.revision(r.node(x))
496 r.revision(r.node(x))
497
497
498 timer(d)
498 timer(d)
499 fm.end()
499 fm.end()
500
500
501 @command('perfrevlogrevision', revlogopts + formatteropts +
501 @command('perfrevlogrevision', revlogopts + formatteropts +
502 [('', 'cache', False, 'use caches instead of clearing')],
502 [('', 'cache', False, 'use caches instead of clearing')],
503 '-c|-m|FILE REV')
503 '-c|-m|FILE REV')
504 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
504 def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
505 """Benchmark obtaining a revlog revision.
505 """Benchmark obtaining a revlog revision.
506
506
507 Obtaining a revlog revision consists of roughly the following steps:
507 Obtaining a revlog revision consists of roughly the following steps:
508
508
509 1. Compute the delta chain
509 1. Compute the delta chain
510 2. Obtain the raw chunks for that delta chain
510 2. Obtain the raw chunks for that delta chain
511 3. Decompress each raw chunk
511 3. Decompress each raw chunk
512 4. Apply binary patches to obtain fulltext
512 4. Apply binary patches to obtain fulltext
513 5. Verify hash of fulltext
513 5. Verify hash of fulltext
514
514
515 This command measures the time spent in each of these phases.
515 This command measures the time spent in each of these phases.
516 """
516 """
517 if opts.get('changelog') or opts.get('manifest'):
517 if opts.get('changelog') or opts.get('manifest'):
518 file_, rev = None, file_
518 file_, rev = None, file_
519 elif rev is None:
519 elif rev is None:
520 raise error.CommandError('perfrevlogrevision', 'invalid arguments')
520 raise error.CommandError('perfrevlogrevision', 'invalid arguments')
521
521
522 r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
522 r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
523 node = r.lookup(rev)
523 node = r.lookup(rev)
524 rev = r.rev(node)
524 rev = r.rev(node)
525
525
526 def dodeltachain(rev):
526 def dodeltachain(rev):
527 if not cache:
527 if not cache:
528 r.clearcaches()
528 r.clearcaches()
529 r._deltachain(rev)
529 r._deltachain(rev)
530
530
531 def doread(chain):
531 def doread(chain):
532 if not cache:
532 if not cache:
533 r.clearcaches()
533 r.clearcaches()
534 r._chunkraw(chain[0], chain[-1])
534 r._chunkraw(chain[0], chain[-1])
535
535
536 def dodecompress(data, chain):
536 def dodecompress(data, chain):
537 if not cache:
537 if not cache:
538 r.clearcaches()
538 r.clearcaches()
539
539
540 start = r.start
540 start = r.start
541 length = r.length
541 length = r.length
542 inline = r._inline
542 inline = r._inline
543 iosize = r._io.size
543 iosize = r._io.size
544 buffer = util.buffer
544 buffer = util.buffer
545 offset = start(chain[0])
545 offset = start(chain[0])
546
546
547 for rev in chain:
547 for rev in chain:
548 chunkstart = start(rev)
548 chunkstart = start(rev)
549 if inline:
549 if inline:
550 chunkstart += (rev + 1) * iosize
550 chunkstart += (rev + 1) * iosize
551 chunklength = length(rev)
551 chunklength = length(rev)
552 b = buffer(data, chunkstart - offset, chunklength)
552 b = buffer(data, chunkstart - offset, chunklength)
553 revlog.decompress(b)
553 revlog.decompress(b)
554
554
555 def dopatch(text, bins):
555 def dopatch(text, bins):
556 if not cache:
556 if not cache:
557 r.clearcaches()
557 r.clearcaches()
558 mdiff.patches(text, bins)
558 mdiff.patches(text, bins)
559
559
560 def dohash(text):
560 def dohash(text):
561 if not cache:
561 if not cache:
562 r.clearcaches()
562 r.clearcaches()
563 r._checkhash(text, node, rev)
563 r._checkhash(text, node, rev)
564
564
565 def dorevision():
565 def dorevision():
566 if not cache:
566 if not cache:
567 r.clearcaches()
567 r.clearcaches()
568 r.revision(node)
568 r.revision(node)
569
569
570 chain = r._deltachain(rev)[0]
570 chain = r._deltachain(rev)[0]
571 data = r._chunkraw(chain[0], chain[-1])
571 data = r._chunkraw(chain[0], chain[-1])[1]
572 bins = r._chunks(chain)
572 bins = r._chunks(chain)
573 text = str(bins[0])
573 text = str(bins[0])
574 bins = bins[1:]
574 bins = bins[1:]
575 text = mdiff.patches(text, bins)
575 text = mdiff.patches(text, bins)
576
576
577 benches = [
577 benches = [
578 (lambda: dorevision(), 'full'),
578 (lambda: dorevision(), 'full'),
579 (lambda: dodeltachain(rev), 'deltachain'),
579 (lambda: dodeltachain(rev), 'deltachain'),
580 (lambda: doread(chain), 'read'),
580 (lambda: doread(chain), 'read'),
581 (lambda: dodecompress(data, chain), 'decompress'),
581 (lambda: dodecompress(data, chain), 'decompress'),
582 (lambda: dopatch(text, bins), 'patch'),
582 (lambda: dopatch(text, bins), 'patch'),
583 (lambda: dohash(text), 'hash'),
583 (lambda: dohash(text), 'hash'),
584 ]
584 ]
585
585
586 for fn, title in benches:
586 for fn, title in benches:
587 timer, fm = gettimer(ui, opts)
587 timer, fm = gettimer(ui, opts)
588 timer(fn, title=title)
588 timer(fn, title=title)
589 fm.end()
589 fm.end()
590
590
591 @command('perfrevset',
591 @command('perfrevset',
592 [('C', 'clear', False, 'clear volatile cache between each call.'),
592 [('C', 'clear', False, 'clear volatile cache between each call.'),
593 ('', 'contexts', False, 'obtain changectx for each revision')]
593 ('', 'contexts', False, 'obtain changectx for each revision')]
594 + formatteropts, "REVSET")
594 + formatteropts, "REVSET")
595 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
595 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
596 """benchmark the execution time of a revset
596 """benchmark the execution time of a revset
597
597
598 Use the --clean option if need to evaluate the impact of build volatile
598 Use the --clean option if need to evaluate the impact of build volatile
599 revisions set cache on the revset execution. Volatile cache hold filtered
599 revisions set cache on the revset execution. Volatile cache hold filtered
600 and obsolete related cache."""
600 and obsolete related cache."""
601 timer, fm = gettimer(ui, opts)
601 timer, fm = gettimer(ui, opts)
602 def d():
602 def d():
603 if clear:
603 if clear:
604 repo.invalidatevolatilesets()
604 repo.invalidatevolatilesets()
605 if contexts:
605 if contexts:
606 for ctx in repo.set(expr): pass
606 for ctx in repo.set(expr): pass
607 else:
607 else:
608 for r in repo.revs(expr): pass
608 for r in repo.revs(expr): pass
609 timer(d)
609 timer(d)
610 fm.end()
610 fm.end()
611
611
612 @command('perfvolatilesets', formatteropts)
612 @command('perfvolatilesets', formatteropts)
613 def perfvolatilesets(ui, repo, *names, **opts):
613 def perfvolatilesets(ui, repo, *names, **opts):
614 """benchmark the computation of various volatile set
614 """benchmark the computation of various volatile set
615
615
616 Volatile set computes element related to filtering and obsolescence."""
616 Volatile set computes element related to filtering and obsolescence."""
617 timer, fm = gettimer(ui, opts)
617 timer, fm = gettimer(ui, opts)
618 repo = repo.unfiltered()
618 repo = repo.unfiltered()
619
619
620 def getobs(name):
620 def getobs(name):
621 def d():
621 def d():
622 repo.invalidatevolatilesets()
622 repo.invalidatevolatilesets()
623 obsolete.getrevs(repo, name)
623 obsolete.getrevs(repo, name)
624 return d
624 return d
625
625
626 allobs = sorted(obsolete.cachefuncs)
626 allobs = sorted(obsolete.cachefuncs)
627 if names:
627 if names:
628 allobs = [n for n in allobs if n in names]
628 allobs = [n for n in allobs if n in names]
629
629
630 for name in allobs:
630 for name in allobs:
631 timer(getobs(name), title=name)
631 timer(getobs(name), title=name)
632
632
633 def getfiltered(name):
633 def getfiltered(name):
634 def d():
634 def d():
635 repo.invalidatevolatilesets()
635 repo.invalidatevolatilesets()
636 repoview.filterrevs(repo, name)
636 repoview.filterrevs(repo, name)
637 return d
637 return d
638
638
639 allfilter = sorted(repoview.filtertable)
639 allfilter = sorted(repoview.filtertable)
640 if names:
640 if names:
641 allfilter = [n for n in allfilter if n in names]
641 allfilter = [n for n in allfilter if n in names]
642
642
643 for name in allfilter:
643 for name in allfilter:
644 timer(getfiltered(name), title=name)
644 timer(getfiltered(name), title=name)
645 fm.end()
645 fm.end()
646
646
647 @command('perfbranchmap',
647 @command('perfbranchmap',
648 [('f', 'full', False,
648 [('f', 'full', False,
649 'Includes build time of subset'),
649 'Includes build time of subset'),
650 ] + formatteropts)
650 ] + formatteropts)
651 def perfbranchmap(ui, repo, full=False, **opts):
651 def perfbranchmap(ui, repo, full=False, **opts):
652 """benchmark the update of a branchmap
652 """benchmark the update of a branchmap
653
653
654 This benchmarks the full repo.branchmap() call with read and write disabled
654 This benchmarks the full repo.branchmap() call with read and write disabled
655 """
655 """
656 timer, fm = gettimer(ui, opts)
656 timer, fm = gettimer(ui, opts)
657 def getbranchmap(filtername):
657 def getbranchmap(filtername):
658 """generate a benchmark function for the filtername"""
658 """generate a benchmark function for the filtername"""
659 if filtername is None:
659 if filtername is None:
660 view = repo
660 view = repo
661 else:
661 else:
662 view = repo.filtered(filtername)
662 view = repo.filtered(filtername)
663 def d():
663 def d():
664 if full:
664 if full:
665 view._branchcaches.clear()
665 view._branchcaches.clear()
666 else:
666 else:
667 view._branchcaches.pop(filtername, None)
667 view._branchcaches.pop(filtername, None)
668 view.branchmap()
668 view.branchmap()
669 return d
669 return d
670 # add filter in smaller subset to bigger subset
670 # add filter in smaller subset to bigger subset
671 possiblefilters = set(repoview.filtertable)
671 possiblefilters = set(repoview.filtertable)
672 allfilters = []
672 allfilters = []
673 while possiblefilters:
673 while possiblefilters:
674 for name in possiblefilters:
674 for name in possiblefilters:
675 subset = branchmap.subsettable.get(name)
675 subset = branchmap.subsettable.get(name)
676 if subset not in possiblefilters:
676 if subset not in possiblefilters:
677 break
677 break
678 else:
678 else:
679 assert False, 'subset cycle %s!' % possiblefilters
679 assert False, 'subset cycle %s!' % possiblefilters
680 allfilters.append(name)
680 allfilters.append(name)
681 possiblefilters.remove(name)
681 possiblefilters.remove(name)
682
682
683 # warm the cache
683 # warm the cache
684 if not full:
684 if not full:
685 for name in allfilters:
685 for name in allfilters:
686 repo.filtered(name).branchmap()
686 repo.filtered(name).branchmap()
687 # add unfiltered
687 # add unfiltered
688 allfilters.append(None)
688 allfilters.append(None)
689 oldread = branchmap.read
689 oldread = branchmap.read
690 oldwrite = branchmap.branchcache.write
690 oldwrite = branchmap.branchcache.write
691 try:
691 try:
692 branchmap.read = lambda repo: None
692 branchmap.read = lambda repo: None
693 branchmap.write = lambda repo: None
693 branchmap.write = lambda repo: None
694 for name in allfilters:
694 for name in allfilters:
695 timer(getbranchmap(name), title=str(name))
695 timer(getbranchmap(name), title=str(name))
696 finally:
696 finally:
697 branchmap.read = oldread
697 branchmap.read = oldread
698 branchmap.branchcache.write = oldwrite
698 branchmap.branchcache.write = oldwrite
699 fm.end()
699 fm.end()
700
700
701 @command('perfloadmarkers')
701 @command('perfloadmarkers')
702 def perfloadmarkers(ui, repo):
702 def perfloadmarkers(ui, repo):
703 """benchmark the time to parse the on-disk markers for a repo
703 """benchmark the time to parse the on-disk markers for a repo
704
704
705 Result is the number of markers in the repo."""
705 Result is the number of markers in the repo."""
706 timer, fm = gettimer(ui)
706 timer, fm = gettimer(ui)
707 timer(lambda: len(obsolete.obsstore(repo.svfs)))
707 timer(lambda: len(obsolete.obsstore(repo.svfs)))
708 fm.end()
708 fm.end()
709
709
710 @command('perflrucachedict', formatteropts +
710 @command('perflrucachedict', formatteropts +
711 [('', 'size', 4, 'size of cache'),
711 [('', 'size', 4, 'size of cache'),
712 ('', 'gets', 10000, 'number of key lookups'),
712 ('', 'gets', 10000, 'number of key lookups'),
713 ('', 'sets', 10000, 'number of key sets'),
713 ('', 'sets', 10000, 'number of key sets'),
714 ('', 'mixed', 10000, 'number of mixed mode operations'),
714 ('', 'mixed', 10000, 'number of mixed mode operations'),
715 ('', 'mixedgetfreq', 50, 'frequency of get vs set ops in mixed mode')],
715 ('', 'mixedgetfreq', 50, 'frequency of get vs set ops in mixed mode')],
716 norepo=True)
716 norepo=True)
717 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
717 def perflrucache(ui, size=4, gets=10000, sets=10000, mixed=10000,
718 mixedgetfreq=50, **opts):
718 mixedgetfreq=50, **opts):
719 def doinit():
719 def doinit():
720 for i in xrange(10000):
720 for i in xrange(10000):
721 util.lrucachedict(size)
721 util.lrucachedict(size)
722
722
723 values = []
723 values = []
724 for i in xrange(size):
724 for i in xrange(size):
725 values.append(random.randint(0, sys.maxint))
725 values.append(random.randint(0, sys.maxint))
726
726
727 # Get mode fills the cache and tests raw lookup performance with no
727 # Get mode fills the cache and tests raw lookup performance with no
728 # eviction.
728 # eviction.
729 getseq = []
729 getseq = []
730 for i in xrange(gets):
730 for i in xrange(gets):
731 getseq.append(random.choice(values))
731 getseq.append(random.choice(values))
732
732
733 def dogets():
733 def dogets():
734 d = util.lrucachedict(size)
734 d = util.lrucachedict(size)
735 for v in values:
735 for v in values:
736 d[v] = v
736 d[v] = v
737 for key in getseq:
737 for key in getseq:
738 value = d[key]
738 value = d[key]
739 value # silence pyflakes warning
739 value # silence pyflakes warning
740
740
741 # Set mode tests insertion speed with cache eviction.
741 # Set mode tests insertion speed with cache eviction.
742 setseq = []
742 setseq = []
743 for i in xrange(sets):
743 for i in xrange(sets):
744 setseq.append(random.randint(0, sys.maxint))
744 setseq.append(random.randint(0, sys.maxint))
745
745
746 def dosets():
746 def dosets():
747 d = util.lrucachedict(size)
747 d = util.lrucachedict(size)
748 for v in setseq:
748 for v in setseq:
749 d[v] = v
749 d[v] = v
750
750
751 # Mixed mode randomly performs gets and sets with eviction.
751 # Mixed mode randomly performs gets and sets with eviction.
752 mixedops = []
752 mixedops = []
753 for i in xrange(mixed):
753 for i in xrange(mixed):
754 r = random.randint(0, 100)
754 r = random.randint(0, 100)
755 if r < mixedgetfreq:
755 if r < mixedgetfreq:
756 op = 0
756 op = 0
757 else:
757 else:
758 op = 1
758 op = 1
759
759
760 mixedops.append((op, random.randint(0, size * 2)))
760 mixedops.append((op, random.randint(0, size * 2)))
761
761
762 def domixed():
762 def domixed():
763 d = util.lrucachedict(size)
763 d = util.lrucachedict(size)
764
764
765 for op, v in mixedops:
765 for op, v in mixedops:
766 if op == 0:
766 if op == 0:
767 try:
767 try:
768 d[v]
768 d[v]
769 except KeyError:
769 except KeyError:
770 pass
770 pass
771 else:
771 else:
772 d[v] = v
772 d[v] = v
773
773
774 benches = [
774 benches = [
775 (doinit, 'init'),
775 (doinit, 'init'),
776 (dogets, 'gets'),
776 (dogets, 'gets'),
777 (dosets, 'sets'),
777 (dosets, 'sets'),
778 (domixed, 'mixed')
778 (domixed, 'mixed')
779 ]
779 ]
780
780
781 for fn, title in benches:
781 for fn, title in benches:
782 timer, fm = gettimer(ui, opts)
782 timer, fm = gettimer(ui, opts)
783 timer(fn, title=title)
783 timer(fn, title=title)
784 fm.end()
784 fm.end()
@@ -1,1786 +1,1790 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import errno
17 import errno
18 import os
18 import os
19 import struct
19 import struct
20 import zlib
20 import zlib
21
21
22 # import stuff from node for others to import from revlog
22 # import stuff from node for others to import from revlog
23 from .node import (
23 from .node import (
24 bin,
24 bin,
25 hex,
25 hex,
26 nullid,
26 nullid,
27 nullrev,
27 nullrev,
28 )
28 )
29 from .i18n import _
29 from .i18n import _
30 from . import (
30 from . import (
31 ancestor,
31 ancestor,
32 error,
32 error,
33 mdiff,
33 mdiff,
34 parsers,
34 parsers,
35 templatefilters,
35 templatefilters,
36 util,
36 util,
37 )
37 )
38
38
39 _pack = struct.pack
39 _pack = struct.pack
40 _unpack = struct.unpack
40 _unpack = struct.unpack
41 _compress = zlib.compress
41 _compress = zlib.compress
42 _decompress = zlib.decompress
42 _decompress = zlib.decompress
43 _sha = util.sha1
43 _sha = util.sha1
44
44
45 # revlog header flags
45 # revlog header flags
46 REVLOGV0 = 0
46 REVLOGV0 = 0
47 REVLOGNG = 1
47 REVLOGNG = 1
48 REVLOGNGINLINEDATA = (1 << 16)
48 REVLOGNGINLINEDATA = (1 << 16)
49 REVLOGGENERALDELTA = (1 << 17)
49 REVLOGGENERALDELTA = (1 << 17)
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
50 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
51 REVLOG_DEFAULT_FORMAT = REVLOGNG
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
52 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
53 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
54
54
55 # revlog index flags
55 # revlog index flags
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
56 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
57 REVIDX_DEFAULT_FLAGS = 0
57 REVIDX_DEFAULT_FLAGS = 0
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
58 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
59
59
60 # max size of revlog with inline data
60 # max size of revlog with inline data
61 _maxinline = 131072
61 _maxinline = 131072
62 _chunksize = 1048576
62 _chunksize = 1048576
63
63
64 RevlogError = error.RevlogError
64 RevlogError = error.RevlogError
65 LookupError = error.LookupError
65 LookupError = error.LookupError
66 CensoredNodeError = error.CensoredNodeError
66 CensoredNodeError = error.CensoredNodeError
67
67
68 def getoffset(q):
68 def getoffset(q):
69 return int(q >> 16)
69 return int(q >> 16)
70
70
71 def gettype(q):
71 def gettype(q):
72 return int(q & 0xFFFF)
72 return int(q & 0xFFFF)
73
73
74 def offset_type(offset, type):
74 def offset_type(offset, type):
75 return long(long(offset) << 16 | type)
75 return long(long(offset) << 16 | type)
76
76
77 _nullhash = _sha(nullid)
77 _nullhash = _sha(nullid)
78
78
79 def hash(text, p1, p2):
79 def hash(text, p1, p2):
80 """generate a hash from the given text and its parent hashes
80 """generate a hash from the given text and its parent hashes
81
81
82 This hash combines both the current file contents and its history
82 This hash combines both the current file contents and its history
83 in a manner that makes it easy to distinguish nodes with the same
83 in a manner that makes it easy to distinguish nodes with the same
84 content in the revision graph.
84 content in the revision graph.
85 """
85 """
86 # As of now, if one of the parent node is null, p2 is null
86 # As of now, if one of the parent node is null, p2 is null
87 if p2 == nullid:
87 if p2 == nullid:
88 # deep copy of a hash is faster than creating one
88 # deep copy of a hash is faster than creating one
89 s = _nullhash.copy()
89 s = _nullhash.copy()
90 s.update(p1)
90 s.update(p1)
91 else:
91 else:
92 # none of the parent nodes are nullid
92 # none of the parent nodes are nullid
93 l = [p1, p2]
93 l = [p1, p2]
94 l.sort()
94 l.sort()
95 s = _sha(l[0])
95 s = _sha(l[0])
96 s.update(l[1])
96 s.update(l[1])
97 s.update(text)
97 s.update(text)
98 return s.digest()
98 return s.digest()
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 try:
108 try:
109 return _decompress(bin)
109 return _decompress(bin)
110 except zlib.error as e:
110 except zlib.error as e:
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
111 raise RevlogError(_("revlog decompress error: %s") % str(e))
112 if t == 'u':
112 if t == 'u':
113 return util.buffer(bin, 1)
113 return util.buffer(bin, 1)
114 raise RevlogError(_("unknown compression type %r") % t)
114 raise RevlogError(_("unknown compression type %r") % t)
115
115
116 # index v0:
116 # index v0:
117 # 4 bytes: offset
117 # 4 bytes: offset
118 # 4 bytes: compressed length
118 # 4 bytes: compressed length
119 # 4 bytes: base rev
119 # 4 bytes: base rev
120 # 4 bytes: link rev
120 # 4 bytes: link rev
121 # 20 bytes: parent 1 nodeid
121 # 20 bytes: parent 1 nodeid
122 # 20 bytes: parent 2 nodeid
122 # 20 bytes: parent 2 nodeid
123 # 20 bytes: nodeid
123 # 20 bytes: nodeid
124 indexformatv0 = ">4l20s20s20s"
124 indexformatv0 = ">4l20s20s20s"
125
125
126 class revlogoldio(object):
126 class revlogoldio(object):
127 def __init__(self):
127 def __init__(self):
128 self.size = struct.calcsize(indexformatv0)
128 self.size = struct.calcsize(indexformatv0)
129
129
130 def parseindex(self, data, inline):
130 def parseindex(self, data, inline):
131 s = self.size
131 s = self.size
132 index = []
132 index = []
133 nodemap = {nullid: nullrev}
133 nodemap = {nullid: nullrev}
134 n = off = 0
134 n = off = 0
135 l = len(data)
135 l = len(data)
136 while off + s <= l:
136 while off + s <= l:
137 cur = data[off:off + s]
137 cur = data[off:off + s]
138 off += s
138 off += s
139 e = _unpack(indexformatv0, cur)
139 e = _unpack(indexformatv0, cur)
140 # transform to revlogv1 format
140 # transform to revlogv1 format
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
141 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
142 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
143 index.append(e2)
143 index.append(e2)
144 nodemap[e[6]] = n
144 nodemap[e[6]] = n
145 n += 1
145 n += 1
146
146
147 # add the magic null revision at -1
147 # add the magic null revision at -1
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
148 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
149
149
150 return index, nodemap, None
150 return index, nodemap, None
151
151
152 def packentry(self, entry, node, version, rev):
152 def packentry(self, entry, node, version, rev):
153 if gettype(entry[0]):
153 if gettype(entry[0]):
154 raise RevlogError(_("index entry flags need RevlogNG"))
154 raise RevlogError(_("index entry flags need RevlogNG"))
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
155 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
156 node(entry[5]), node(entry[6]), entry[7])
156 node(entry[5]), node(entry[6]), entry[7])
157 return _pack(indexformatv0, *e2)
157 return _pack(indexformatv0, *e2)
158
158
159 # index ng:
159 # index ng:
160 # 6 bytes: offset
160 # 6 bytes: offset
161 # 2 bytes: flags
161 # 2 bytes: flags
162 # 4 bytes: compressed length
162 # 4 bytes: compressed length
163 # 4 bytes: uncompressed length
163 # 4 bytes: uncompressed length
164 # 4 bytes: base rev
164 # 4 bytes: base rev
165 # 4 bytes: link rev
165 # 4 bytes: link rev
166 # 4 bytes: parent 1 rev
166 # 4 bytes: parent 1 rev
167 # 4 bytes: parent 2 rev
167 # 4 bytes: parent 2 rev
168 # 32 bytes: nodeid
168 # 32 bytes: nodeid
169 indexformatng = ">Qiiiiii20s12x"
169 indexformatng = ">Qiiiiii20s12x"
170 versionformat = ">I"
170 versionformat = ">I"
171
171
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
172 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
173 # signed integer)
173 # signed integer)
174 _maxentrysize = 0x7fffffff
174 _maxentrysize = 0x7fffffff
175
175
176 class revlogio(object):
176 class revlogio(object):
177 def __init__(self):
177 def __init__(self):
178 self.size = struct.calcsize(indexformatng)
178 self.size = struct.calcsize(indexformatng)
179
179
180 def parseindex(self, data, inline):
180 def parseindex(self, data, inline):
181 # call the C implementation to parse the index data
181 # call the C implementation to parse the index data
182 index, cache = parsers.parse_index2(data, inline)
182 index, cache = parsers.parse_index2(data, inline)
183 return index, getattr(index, 'nodemap', None), cache
183 return index, getattr(index, 'nodemap', None), cache
184
184
185 def packentry(self, entry, node, version, rev):
185 def packentry(self, entry, node, version, rev):
186 p = _pack(indexformatng, *entry)
186 p = _pack(indexformatng, *entry)
187 if rev == 0:
187 if rev == 0:
188 p = _pack(versionformat, version) + p[4:]
188 p = _pack(versionformat, version) + p[4:]
189 return p
189 return p
190
190
191 class revlog(object):
191 class revlog(object):
192 """
192 """
193 the underlying revision storage object
193 the underlying revision storage object
194
194
195 A revlog consists of two parts, an index and the revision data.
195 A revlog consists of two parts, an index and the revision data.
196
196
197 The index is a file with a fixed record size containing
197 The index is a file with a fixed record size containing
198 information on each revision, including its nodeid (hash), the
198 information on each revision, including its nodeid (hash), the
199 nodeids of its parents, the position and offset of its data within
199 nodeids of its parents, the position and offset of its data within
200 the data file, and the revision it's based on. Finally, each entry
200 the data file, and the revision it's based on. Finally, each entry
201 contains a linkrev entry that can serve as a pointer to external
201 contains a linkrev entry that can serve as a pointer to external
202 data.
202 data.
203
203
204 The revision data itself is a linear collection of data chunks.
204 The revision data itself is a linear collection of data chunks.
205 Each chunk represents a revision and is usually represented as a
205 Each chunk represents a revision and is usually represented as a
206 delta against the previous chunk. To bound lookup time, runs of
206 delta against the previous chunk. To bound lookup time, runs of
207 deltas are limited to about 2 times the length of the original
207 deltas are limited to about 2 times the length of the original
208 version data. This makes retrieval of a version proportional to
208 version data. This makes retrieval of a version proportional to
209 its size, or O(1) relative to the number of revisions.
209 its size, or O(1) relative to the number of revisions.
210
210
211 Both pieces of the revlog are written to in an append-only
211 Both pieces of the revlog are written to in an append-only
212 fashion, which means we never need to rewrite a file to insert or
212 fashion, which means we never need to rewrite a file to insert or
213 remove data, and can use some simple techniques to avoid the need
213 remove data, and can use some simple techniques to avoid the need
214 for locking while reading.
214 for locking while reading.
215 """
215 """
216 def __init__(self, opener, indexfile):
216 def __init__(self, opener, indexfile):
217 """
217 """
218 create a revlog object
218 create a revlog object
219
219
220 opener is a function that abstracts the file opening operation
220 opener is a function that abstracts the file opening operation
221 and can be used to implement COW semantics or the like.
221 and can be used to implement COW semantics or the like.
222 """
222 """
223 self.indexfile = indexfile
223 self.indexfile = indexfile
224 self.datafile = indexfile[:-2] + ".d"
224 self.datafile = indexfile[:-2] + ".d"
225 self.opener = opener
225 self.opener = opener
226 # 3-tuple of (node, rev, text) for a raw revision.
226 # 3-tuple of (node, rev, text) for a raw revision.
227 self._cache = None
227 self._cache = None
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
228 # 2-tuple of (rev, baserev) defining the base revision the delta chain
229 # begins at for a revision.
229 # begins at for a revision.
230 self._basecache = None
230 self._basecache = None
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
231 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
232 self._chunkcache = (0, '')
232 self._chunkcache = (0, '')
233 # How much data to read and cache into the raw revlog data cache.
233 # How much data to read and cache into the raw revlog data cache.
234 self._chunkcachesize = 65536
234 self._chunkcachesize = 65536
235 self._maxchainlen = None
235 self._maxchainlen = None
236 self._aggressivemergedeltas = False
236 self._aggressivemergedeltas = False
237 self.index = []
237 self.index = []
238 # Mapping of partial identifiers to full nodes.
238 # Mapping of partial identifiers to full nodes.
239 self._pcache = {}
239 self._pcache = {}
240 # Mapping of revision integer to full node.
240 # Mapping of revision integer to full node.
241 self._nodecache = {nullid: nullrev}
241 self._nodecache = {nullid: nullrev}
242 self._nodepos = None
242 self._nodepos = None
243
243
244 v = REVLOG_DEFAULT_VERSION
244 v = REVLOG_DEFAULT_VERSION
245 opts = getattr(opener, 'options', None)
245 opts = getattr(opener, 'options', None)
246 if opts is not None:
246 if opts is not None:
247 if 'revlogv1' in opts:
247 if 'revlogv1' in opts:
248 if 'generaldelta' in opts:
248 if 'generaldelta' in opts:
249 v |= REVLOGGENERALDELTA
249 v |= REVLOGGENERALDELTA
250 else:
250 else:
251 v = 0
251 v = 0
252 if 'chunkcachesize' in opts:
252 if 'chunkcachesize' in opts:
253 self._chunkcachesize = opts['chunkcachesize']
253 self._chunkcachesize = opts['chunkcachesize']
254 if 'maxchainlen' in opts:
254 if 'maxchainlen' in opts:
255 self._maxchainlen = opts['maxchainlen']
255 self._maxchainlen = opts['maxchainlen']
256 if 'aggressivemergedeltas' in opts:
256 if 'aggressivemergedeltas' in opts:
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
257 self._aggressivemergedeltas = opts['aggressivemergedeltas']
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
258 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
259
259
260 if self._chunkcachesize <= 0:
260 if self._chunkcachesize <= 0:
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
261 raise RevlogError(_('revlog chunk cache size %r is not greater '
262 'than 0') % self._chunkcachesize)
262 'than 0') % self._chunkcachesize)
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
263 elif self._chunkcachesize & (self._chunkcachesize - 1):
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
264 raise RevlogError(_('revlog chunk cache size %r is not a power '
265 'of 2') % self._chunkcachesize)
265 'of 2') % self._chunkcachesize)
266
266
267 indexdata = ''
267 indexdata = ''
268 self._initempty = True
268 self._initempty = True
269 try:
269 try:
270 f = self.opener(self.indexfile)
270 f = self.opener(self.indexfile)
271 indexdata = f.read()
271 indexdata = f.read()
272 f.close()
272 f.close()
273 if len(indexdata) > 0:
273 if len(indexdata) > 0:
274 v = struct.unpack(versionformat, indexdata[:4])[0]
274 v = struct.unpack(versionformat, indexdata[:4])[0]
275 self._initempty = False
275 self._initempty = False
276 except IOError as inst:
276 except IOError as inst:
277 if inst.errno != errno.ENOENT:
277 if inst.errno != errno.ENOENT:
278 raise
278 raise
279
279
280 self.version = v
280 self.version = v
281 self._inline = v & REVLOGNGINLINEDATA
281 self._inline = v & REVLOGNGINLINEDATA
282 self._generaldelta = v & REVLOGGENERALDELTA
282 self._generaldelta = v & REVLOGGENERALDELTA
283 flags = v & ~0xFFFF
283 flags = v & ~0xFFFF
284 fmt = v & 0xFFFF
284 fmt = v & 0xFFFF
285 if fmt == REVLOGV0 and flags:
285 if fmt == REVLOGV0 and flags:
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
286 raise RevlogError(_("index %s unknown flags %#04x for format v0")
287 % (self.indexfile, flags >> 16))
287 % (self.indexfile, flags >> 16))
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
288 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
289 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
290 % (self.indexfile, flags >> 16))
290 % (self.indexfile, flags >> 16))
291 elif fmt > REVLOGNG:
291 elif fmt > REVLOGNG:
292 raise RevlogError(_("index %s unknown format %d")
292 raise RevlogError(_("index %s unknown format %d")
293 % (self.indexfile, fmt))
293 % (self.indexfile, fmt))
294
294
295 self._io = revlogio()
295 self._io = revlogio()
296 if self.version == REVLOGV0:
296 if self.version == REVLOGV0:
297 self._io = revlogoldio()
297 self._io = revlogoldio()
298 try:
298 try:
299 d = self._io.parseindex(indexdata, self._inline)
299 d = self._io.parseindex(indexdata, self._inline)
300 except (ValueError, IndexError):
300 except (ValueError, IndexError):
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
301 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
302 self.index, nodemap, self._chunkcache = d
302 self.index, nodemap, self._chunkcache = d
303 if nodemap is not None:
303 if nodemap is not None:
304 self.nodemap = self._nodecache = nodemap
304 self.nodemap = self._nodecache = nodemap
305 if not self._chunkcache:
305 if not self._chunkcache:
306 self._chunkclear()
306 self._chunkclear()
307 # revnum -> (chain-length, sum-delta-length)
307 # revnum -> (chain-length, sum-delta-length)
308 self._chaininfocache = {}
308 self._chaininfocache = {}
309
309
310 def tip(self):
310 def tip(self):
311 return self.node(len(self.index) - 2)
311 return self.node(len(self.index) - 2)
312 def __contains__(self, rev):
312 def __contains__(self, rev):
313 return 0 <= rev < len(self)
313 return 0 <= rev < len(self)
314 def __len__(self):
314 def __len__(self):
315 return len(self.index) - 1
315 return len(self.index) - 1
316 def __iter__(self):
316 def __iter__(self):
317 return iter(xrange(len(self)))
317 return iter(xrange(len(self)))
318 def revs(self, start=0, stop=None):
318 def revs(self, start=0, stop=None):
319 """iterate over all rev in this revlog (from start to stop)"""
319 """iterate over all rev in this revlog (from start to stop)"""
320 step = 1
320 step = 1
321 if stop is not None:
321 if stop is not None:
322 if start > stop:
322 if start > stop:
323 step = -1
323 step = -1
324 stop += step
324 stop += step
325 else:
325 else:
326 stop = len(self)
326 stop = len(self)
327 return xrange(start, stop, step)
327 return xrange(start, stop, step)
328
328
329 @util.propertycache
329 @util.propertycache
330 def nodemap(self):
330 def nodemap(self):
331 self.rev(self.node(0))
331 self.rev(self.node(0))
332 return self._nodecache
332 return self._nodecache
333
333
334 def hasnode(self, node):
334 def hasnode(self, node):
335 try:
335 try:
336 self.rev(node)
336 self.rev(node)
337 return True
337 return True
338 except KeyError:
338 except KeyError:
339 return False
339 return False
340
340
341 def clearcaches(self):
341 def clearcaches(self):
342 self._cache = None
342 self._cache = None
343 self._basecache = None
343 self._basecache = None
344 self._chunkcache = (0, '')
344 self._chunkcache = (0, '')
345 self._pcache = {}
345 self._pcache = {}
346
346
347 try:
347 try:
348 self._nodecache.clearcaches()
348 self._nodecache.clearcaches()
349 except AttributeError:
349 except AttributeError:
350 self._nodecache = {nullid: nullrev}
350 self._nodecache = {nullid: nullrev}
351 self._nodepos = None
351 self._nodepos = None
352
352
353 def rev(self, node):
353 def rev(self, node):
354 try:
354 try:
355 return self._nodecache[node]
355 return self._nodecache[node]
356 except TypeError:
356 except TypeError:
357 raise
357 raise
358 except RevlogError:
358 except RevlogError:
359 # parsers.c radix tree lookup failed
359 # parsers.c radix tree lookup failed
360 raise LookupError(node, self.indexfile, _('no node'))
360 raise LookupError(node, self.indexfile, _('no node'))
361 except KeyError:
361 except KeyError:
362 # pure python cache lookup failed
362 # pure python cache lookup failed
363 n = self._nodecache
363 n = self._nodecache
364 i = self.index
364 i = self.index
365 p = self._nodepos
365 p = self._nodepos
366 if p is None:
366 if p is None:
367 p = len(i) - 2
367 p = len(i) - 2
368 for r in xrange(p, -1, -1):
368 for r in xrange(p, -1, -1):
369 v = i[r][7]
369 v = i[r][7]
370 n[v] = r
370 n[v] = r
371 if v == node:
371 if v == node:
372 self._nodepos = r - 1
372 self._nodepos = r - 1
373 return r
373 return r
374 raise LookupError(node, self.indexfile, _('no node'))
374 raise LookupError(node, self.indexfile, _('no node'))
375
375
376 def node(self, rev):
376 def node(self, rev):
377 return self.index[rev][7]
377 return self.index[rev][7]
378 def linkrev(self, rev):
378 def linkrev(self, rev):
379 return self.index[rev][4]
379 return self.index[rev][4]
380 def parents(self, node):
380 def parents(self, node):
381 i = self.index
381 i = self.index
382 d = i[self.rev(node)]
382 d = i[self.rev(node)]
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
383 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
384 def parentrevs(self, rev):
384 def parentrevs(self, rev):
385 return self.index[rev][5:7]
385 return self.index[rev][5:7]
386 def start(self, rev):
386 def start(self, rev):
387 return int(self.index[rev][0] >> 16)
387 return int(self.index[rev][0] >> 16)
388 def end(self, rev):
388 def end(self, rev):
389 return self.start(rev) + self.length(rev)
389 return self.start(rev) + self.length(rev)
390 def length(self, rev):
390 def length(self, rev):
391 return self.index[rev][1]
391 return self.index[rev][1]
392 def chainbase(self, rev):
392 def chainbase(self, rev):
393 index = self.index
393 index = self.index
394 base = index[rev][3]
394 base = index[rev][3]
395 while base != rev:
395 while base != rev:
396 rev = base
396 rev = base
397 base = index[rev][3]
397 base = index[rev][3]
398 return base
398 return base
399 def chainlen(self, rev):
399 def chainlen(self, rev):
400 return self._chaininfo(rev)[0]
400 return self._chaininfo(rev)[0]
401
401
402 def _chaininfo(self, rev):
402 def _chaininfo(self, rev):
403 chaininfocache = self._chaininfocache
403 chaininfocache = self._chaininfocache
404 if rev in chaininfocache:
404 if rev in chaininfocache:
405 return chaininfocache[rev]
405 return chaininfocache[rev]
406 index = self.index
406 index = self.index
407 generaldelta = self._generaldelta
407 generaldelta = self._generaldelta
408 iterrev = rev
408 iterrev = rev
409 e = index[iterrev]
409 e = index[iterrev]
410 clen = 0
410 clen = 0
411 compresseddeltalen = 0
411 compresseddeltalen = 0
412 while iterrev != e[3]:
412 while iterrev != e[3]:
413 clen += 1
413 clen += 1
414 compresseddeltalen += e[1]
414 compresseddeltalen += e[1]
415 if generaldelta:
415 if generaldelta:
416 iterrev = e[3]
416 iterrev = e[3]
417 else:
417 else:
418 iterrev -= 1
418 iterrev -= 1
419 if iterrev in chaininfocache:
419 if iterrev in chaininfocache:
420 t = chaininfocache[iterrev]
420 t = chaininfocache[iterrev]
421 clen += t[0]
421 clen += t[0]
422 compresseddeltalen += t[1]
422 compresseddeltalen += t[1]
423 break
423 break
424 e = index[iterrev]
424 e = index[iterrev]
425 else:
425 else:
426 # Add text length of base since decompressing that also takes
426 # Add text length of base since decompressing that also takes
427 # work. For cache hits the length is already included.
427 # work. For cache hits the length is already included.
428 compresseddeltalen += e[1]
428 compresseddeltalen += e[1]
429 r = (clen, compresseddeltalen)
429 r = (clen, compresseddeltalen)
430 chaininfocache[rev] = r
430 chaininfocache[rev] = r
431 return r
431 return r
432
432
433 def _deltachain(self, rev, stoprev=None):
433 def _deltachain(self, rev, stoprev=None):
434 """Obtain the delta chain for a revision.
434 """Obtain the delta chain for a revision.
435
435
436 ``stoprev`` specifies a revision to stop at. If not specified, we
436 ``stoprev`` specifies a revision to stop at. If not specified, we
437 stop at the base of the chain.
437 stop at the base of the chain.
438
438
439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
439 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
440 revs in ascending order and ``stopped`` is a bool indicating whether
440 revs in ascending order and ``stopped`` is a bool indicating whether
441 ``stoprev`` was hit.
441 ``stoprev`` was hit.
442 """
442 """
443 chain = []
443 chain = []
444
444
445 # Alias to prevent attribute lookup in tight loop.
445 # Alias to prevent attribute lookup in tight loop.
446 index = self.index
446 index = self.index
447 generaldelta = self._generaldelta
447 generaldelta = self._generaldelta
448
448
449 iterrev = rev
449 iterrev = rev
450 e = index[iterrev]
450 e = index[iterrev]
451 while iterrev != e[3] and iterrev != stoprev:
451 while iterrev != e[3] and iterrev != stoprev:
452 chain.append(iterrev)
452 chain.append(iterrev)
453 if generaldelta:
453 if generaldelta:
454 iterrev = e[3]
454 iterrev = e[3]
455 else:
455 else:
456 iterrev -= 1
456 iterrev -= 1
457 e = index[iterrev]
457 e = index[iterrev]
458
458
459 if iterrev == stoprev:
459 if iterrev == stoprev:
460 stopped = True
460 stopped = True
461 else:
461 else:
462 chain.append(iterrev)
462 chain.append(iterrev)
463 stopped = False
463 stopped = False
464
464
465 chain.reverse()
465 chain.reverse()
466 return chain, stopped
466 return chain, stopped
467
467
468 def flags(self, rev):
468 def flags(self, rev):
469 return self.index[rev][0] & 0xFFFF
469 return self.index[rev][0] & 0xFFFF
470 def rawsize(self, rev):
470 def rawsize(self, rev):
471 """return the length of the uncompressed text for a given revision"""
471 """return the length of the uncompressed text for a given revision"""
472 l = self.index[rev][2]
472 l = self.index[rev][2]
473 if l >= 0:
473 if l >= 0:
474 return l
474 return l
475
475
476 t = self.revision(self.node(rev))
476 t = self.revision(self.node(rev))
477 return len(t)
477 return len(t)
478 size = rawsize
478 size = rawsize
479
479
480 def ancestors(self, revs, stoprev=0, inclusive=False):
480 def ancestors(self, revs, stoprev=0, inclusive=False):
481 """Generate the ancestors of 'revs' in reverse topological order.
481 """Generate the ancestors of 'revs' in reverse topological order.
482 Does not generate revs lower than stoprev.
482 Does not generate revs lower than stoprev.
483
483
484 See the documentation for ancestor.lazyancestors for more details."""
484 See the documentation for ancestor.lazyancestors for more details."""
485
485
486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
486 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
487 inclusive=inclusive)
487 inclusive=inclusive)
488
488
489 def descendants(self, revs):
489 def descendants(self, revs):
490 """Generate the descendants of 'revs' in revision order.
490 """Generate the descendants of 'revs' in revision order.
491
491
492 Yield a sequence of revision numbers starting with a child of
492 Yield a sequence of revision numbers starting with a child of
493 some rev in revs, i.e., each revision is *not* considered a
493 some rev in revs, i.e., each revision is *not* considered a
494 descendant of itself. Results are ordered by revision number (a
494 descendant of itself. Results are ordered by revision number (a
495 topological sort)."""
495 topological sort)."""
496 first = min(revs)
496 first = min(revs)
497 if first == nullrev:
497 if first == nullrev:
498 for i in self:
498 for i in self:
499 yield i
499 yield i
500 return
500 return
501
501
502 seen = set(revs)
502 seen = set(revs)
503 for i in self.revs(start=first + 1):
503 for i in self.revs(start=first + 1):
504 for x in self.parentrevs(i):
504 for x in self.parentrevs(i):
505 if x != nullrev and x in seen:
505 if x != nullrev and x in seen:
506 seen.add(i)
506 seen.add(i)
507 yield i
507 yield i
508 break
508 break
509
509
510 def findcommonmissing(self, common=None, heads=None):
510 def findcommonmissing(self, common=None, heads=None):
511 """Return a tuple of the ancestors of common and the ancestors of heads
511 """Return a tuple of the ancestors of common and the ancestors of heads
512 that are not ancestors of common. In revset terminology, we return the
512 that are not ancestors of common. In revset terminology, we return the
513 tuple:
513 tuple:
514
514
515 ::common, (::heads) - (::common)
515 ::common, (::heads) - (::common)
516
516
517 The list is sorted by revision number, meaning it is
517 The list is sorted by revision number, meaning it is
518 topologically sorted.
518 topologically sorted.
519
519
520 'heads' and 'common' are both lists of node IDs. If heads is
520 'heads' and 'common' are both lists of node IDs. If heads is
521 not supplied, uses all of the revlog's heads. If common is not
521 not supplied, uses all of the revlog's heads. If common is not
522 supplied, uses nullid."""
522 supplied, uses nullid."""
523 if common is None:
523 if common is None:
524 common = [nullid]
524 common = [nullid]
525 if heads is None:
525 if heads is None:
526 heads = self.heads()
526 heads = self.heads()
527
527
528 common = [self.rev(n) for n in common]
528 common = [self.rev(n) for n in common]
529 heads = [self.rev(n) for n in heads]
529 heads = [self.rev(n) for n in heads]
530
530
531 # we want the ancestors, but inclusive
531 # we want the ancestors, but inclusive
532 class lazyset(object):
532 class lazyset(object):
533 def __init__(self, lazyvalues):
533 def __init__(self, lazyvalues):
534 self.addedvalues = set()
534 self.addedvalues = set()
535 self.lazyvalues = lazyvalues
535 self.lazyvalues = lazyvalues
536
536
537 def __contains__(self, value):
537 def __contains__(self, value):
538 return value in self.addedvalues or value in self.lazyvalues
538 return value in self.addedvalues or value in self.lazyvalues
539
539
540 def __iter__(self):
540 def __iter__(self):
541 added = self.addedvalues
541 added = self.addedvalues
542 for r in added:
542 for r in added:
543 yield r
543 yield r
544 for r in self.lazyvalues:
544 for r in self.lazyvalues:
545 if not r in added:
545 if not r in added:
546 yield r
546 yield r
547
547
548 def add(self, value):
548 def add(self, value):
549 self.addedvalues.add(value)
549 self.addedvalues.add(value)
550
550
551 def update(self, values):
551 def update(self, values):
552 self.addedvalues.update(values)
552 self.addedvalues.update(values)
553
553
554 has = lazyset(self.ancestors(common))
554 has = lazyset(self.ancestors(common))
555 has.add(nullrev)
555 has.add(nullrev)
556 has.update(common)
556 has.update(common)
557
557
558 # take all ancestors from heads that aren't in has
558 # take all ancestors from heads that aren't in has
559 missing = set()
559 missing = set()
560 visit = collections.deque(r for r in heads if r not in has)
560 visit = collections.deque(r for r in heads if r not in has)
561 while visit:
561 while visit:
562 r = visit.popleft()
562 r = visit.popleft()
563 if r in missing:
563 if r in missing:
564 continue
564 continue
565 else:
565 else:
566 missing.add(r)
566 missing.add(r)
567 for p in self.parentrevs(r):
567 for p in self.parentrevs(r):
568 if p not in has:
568 if p not in has:
569 visit.append(p)
569 visit.append(p)
570 missing = list(missing)
570 missing = list(missing)
571 missing.sort()
571 missing.sort()
572 return has, [self.node(r) for r in missing]
572 return has, [self.node(r) for r in missing]
573
573
574 def incrementalmissingrevs(self, common=None):
574 def incrementalmissingrevs(self, common=None):
575 """Return an object that can be used to incrementally compute the
575 """Return an object that can be used to incrementally compute the
576 revision numbers of the ancestors of arbitrary sets that are not
576 revision numbers of the ancestors of arbitrary sets that are not
577 ancestors of common. This is an ancestor.incrementalmissingancestors
577 ancestors of common. This is an ancestor.incrementalmissingancestors
578 object.
578 object.
579
579
580 'common' is a list of revision numbers. If common is not supplied, uses
580 'common' is a list of revision numbers. If common is not supplied, uses
581 nullrev.
581 nullrev.
582 """
582 """
583 if common is None:
583 if common is None:
584 common = [nullrev]
584 common = [nullrev]
585
585
586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
586 return ancestor.incrementalmissingancestors(self.parentrevs, common)
587
587
588 def findmissingrevs(self, common=None, heads=None):
588 def findmissingrevs(self, common=None, heads=None):
589 """Return the revision numbers of the ancestors of heads that
589 """Return the revision numbers of the ancestors of heads that
590 are not ancestors of common.
590 are not ancestors of common.
591
591
592 More specifically, return a list of revision numbers corresponding to
592 More specifically, return a list of revision numbers corresponding to
593 nodes N such that every N satisfies the following constraints:
593 nodes N such that every N satisfies the following constraints:
594
594
595 1. N is an ancestor of some node in 'heads'
595 1. N is an ancestor of some node in 'heads'
596 2. N is not an ancestor of any node in 'common'
596 2. N is not an ancestor of any node in 'common'
597
597
598 The list is sorted by revision number, meaning it is
598 The list is sorted by revision number, meaning it is
599 topologically sorted.
599 topologically sorted.
600
600
601 'heads' and 'common' are both lists of revision numbers. If heads is
601 'heads' and 'common' are both lists of revision numbers. If heads is
602 not supplied, uses all of the revlog's heads. If common is not
602 not supplied, uses all of the revlog's heads. If common is not
603 supplied, uses nullid."""
603 supplied, uses nullid."""
604 if common is None:
604 if common is None:
605 common = [nullrev]
605 common = [nullrev]
606 if heads is None:
606 if heads is None:
607 heads = self.headrevs()
607 heads = self.headrevs()
608
608
609 inc = self.incrementalmissingrevs(common=common)
609 inc = self.incrementalmissingrevs(common=common)
610 return inc.missingancestors(heads)
610 return inc.missingancestors(heads)
611
611
612 def findmissing(self, common=None, heads=None):
612 def findmissing(self, common=None, heads=None):
613 """Return the ancestors of heads that are not ancestors of common.
613 """Return the ancestors of heads that are not ancestors of common.
614
614
615 More specifically, return a list of nodes N such that every N
615 More specifically, return a list of nodes N such that every N
616 satisfies the following constraints:
616 satisfies the following constraints:
617
617
618 1. N is an ancestor of some node in 'heads'
618 1. N is an ancestor of some node in 'heads'
619 2. N is not an ancestor of any node in 'common'
619 2. N is not an ancestor of any node in 'common'
620
620
621 The list is sorted by revision number, meaning it is
621 The list is sorted by revision number, meaning it is
622 topologically sorted.
622 topologically sorted.
623
623
624 'heads' and 'common' are both lists of node IDs. If heads is
624 'heads' and 'common' are both lists of node IDs. If heads is
625 not supplied, uses all of the revlog's heads. If common is not
625 not supplied, uses all of the revlog's heads. If common is not
626 supplied, uses nullid."""
626 supplied, uses nullid."""
627 if common is None:
627 if common is None:
628 common = [nullid]
628 common = [nullid]
629 if heads is None:
629 if heads is None:
630 heads = self.heads()
630 heads = self.heads()
631
631
632 common = [self.rev(n) for n in common]
632 common = [self.rev(n) for n in common]
633 heads = [self.rev(n) for n in heads]
633 heads = [self.rev(n) for n in heads]
634
634
635 inc = self.incrementalmissingrevs(common=common)
635 inc = self.incrementalmissingrevs(common=common)
636 return [self.node(r) for r in inc.missingancestors(heads)]
636 return [self.node(r) for r in inc.missingancestors(heads)]
637
637
638 def nodesbetween(self, roots=None, heads=None):
638 def nodesbetween(self, roots=None, heads=None):
639 """Return a topological path from 'roots' to 'heads'.
639 """Return a topological path from 'roots' to 'heads'.
640
640
641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
641 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
642 topologically sorted list of all nodes N that satisfy both of
642 topologically sorted list of all nodes N that satisfy both of
643 these constraints:
643 these constraints:
644
644
645 1. N is a descendant of some node in 'roots'
645 1. N is a descendant of some node in 'roots'
646 2. N is an ancestor of some node in 'heads'
646 2. N is an ancestor of some node in 'heads'
647
647
648 Every node is considered to be both a descendant and an ancestor
648 Every node is considered to be both a descendant and an ancestor
649 of itself, so every reachable node in 'roots' and 'heads' will be
649 of itself, so every reachable node in 'roots' and 'heads' will be
650 included in 'nodes'.
650 included in 'nodes'.
651
651
652 'outroots' is the list of reachable nodes in 'roots', i.e., the
652 'outroots' is the list of reachable nodes in 'roots', i.e., the
653 subset of 'roots' that is returned in 'nodes'. Likewise,
653 subset of 'roots' that is returned in 'nodes'. Likewise,
654 'outheads' is the subset of 'heads' that is also in 'nodes'.
654 'outheads' is the subset of 'heads' that is also in 'nodes'.
655
655
656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
656 'roots' and 'heads' are both lists of node IDs. If 'roots' is
657 unspecified, uses nullid as the only root. If 'heads' is
657 unspecified, uses nullid as the only root. If 'heads' is
658 unspecified, uses list of all of the revlog's heads."""
658 unspecified, uses list of all of the revlog's heads."""
659 nonodes = ([], [], [])
659 nonodes = ([], [], [])
660 if roots is not None:
660 if roots is not None:
661 roots = list(roots)
661 roots = list(roots)
662 if not roots:
662 if not roots:
663 return nonodes
663 return nonodes
664 lowestrev = min([self.rev(n) for n in roots])
664 lowestrev = min([self.rev(n) for n in roots])
665 else:
665 else:
666 roots = [nullid] # Everybody's a descendant of nullid
666 roots = [nullid] # Everybody's a descendant of nullid
667 lowestrev = nullrev
667 lowestrev = nullrev
668 if (lowestrev == nullrev) and (heads is None):
668 if (lowestrev == nullrev) and (heads is None):
669 # We want _all_ the nodes!
669 # We want _all_ the nodes!
670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
670 return ([self.node(r) for r in self], [nullid], list(self.heads()))
671 if heads is None:
671 if heads is None:
672 # All nodes are ancestors, so the latest ancestor is the last
672 # All nodes are ancestors, so the latest ancestor is the last
673 # node.
673 # node.
674 highestrev = len(self) - 1
674 highestrev = len(self) - 1
675 # Set ancestors to None to signal that every node is an ancestor.
675 # Set ancestors to None to signal that every node is an ancestor.
676 ancestors = None
676 ancestors = None
677 # Set heads to an empty dictionary for later discovery of heads
677 # Set heads to an empty dictionary for later discovery of heads
678 heads = {}
678 heads = {}
679 else:
679 else:
680 heads = list(heads)
680 heads = list(heads)
681 if not heads:
681 if not heads:
682 return nonodes
682 return nonodes
683 ancestors = set()
683 ancestors = set()
684 # Turn heads into a dictionary so we can remove 'fake' heads.
684 # Turn heads into a dictionary so we can remove 'fake' heads.
685 # Also, later we will be using it to filter out the heads we can't
685 # Also, later we will be using it to filter out the heads we can't
686 # find from roots.
686 # find from roots.
687 heads = dict.fromkeys(heads, False)
687 heads = dict.fromkeys(heads, False)
688 # Start at the top and keep marking parents until we're done.
688 # Start at the top and keep marking parents until we're done.
689 nodestotag = set(heads)
689 nodestotag = set(heads)
690 # Remember where the top was so we can use it as a limit later.
690 # Remember where the top was so we can use it as a limit later.
691 highestrev = max([self.rev(n) for n in nodestotag])
691 highestrev = max([self.rev(n) for n in nodestotag])
692 while nodestotag:
692 while nodestotag:
693 # grab a node to tag
693 # grab a node to tag
694 n = nodestotag.pop()
694 n = nodestotag.pop()
695 # Never tag nullid
695 # Never tag nullid
696 if n == nullid:
696 if n == nullid:
697 continue
697 continue
698 # A node's revision number represents its place in a
698 # A node's revision number represents its place in a
699 # topologically sorted list of nodes.
699 # topologically sorted list of nodes.
700 r = self.rev(n)
700 r = self.rev(n)
701 if r >= lowestrev:
701 if r >= lowestrev:
702 if n not in ancestors:
702 if n not in ancestors:
703 # If we are possibly a descendant of one of the roots
703 # If we are possibly a descendant of one of the roots
704 # and we haven't already been marked as an ancestor
704 # and we haven't already been marked as an ancestor
705 ancestors.add(n) # Mark as ancestor
705 ancestors.add(n) # Mark as ancestor
706 # Add non-nullid parents to list of nodes to tag.
706 # Add non-nullid parents to list of nodes to tag.
707 nodestotag.update([p for p in self.parents(n) if
707 nodestotag.update([p for p in self.parents(n) if
708 p != nullid])
708 p != nullid])
709 elif n in heads: # We've seen it before, is it a fake head?
709 elif n in heads: # We've seen it before, is it a fake head?
710 # So it is, real heads should not be the ancestors of
710 # So it is, real heads should not be the ancestors of
711 # any other heads.
711 # any other heads.
712 heads.pop(n)
712 heads.pop(n)
713 if not ancestors:
713 if not ancestors:
714 return nonodes
714 return nonodes
715 # Now that we have our set of ancestors, we want to remove any
715 # Now that we have our set of ancestors, we want to remove any
716 # roots that are not ancestors.
716 # roots that are not ancestors.
717
717
718 # If one of the roots was nullid, everything is included anyway.
718 # If one of the roots was nullid, everything is included anyway.
719 if lowestrev > nullrev:
719 if lowestrev > nullrev:
720 # But, since we weren't, let's recompute the lowest rev to not
720 # But, since we weren't, let's recompute the lowest rev to not
721 # include roots that aren't ancestors.
721 # include roots that aren't ancestors.
722
722
723 # Filter out roots that aren't ancestors of heads
723 # Filter out roots that aren't ancestors of heads
724 roots = [n for n in roots if n in ancestors]
724 roots = [n for n in roots if n in ancestors]
725 # Recompute the lowest revision
725 # Recompute the lowest revision
726 if roots:
726 if roots:
727 lowestrev = min([self.rev(n) for n in roots])
727 lowestrev = min([self.rev(n) for n in roots])
728 else:
728 else:
729 # No more roots? Return empty list
729 # No more roots? Return empty list
730 return nonodes
730 return nonodes
731 else:
731 else:
732 # We are descending from nullid, and don't need to care about
732 # We are descending from nullid, and don't need to care about
733 # any other roots.
733 # any other roots.
734 lowestrev = nullrev
734 lowestrev = nullrev
735 roots = [nullid]
735 roots = [nullid]
736 # Transform our roots list into a set.
736 # Transform our roots list into a set.
737 descendants = set(roots)
737 descendants = set(roots)
738 # Also, keep the original roots so we can filter out roots that aren't
738 # Also, keep the original roots so we can filter out roots that aren't
739 # 'real' roots (i.e. are descended from other roots).
739 # 'real' roots (i.e. are descended from other roots).
740 roots = descendants.copy()
740 roots = descendants.copy()
741 # Our topologically sorted list of output nodes.
741 # Our topologically sorted list of output nodes.
742 orderedout = []
742 orderedout = []
743 # Don't start at nullid since we don't want nullid in our output list,
743 # Don't start at nullid since we don't want nullid in our output list,
744 # and if nullid shows up in descendants, empty parents will look like
744 # and if nullid shows up in descendants, empty parents will look like
745 # they're descendants.
745 # they're descendants.
746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
746 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
747 n = self.node(r)
747 n = self.node(r)
748 isdescendant = False
748 isdescendant = False
749 if lowestrev == nullrev: # Everybody is a descendant of nullid
749 if lowestrev == nullrev: # Everybody is a descendant of nullid
750 isdescendant = True
750 isdescendant = True
751 elif n in descendants:
751 elif n in descendants:
752 # n is already a descendant
752 # n is already a descendant
753 isdescendant = True
753 isdescendant = True
754 # This check only needs to be done here because all the roots
754 # This check only needs to be done here because all the roots
755 # will start being marked is descendants before the loop.
755 # will start being marked is descendants before the loop.
756 if n in roots:
756 if n in roots:
757 # If n was a root, check if it's a 'real' root.
757 # If n was a root, check if it's a 'real' root.
758 p = tuple(self.parents(n))
758 p = tuple(self.parents(n))
759 # If any of its parents are descendants, it's not a root.
759 # If any of its parents are descendants, it's not a root.
760 if (p[0] in descendants) or (p[1] in descendants):
760 if (p[0] in descendants) or (p[1] in descendants):
761 roots.remove(n)
761 roots.remove(n)
762 else:
762 else:
763 p = tuple(self.parents(n))
763 p = tuple(self.parents(n))
764 # A node is a descendant if either of its parents are
764 # A node is a descendant if either of its parents are
765 # descendants. (We seeded the dependents list with the roots
765 # descendants. (We seeded the dependents list with the roots
766 # up there, remember?)
766 # up there, remember?)
767 if (p[0] in descendants) or (p[1] in descendants):
767 if (p[0] in descendants) or (p[1] in descendants):
768 descendants.add(n)
768 descendants.add(n)
769 isdescendant = True
769 isdescendant = True
770 if isdescendant and ((ancestors is None) or (n in ancestors)):
770 if isdescendant and ((ancestors is None) or (n in ancestors)):
771 # Only include nodes that are both descendants and ancestors.
771 # Only include nodes that are both descendants and ancestors.
772 orderedout.append(n)
772 orderedout.append(n)
773 if (ancestors is not None) and (n in heads):
773 if (ancestors is not None) and (n in heads):
774 # We're trying to figure out which heads are reachable
774 # We're trying to figure out which heads are reachable
775 # from roots.
775 # from roots.
776 # Mark this head as having been reached
776 # Mark this head as having been reached
777 heads[n] = True
777 heads[n] = True
778 elif ancestors is None:
778 elif ancestors is None:
779 # Otherwise, we're trying to discover the heads.
779 # Otherwise, we're trying to discover the heads.
780 # Assume this is a head because if it isn't, the next step
780 # Assume this is a head because if it isn't, the next step
781 # will eventually remove it.
781 # will eventually remove it.
782 heads[n] = True
782 heads[n] = True
783 # But, obviously its parents aren't.
783 # But, obviously its parents aren't.
784 for p in self.parents(n):
784 for p in self.parents(n):
785 heads.pop(p, None)
785 heads.pop(p, None)
786 heads = [n for n, flag in heads.iteritems() if flag]
786 heads = [n for n, flag in heads.iteritems() if flag]
787 roots = list(roots)
787 roots = list(roots)
788 assert orderedout
788 assert orderedout
789 assert roots
789 assert roots
790 assert heads
790 assert heads
791 return (orderedout, roots, heads)
791 return (orderedout, roots, heads)
792
792
793 def headrevs(self):
793 def headrevs(self):
794 try:
794 try:
795 return self.index.headrevs()
795 return self.index.headrevs()
796 except AttributeError:
796 except AttributeError:
797 return self._headrevs()
797 return self._headrevs()
798
798
799 def computephases(self, roots):
799 def computephases(self, roots):
800 return self.index.computephasesmapsets(roots)
800 return self.index.computephasesmapsets(roots)
801
801
802 def _headrevs(self):
802 def _headrevs(self):
803 count = len(self)
803 count = len(self)
804 if not count:
804 if not count:
805 return [nullrev]
805 return [nullrev]
806 # we won't iter over filtered rev so nobody is a head at start
806 # we won't iter over filtered rev so nobody is a head at start
807 ishead = [0] * (count + 1)
807 ishead = [0] * (count + 1)
808 index = self.index
808 index = self.index
809 for r in self:
809 for r in self:
810 ishead[r] = 1 # I may be an head
810 ishead[r] = 1 # I may be an head
811 e = index[r]
811 e = index[r]
812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
812 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
813 return [r for r, val in enumerate(ishead) if val]
813 return [r for r, val in enumerate(ishead) if val]
814
814
815 def heads(self, start=None, stop=None):
815 def heads(self, start=None, stop=None):
816 """return the list of all nodes that have no children
816 """return the list of all nodes that have no children
817
817
818 if start is specified, only heads that are descendants of
818 if start is specified, only heads that are descendants of
819 start will be returned
819 start will be returned
820 if stop is specified, it will consider all the revs from stop
820 if stop is specified, it will consider all the revs from stop
821 as if they had no children
821 as if they had no children
822 """
822 """
823 if start is None and stop is None:
823 if start is None and stop is None:
824 if not len(self):
824 if not len(self):
825 return [nullid]
825 return [nullid]
826 return [self.node(r) for r in self.headrevs()]
826 return [self.node(r) for r in self.headrevs()]
827
827
828 if start is None:
828 if start is None:
829 start = nullid
829 start = nullid
830 if stop is None:
830 if stop is None:
831 stop = []
831 stop = []
832 stoprevs = set([self.rev(n) for n in stop])
832 stoprevs = set([self.rev(n) for n in stop])
833 startrev = self.rev(start)
833 startrev = self.rev(start)
834 reachable = set((startrev,))
834 reachable = set((startrev,))
835 heads = set((startrev,))
835 heads = set((startrev,))
836
836
837 parentrevs = self.parentrevs
837 parentrevs = self.parentrevs
838 for r in self.revs(start=startrev + 1):
838 for r in self.revs(start=startrev + 1):
839 for p in parentrevs(r):
839 for p in parentrevs(r):
840 if p in reachable:
840 if p in reachable:
841 if r not in stoprevs:
841 if r not in stoprevs:
842 reachable.add(r)
842 reachable.add(r)
843 heads.add(r)
843 heads.add(r)
844 if p in heads and p not in stoprevs:
844 if p in heads and p not in stoprevs:
845 heads.remove(p)
845 heads.remove(p)
846
846
847 return [self.node(r) for r in heads]
847 return [self.node(r) for r in heads]
848
848
849 def children(self, node):
849 def children(self, node):
850 """find the children of a given node"""
850 """find the children of a given node"""
851 c = []
851 c = []
852 p = self.rev(node)
852 p = self.rev(node)
853 for r in self.revs(start=p + 1):
853 for r in self.revs(start=p + 1):
854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
854 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
855 if prevs:
855 if prevs:
856 for pr in prevs:
856 for pr in prevs:
857 if pr == p:
857 if pr == p:
858 c.append(self.node(r))
858 c.append(self.node(r))
859 elif p == nullrev:
859 elif p == nullrev:
860 c.append(self.node(r))
860 c.append(self.node(r))
861 return c
861 return c
862
862
863 def descendant(self, start, end):
863 def descendant(self, start, end):
864 if start == nullrev:
864 if start == nullrev:
865 return True
865 return True
866 for i in self.descendants([start]):
866 for i in self.descendants([start]):
867 if i == end:
867 if i == end:
868 return True
868 return True
869 elif i > end:
869 elif i > end:
870 break
870 break
871 return False
871 return False
872
872
873 def commonancestorsheads(self, a, b):
873 def commonancestorsheads(self, a, b):
874 """calculate all the heads of the common ancestors of nodes a and b"""
874 """calculate all the heads of the common ancestors of nodes a and b"""
875 a, b = self.rev(a), self.rev(b)
875 a, b = self.rev(a), self.rev(b)
876 try:
876 try:
877 ancs = self.index.commonancestorsheads(a, b)
877 ancs = self.index.commonancestorsheads(a, b)
878 except (AttributeError, OverflowError): # C implementation failed
878 except (AttributeError, OverflowError): # C implementation failed
879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
879 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
880 return map(self.node, ancs)
880 return map(self.node, ancs)
881
881
882 def isancestor(self, a, b):
882 def isancestor(self, a, b):
883 """return True if node a is an ancestor of node b
883 """return True if node a is an ancestor of node b
884
884
885 The implementation of this is trivial but the use of
885 The implementation of this is trivial but the use of
886 commonancestorsheads is not."""
886 commonancestorsheads is not."""
887 return a in self.commonancestorsheads(a, b)
887 return a in self.commonancestorsheads(a, b)
888
888
889 def ancestor(self, a, b):
889 def ancestor(self, a, b):
890 """calculate the "best" common ancestor of nodes a and b"""
890 """calculate the "best" common ancestor of nodes a and b"""
891
891
892 a, b = self.rev(a), self.rev(b)
892 a, b = self.rev(a), self.rev(b)
893 try:
893 try:
894 ancs = self.index.ancestors(a, b)
894 ancs = self.index.ancestors(a, b)
895 except (AttributeError, OverflowError):
895 except (AttributeError, OverflowError):
896 ancs = ancestor.ancestors(self.parentrevs, a, b)
896 ancs = ancestor.ancestors(self.parentrevs, a, b)
897 if ancs:
897 if ancs:
898 # choose a consistent winner when there's a tie
898 # choose a consistent winner when there's a tie
899 return min(map(self.node, ancs))
899 return min(map(self.node, ancs))
900 return nullid
900 return nullid
901
901
902 def _match(self, id):
902 def _match(self, id):
903 if isinstance(id, int):
903 if isinstance(id, int):
904 # rev
904 # rev
905 return self.node(id)
905 return self.node(id)
906 if len(id) == 20:
906 if len(id) == 20:
907 # possibly a binary node
907 # possibly a binary node
908 # odds of a binary node being all hex in ASCII are 1 in 10**25
908 # odds of a binary node being all hex in ASCII are 1 in 10**25
909 try:
909 try:
910 node = id
910 node = id
911 self.rev(node) # quick search the index
911 self.rev(node) # quick search the index
912 return node
912 return node
913 except LookupError:
913 except LookupError:
914 pass # may be partial hex id
914 pass # may be partial hex id
915 try:
915 try:
916 # str(rev)
916 # str(rev)
917 rev = int(id)
917 rev = int(id)
918 if str(rev) != id:
918 if str(rev) != id:
919 raise ValueError
919 raise ValueError
920 if rev < 0:
920 if rev < 0:
921 rev = len(self) + rev
921 rev = len(self) + rev
922 if rev < 0 or rev >= len(self):
922 if rev < 0 or rev >= len(self):
923 raise ValueError
923 raise ValueError
924 return self.node(rev)
924 return self.node(rev)
925 except (ValueError, OverflowError):
925 except (ValueError, OverflowError):
926 pass
926 pass
927 if len(id) == 40:
927 if len(id) == 40:
928 try:
928 try:
929 # a full hex nodeid?
929 # a full hex nodeid?
930 node = bin(id)
930 node = bin(id)
931 self.rev(node)
931 self.rev(node)
932 return node
932 return node
933 except (TypeError, LookupError):
933 except (TypeError, LookupError):
934 pass
934 pass
935
935
936 def _partialmatch(self, id):
936 def _partialmatch(self, id):
937 try:
937 try:
938 n = self.index.partialmatch(id)
938 n = self.index.partialmatch(id)
939 if n and self.hasnode(n):
939 if n and self.hasnode(n):
940 return n
940 return n
941 return None
941 return None
942 except RevlogError:
942 except RevlogError:
943 # parsers.c radix tree lookup gave multiple matches
943 # parsers.c radix tree lookup gave multiple matches
944 # fall through to slow path that filters hidden revisions
944 # fall through to slow path that filters hidden revisions
945 pass
945 pass
946 except (AttributeError, ValueError):
946 except (AttributeError, ValueError):
947 # we are pure python, or key was too short to search radix tree
947 # we are pure python, or key was too short to search radix tree
948 pass
948 pass
949
949
950 if id in self._pcache:
950 if id in self._pcache:
951 return self._pcache[id]
951 return self._pcache[id]
952
952
953 if len(id) < 40:
953 if len(id) < 40:
954 try:
954 try:
955 # hex(node)[:...]
955 # hex(node)[:...]
956 l = len(id) // 2 # grab an even number of digits
956 l = len(id) // 2 # grab an even number of digits
957 prefix = bin(id[:l * 2])
957 prefix = bin(id[:l * 2])
958 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
958 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
959 nl = [n for n in nl if hex(n).startswith(id) and
959 nl = [n for n in nl if hex(n).startswith(id) and
960 self.hasnode(n)]
960 self.hasnode(n)]
961 if len(nl) > 0:
961 if len(nl) > 0:
962 if len(nl) == 1:
962 if len(nl) == 1:
963 self._pcache[id] = nl[0]
963 self._pcache[id] = nl[0]
964 return nl[0]
964 return nl[0]
965 raise LookupError(id, self.indexfile,
965 raise LookupError(id, self.indexfile,
966 _('ambiguous identifier'))
966 _('ambiguous identifier'))
967 return None
967 return None
968 except TypeError:
968 except TypeError:
969 pass
969 pass
970
970
971 def lookup(self, id):
971 def lookup(self, id):
972 """locate a node based on:
972 """locate a node based on:
973 - revision number or str(revision number)
973 - revision number or str(revision number)
974 - nodeid or subset of hex nodeid
974 - nodeid or subset of hex nodeid
975 """
975 """
976 n = self._match(id)
976 n = self._match(id)
977 if n is not None:
977 if n is not None:
978 return n
978 return n
979 n = self._partialmatch(id)
979 n = self._partialmatch(id)
980 if n:
980 if n:
981 return n
981 return n
982
982
983 raise LookupError(id, self.indexfile, _('no match found'))
983 raise LookupError(id, self.indexfile, _('no match found'))
984
984
985 def cmp(self, node, text):
985 def cmp(self, node, text):
986 """compare text with a given file revision
986 """compare text with a given file revision
987
987
988 returns True if text is different than what is stored.
988 returns True if text is different than what is stored.
989 """
989 """
990 p1, p2 = self.parents(node)
990 p1, p2 = self.parents(node)
991 return hash(text, p1, p2) != node
991 return hash(text, p1, p2) != node
992
992
993 def _addchunk(self, offset, data):
993 def _addchunk(self, offset, data):
994 """Add a segment to the revlog cache.
994 """Add a segment to the revlog cache.
995
995
996 Accepts an absolute offset and the data that is at that location.
996 Accepts an absolute offset and the data that is at that location.
997 """
997 """
998 o, d = self._chunkcache
998 o, d = self._chunkcache
999 # try to add to existing cache
999 # try to add to existing cache
1000 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1000 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1001 self._chunkcache = o, d + data
1001 self._chunkcache = o, d + data
1002 else:
1002 else:
1003 self._chunkcache = offset, data
1003 self._chunkcache = offset, data
1004
1004
1005 def _loadchunk(self, offset, length, df=None):
1005 def _loadchunk(self, offset, length, df=None):
1006 """Load a segment of raw data from the revlog.
1006 """Load a segment of raw data from the revlog.
1007
1007
1008 Accepts an absolute offset, length to read, and an optional existing
1008 Accepts an absolute offset, length to read, and an optional existing
1009 file handle to read from.
1009 file handle to read from.
1010
1010
1011 If an existing file handle is passed, it will be seeked and the
1011 If an existing file handle is passed, it will be seeked and the
1012 original seek position will NOT be restored.
1012 original seek position will NOT be restored.
1013
1013
1014 Returns a str or buffer of raw byte data.
1014 Returns a str or buffer of raw byte data.
1015 """
1015 """
1016 if df is not None:
1016 if df is not None:
1017 closehandle = False
1017 closehandle = False
1018 else:
1018 else:
1019 if self._inline:
1019 if self._inline:
1020 df = self.opener(self.indexfile)
1020 df = self.opener(self.indexfile)
1021 else:
1021 else:
1022 df = self.opener(self.datafile)
1022 df = self.opener(self.datafile)
1023 closehandle = True
1023 closehandle = True
1024
1024
1025 # Cache data both forward and backward around the requested
1025 # Cache data both forward and backward around the requested
1026 # data, in a fixed size window. This helps speed up operations
1026 # data, in a fixed size window. This helps speed up operations
1027 # involving reading the revlog backwards.
1027 # involving reading the revlog backwards.
1028 cachesize = self._chunkcachesize
1028 cachesize = self._chunkcachesize
1029 realoffset = offset & ~(cachesize - 1)
1029 realoffset = offset & ~(cachesize - 1)
1030 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1030 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1031 - realoffset)
1031 - realoffset)
1032 df.seek(realoffset)
1032 df.seek(realoffset)
1033 d = df.read(reallength)
1033 d = df.read(reallength)
1034 if closehandle:
1034 if closehandle:
1035 df.close()
1035 df.close()
1036 self._addchunk(realoffset, d)
1036 self._addchunk(realoffset, d)
1037 if offset != realoffset or reallength != length:
1037 if offset != realoffset or reallength != length:
1038 return util.buffer(d, offset - realoffset, length)
1038 return util.buffer(d, offset - realoffset, length)
1039 return d
1039 return d
1040
1040
1041 def _getchunk(self, offset, length, df=None):
1041 def _getchunk(self, offset, length, df=None):
1042 """Obtain a segment of raw data from the revlog.
1042 """Obtain a segment of raw data from the revlog.
1043
1043
1044 Accepts an absolute offset, length of bytes to obtain, and an
1044 Accepts an absolute offset, length of bytes to obtain, and an
1045 optional file handle to the already-opened revlog. If the file
1045 optional file handle to the already-opened revlog. If the file
1046 handle is used, it's original seek position will not be preserved.
1046 handle is used, it's original seek position will not be preserved.
1047
1047
1048 Requests for data may be returned from a cache.
1048 Requests for data may be returned from a cache.
1049
1049
1050 Returns a str or a buffer instance of raw byte data.
1050 Returns a str or a buffer instance of raw byte data.
1051 """
1051 """
1052 o, d = self._chunkcache
1052 o, d = self._chunkcache
1053 l = len(d)
1053 l = len(d)
1054
1054
1055 # is it in the cache?
1055 # is it in the cache?
1056 cachestart = offset - o
1056 cachestart = offset - o
1057 cacheend = cachestart + length
1057 cacheend = cachestart + length
1058 if cachestart >= 0 and cacheend <= l:
1058 if cachestart >= 0 and cacheend <= l:
1059 if cachestart == 0 and cacheend == l:
1059 if cachestart == 0 and cacheend == l:
1060 return d # avoid a copy
1060 return d # avoid a copy
1061 return util.buffer(d, cachestart, cacheend - cachestart)
1061 return util.buffer(d, cachestart, cacheend - cachestart)
1062
1062
1063 return self._loadchunk(offset, length, df=df)
1063 return self._loadchunk(offset, length, df=df)
1064
1064
1065 def _chunkraw(self, startrev, endrev, df=None):
1065 def _chunkraw(self, startrev, endrev, df=None):
1066 """Obtain a segment of raw data corresponding to a range of revisions.
1066 """Obtain a segment of raw data corresponding to a range of revisions.
1067
1067
1068 Accepts the start and end revisions and an optional already-open
1068 Accepts the start and end revisions and an optional already-open
1069 file handle to be used for reading. If the file handle is read, its
1069 file handle to be used for reading. If the file handle is read, its
1070 seek position will not be preserved.
1070 seek position will not be preserved.
1071
1071
1072 Requests for data may be satisfied by a cache.
1072 Requests for data may be satisfied by a cache.
1073
1073
1074 Returns a str or a buffer instance of raw byte data. Callers will
1074 Returns a 2-tuple of (offset, data) for the requested range of
1075 need to call ``self.start(rev)`` and ``self.length()`` to determine
1075 revisions. Offset is the integer offset from the beginning of the
1076 where each revision's data begins and ends.
1076 revlog and data is a str or buffer of the raw byte data.
1077
1078 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1079 to determine where each revision's data begins and ends.
1077 """
1080 """
1078 start = self.start(startrev)
1081 start = self.start(startrev)
1079 end = self.end(endrev)
1082 end = self.end(endrev)
1080 if self._inline:
1083 if self._inline:
1081 start += (startrev + 1) * self._io.size
1084 start += (startrev + 1) * self._io.size
1082 end += (endrev + 1) * self._io.size
1085 end += (endrev + 1) * self._io.size
1083 length = end - start
1086 length = end - start
1084 return self._getchunk(start, length, df=df)
1087
1088 return start, self._getchunk(start, length, df=df)
1085
1089
1086 def _chunk(self, rev, df=None):
1090 def _chunk(self, rev, df=None):
1087 """Obtain a single decompressed chunk for a revision.
1091 """Obtain a single decompressed chunk for a revision.
1088
1092
1089 Accepts an integer revision and an optional already-open file handle
1093 Accepts an integer revision and an optional already-open file handle
1090 to be used for reading. If used, the seek position of the file will not
1094 to be used for reading. If used, the seek position of the file will not
1091 be preserved.
1095 be preserved.
1092
1096
1093 Returns a str holding uncompressed data for the requested revision.
1097 Returns a str holding uncompressed data for the requested revision.
1094 """
1098 """
1095 return decompress(self._chunkraw(rev, rev, df=df))
1099 return decompress(self._chunkraw(rev, rev, df=df)[1])
1096
1100
1097 def _chunks(self, revs, df=None):
1101 def _chunks(self, revs, df=None):
1098 """Obtain decompressed chunks for the specified revisions.
1102 """Obtain decompressed chunks for the specified revisions.
1099
1103
1100 Accepts an iterable of numeric revisions that are assumed to be in
1104 Accepts an iterable of numeric revisions that are assumed to be in
1101 ascending order. Also accepts an optional already-open file handle
1105 ascending order. Also accepts an optional already-open file handle
1102 to be used for reading. If used, the seek position of the file will
1106 to be used for reading. If used, the seek position of the file will
1103 not be preserved.
1107 not be preserved.
1104
1108
1105 This function is similar to calling ``self._chunk()`` multiple times,
1109 This function is similar to calling ``self._chunk()`` multiple times,
1106 but is faster.
1110 but is faster.
1107
1111
1108 Returns a list with decompressed data for each requested revision.
1112 Returns a list with decompressed data for each requested revision.
1109 """
1113 """
1110 if not revs:
1114 if not revs:
1111 return []
1115 return []
1112 start = self.start
1116 start = self.start
1113 length = self.length
1117 length = self.length
1114 inline = self._inline
1118 inline = self._inline
1115 iosize = self._io.size
1119 iosize = self._io.size
1116 buffer = util.buffer
1120 buffer = util.buffer
1117
1121
1118 l = []
1122 l = []
1119 ladd = l.append
1123 ladd = l.append
1120
1124
1121 # preload the cache
1125 # preload the cache
1122 try:
1126 try:
1123 while True:
1127 while True:
1124 # ensure that the cache doesn't change out from under us
1128 # ensure that the cache doesn't change out from under us
1125 _cache = self._chunkcache
1129 _cache = self._chunkcache
1126 self._chunkraw(revs[0], revs[-1], df=df)
1130 self._chunkraw(revs[0], revs[-1], df=df)[1]
1127 if _cache == self._chunkcache:
1131 if _cache == self._chunkcache:
1128 break
1132 break
1129 offset, data = _cache
1133 offset, data = _cache
1130 except OverflowError:
1134 except OverflowError:
1131 # issue4215 - we can't cache a run of chunks greater than
1135 # issue4215 - we can't cache a run of chunks greater than
1132 # 2G on Windows
1136 # 2G on Windows
1133 return [self._chunk(rev, df=df) for rev in revs]
1137 return [self._chunk(rev, df=df) for rev in revs]
1134
1138
1135 for rev in revs:
1139 for rev in revs:
1136 chunkstart = start(rev)
1140 chunkstart = start(rev)
1137 if inline:
1141 if inline:
1138 chunkstart += (rev + 1) * iosize
1142 chunkstart += (rev + 1) * iosize
1139 chunklength = length(rev)
1143 chunklength = length(rev)
1140 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1144 ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
1141
1145
1142 return l
1146 return l
1143
1147
1144 def _chunkclear(self):
1148 def _chunkclear(self):
1145 """Clear the raw chunk cache."""
1149 """Clear the raw chunk cache."""
1146 self._chunkcache = (0, '')
1150 self._chunkcache = (0, '')
1147
1151
1148 def deltaparent(self, rev):
1152 def deltaparent(self, rev):
1149 """return deltaparent of the given revision"""
1153 """return deltaparent of the given revision"""
1150 base = self.index[rev][3]
1154 base = self.index[rev][3]
1151 if base == rev:
1155 if base == rev:
1152 return nullrev
1156 return nullrev
1153 elif self._generaldelta:
1157 elif self._generaldelta:
1154 return base
1158 return base
1155 else:
1159 else:
1156 return rev - 1
1160 return rev - 1
1157
1161
1158 def revdiff(self, rev1, rev2):
1162 def revdiff(self, rev1, rev2):
1159 """return or calculate a delta between two revisions"""
1163 """return or calculate a delta between two revisions"""
1160 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1164 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1161 return str(self._chunk(rev2))
1165 return str(self._chunk(rev2))
1162
1166
1163 return mdiff.textdiff(self.revision(rev1),
1167 return mdiff.textdiff(self.revision(rev1),
1164 self.revision(rev2))
1168 self.revision(rev2))
1165
1169
1166 def revision(self, nodeorrev, _df=None):
1170 def revision(self, nodeorrev, _df=None):
1167 """return an uncompressed revision of a given node or revision
1171 """return an uncompressed revision of a given node or revision
1168 number.
1172 number.
1169
1173
1170 _df is an existing file handle to read from. It is meant to only be
1174 _df is an existing file handle to read from. It is meant to only be
1171 used internally.
1175 used internally.
1172 """
1176 """
1173 if isinstance(nodeorrev, int):
1177 if isinstance(nodeorrev, int):
1174 rev = nodeorrev
1178 rev = nodeorrev
1175 node = self.node(rev)
1179 node = self.node(rev)
1176 else:
1180 else:
1177 node = nodeorrev
1181 node = nodeorrev
1178 rev = None
1182 rev = None
1179
1183
1180 cachedrev = None
1184 cachedrev = None
1181 if node == nullid:
1185 if node == nullid:
1182 return ""
1186 return ""
1183 if self._cache:
1187 if self._cache:
1184 if self._cache[0] == node:
1188 if self._cache[0] == node:
1185 return self._cache[2]
1189 return self._cache[2]
1186 cachedrev = self._cache[1]
1190 cachedrev = self._cache[1]
1187
1191
1188 # look up what we need to read
1192 # look up what we need to read
1189 text = None
1193 text = None
1190 if rev is None:
1194 if rev is None:
1191 rev = self.rev(node)
1195 rev = self.rev(node)
1192
1196
1193 # check rev flags
1197 # check rev flags
1194 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1198 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1195 raise RevlogError(_('incompatible revision flag %x') %
1199 raise RevlogError(_('incompatible revision flag %x') %
1196 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1200 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1197
1201
1198 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1202 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1199 if stopped:
1203 if stopped:
1200 text = self._cache[2]
1204 text = self._cache[2]
1201
1205
1202 # drop cache to save memory
1206 # drop cache to save memory
1203 self._cache = None
1207 self._cache = None
1204
1208
1205 bins = self._chunks(chain, df=_df)
1209 bins = self._chunks(chain, df=_df)
1206 if text is None:
1210 if text is None:
1207 text = str(bins[0])
1211 text = str(bins[0])
1208 bins = bins[1:]
1212 bins = bins[1:]
1209
1213
1210 text = mdiff.patches(text, bins)
1214 text = mdiff.patches(text, bins)
1211
1215
1212 text = self._checkhash(text, node, rev)
1216 text = self._checkhash(text, node, rev)
1213
1217
1214 self._cache = (node, rev, text)
1218 self._cache = (node, rev, text)
1215 return text
1219 return text
1216
1220
1217 def hash(self, text, p1, p2):
1221 def hash(self, text, p1, p2):
1218 """Compute a node hash.
1222 """Compute a node hash.
1219
1223
1220 Available as a function so that subclasses can replace the hash
1224 Available as a function so that subclasses can replace the hash
1221 as needed.
1225 as needed.
1222 """
1226 """
1223 return hash(text, p1, p2)
1227 return hash(text, p1, p2)
1224
1228
1225 def _checkhash(self, text, node, rev):
1229 def _checkhash(self, text, node, rev):
1226 p1, p2 = self.parents(node)
1230 p1, p2 = self.parents(node)
1227 self.checkhash(text, p1, p2, node, rev)
1231 self.checkhash(text, p1, p2, node, rev)
1228 return text
1232 return text
1229
1233
1230 def checkhash(self, text, p1, p2, node, rev=None):
1234 def checkhash(self, text, p1, p2, node, rev=None):
1231 if node != self.hash(text, p1, p2):
1235 if node != self.hash(text, p1, p2):
1232 revornode = rev
1236 revornode = rev
1233 if revornode is None:
1237 if revornode is None:
1234 revornode = templatefilters.short(hex(node))
1238 revornode = templatefilters.short(hex(node))
1235 raise RevlogError(_("integrity check failed on %s:%s")
1239 raise RevlogError(_("integrity check failed on %s:%s")
1236 % (self.indexfile, revornode))
1240 % (self.indexfile, revornode))
1237
1241
1238 def checkinlinesize(self, tr, fp=None):
1242 def checkinlinesize(self, tr, fp=None):
1239 """Check if the revlog is too big for inline and convert if so.
1243 """Check if the revlog is too big for inline and convert if so.
1240
1244
1241 This should be called after revisions are added to the revlog. If the
1245 This should be called after revisions are added to the revlog. If the
1242 revlog has grown too large to be an inline revlog, it will convert it
1246 revlog has grown too large to be an inline revlog, it will convert it
1243 to use multiple index and data files.
1247 to use multiple index and data files.
1244 """
1248 """
1245 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1249 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1246 return
1250 return
1247
1251
1248 trinfo = tr.find(self.indexfile)
1252 trinfo = tr.find(self.indexfile)
1249 if trinfo is None:
1253 if trinfo is None:
1250 raise RevlogError(_("%s not found in the transaction")
1254 raise RevlogError(_("%s not found in the transaction")
1251 % self.indexfile)
1255 % self.indexfile)
1252
1256
1253 trindex = trinfo[2]
1257 trindex = trinfo[2]
1254 if trindex is not None:
1258 if trindex is not None:
1255 dataoff = self.start(trindex)
1259 dataoff = self.start(trindex)
1256 else:
1260 else:
1257 # revlog was stripped at start of transaction, use all leftover data
1261 # revlog was stripped at start of transaction, use all leftover data
1258 trindex = len(self) - 1
1262 trindex = len(self) - 1
1259 dataoff = self.end(-2)
1263 dataoff = self.end(-2)
1260
1264
1261 tr.add(self.datafile, dataoff)
1265 tr.add(self.datafile, dataoff)
1262
1266
1263 if fp:
1267 if fp:
1264 fp.flush()
1268 fp.flush()
1265 fp.close()
1269 fp.close()
1266
1270
1267 df = self.opener(self.datafile, 'w')
1271 df = self.opener(self.datafile, 'w')
1268 try:
1272 try:
1269 for r in self:
1273 for r in self:
1270 df.write(self._chunkraw(r, r))
1274 df.write(self._chunkraw(r, r)[1])
1271 finally:
1275 finally:
1272 df.close()
1276 df.close()
1273
1277
1274 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1278 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1275 self.version &= ~(REVLOGNGINLINEDATA)
1279 self.version &= ~(REVLOGNGINLINEDATA)
1276 self._inline = False
1280 self._inline = False
1277 for i in self:
1281 for i in self:
1278 e = self._io.packentry(self.index[i], self.node, self.version, i)
1282 e = self._io.packentry(self.index[i], self.node, self.version, i)
1279 fp.write(e)
1283 fp.write(e)
1280
1284
1281 # if we don't call close, the temp file will never replace the
1285 # if we don't call close, the temp file will never replace the
1282 # real index
1286 # real index
1283 fp.close()
1287 fp.close()
1284
1288
1285 tr.replace(self.indexfile, trindex * self._io.size)
1289 tr.replace(self.indexfile, trindex * self._io.size)
1286 self._chunkclear()
1290 self._chunkclear()
1287
1291
1288 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1292 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1289 node=None):
1293 node=None):
1290 """add a revision to the log
1294 """add a revision to the log
1291
1295
1292 text - the revision data to add
1296 text - the revision data to add
1293 transaction - the transaction object used for rollback
1297 transaction - the transaction object used for rollback
1294 link - the linkrev data to add
1298 link - the linkrev data to add
1295 p1, p2 - the parent nodeids of the revision
1299 p1, p2 - the parent nodeids of the revision
1296 cachedelta - an optional precomputed delta
1300 cachedelta - an optional precomputed delta
1297 node - nodeid of revision; typically node is not specified, and it is
1301 node - nodeid of revision; typically node is not specified, and it is
1298 computed by default as hash(text, p1, p2), however subclasses might
1302 computed by default as hash(text, p1, p2), however subclasses might
1299 use different hashing method (and override checkhash() in such case)
1303 use different hashing method (and override checkhash() in such case)
1300 """
1304 """
1301 if link == nullrev:
1305 if link == nullrev:
1302 raise RevlogError(_("attempted to add linkrev -1 to %s")
1306 raise RevlogError(_("attempted to add linkrev -1 to %s")
1303 % self.indexfile)
1307 % self.indexfile)
1304
1308
1305 if len(text) > _maxentrysize:
1309 if len(text) > _maxentrysize:
1306 raise RevlogError(
1310 raise RevlogError(
1307 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1311 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1308 % (self.indexfile, len(text)))
1312 % (self.indexfile, len(text)))
1309
1313
1310 node = node or self.hash(text, p1, p2)
1314 node = node or self.hash(text, p1, p2)
1311 if node in self.nodemap:
1315 if node in self.nodemap:
1312 return node
1316 return node
1313
1317
1314 dfh = None
1318 dfh = None
1315 if not self._inline:
1319 if not self._inline:
1316 dfh = self.opener(self.datafile, "a+")
1320 dfh = self.opener(self.datafile, "a+")
1317 ifh = self.opener(self.indexfile, "a+")
1321 ifh = self.opener(self.indexfile, "a+")
1318 try:
1322 try:
1319 return self._addrevision(node, text, transaction, link, p1, p2,
1323 return self._addrevision(node, text, transaction, link, p1, p2,
1320 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1324 REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
1321 finally:
1325 finally:
1322 if dfh:
1326 if dfh:
1323 dfh.close()
1327 dfh.close()
1324 ifh.close()
1328 ifh.close()
1325
1329
1326 def compress(self, text):
1330 def compress(self, text):
1327 """ generate a possibly-compressed representation of text """
1331 """ generate a possibly-compressed representation of text """
1328 if not text:
1332 if not text:
1329 return ("", text)
1333 return ("", text)
1330 l = len(text)
1334 l = len(text)
1331 bin = None
1335 bin = None
1332 if l < 44:
1336 if l < 44:
1333 pass
1337 pass
1334 elif l > 1000000:
1338 elif l > 1000000:
1335 # zlib makes an internal copy, thus doubling memory usage for
1339 # zlib makes an internal copy, thus doubling memory usage for
1336 # large files, so lets do this in pieces
1340 # large files, so lets do this in pieces
1337 z = zlib.compressobj()
1341 z = zlib.compressobj()
1338 p = []
1342 p = []
1339 pos = 0
1343 pos = 0
1340 while pos < l:
1344 while pos < l:
1341 pos2 = pos + 2**20
1345 pos2 = pos + 2**20
1342 p.append(z.compress(text[pos:pos2]))
1346 p.append(z.compress(text[pos:pos2]))
1343 pos = pos2
1347 pos = pos2
1344 p.append(z.flush())
1348 p.append(z.flush())
1345 if sum(map(len, p)) < l:
1349 if sum(map(len, p)) < l:
1346 bin = "".join(p)
1350 bin = "".join(p)
1347 else:
1351 else:
1348 bin = _compress(text)
1352 bin = _compress(text)
1349 if bin is None or len(bin) > l:
1353 if bin is None or len(bin) > l:
1350 if text[0] == '\0':
1354 if text[0] == '\0':
1351 return ("", text)
1355 return ("", text)
1352 return ('u', text)
1356 return ('u', text)
1353 return ("", bin)
1357 return ("", bin)
1354
1358
1355 def _isgooddelta(self, d, textlen):
1359 def _isgooddelta(self, d, textlen):
1356 """Returns True if the given delta is good. Good means that it is within
1360 """Returns True if the given delta is good. Good means that it is within
1357 the disk span, disk size, and chain length bounds that we know to be
1361 the disk span, disk size, and chain length bounds that we know to be
1358 performant."""
1362 performant."""
1359 if d is None:
1363 if d is None:
1360 return False
1364 return False
1361
1365
1362 # - 'dist' is the distance from the base revision -- bounding it limits
1366 # - 'dist' is the distance from the base revision -- bounding it limits
1363 # the amount of I/O we need to do.
1367 # the amount of I/O we need to do.
1364 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1368 # - 'compresseddeltalen' is the sum of the total size of deltas we need
1365 # to apply -- bounding it limits the amount of CPU we consume.
1369 # to apply -- bounding it limits the amount of CPU we consume.
1366 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1370 dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
1367 if (dist > textlen * 4 or l > textlen or
1371 if (dist > textlen * 4 or l > textlen or
1368 compresseddeltalen > textlen * 2 or
1372 compresseddeltalen > textlen * 2 or
1369 (self._maxchainlen and chainlen > self._maxchainlen)):
1373 (self._maxchainlen and chainlen > self._maxchainlen)):
1370 return False
1374 return False
1371
1375
1372 return True
1376 return True
1373
1377
1374 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1378 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
1375 cachedelta, ifh, dfh, alwayscache=False):
1379 cachedelta, ifh, dfh, alwayscache=False):
1376 """internal function to add revisions to the log
1380 """internal function to add revisions to the log
1377
1381
1378 see addrevision for argument descriptions.
1382 see addrevision for argument descriptions.
1379 invariants:
1383 invariants:
1380 - text is optional (can be None); if not set, cachedelta must be set.
1384 - text is optional (can be None); if not set, cachedelta must be set.
1381 if both are set, they must correspond to each other.
1385 if both are set, they must correspond to each other.
1382 """
1386 """
1383 btext = [text]
1387 btext = [text]
1384 def buildtext():
1388 def buildtext():
1385 if btext[0] is not None:
1389 if btext[0] is not None:
1386 return btext[0]
1390 return btext[0]
1387 baserev = cachedelta[0]
1391 baserev = cachedelta[0]
1388 delta = cachedelta[1]
1392 delta = cachedelta[1]
1389 # special case deltas which replace entire base; no need to decode
1393 # special case deltas which replace entire base; no need to decode
1390 # base revision. this neatly avoids censored bases, which throw when
1394 # base revision. this neatly avoids censored bases, which throw when
1391 # they're decoded.
1395 # they're decoded.
1392 hlen = struct.calcsize(">lll")
1396 hlen = struct.calcsize(">lll")
1393 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1397 if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
1394 len(delta) - hlen):
1398 len(delta) - hlen):
1395 btext[0] = delta[hlen:]
1399 btext[0] = delta[hlen:]
1396 else:
1400 else:
1397 if self._inline:
1401 if self._inline:
1398 fh = ifh
1402 fh = ifh
1399 else:
1403 else:
1400 fh = dfh
1404 fh = dfh
1401 basetext = self.revision(self.node(baserev), _df=fh)
1405 basetext = self.revision(self.node(baserev), _df=fh)
1402 btext[0] = mdiff.patch(basetext, delta)
1406 btext[0] = mdiff.patch(basetext, delta)
1403 try:
1407 try:
1404 self.checkhash(btext[0], p1, p2, node)
1408 self.checkhash(btext[0], p1, p2, node)
1405 if flags & REVIDX_ISCENSORED:
1409 if flags & REVIDX_ISCENSORED:
1406 raise RevlogError(_('node %s is not censored') % node)
1410 raise RevlogError(_('node %s is not censored') % node)
1407 except CensoredNodeError:
1411 except CensoredNodeError:
1408 # must pass the censored index flag to add censored revisions
1412 # must pass the censored index flag to add censored revisions
1409 if not flags & REVIDX_ISCENSORED:
1413 if not flags & REVIDX_ISCENSORED:
1410 raise
1414 raise
1411 return btext[0]
1415 return btext[0]
1412
1416
1413 def builddelta(rev):
1417 def builddelta(rev):
1414 # can we use the cached delta?
1418 # can we use the cached delta?
1415 if cachedelta and cachedelta[0] == rev:
1419 if cachedelta and cachedelta[0] == rev:
1416 delta = cachedelta[1]
1420 delta = cachedelta[1]
1417 else:
1421 else:
1418 t = buildtext()
1422 t = buildtext()
1419 if self.iscensored(rev):
1423 if self.iscensored(rev):
1420 # deltas based on a censored revision must replace the
1424 # deltas based on a censored revision must replace the
1421 # full content in one patch, so delta works everywhere
1425 # full content in one patch, so delta works everywhere
1422 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1426 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
1423 delta = header + t
1427 delta = header + t
1424 else:
1428 else:
1425 if self._inline:
1429 if self._inline:
1426 fh = ifh
1430 fh = ifh
1427 else:
1431 else:
1428 fh = dfh
1432 fh = dfh
1429 ptext = self.revision(self.node(rev), _df=fh)
1433 ptext = self.revision(self.node(rev), _df=fh)
1430 delta = mdiff.textdiff(ptext, t)
1434 delta = mdiff.textdiff(ptext, t)
1431 data = self.compress(delta)
1435 data = self.compress(delta)
1432 l = len(data[1]) + len(data[0])
1436 l = len(data[1]) + len(data[0])
1433 if basecache[0] == rev:
1437 if basecache[0] == rev:
1434 chainbase = basecache[1]
1438 chainbase = basecache[1]
1435 else:
1439 else:
1436 chainbase = self.chainbase(rev)
1440 chainbase = self.chainbase(rev)
1437 dist = l + offset - self.start(chainbase)
1441 dist = l + offset - self.start(chainbase)
1438 if self._generaldelta:
1442 if self._generaldelta:
1439 base = rev
1443 base = rev
1440 else:
1444 else:
1441 base = chainbase
1445 base = chainbase
1442 chainlen, compresseddeltalen = self._chaininfo(rev)
1446 chainlen, compresseddeltalen = self._chaininfo(rev)
1443 chainlen += 1
1447 chainlen += 1
1444 compresseddeltalen += l
1448 compresseddeltalen += l
1445 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1449 return dist, l, data, base, chainbase, chainlen, compresseddeltalen
1446
1450
1447 curr = len(self)
1451 curr = len(self)
1448 prev = curr - 1
1452 prev = curr - 1
1449 base = chainbase = curr
1453 base = chainbase = curr
1450 offset = self.end(prev)
1454 offset = self.end(prev)
1451 delta = None
1455 delta = None
1452 if self._basecache is None:
1456 if self._basecache is None:
1453 self._basecache = (prev, self.chainbase(prev))
1457 self._basecache = (prev, self.chainbase(prev))
1454 basecache = self._basecache
1458 basecache = self._basecache
1455 p1r, p2r = self.rev(p1), self.rev(p2)
1459 p1r, p2r = self.rev(p1), self.rev(p2)
1456
1460
1457 # full versions are inserted when the needed deltas
1461 # full versions are inserted when the needed deltas
1458 # become comparable to the uncompressed text
1462 # become comparable to the uncompressed text
1459 if text is None:
1463 if text is None:
1460 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1464 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1461 cachedelta[1])
1465 cachedelta[1])
1462 else:
1466 else:
1463 textlen = len(text)
1467 textlen = len(text)
1464
1468
1465 # should we try to build a delta?
1469 # should we try to build a delta?
1466 if prev != nullrev:
1470 if prev != nullrev:
1467 tested = set()
1471 tested = set()
1468 if cachedelta and self._generaldelta and self._lazydeltabase:
1472 if cachedelta and self._generaldelta and self._lazydeltabase:
1469 # Assume what we received from the server is a good choice
1473 # Assume what we received from the server is a good choice
1470 # build delta will reuse the cache
1474 # build delta will reuse the cache
1471 candidatedelta = builddelta(cachedelta[0])
1475 candidatedelta = builddelta(cachedelta[0])
1472 tested.add(cachedelta[0])
1476 tested.add(cachedelta[0])
1473 if self._isgooddelta(candidatedelta, textlen):
1477 if self._isgooddelta(candidatedelta, textlen):
1474 delta = candidatedelta
1478 delta = candidatedelta
1475 if delta is None and self._generaldelta:
1479 if delta is None and self._generaldelta:
1476 # exclude already lazy tested base if any
1480 # exclude already lazy tested base if any
1477 parents = [p for p in (p1r, p2r)
1481 parents = [p for p in (p1r, p2r)
1478 if p != nullrev and p not in tested]
1482 if p != nullrev and p not in tested]
1479 if parents and not self._aggressivemergedeltas:
1483 if parents and not self._aggressivemergedeltas:
1480 # Pick whichever parent is closer to us (to minimize the
1484 # Pick whichever parent is closer to us (to minimize the
1481 # chance of having to build a fulltext).
1485 # chance of having to build a fulltext).
1482 parents = [max(parents)]
1486 parents = [max(parents)]
1483 tested.update(parents)
1487 tested.update(parents)
1484 pdeltas = []
1488 pdeltas = []
1485 for p in parents:
1489 for p in parents:
1486 pd = builddelta(p)
1490 pd = builddelta(p)
1487 if self._isgooddelta(pd, textlen):
1491 if self._isgooddelta(pd, textlen):
1488 pdeltas.append(pd)
1492 pdeltas.append(pd)
1489 if pdeltas:
1493 if pdeltas:
1490 delta = min(pdeltas, key=lambda x: x[1])
1494 delta = min(pdeltas, key=lambda x: x[1])
1491 if delta is None and prev not in tested:
1495 if delta is None and prev not in tested:
1492 # other approach failed try against prev to hopefully save us a
1496 # other approach failed try against prev to hopefully save us a
1493 # fulltext.
1497 # fulltext.
1494 candidatedelta = builddelta(prev)
1498 candidatedelta = builddelta(prev)
1495 if self._isgooddelta(candidatedelta, textlen):
1499 if self._isgooddelta(candidatedelta, textlen):
1496 delta = candidatedelta
1500 delta = candidatedelta
1497 if delta is not None:
1501 if delta is not None:
1498 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1502 dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
1499 else:
1503 else:
1500 text = buildtext()
1504 text = buildtext()
1501 data = self.compress(text)
1505 data = self.compress(text)
1502 l = len(data[1]) + len(data[0])
1506 l = len(data[1]) + len(data[0])
1503 base = chainbase = curr
1507 base = chainbase = curr
1504
1508
1505 e = (offset_type(offset, flags), l, textlen,
1509 e = (offset_type(offset, flags), l, textlen,
1506 base, link, p1r, p2r, node)
1510 base, link, p1r, p2r, node)
1507 self.index.insert(-1, e)
1511 self.index.insert(-1, e)
1508 self.nodemap[node] = curr
1512 self.nodemap[node] = curr
1509
1513
1510 entry = self._io.packentry(e, self.node, self.version, curr)
1514 entry = self._io.packentry(e, self.node, self.version, curr)
1511 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1515 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
1512
1516
1513 if alwayscache and text is None:
1517 if alwayscache and text is None:
1514 text = buildtext()
1518 text = buildtext()
1515
1519
1516 if type(text) == str: # only accept immutable objects
1520 if type(text) == str: # only accept immutable objects
1517 self._cache = (node, curr, text)
1521 self._cache = (node, curr, text)
1518 self._basecache = (curr, chainbase)
1522 self._basecache = (curr, chainbase)
1519 return node
1523 return node
1520
1524
1521 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1525 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1522 # Files opened in a+ mode have inconsistent behavior on various
1526 # Files opened in a+ mode have inconsistent behavior on various
1523 # platforms. Windows requires that a file positioning call be made
1527 # platforms. Windows requires that a file positioning call be made
1524 # when the file handle transitions between reads and writes. See
1528 # when the file handle transitions between reads and writes. See
1525 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1529 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1526 # platforms, Python or the platform itself can be buggy. Some versions
1530 # platforms, Python or the platform itself can be buggy. Some versions
1527 # of Solaris have been observed to not append at the end of the file
1531 # of Solaris have been observed to not append at the end of the file
1528 # if the file was seeked to before the end. See issue4943 for more.
1532 # if the file was seeked to before the end. See issue4943 for more.
1529 #
1533 #
1530 # We work around this issue by inserting a seek() before writing.
1534 # We work around this issue by inserting a seek() before writing.
1531 # Note: This is likely not necessary on Python 3.
1535 # Note: This is likely not necessary on Python 3.
1532 ifh.seek(0, os.SEEK_END)
1536 ifh.seek(0, os.SEEK_END)
1533 if dfh:
1537 if dfh:
1534 dfh.seek(0, os.SEEK_END)
1538 dfh.seek(0, os.SEEK_END)
1535
1539
1536 curr = len(self) - 1
1540 curr = len(self) - 1
1537 if not self._inline:
1541 if not self._inline:
1538 transaction.add(self.datafile, offset)
1542 transaction.add(self.datafile, offset)
1539 transaction.add(self.indexfile, curr * len(entry))
1543 transaction.add(self.indexfile, curr * len(entry))
1540 if data[0]:
1544 if data[0]:
1541 dfh.write(data[0])
1545 dfh.write(data[0])
1542 dfh.write(data[1])
1546 dfh.write(data[1])
1543 ifh.write(entry)
1547 ifh.write(entry)
1544 else:
1548 else:
1545 offset += curr * self._io.size
1549 offset += curr * self._io.size
1546 transaction.add(self.indexfile, offset, curr)
1550 transaction.add(self.indexfile, offset, curr)
1547 ifh.write(entry)
1551 ifh.write(entry)
1548 ifh.write(data[0])
1552 ifh.write(data[0])
1549 ifh.write(data[1])
1553 ifh.write(data[1])
1550 self.checkinlinesize(transaction, ifh)
1554 self.checkinlinesize(transaction, ifh)
1551
1555
1552 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1556 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
1553 """
1557 """
1554 add a delta group
1558 add a delta group
1555
1559
1556 given a set of deltas, add them to the revision log. the
1560 given a set of deltas, add them to the revision log. the
1557 first delta is against its parent, which should be in our
1561 first delta is against its parent, which should be in our
1558 log, the rest are against the previous delta.
1562 log, the rest are against the previous delta.
1559
1563
1560 If ``addrevisioncb`` is defined, it will be called with arguments of
1564 If ``addrevisioncb`` is defined, it will be called with arguments of
1561 this revlog and the node that was added.
1565 this revlog and the node that was added.
1562 """
1566 """
1563
1567
1564 # track the base of the current delta log
1568 # track the base of the current delta log
1565 content = []
1569 content = []
1566 node = None
1570 node = None
1567
1571
1568 r = len(self)
1572 r = len(self)
1569 end = 0
1573 end = 0
1570 if r:
1574 if r:
1571 end = self.end(r - 1)
1575 end = self.end(r - 1)
1572 ifh = self.opener(self.indexfile, "a+")
1576 ifh = self.opener(self.indexfile, "a+")
1573 isize = r * self._io.size
1577 isize = r * self._io.size
1574 if self._inline:
1578 if self._inline:
1575 transaction.add(self.indexfile, end + isize, r)
1579 transaction.add(self.indexfile, end + isize, r)
1576 dfh = None
1580 dfh = None
1577 else:
1581 else:
1578 transaction.add(self.indexfile, isize, r)
1582 transaction.add(self.indexfile, isize, r)
1579 transaction.add(self.datafile, end)
1583 transaction.add(self.datafile, end)
1580 dfh = self.opener(self.datafile, "a+")
1584 dfh = self.opener(self.datafile, "a+")
1581 def flush():
1585 def flush():
1582 if dfh:
1586 if dfh:
1583 dfh.flush()
1587 dfh.flush()
1584 ifh.flush()
1588 ifh.flush()
1585 try:
1589 try:
1586 # loop through our set of deltas
1590 # loop through our set of deltas
1587 chain = None
1591 chain = None
1588 while True:
1592 while True:
1589 chunkdata = cg.deltachunk(chain)
1593 chunkdata = cg.deltachunk(chain)
1590 if not chunkdata:
1594 if not chunkdata:
1591 break
1595 break
1592 node = chunkdata['node']
1596 node = chunkdata['node']
1593 p1 = chunkdata['p1']
1597 p1 = chunkdata['p1']
1594 p2 = chunkdata['p2']
1598 p2 = chunkdata['p2']
1595 cs = chunkdata['cs']
1599 cs = chunkdata['cs']
1596 deltabase = chunkdata['deltabase']
1600 deltabase = chunkdata['deltabase']
1597 delta = chunkdata['delta']
1601 delta = chunkdata['delta']
1598 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1602 flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
1599
1603
1600 content.append(node)
1604 content.append(node)
1601
1605
1602 link = linkmapper(cs)
1606 link = linkmapper(cs)
1603 if node in self.nodemap:
1607 if node in self.nodemap:
1604 # this can happen if two branches make the same change
1608 # this can happen if two branches make the same change
1605 chain = node
1609 chain = node
1606 continue
1610 continue
1607
1611
1608 for p in (p1, p2):
1612 for p in (p1, p2):
1609 if p not in self.nodemap:
1613 if p not in self.nodemap:
1610 raise LookupError(p, self.indexfile,
1614 raise LookupError(p, self.indexfile,
1611 _('unknown parent'))
1615 _('unknown parent'))
1612
1616
1613 if deltabase not in self.nodemap:
1617 if deltabase not in self.nodemap:
1614 raise LookupError(deltabase, self.indexfile,
1618 raise LookupError(deltabase, self.indexfile,
1615 _('unknown delta base'))
1619 _('unknown delta base'))
1616
1620
1617 baserev = self.rev(deltabase)
1621 baserev = self.rev(deltabase)
1618
1622
1619 if baserev != nullrev and self.iscensored(baserev):
1623 if baserev != nullrev and self.iscensored(baserev):
1620 # if base is censored, delta must be full replacement in a
1624 # if base is censored, delta must be full replacement in a
1621 # single patch operation
1625 # single patch operation
1622 hlen = struct.calcsize(">lll")
1626 hlen = struct.calcsize(">lll")
1623 oldlen = self.rawsize(baserev)
1627 oldlen = self.rawsize(baserev)
1624 newlen = len(delta) - hlen
1628 newlen = len(delta) - hlen
1625 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1629 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
1626 raise error.CensoredBaseError(self.indexfile,
1630 raise error.CensoredBaseError(self.indexfile,
1627 self.node(baserev))
1631 self.node(baserev))
1628
1632
1629 if not flags and self._peek_iscensored(baserev, delta, flush):
1633 if not flags and self._peek_iscensored(baserev, delta, flush):
1630 flags |= REVIDX_ISCENSORED
1634 flags |= REVIDX_ISCENSORED
1631
1635
1632 # We assume consumers of addrevisioncb will want to retrieve
1636 # We assume consumers of addrevisioncb will want to retrieve
1633 # the added revision, which will require a call to
1637 # the added revision, which will require a call to
1634 # revision(). revision() will fast path if there is a cache
1638 # revision(). revision() will fast path if there is a cache
1635 # hit. So, we tell _addrevision() to always cache in this case.
1639 # hit. So, we tell _addrevision() to always cache in this case.
1636 chain = self._addrevision(node, None, transaction, link,
1640 chain = self._addrevision(node, None, transaction, link,
1637 p1, p2, flags, (baserev, delta),
1641 p1, p2, flags, (baserev, delta),
1638 ifh, dfh,
1642 ifh, dfh,
1639 alwayscache=bool(addrevisioncb))
1643 alwayscache=bool(addrevisioncb))
1640
1644
1641 if addrevisioncb:
1645 if addrevisioncb:
1642 addrevisioncb(self, chain)
1646 addrevisioncb(self, chain)
1643
1647
1644 if not dfh and not self._inline:
1648 if not dfh and not self._inline:
1645 # addrevision switched from inline to conventional
1649 # addrevision switched from inline to conventional
1646 # reopen the index
1650 # reopen the index
1647 ifh.close()
1651 ifh.close()
1648 dfh = self.opener(self.datafile, "a+")
1652 dfh = self.opener(self.datafile, "a+")
1649 ifh = self.opener(self.indexfile, "a+")
1653 ifh = self.opener(self.indexfile, "a+")
1650 finally:
1654 finally:
1651 if dfh:
1655 if dfh:
1652 dfh.close()
1656 dfh.close()
1653 ifh.close()
1657 ifh.close()
1654
1658
1655 return content
1659 return content
1656
1660
1657 def iscensored(self, rev):
1661 def iscensored(self, rev):
1658 """Check if a file revision is censored."""
1662 """Check if a file revision is censored."""
1659 return False
1663 return False
1660
1664
1661 def _peek_iscensored(self, baserev, delta, flush):
1665 def _peek_iscensored(self, baserev, delta, flush):
1662 """Quickly check if a delta produces a censored revision."""
1666 """Quickly check if a delta produces a censored revision."""
1663 return False
1667 return False
1664
1668
1665 def getstrippoint(self, minlink):
1669 def getstrippoint(self, minlink):
1666 """find the minimum rev that must be stripped to strip the linkrev
1670 """find the minimum rev that must be stripped to strip the linkrev
1667
1671
1668 Returns a tuple containing the minimum rev and a set of all revs that
1672 Returns a tuple containing the minimum rev and a set of all revs that
1669 have linkrevs that will be broken by this strip.
1673 have linkrevs that will be broken by this strip.
1670 """
1674 """
1671 brokenrevs = set()
1675 brokenrevs = set()
1672 strippoint = len(self)
1676 strippoint = len(self)
1673
1677
1674 heads = {}
1678 heads = {}
1675 futurelargelinkrevs = set()
1679 futurelargelinkrevs = set()
1676 for head in self.headrevs():
1680 for head in self.headrevs():
1677 headlinkrev = self.linkrev(head)
1681 headlinkrev = self.linkrev(head)
1678 heads[head] = headlinkrev
1682 heads[head] = headlinkrev
1679 if headlinkrev >= minlink:
1683 if headlinkrev >= minlink:
1680 futurelargelinkrevs.add(headlinkrev)
1684 futurelargelinkrevs.add(headlinkrev)
1681
1685
1682 # This algorithm involves walking down the rev graph, starting at the
1686 # This algorithm involves walking down the rev graph, starting at the
1683 # heads. Since the revs are topologically sorted according to linkrev,
1687 # heads. Since the revs are topologically sorted according to linkrev,
1684 # once all head linkrevs are below the minlink, we know there are
1688 # once all head linkrevs are below the minlink, we know there are
1685 # no more revs that could have a linkrev greater than minlink.
1689 # no more revs that could have a linkrev greater than minlink.
1686 # So we can stop walking.
1690 # So we can stop walking.
1687 while futurelargelinkrevs:
1691 while futurelargelinkrevs:
1688 strippoint -= 1
1692 strippoint -= 1
1689 linkrev = heads.pop(strippoint)
1693 linkrev = heads.pop(strippoint)
1690
1694
1691 if linkrev < minlink:
1695 if linkrev < minlink:
1692 brokenrevs.add(strippoint)
1696 brokenrevs.add(strippoint)
1693 else:
1697 else:
1694 futurelargelinkrevs.remove(linkrev)
1698 futurelargelinkrevs.remove(linkrev)
1695
1699
1696 for p in self.parentrevs(strippoint):
1700 for p in self.parentrevs(strippoint):
1697 if p != nullrev:
1701 if p != nullrev:
1698 plinkrev = self.linkrev(p)
1702 plinkrev = self.linkrev(p)
1699 heads[p] = plinkrev
1703 heads[p] = plinkrev
1700 if plinkrev >= minlink:
1704 if plinkrev >= minlink:
1701 futurelargelinkrevs.add(plinkrev)
1705 futurelargelinkrevs.add(plinkrev)
1702
1706
1703 return strippoint, brokenrevs
1707 return strippoint, brokenrevs
1704
1708
1705 def strip(self, minlink, transaction):
1709 def strip(self, minlink, transaction):
1706 """truncate the revlog on the first revision with a linkrev >= minlink
1710 """truncate the revlog on the first revision with a linkrev >= minlink
1707
1711
1708 This function is called when we're stripping revision minlink and
1712 This function is called when we're stripping revision minlink and
1709 its descendants from the repository.
1713 its descendants from the repository.
1710
1714
1711 We have to remove all revisions with linkrev >= minlink, because
1715 We have to remove all revisions with linkrev >= minlink, because
1712 the equivalent changelog revisions will be renumbered after the
1716 the equivalent changelog revisions will be renumbered after the
1713 strip.
1717 strip.
1714
1718
1715 So we truncate the revlog on the first of these revisions, and
1719 So we truncate the revlog on the first of these revisions, and
1716 trust that the caller has saved the revisions that shouldn't be
1720 trust that the caller has saved the revisions that shouldn't be
1717 removed and that it'll re-add them after this truncation.
1721 removed and that it'll re-add them after this truncation.
1718 """
1722 """
1719 if len(self) == 0:
1723 if len(self) == 0:
1720 return
1724 return
1721
1725
1722 rev, _ = self.getstrippoint(minlink)
1726 rev, _ = self.getstrippoint(minlink)
1723 if rev == len(self):
1727 if rev == len(self):
1724 return
1728 return
1725
1729
1726 # first truncate the files on disk
1730 # first truncate the files on disk
1727 end = self.start(rev)
1731 end = self.start(rev)
1728 if not self._inline:
1732 if not self._inline:
1729 transaction.add(self.datafile, end)
1733 transaction.add(self.datafile, end)
1730 end = rev * self._io.size
1734 end = rev * self._io.size
1731 else:
1735 else:
1732 end += rev * self._io.size
1736 end += rev * self._io.size
1733
1737
1734 transaction.add(self.indexfile, end)
1738 transaction.add(self.indexfile, end)
1735
1739
1736 # then reset internal state in memory to forget those revisions
1740 # then reset internal state in memory to forget those revisions
1737 self._cache = None
1741 self._cache = None
1738 self._chaininfocache = {}
1742 self._chaininfocache = {}
1739 self._chunkclear()
1743 self._chunkclear()
1740 for x in xrange(rev, len(self)):
1744 for x in xrange(rev, len(self)):
1741 del self.nodemap[self.node(x)]
1745 del self.nodemap[self.node(x)]
1742
1746
1743 del self.index[rev:-1]
1747 del self.index[rev:-1]
1744
1748
1745 def checksize(self):
1749 def checksize(self):
1746 expected = 0
1750 expected = 0
1747 if len(self):
1751 if len(self):
1748 expected = max(0, self.end(len(self) - 1))
1752 expected = max(0, self.end(len(self) - 1))
1749
1753
1750 try:
1754 try:
1751 f = self.opener(self.datafile)
1755 f = self.opener(self.datafile)
1752 f.seek(0, 2)
1756 f.seek(0, 2)
1753 actual = f.tell()
1757 actual = f.tell()
1754 f.close()
1758 f.close()
1755 dd = actual - expected
1759 dd = actual - expected
1756 except IOError as inst:
1760 except IOError as inst:
1757 if inst.errno != errno.ENOENT:
1761 if inst.errno != errno.ENOENT:
1758 raise
1762 raise
1759 dd = 0
1763 dd = 0
1760
1764
1761 try:
1765 try:
1762 f = self.opener(self.indexfile)
1766 f = self.opener(self.indexfile)
1763 f.seek(0, 2)
1767 f.seek(0, 2)
1764 actual = f.tell()
1768 actual = f.tell()
1765 f.close()
1769 f.close()
1766 s = self._io.size
1770 s = self._io.size
1767 i = max(0, actual // s)
1771 i = max(0, actual // s)
1768 di = actual - (i * s)
1772 di = actual - (i * s)
1769 if self._inline:
1773 if self._inline:
1770 databytes = 0
1774 databytes = 0
1771 for r in self:
1775 for r in self:
1772 databytes += max(0, self.length(r))
1776 databytes += max(0, self.length(r))
1773 dd = 0
1777 dd = 0
1774 di = actual - len(self) * s - databytes
1778 di = actual - len(self) * s - databytes
1775 except IOError as inst:
1779 except IOError as inst:
1776 if inst.errno != errno.ENOENT:
1780 if inst.errno != errno.ENOENT:
1777 raise
1781 raise
1778 di = 0
1782 di = 0
1779
1783
1780 return (dd, di)
1784 return (dd, di)
1781
1785
1782 def files(self):
1786 def files(self):
1783 res = [self.indexfile]
1787 res = [self.indexfile]
1784 if not self._inline:
1788 if not self._inline:
1785 res.append(self.datafile)
1789 res.append(self.datafile)
1786 return res
1790 return res
General Comments 0
You need to be logged in to leave comments. Login now