##// END OF EJS Templates
py3: use node.hex(m.digest()) instead of m.hexdigest()...
Pulkit Goyal -
r40711:9fcf8084 default
parent child Browse files
Show More
@@ -1,829 +1,829 b''
1 # Copyright 2016-present Facebook. All Rights Reserved.
1 # Copyright 2016-present Facebook. All Rights Reserved.
2 #
2 #
3 # context: context needed to annotate a file
3 # context: context needed to annotate a file
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import contextlib
11 import contextlib
12 import hashlib
12 import hashlib
13 import os
13 import os
14
14
15 from mercurial.i18n import _
15 from mercurial.i18n import _
16 from mercurial import (
16 from mercurial import (
17 error,
17 error,
18 linelog as linelogmod,
18 linelog as linelogmod,
19 lock as lockmod,
19 lock as lockmod,
20 mdiff,
20 mdiff,
21 node,
21 node,
22 pycompat,
22 pycompat,
23 scmutil,
23 scmutil,
24 util,
24 util,
25 )
25 )
26 from mercurial.utils import (
26 from mercurial.utils import (
27 stringutil,
27 stringutil,
28 )
28 )
29
29
30 from . import (
30 from . import (
31 error as faerror,
31 error as faerror,
32 revmap as revmapmod,
32 revmap as revmapmod,
33 )
33 )
34
34
35 # given path, get filelog, cached
35 # given path, get filelog, cached
36 @util.lrucachefunc
36 @util.lrucachefunc
37 def _getflog(repo, path):
37 def _getflog(repo, path):
38 return repo.file(path)
38 return repo.file(path)
39
39
40 # extracted from mercurial.context.basefilectx.annotate
40 # extracted from mercurial.context.basefilectx.annotate
41 def _parents(f, follow=True):
41 def _parents(f, follow=True):
42 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
42 # Cut _descendantrev here to mitigate the penalty of lazy linkrev
43 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
43 # adjustment. Otherwise, p._adjustlinkrev() would walk changelog
44 # from the topmost introrev (= srcrev) down to p.linkrev() if it
44 # from the topmost introrev (= srcrev) down to p.linkrev() if it
45 # isn't an ancestor of the srcrev.
45 # isn't an ancestor of the srcrev.
46 f._changeid
46 f._changeid
47 pl = f.parents()
47 pl = f.parents()
48
48
49 # Don't return renamed parents if we aren't following.
49 # Don't return renamed parents if we aren't following.
50 if not follow:
50 if not follow:
51 pl = [p for p in pl if p.path() == f.path()]
51 pl = [p for p in pl if p.path() == f.path()]
52
52
53 # renamed filectx won't have a filelog yet, so set it
53 # renamed filectx won't have a filelog yet, so set it
54 # from the cache to save time
54 # from the cache to save time
55 for p in pl:
55 for p in pl:
56 if not '_filelog' in p.__dict__:
56 if not '_filelog' in p.__dict__:
57 p._filelog = _getflog(f._repo, p.path())
57 p._filelog = _getflog(f._repo, p.path())
58
58
59 return pl
59 return pl
60
60
61 # extracted from mercurial.context.basefilectx.annotate. slightly modified
61 # extracted from mercurial.context.basefilectx.annotate. slightly modified
62 # so it takes a fctx instead of a pair of text and fctx.
62 # so it takes a fctx instead of a pair of text and fctx.
63 def _decorate(fctx):
63 def _decorate(fctx):
64 text = fctx.data()
64 text = fctx.data()
65 linecount = text.count('\n')
65 linecount = text.count('\n')
66 if text and not text.endswith('\n'):
66 if text and not text.endswith('\n'):
67 linecount += 1
67 linecount += 1
68 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
68 return ([(fctx, i) for i in pycompat.xrange(linecount)], text)
69
69
70 # extracted from mercurial.context.basefilectx.annotate. slightly modified
70 # extracted from mercurial.context.basefilectx.annotate. slightly modified
71 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
71 # so it takes an extra "blocks" parameter calculated elsewhere, instead of
72 # calculating diff here.
72 # calculating diff here.
73 def _pair(parent, child, blocks):
73 def _pair(parent, child, blocks):
74 for (a1, a2, b1, b2), t in blocks:
74 for (a1, a2, b1, b2), t in blocks:
75 # Changed blocks ('!') or blocks made only of blank lines ('~')
75 # Changed blocks ('!') or blocks made only of blank lines ('~')
76 # belong to the child.
76 # belong to the child.
77 if t == '=':
77 if t == '=':
78 child[0][b1:b2] = parent[0][a1:a2]
78 child[0][b1:b2] = parent[0][a1:a2]
79 return child
79 return child
80
80
81 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
81 # like scmutil.revsingle, but with lru cache, so their states (like manifests)
82 # could be reused
82 # could be reused
83 _revsingle = util.lrucachefunc(scmutil.revsingle)
83 _revsingle = util.lrucachefunc(scmutil.revsingle)
84
84
85 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
85 def resolvefctx(repo, rev, path, resolverev=False, adjustctx=None):
86 """(repo, str, str) -> fctx
86 """(repo, str, str) -> fctx
87
87
88 get the filectx object from repo, rev, path, in an efficient way.
88 get the filectx object from repo, rev, path, in an efficient way.
89
89
90 if resolverev is True, "rev" is a revision specified by the revset
90 if resolverev is True, "rev" is a revision specified by the revset
91 language, otherwise "rev" is a nodeid, or a revision number that can
91 language, otherwise "rev" is a nodeid, or a revision number that can
92 be consumed by repo.__getitem__.
92 be consumed by repo.__getitem__.
93
93
94 if adjustctx is not None, the returned fctx will point to a changeset
94 if adjustctx is not None, the returned fctx will point to a changeset
95 that introduces the change (last modified the file). if adjustctx
95 that introduces the change (last modified the file). if adjustctx
96 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
96 is 'linkrev', trust the linkrev and do not adjust it. this is noticeably
97 faster for big repos but is incorrect for some cases.
97 faster for big repos but is incorrect for some cases.
98 """
98 """
99 if resolverev and not isinstance(rev, int) and rev is not None:
99 if resolverev and not isinstance(rev, int) and rev is not None:
100 ctx = _revsingle(repo, rev)
100 ctx = _revsingle(repo, rev)
101 else:
101 else:
102 ctx = repo[rev]
102 ctx = repo[rev]
103
103
104 # If we don't need to adjust the linkrev, create the filectx using the
104 # If we don't need to adjust the linkrev, create the filectx using the
105 # changectx instead of using ctx[path]. This means it already has the
105 # changectx instead of using ctx[path]. This means it already has the
106 # changectx information, so blame -u will be able to look directly at the
106 # changectx information, so blame -u will be able to look directly at the
107 # commitctx object instead of having to resolve it by going through the
107 # commitctx object instead of having to resolve it by going through the
108 # manifest. In a lazy-manifest world this can prevent us from downloading a
108 # manifest. In a lazy-manifest world this can prevent us from downloading a
109 # lot of data.
109 # lot of data.
110 if adjustctx is None:
110 if adjustctx is None:
111 # ctx.rev() is None means it's the working copy, which is a special
111 # ctx.rev() is None means it's the working copy, which is a special
112 # case.
112 # case.
113 if ctx.rev() is None:
113 if ctx.rev() is None:
114 fctx = ctx[path]
114 fctx = ctx[path]
115 else:
115 else:
116 fctx = repo.filectx(path, changeid=ctx.rev())
116 fctx = repo.filectx(path, changeid=ctx.rev())
117 else:
117 else:
118 fctx = ctx[path]
118 fctx = ctx[path]
119 if adjustctx == 'linkrev':
119 if adjustctx == 'linkrev':
120 introrev = fctx.linkrev()
120 introrev = fctx.linkrev()
121 else:
121 else:
122 introrev = fctx.introrev()
122 introrev = fctx.introrev()
123 if introrev != ctx.rev():
123 if introrev != ctx.rev():
124 fctx._changeid = introrev
124 fctx._changeid = introrev
125 fctx._changectx = repo[introrev]
125 fctx._changectx = repo[introrev]
126 return fctx
126 return fctx
127
127
128 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
128 # like mercurial.store.encodedir, but use linelog suffixes: .m, .l, .lock
129 def encodedir(path):
129 def encodedir(path):
130 return (path
130 return (path
131 .replace('.hg/', '.hg.hg/')
131 .replace('.hg/', '.hg.hg/')
132 .replace('.l/', '.l.hg/')
132 .replace('.l/', '.l.hg/')
133 .replace('.m/', '.m.hg/')
133 .replace('.m/', '.m.hg/')
134 .replace('.lock/', '.lock.hg/'))
134 .replace('.lock/', '.lock.hg/'))
135
135
136 def hashdiffopts(diffopts):
136 def hashdiffopts(diffopts):
137 diffoptstr = stringutil.pprint(sorted(
137 diffoptstr = stringutil.pprint(sorted(
138 (k, getattr(diffopts, k))
138 (k, getattr(diffopts, k))
139 for k in mdiff.diffopts.defaults
139 for k in mdiff.diffopts.defaults
140 ))
140 ))
141 return hashlib.sha1(diffoptstr).hexdigest()[:6]
141 return node.hex(hashlib.sha1(diffoptstr).digest())[:6]
142
142
143 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
143 _defaultdiffopthash = hashdiffopts(mdiff.defaultopts)
144
144
145 class annotateopts(object):
145 class annotateopts(object):
146 """like mercurial.mdiff.diffopts, but is for annotate
146 """like mercurial.mdiff.diffopts, but is for annotate
147
147
148 followrename: follow renames, like "hg annotate -f"
148 followrename: follow renames, like "hg annotate -f"
149 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
149 followmerge: follow p2 of a merge changeset, otherwise p2 is ignored
150 """
150 """
151
151
152 defaults = {
152 defaults = {
153 'diffopts': None,
153 'diffopts': None,
154 'followrename': True,
154 'followrename': True,
155 'followmerge': True,
155 'followmerge': True,
156 }
156 }
157
157
158 def __init__(self, **opts):
158 def __init__(self, **opts):
159 opts = pycompat.byteskwargs(opts)
159 opts = pycompat.byteskwargs(opts)
160 for k, v in self.defaults.iteritems():
160 for k, v in self.defaults.iteritems():
161 setattr(self, k, opts.get(k, v))
161 setattr(self, k, opts.get(k, v))
162
162
163 @util.propertycache
163 @util.propertycache
164 def shortstr(self):
164 def shortstr(self):
165 """represent opts in a short string, suitable for a directory name"""
165 """represent opts in a short string, suitable for a directory name"""
166 result = ''
166 result = ''
167 if not self.followrename:
167 if not self.followrename:
168 result += 'r0'
168 result += 'r0'
169 if not self.followmerge:
169 if not self.followmerge:
170 result += 'm0'
170 result += 'm0'
171 if self.diffopts is not None:
171 if self.diffopts is not None:
172 assert isinstance(self.diffopts, mdiff.diffopts)
172 assert isinstance(self.diffopts, mdiff.diffopts)
173 diffopthash = hashdiffopts(self.diffopts)
173 diffopthash = hashdiffopts(self.diffopts)
174 if diffopthash != _defaultdiffopthash:
174 if diffopthash != _defaultdiffopthash:
175 result += 'i' + diffopthash
175 result += 'i' + diffopthash
176 return result or 'default'
176 return result or 'default'
177
177
178 defaultopts = annotateopts()
178 defaultopts = annotateopts()
179
179
180 class _annotatecontext(object):
180 class _annotatecontext(object):
181 """do not use this class directly as it does not use lock to protect
181 """do not use this class directly as it does not use lock to protect
182 writes. use "with annotatecontext(...)" instead.
182 writes. use "with annotatecontext(...)" instead.
183 """
183 """
184
184
185 def __init__(self, repo, path, linelogpath, revmappath, opts):
185 def __init__(self, repo, path, linelogpath, revmappath, opts):
186 self.repo = repo
186 self.repo = repo
187 self.ui = repo.ui
187 self.ui = repo.ui
188 self.path = path
188 self.path = path
189 self.opts = opts
189 self.opts = opts
190 self.linelogpath = linelogpath
190 self.linelogpath = linelogpath
191 self.revmappath = revmappath
191 self.revmappath = revmappath
192 self._linelog = None
192 self._linelog = None
193 self._revmap = None
193 self._revmap = None
194 self._node2path = {} # {str: str}
194 self._node2path = {} # {str: str}
195
195
196 @property
196 @property
197 def linelog(self):
197 def linelog(self):
198 if self._linelog is None:
198 if self._linelog is None:
199 if os.path.exists(self.linelogpath):
199 if os.path.exists(self.linelogpath):
200 with open(self.linelogpath, 'rb') as f:
200 with open(self.linelogpath, 'rb') as f:
201 try:
201 try:
202 self._linelog = linelogmod.linelog.fromdata(f.read())
202 self._linelog = linelogmod.linelog.fromdata(f.read())
203 except linelogmod.LineLogError:
203 except linelogmod.LineLogError:
204 self._linelog = linelogmod.linelog()
204 self._linelog = linelogmod.linelog()
205 else:
205 else:
206 self._linelog = linelogmod.linelog()
206 self._linelog = linelogmod.linelog()
207 return self._linelog
207 return self._linelog
208
208
209 @property
209 @property
210 def revmap(self):
210 def revmap(self):
211 if self._revmap is None:
211 if self._revmap is None:
212 self._revmap = revmapmod.revmap(self.revmappath)
212 self._revmap = revmapmod.revmap(self.revmappath)
213 return self._revmap
213 return self._revmap
214
214
215 def close(self):
215 def close(self):
216 if self._revmap is not None:
216 if self._revmap is not None:
217 self._revmap.flush()
217 self._revmap.flush()
218 self._revmap = None
218 self._revmap = None
219 if self._linelog is not None:
219 if self._linelog is not None:
220 with open(self.linelogpath, 'wb') as f:
220 with open(self.linelogpath, 'wb') as f:
221 f.write(self._linelog.encode())
221 f.write(self._linelog.encode())
222 self._linelog = None
222 self._linelog = None
223
223
224 __del__ = close
224 __del__ = close
225
225
226 def rebuild(self):
226 def rebuild(self):
227 """delete linelog and revmap, useful for rebuilding"""
227 """delete linelog and revmap, useful for rebuilding"""
228 self.close()
228 self.close()
229 self._node2path.clear()
229 self._node2path.clear()
230 _unlinkpaths([self.revmappath, self.linelogpath])
230 _unlinkpaths([self.revmappath, self.linelogpath])
231
231
232 @property
232 @property
233 def lastnode(self):
233 def lastnode(self):
234 """return last node in revmap, or None if revmap is empty"""
234 """return last node in revmap, or None if revmap is empty"""
235 if self._revmap is None:
235 if self._revmap is None:
236 # fast path, read revmap without loading its full content
236 # fast path, read revmap without loading its full content
237 return revmapmod.getlastnode(self.revmappath)
237 return revmapmod.getlastnode(self.revmappath)
238 else:
238 else:
239 return self._revmap.rev2hsh(self._revmap.maxrev)
239 return self._revmap.rev2hsh(self._revmap.maxrev)
240
240
241 def isuptodate(self, master, strict=True):
241 def isuptodate(self, master, strict=True):
242 """return True if the revmap / linelog is up-to-date, or the file
242 """return True if the revmap / linelog is up-to-date, or the file
243 does not exist in the master revision. False otherwise.
243 does not exist in the master revision. False otherwise.
244
244
245 it tries to be fast and could return false negatives, because of the
245 it tries to be fast and could return false negatives, because of the
246 use of linkrev instead of introrev.
246 use of linkrev instead of introrev.
247
247
248 useful for both server and client to decide whether to update
248 useful for both server and client to decide whether to update
249 fastannotate cache or not.
249 fastannotate cache or not.
250
250
251 if strict is True, even if fctx exists in the revmap, but is not the
251 if strict is True, even if fctx exists in the revmap, but is not the
252 last node, isuptodate will return False. it's good for performance - no
252 last node, isuptodate will return False. it's good for performance - no
253 expensive check was done.
253 expensive check was done.
254
254
255 if strict is False, if fctx exists in the revmap, this function may
255 if strict is False, if fctx exists in the revmap, this function may
256 return True. this is useful for the client to skip downloading the
256 return True. this is useful for the client to skip downloading the
257 cache if the client's master is behind the server's.
257 cache if the client's master is behind the server's.
258 """
258 """
259 lastnode = self.lastnode
259 lastnode = self.lastnode
260 try:
260 try:
261 f = self._resolvefctx(master, resolverev=True)
261 f = self._resolvefctx(master, resolverev=True)
262 # choose linkrev instead of introrev as the check is meant to be
262 # choose linkrev instead of introrev as the check is meant to be
263 # *fast*.
263 # *fast*.
264 linknode = self.repo.changelog.node(f.linkrev())
264 linknode = self.repo.changelog.node(f.linkrev())
265 if not strict and lastnode and linknode != lastnode:
265 if not strict and lastnode and linknode != lastnode:
266 # check if f.node() is in the revmap. note: this loads the
266 # check if f.node() is in the revmap. note: this loads the
267 # revmap and can be slow.
267 # revmap and can be slow.
268 return self.revmap.hsh2rev(linknode) is not None
268 return self.revmap.hsh2rev(linknode) is not None
269 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
269 # avoid resolving old manifest, or slow adjustlinkrev to be fast,
270 # false negatives are acceptable in this case.
270 # false negatives are acceptable in this case.
271 return linknode == lastnode
271 return linknode == lastnode
272 except LookupError:
272 except LookupError:
273 # master does not have the file, or the revmap is ahead
273 # master does not have the file, or the revmap is ahead
274 return True
274 return True
275
275
276 def annotate(self, rev, master=None, showpath=False, showlines=False):
276 def annotate(self, rev, master=None, showpath=False, showlines=False):
277 """incrementally update the cache so it includes revisions in the main
277 """incrementally update the cache so it includes revisions in the main
278 branch till 'master'. and run annotate on 'rev', which may or may not be
278 branch till 'master'. and run annotate on 'rev', which may or may not be
279 included in the main branch.
279 included in the main branch.
280
280
281 if master is None, do not update linelog.
281 if master is None, do not update linelog.
282
282
283 the first value returned is the annotate result, it is [(node, linenum)]
283 the first value returned is the annotate result, it is [(node, linenum)]
284 by default. [(node, linenum, path)] if showpath is True.
284 by default. [(node, linenum, path)] if showpath is True.
285
285
286 if showlines is True, a second value will be returned, it is a list of
286 if showlines is True, a second value will be returned, it is a list of
287 corresponding line contents.
287 corresponding line contents.
288 """
288 """
289
289
290 # the fast path test requires commit hash, convert rev number to hash,
290 # the fast path test requires commit hash, convert rev number to hash,
291 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
291 # so it may hit the fast path. note: in the "fctx" mode, the "annotate"
292 # command could give us a revision number even if the user passes a
292 # command could give us a revision number even if the user passes a
293 # commit hash.
293 # commit hash.
294 if isinstance(rev, int):
294 if isinstance(rev, int):
295 rev = node.hex(self.repo.changelog.node(rev))
295 rev = node.hex(self.repo.changelog.node(rev))
296
296
297 # fast path: if rev is in the main branch already
297 # fast path: if rev is in the main branch already
298 directly, revfctx = self.canannotatedirectly(rev)
298 directly, revfctx = self.canannotatedirectly(rev)
299 if directly:
299 if directly:
300 if self.ui.debugflag:
300 if self.ui.debugflag:
301 self.ui.debug('fastannotate: %s: using fast path '
301 self.ui.debug('fastannotate: %s: using fast path '
302 '(resolved fctx: %s)\n'
302 '(resolved fctx: %s)\n'
303 % (self.path,
303 % (self.path,
304 stringutil.pprint(util.safehasattr(revfctx,
304 stringutil.pprint(util.safehasattr(revfctx,
305 'node'))))
305 'node'))))
306 return self.annotatedirectly(revfctx, showpath, showlines)
306 return self.annotatedirectly(revfctx, showpath, showlines)
307
307
308 # resolve master
308 # resolve master
309 masterfctx = None
309 masterfctx = None
310 if master:
310 if master:
311 try:
311 try:
312 masterfctx = self._resolvefctx(master, resolverev=True,
312 masterfctx = self._resolvefctx(master, resolverev=True,
313 adjustctx=True)
313 adjustctx=True)
314 except LookupError: # master does not have the file
314 except LookupError: # master does not have the file
315 pass
315 pass
316 else:
316 else:
317 if masterfctx in self.revmap: # no need to update linelog
317 if masterfctx in self.revmap: # no need to update linelog
318 masterfctx = None
318 masterfctx = None
319
319
320 # ... - @ <- rev (can be an arbitrary changeset,
320 # ... - @ <- rev (can be an arbitrary changeset,
321 # / not necessarily a descendant
321 # / not necessarily a descendant
322 # master -> o of master)
322 # master -> o of master)
323 # |
323 # |
324 # a merge -> o 'o': new changesets in the main branch
324 # a merge -> o 'o': new changesets in the main branch
325 # |\ '#': revisions in the main branch that
325 # |\ '#': revisions in the main branch that
326 # o * exist in linelog / revmap
326 # o * exist in linelog / revmap
327 # | . '*': changesets in side branches, or
327 # | . '*': changesets in side branches, or
328 # last master -> # . descendants of master
328 # last master -> # . descendants of master
329 # | .
329 # | .
330 # # * joint: '#', and is a parent of a '*'
330 # # * joint: '#', and is a parent of a '*'
331 # |/
331 # |/
332 # a joint -> # ^^^^ --- side branches
332 # a joint -> # ^^^^ --- side branches
333 # |
333 # |
334 # ^ --- main branch (in linelog)
334 # ^ --- main branch (in linelog)
335
335
336 # these DFSes are similar to the traditional annotate algorithm.
336 # these DFSes are similar to the traditional annotate algorithm.
337 # we cannot really reuse the code for perf reason.
337 # we cannot really reuse the code for perf reason.
338
338
339 # 1st DFS calculates merges, joint points, and needed.
339 # 1st DFS calculates merges, joint points, and needed.
340 # "needed" is a simple reference counting dict to free items in
340 # "needed" is a simple reference counting dict to free items in
341 # "hist", reducing its memory usage otherwise could be huge.
341 # "hist", reducing its memory usage otherwise could be huge.
342 initvisit = [revfctx]
342 initvisit = [revfctx]
343 if masterfctx:
343 if masterfctx:
344 if masterfctx.rev() is None:
344 if masterfctx.rev() is None:
345 raise error.Abort(_('cannot update linelog to wdir()'),
345 raise error.Abort(_('cannot update linelog to wdir()'),
346 hint=_('set fastannotate.mainbranch'))
346 hint=_('set fastannotate.mainbranch'))
347 initvisit.append(masterfctx)
347 initvisit.append(masterfctx)
348 visit = initvisit[:]
348 visit = initvisit[:]
349 pcache = {}
349 pcache = {}
350 needed = {revfctx: 1}
350 needed = {revfctx: 1}
351 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
351 hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
352 while visit:
352 while visit:
353 f = visit.pop()
353 f = visit.pop()
354 if f in pcache or f in hist:
354 if f in pcache or f in hist:
355 continue
355 continue
356 if f in self.revmap: # in the old main branch, it's a joint
356 if f in self.revmap: # in the old main branch, it's a joint
357 llrev = self.revmap.hsh2rev(f.node())
357 llrev = self.revmap.hsh2rev(f.node())
358 self.linelog.annotate(llrev)
358 self.linelog.annotate(llrev)
359 result = self.linelog.annotateresult
359 result = self.linelog.annotateresult
360 hist[f] = (result, f.data())
360 hist[f] = (result, f.data())
361 continue
361 continue
362 pl = self._parentfunc(f)
362 pl = self._parentfunc(f)
363 pcache[f] = pl
363 pcache[f] = pl
364 for p in pl:
364 for p in pl:
365 needed[p] = needed.get(p, 0) + 1
365 needed[p] = needed.get(p, 0) + 1
366 if p not in pcache:
366 if p not in pcache:
367 visit.append(p)
367 visit.append(p)
368
368
369 # 2nd (simple) DFS calculates new changesets in the main branch
369 # 2nd (simple) DFS calculates new changesets in the main branch
370 # ('o' nodes in # the above graph), so we know when to update linelog.
370 # ('o' nodes in # the above graph), so we know when to update linelog.
371 newmainbranch = set()
371 newmainbranch = set()
372 f = masterfctx
372 f = masterfctx
373 while f and f not in self.revmap:
373 while f and f not in self.revmap:
374 newmainbranch.add(f)
374 newmainbranch.add(f)
375 pl = pcache[f]
375 pl = pcache[f]
376 if pl:
376 if pl:
377 f = pl[0]
377 f = pl[0]
378 else:
378 else:
379 f = None
379 f = None
380 break
380 break
381
381
382 # f, if present, is the position where the last build stopped at, and
382 # f, if present, is the position where the last build stopped at, and
383 # should be the "master" last time. check to see if we can continue
383 # should be the "master" last time. check to see if we can continue
384 # building the linelog incrementally. (we cannot if diverged)
384 # building the linelog incrementally. (we cannot if diverged)
385 if masterfctx is not None:
385 if masterfctx is not None:
386 self._checklastmasterhead(f)
386 self._checklastmasterhead(f)
387
387
388 if self.ui.debugflag:
388 if self.ui.debugflag:
389 if newmainbranch:
389 if newmainbranch:
390 self.ui.debug('fastannotate: %s: %d new changesets in the main'
390 self.ui.debug('fastannotate: %s: %d new changesets in the main'
391 ' branch\n' % (self.path, len(newmainbranch)))
391 ' branch\n' % (self.path, len(newmainbranch)))
392 elif not hist: # no joints, no updates
392 elif not hist: # no joints, no updates
393 self.ui.debug('fastannotate: %s: linelog cannot help in '
393 self.ui.debug('fastannotate: %s: linelog cannot help in '
394 'annotating this revision\n' % self.path)
394 'annotating this revision\n' % self.path)
395
395
396 # prepare annotateresult so we can update linelog incrementally
396 # prepare annotateresult so we can update linelog incrementally
397 self.linelog.annotate(self.linelog.maxrev)
397 self.linelog.annotate(self.linelog.maxrev)
398
398
399 # 3rd DFS does the actual annotate
399 # 3rd DFS does the actual annotate
400 visit = initvisit[:]
400 visit = initvisit[:]
401 progress = 0
401 progress = 0
402 while visit:
402 while visit:
403 f = visit[-1]
403 f = visit[-1]
404 if f in hist:
404 if f in hist:
405 visit.pop()
405 visit.pop()
406 continue
406 continue
407
407
408 ready = True
408 ready = True
409 pl = pcache[f]
409 pl = pcache[f]
410 for p in pl:
410 for p in pl:
411 if p not in hist:
411 if p not in hist:
412 ready = False
412 ready = False
413 visit.append(p)
413 visit.append(p)
414 if not ready:
414 if not ready:
415 continue
415 continue
416
416
417 visit.pop()
417 visit.pop()
418 blocks = None # mdiff blocks, used for appending linelog
418 blocks = None # mdiff blocks, used for appending linelog
419 ismainbranch = (f in newmainbranch)
419 ismainbranch = (f in newmainbranch)
420 # curr is the same as the traditional annotate algorithm,
420 # curr is the same as the traditional annotate algorithm,
421 # if we only care about linear history (do not follow merge),
421 # if we only care about linear history (do not follow merge),
422 # then curr is not actually used.
422 # then curr is not actually used.
423 assert f not in hist
423 assert f not in hist
424 curr = _decorate(f)
424 curr = _decorate(f)
425 for i, p in enumerate(pl):
425 for i, p in enumerate(pl):
426 bs = list(self._diffblocks(hist[p][1], curr[1]))
426 bs = list(self._diffblocks(hist[p][1], curr[1]))
427 if i == 0 and ismainbranch:
427 if i == 0 and ismainbranch:
428 blocks = bs
428 blocks = bs
429 curr = _pair(hist[p], curr, bs)
429 curr = _pair(hist[p], curr, bs)
430 if needed[p] == 1:
430 if needed[p] == 1:
431 del hist[p]
431 del hist[p]
432 del needed[p]
432 del needed[p]
433 else:
433 else:
434 needed[p] -= 1
434 needed[p] -= 1
435
435
436 hist[f] = curr
436 hist[f] = curr
437 del pcache[f]
437 del pcache[f]
438
438
439 if ismainbranch: # need to write to linelog
439 if ismainbranch: # need to write to linelog
440 if not self.ui.quiet:
440 if not self.ui.quiet:
441 progress += 1
441 progress += 1
442 self.ui.progress(_('building cache'), progress,
442 self.ui.progress(_('building cache'), progress,
443 total=len(newmainbranch))
443 total=len(newmainbranch))
444 bannotated = None
444 bannotated = None
445 if len(pl) == 2 and self.opts.followmerge: # merge
445 if len(pl) == 2 and self.opts.followmerge: # merge
446 bannotated = curr[0]
446 bannotated = curr[0]
447 if blocks is None: # no parents, add an empty one
447 if blocks is None: # no parents, add an empty one
448 blocks = list(self._diffblocks('', curr[1]))
448 blocks = list(self._diffblocks('', curr[1]))
449 self._appendrev(f, blocks, bannotated)
449 self._appendrev(f, blocks, bannotated)
450 elif showpath: # not append linelog, but we need to record path
450 elif showpath: # not append linelog, but we need to record path
451 self._node2path[f.node()] = f.path()
451 self._node2path[f.node()] = f.path()
452
452
453 if progress: # clean progress bar
453 if progress: # clean progress bar
454 self.ui.write()
454 self.ui.write()
455
455
456 result = [
456 result = [
457 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
457 ((self.revmap.rev2hsh(fr) if isinstance(fr, int) else fr.node()), l)
458 for fr, l in hist[revfctx][0]] # [(node, linenumber)]
458 for fr, l in hist[revfctx][0]] # [(node, linenumber)]
459 return self._refineannotateresult(result, revfctx, showpath, showlines)
459 return self._refineannotateresult(result, revfctx, showpath, showlines)
460
460
461 def canannotatedirectly(self, rev):
461 def canannotatedirectly(self, rev):
462 """(str) -> bool, fctx or node.
462 """(str) -> bool, fctx or node.
463 return (True, f) if we can annotate without updating the linelog, pass
463 return (True, f) if we can annotate without updating the linelog, pass
464 f to annotatedirectly.
464 f to annotatedirectly.
465 return (False, f) if we need extra calculation. f is the fctx resolved
465 return (False, f) if we need extra calculation. f is the fctx resolved
466 from rev.
466 from rev.
467 """
467 """
468 result = True
468 result = True
469 f = None
469 f = None
470 if not isinstance(rev, int) and rev is not None:
470 if not isinstance(rev, int) and rev is not None:
471 hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
471 hsh = {20: bytes, 40: node.bin}.get(len(rev), lambda x: None)(rev)
472 if hsh is not None and (hsh, self.path) in self.revmap:
472 if hsh is not None and (hsh, self.path) in self.revmap:
473 f = hsh
473 f = hsh
474 if f is None:
474 if f is None:
475 adjustctx = 'linkrev' if self._perfhack else True
475 adjustctx = 'linkrev' if self._perfhack else True
476 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
476 f = self._resolvefctx(rev, adjustctx=adjustctx, resolverev=True)
477 result = f in self.revmap
477 result = f in self.revmap
478 if not result and self._perfhack:
478 if not result and self._perfhack:
479 # redo the resolution without perfhack - as we are going to
479 # redo the resolution without perfhack - as we are going to
480 # do write operations, we need a correct fctx.
480 # do write operations, we need a correct fctx.
481 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
481 f = self._resolvefctx(rev, adjustctx=True, resolverev=True)
482 return result, f
482 return result, f
483
483
484 def annotatealllines(self, rev, showpath=False, showlines=False):
484 def annotatealllines(self, rev, showpath=False, showlines=False):
485 """(rev : str) -> [(node : str, linenum : int, path : str)]
485 """(rev : str) -> [(node : str, linenum : int, path : str)]
486
486
487 the result has the same format with annotate, but include all (including
487 the result has the same format with annotate, but include all (including
488 deleted) lines up to rev. call this after calling annotate(rev, ...) for
488 deleted) lines up to rev. call this after calling annotate(rev, ...) for
489 better performance and accuracy.
489 better performance and accuracy.
490 """
490 """
491 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
491 revfctx = self._resolvefctx(rev, resolverev=True, adjustctx=True)
492
492
493 # find a chain from rev to anything in the mainbranch
493 # find a chain from rev to anything in the mainbranch
494 if revfctx not in self.revmap:
494 if revfctx not in self.revmap:
495 chain = [revfctx]
495 chain = [revfctx]
496 a = ''
496 a = ''
497 while True:
497 while True:
498 f = chain[-1]
498 f = chain[-1]
499 pl = self._parentfunc(f)
499 pl = self._parentfunc(f)
500 if not pl:
500 if not pl:
501 break
501 break
502 if pl[0] in self.revmap:
502 if pl[0] in self.revmap:
503 a = pl[0].data()
503 a = pl[0].data()
504 break
504 break
505 chain.append(pl[0])
505 chain.append(pl[0])
506
506
507 # both self.linelog and self.revmap is backed by filesystem. now
507 # both self.linelog and self.revmap is backed by filesystem. now
508 # we want to modify them but do not want to write changes back to
508 # we want to modify them but do not want to write changes back to
509 # files. so we create in-memory objects and copy them. it's like
509 # files. so we create in-memory objects and copy them. it's like
510 # a "fork".
510 # a "fork".
511 linelog = linelogmod.linelog()
511 linelog = linelogmod.linelog()
512 linelog.copyfrom(self.linelog)
512 linelog.copyfrom(self.linelog)
513 linelog.annotate(linelog.maxrev)
513 linelog.annotate(linelog.maxrev)
514 revmap = revmapmod.revmap()
514 revmap = revmapmod.revmap()
515 revmap.copyfrom(self.revmap)
515 revmap.copyfrom(self.revmap)
516
516
517 for f in reversed(chain):
517 for f in reversed(chain):
518 b = f.data()
518 b = f.data()
519 blocks = list(self._diffblocks(a, b))
519 blocks = list(self._diffblocks(a, b))
520 self._doappendrev(linelog, revmap, f, blocks)
520 self._doappendrev(linelog, revmap, f, blocks)
521 a = b
521 a = b
522 else:
522 else:
523 # fastpath: use existing linelog, revmap as we don't write to them
523 # fastpath: use existing linelog, revmap as we don't write to them
524 linelog = self.linelog
524 linelog = self.linelog
525 revmap = self.revmap
525 revmap = self.revmap
526
526
527 lines = linelog.getalllines()
527 lines = linelog.getalllines()
528 hsh = revfctx.node()
528 hsh = revfctx.node()
529 llrev = revmap.hsh2rev(hsh)
529 llrev = revmap.hsh2rev(hsh)
530 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
530 result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
531 # cannot use _refineannotateresult since we need custom logic for
531 # cannot use _refineannotateresult since we need custom logic for
532 # resolving line contents
532 # resolving line contents
533 if showpath:
533 if showpath:
534 result = self._addpathtoresult(result, revmap)
534 result = self._addpathtoresult(result, revmap)
535 if showlines:
535 if showlines:
536 linecontents = self._resolvelines(result, revmap, linelog)
536 linecontents = self._resolvelines(result, revmap, linelog)
537 result = (result, linecontents)
537 result = (result, linecontents)
538 return result
538 return result
539
539
540 def _resolvelines(self, annotateresult, revmap, linelog):
540 def _resolvelines(self, annotateresult, revmap, linelog):
541 """(annotateresult) -> [line]. designed for annotatealllines.
541 """(annotateresult) -> [line]. designed for annotatealllines.
542 this is probably the most inefficient code in the whole fastannotate
542 this is probably the most inefficient code in the whole fastannotate
543 directory. but we have made a decision that the linelog does not
543 directory. but we have made a decision that the linelog does not
544 store line contents. so getting them requires random accesses to
544 store line contents. so getting them requires random accesses to
545 the revlog data, since they can be many, it can be very slow.
545 the revlog data, since they can be many, it can be very slow.
546 """
546 """
547 # [llrev]
547 # [llrev]
548 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
548 revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
549 result = [None] * len(annotateresult)
549 result = [None] * len(annotateresult)
550 # {(rev, linenum): [lineindex]}
550 # {(rev, linenum): [lineindex]}
551 key2idxs = collections.defaultdict(list)
551 key2idxs = collections.defaultdict(list)
552 for i in pycompat.xrange(len(result)):
552 for i in pycompat.xrange(len(result)):
553 key2idxs[(revs[i], annotateresult[i][1])].append(i)
553 key2idxs[(revs[i], annotateresult[i][1])].append(i)
554 while key2idxs:
554 while key2idxs:
555 # find an unresolved line and its linelog rev to annotate
555 # find an unresolved line and its linelog rev to annotate
556 hsh = None
556 hsh = None
557 try:
557 try:
558 for (rev, _linenum), idxs in key2idxs.iteritems():
558 for (rev, _linenum), idxs in key2idxs.iteritems():
559 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
559 if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
560 continue
560 continue
561 hsh = annotateresult[idxs[0]][0]
561 hsh = annotateresult[idxs[0]][0]
562 break
562 break
563 except StopIteration: # no more unresolved lines
563 except StopIteration: # no more unresolved lines
564 return result
564 return result
565 if hsh is None:
565 if hsh is None:
566 # the remaining key2idxs are not in main branch, resolving them
566 # the remaining key2idxs are not in main branch, resolving them
567 # using the hard way...
567 # using the hard way...
568 revlines = {}
568 revlines = {}
569 for (rev, linenum), idxs in key2idxs.iteritems():
569 for (rev, linenum), idxs in key2idxs.iteritems():
570 if rev not in revlines:
570 if rev not in revlines:
571 hsh = annotateresult[idxs[0]][0]
571 hsh = annotateresult[idxs[0]][0]
572 if self.ui.debugflag:
572 if self.ui.debugflag:
573 self.ui.debug('fastannotate: reading %s line #%d '
573 self.ui.debug('fastannotate: reading %s line #%d '
574 'to resolve lines %r\n'
574 'to resolve lines %r\n'
575 % (node.short(hsh), linenum, idxs))
575 % (node.short(hsh), linenum, idxs))
576 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
576 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
577 lines = mdiff.splitnewlines(fctx.data())
577 lines = mdiff.splitnewlines(fctx.data())
578 revlines[rev] = lines
578 revlines[rev] = lines
579 for idx in idxs:
579 for idx in idxs:
580 result[idx] = revlines[rev][linenum]
580 result[idx] = revlines[rev][linenum]
581 assert all(x is not None for x in result)
581 assert all(x is not None for x in result)
582 return result
582 return result
583
583
584 # run the annotate and the lines should match to the file content
584 # run the annotate and the lines should match to the file content
585 self.ui.debug('fastannotate: annotate %s to resolve lines\n'
585 self.ui.debug('fastannotate: annotate %s to resolve lines\n'
586 % node.short(hsh))
586 % node.short(hsh))
587 linelog.annotate(rev)
587 linelog.annotate(rev)
588 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
588 fctx = self._resolvefctx(hsh, revmap.rev2path(rev))
589 annotated = linelog.annotateresult
589 annotated = linelog.annotateresult
590 lines = mdiff.splitnewlines(fctx.data())
590 lines = mdiff.splitnewlines(fctx.data())
591 if len(lines) != len(annotated):
591 if len(lines) != len(annotated):
592 raise faerror.CorruptedFileError('unexpected annotated lines')
592 raise faerror.CorruptedFileError('unexpected annotated lines')
593 # resolve lines from the annotate result
593 # resolve lines from the annotate result
594 for i, line in enumerate(lines):
594 for i, line in enumerate(lines):
595 k = annotated[i]
595 k = annotated[i]
596 if k in key2idxs:
596 if k in key2idxs:
597 for idx in key2idxs[k]:
597 for idx in key2idxs[k]:
598 result[idx] = line
598 result[idx] = line
599 del key2idxs[k]
599 del key2idxs[k]
600 return result
600 return result
601
601
602 def annotatedirectly(self, f, showpath, showlines):
602 def annotatedirectly(self, f, showpath, showlines):
603 """like annotate, but when we know that f is in linelog.
603 """like annotate, but when we know that f is in linelog.
604 f can be either a 20-char str (node) or a fctx. this is for perf - in
604 f can be either a 20-char str (node) or a fctx. this is for perf - in
605 the best case, the user provides a node and we don't need to read the
605 the best case, the user provides a node and we don't need to read the
606 filelog or construct any filecontext.
606 filelog or construct any filecontext.
607 """
607 """
608 if isinstance(f, str):
608 if isinstance(f, str):
609 hsh = f
609 hsh = f
610 else:
610 else:
611 hsh = f.node()
611 hsh = f.node()
612 llrev = self.revmap.hsh2rev(hsh)
612 llrev = self.revmap.hsh2rev(hsh)
613 if not llrev:
613 if not llrev:
614 raise faerror.CorruptedFileError('%s is not in revmap'
614 raise faerror.CorruptedFileError('%s is not in revmap'
615 % node.hex(hsh))
615 % node.hex(hsh))
616 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
616 if (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) != 0:
617 raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
617 raise faerror.CorruptedFileError('%s is not in revmap mainbranch'
618 % node.hex(hsh))
618 % node.hex(hsh))
619 self.linelog.annotate(llrev)
619 self.linelog.annotate(llrev)
620 result = [(self.revmap.rev2hsh(r), l)
620 result = [(self.revmap.rev2hsh(r), l)
621 for r, l in self.linelog.annotateresult]
621 for r, l in self.linelog.annotateresult]
622 return self._refineannotateresult(result, f, showpath, showlines)
622 return self._refineannotateresult(result, f, showpath, showlines)
623
623
624 def _refineannotateresult(self, result, f, showpath, showlines):
624 def _refineannotateresult(self, result, f, showpath, showlines):
625 """add the missing path or line contents, they can be expensive.
625 """add the missing path or line contents, they can be expensive.
626 f could be either node or fctx.
626 f could be either node or fctx.
627 """
627 """
628 if showpath:
628 if showpath:
629 result = self._addpathtoresult(result)
629 result = self._addpathtoresult(result)
630 if showlines:
630 if showlines:
631 if isinstance(f, str): # f: node or fctx
631 if isinstance(f, str): # f: node or fctx
632 llrev = self.revmap.hsh2rev(f)
632 llrev = self.revmap.hsh2rev(f)
633 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
633 fctx = self._resolvefctx(f, self.revmap.rev2path(llrev))
634 else:
634 else:
635 fctx = f
635 fctx = f
636 lines = mdiff.splitnewlines(fctx.data())
636 lines = mdiff.splitnewlines(fctx.data())
637 if len(lines) != len(result): # linelog is probably corrupted
637 if len(lines) != len(result): # linelog is probably corrupted
638 raise faerror.CorruptedFileError()
638 raise faerror.CorruptedFileError()
639 result = (result, lines)
639 result = (result, lines)
640 return result
640 return result
641
641
642 def _appendrev(self, fctx, blocks, bannotated=None):
642 def _appendrev(self, fctx, blocks, bannotated=None):
643 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
643 self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
644
644
645 def _diffblocks(self, a, b):
645 def _diffblocks(self, a, b):
646 return mdiff.allblocks(a, b, self.opts.diffopts)
646 return mdiff.allblocks(a, b, self.opts.diffopts)
647
647
648 @staticmethod
648 @staticmethod
649 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
649 def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
650 """append a revision to linelog and revmap"""
650 """append a revision to linelog and revmap"""
651
651
652 def getllrev(f):
652 def getllrev(f):
653 """(fctx) -> int"""
653 """(fctx) -> int"""
654 # f should not be a linelog revision
654 # f should not be a linelog revision
655 if isinstance(f, int):
655 if isinstance(f, int):
656 raise error.ProgrammingError('f should not be an int')
656 raise error.ProgrammingError('f should not be an int')
657 # f is a fctx, allocate linelog rev on demand
657 # f is a fctx, allocate linelog rev on demand
658 hsh = f.node()
658 hsh = f.node()
659 rev = revmap.hsh2rev(hsh)
659 rev = revmap.hsh2rev(hsh)
660 if rev is None:
660 if rev is None:
661 rev = revmap.append(hsh, sidebranch=True, path=f.path())
661 rev = revmap.append(hsh, sidebranch=True, path=f.path())
662 return rev
662 return rev
663
663
664 # append sidebranch revisions to revmap
664 # append sidebranch revisions to revmap
665 siderevs = []
665 siderevs = []
666 siderevmap = {} # node: int
666 siderevmap = {} # node: int
667 if bannotated is not None:
667 if bannotated is not None:
668 for (a1, a2, b1, b2), op in blocks:
668 for (a1, a2, b1, b2), op in blocks:
669 if op != '=':
669 if op != '=':
670 # f could be either linelong rev, or fctx.
670 # f could be either linelong rev, or fctx.
671 siderevs += [f for f, l in bannotated[b1:b2]
671 siderevs += [f for f, l in bannotated[b1:b2]
672 if not isinstance(f, int)]
672 if not isinstance(f, int)]
673 siderevs = set(siderevs)
673 siderevs = set(siderevs)
674 if fctx in siderevs: # mainnode must be appended seperately
674 if fctx in siderevs: # mainnode must be appended seperately
675 siderevs.remove(fctx)
675 siderevs.remove(fctx)
676 for f in siderevs:
676 for f in siderevs:
677 siderevmap[f] = getllrev(f)
677 siderevmap[f] = getllrev(f)
678
678
679 # the changeset in the main branch, could be a merge
679 # the changeset in the main branch, could be a merge
680 llrev = revmap.append(fctx.node(), path=fctx.path())
680 llrev = revmap.append(fctx.node(), path=fctx.path())
681 siderevmap[fctx] = llrev
681 siderevmap[fctx] = llrev
682
682
683 for (a1, a2, b1, b2), op in reversed(blocks):
683 for (a1, a2, b1, b2), op in reversed(blocks):
684 if op == '=':
684 if op == '=':
685 continue
685 continue
686 if bannotated is None:
686 if bannotated is None:
687 linelog.replacelines(llrev, a1, a2, b1, b2)
687 linelog.replacelines(llrev, a1, a2, b1, b2)
688 else:
688 else:
689 blines = [((r if isinstance(r, int) else siderevmap[r]), l)
689 blines = [((r if isinstance(r, int) else siderevmap[r]), l)
690 for r, l in bannotated[b1:b2]]
690 for r, l in bannotated[b1:b2]]
691 linelog.replacelines_vec(llrev, a1, a2, blines)
691 linelog.replacelines_vec(llrev, a1, a2, blines)
692
692
693 def _addpathtoresult(self, annotateresult, revmap=None):
693 def _addpathtoresult(self, annotateresult, revmap=None):
694 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
694 """(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
695 if revmap is None:
695 if revmap is None:
696 revmap = self.revmap
696 revmap = self.revmap
697
697
698 def _getpath(nodeid):
698 def _getpath(nodeid):
699 path = self._node2path.get(nodeid)
699 path = self._node2path.get(nodeid)
700 if path is None:
700 if path is None:
701 path = revmap.rev2path(revmap.hsh2rev(nodeid))
701 path = revmap.rev2path(revmap.hsh2rev(nodeid))
702 self._node2path[nodeid] = path
702 self._node2path[nodeid] = path
703 return path
703 return path
704
704
705 return [(n, l, _getpath(n)) for n, l in annotateresult]
705 return [(n, l, _getpath(n)) for n, l in annotateresult]
706
706
707 def _checklastmasterhead(self, fctx):
707 def _checklastmasterhead(self, fctx):
708 """check if fctx is the master's head last time, raise if not"""
708 """check if fctx is the master's head last time, raise if not"""
709 if fctx is None:
709 if fctx is None:
710 llrev = 0
710 llrev = 0
711 else:
711 else:
712 llrev = self.revmap.hsh2rev(fctx.node())
712 llrev = self.revmap.hsh2rev(fctx.node())
713 if not llrev:
713 if not llrev:
714 raise faerror.CannotReuseError()
714 raise faerror.CannotReuseError()
715 if self.linelog.maxrev != llrev:
715 if self.linelog.maxrev != llrev:
716 raise faerror.CannotReuseError()
716 raise faerror.CannotReuseError()
717
717
718 @util.propertycache
718 @util.propertycache
719 def _parentfunc(self):
719 def _parentfunc(self):
720 """-> (fctx) -> [fctx]"""
720 """-> (fctx) -> [fctx]"""
721 followrename = self.opts.followrename
721 followrename = self.opts.followrename
722 followmerge = self.opts.followmerge
722 followmerge = self.opts.followmerge
723 def parents(f):
723 def parents(f):
724 pl = _parents(f, follow=followrename)
724 pl = _parents(f, follow=followrename)
725 if not followmerge:
725 if not followmerge:
726 pl = pl[:1]
726 pl = pl[:1]
727 return pl
727 return pl
728 return parents
728 return parents
729
729
730 @util.propertycache
730 @util.propertycache
731 def _perfhack(self):
731 def _perfhack(self):
732 return self.ui.configbool('fastannotate', 'perfhack')
732 return self.ui.configbool('fastannotate', 'perfhack')
733
733
734 def _resolvefctx(self, rev, path=None, **kwds):
734 def _resolvefctx(self, rev, path=None, **kwds):
735 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
735 return resolvefctx(self.repo, rev, (path or self.path), **kwds)
736
736
737 def _unlinkpaths(paths):
737 def _unlinkpaths(paths):
738 """silent, best-effort unlink"""
738 """silent, best-effort unlink"""
739 for path in paths:
739 for path in paths:
740 try:
740 try:
741 util.unlink(path)
741 util.unlink(path)
742 except OSError:
742 except OSError:
743 pass
743 pass
744
744
745 class pathhelper(object):
745 class pathhelper(object):
746 """helper for getting paths for lockfile, linelog and revmap"""
746 """helper for getting paths for lockfile, linelog and revmap"""
747
747
748 def __init__(self, repo, path, opts=defaultopts):
748 def __init__(self, repo, path, opts=defaultopts):
749 # different options use different directories
749 # different options use different directories
750 self._vfspath = os.path.join('fastannotate',
750 self._vfspath = os.path.join('fastannotate',
751 opts.shortstr, encodedir(path))
751 opts.shortstr, encodedir(path))
752 self._repo = repo
752 self._repo = repo
753
753
754 @property
754 @property
755 def dirname(self):
755 def dirname(self):
756 return os.path.dirname(self._repo.vfs.join(self._vfspath))
756 return os.path.dirname(self._repo.vfs.join(self._vfspath))
757
757
758 @property
758 @property
759 def linelogpath(self):
759 def linelogpath(self):
760 return self._repo.vfs.join(self._vfspath + '.l')
760 return self._repo.vfs.join(self._vfspath + '.l')
761
761
762 def lock(self):
762 def lock(self):
763 return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
763 return lockmod.lock(self._repo.vfs, self._vfspath + '.lock')
764
764
765 @contextlib.contextmanager
765 @contextlib.contextmanager
766 def _lockflock(self):
766 def _lockflock(self):
767 """the same as 'lock' but use flock instead of lockmod.lock, to avoid
767 """the same as 'lock' but use flock instead of lockmod.lock, to avoid
768 creating temporary symlinks."""
768 creating temporary symlinks."""
769 import fcntl
769 import fcntl
770 lockpath = self.linelogpath
770 lockpath = self.linelogpath
771 util.makedirs(os.path.dirname(lockpath))
771 util.makedirs(os.path.dirname(lockpath))
772 lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
772 lockfd = os.open(lockpath, os.O_RDONLY | os.O_CREAT, 0o664)
773 fcntl.flock(lockfd, fcntl.LOCK_EX)
773 fcntl.flock(lockfd, fcntl.LOCK_EX)
774 try:
774 try:
775 yield
775 yield
776 finally:
776 finally:
777 fcntl.flock(lockfd, fcntl.LOCK_UN)
777 fcntl.flock(lockfd, fcntl.LOCK_UN)
778 os.close(lockfd)
778 os.close(lockfd)
779
779
780 @property
780 @property
781 def revmappath(self):
781 def revmappath(self):
782 return self._repo.vfs.join(self._vfspath + '.m')
782 return self._repo.vfs.join(self._vfspath + '.m')
783
783
784 @contextlib.contextmanager
784 @contextlib.contextmanager
785 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
785 def annotatecontext(repo, path, opts=defaultopts, rebuild=False):
786 """context needed to perform (fast) annotate on a file
786 """context needed to perform (fast) annotate on a file
787
787
788 an annotatecontext of a single file consists of two structures: the
788 an annotatecontext of a single file consists of two structures: the
789 linelog and the revmap. this function takes care of locking. only 1
789 linelog and the revmap. this function takes care of locking. only 1
790 process is allowed to write that file's linelog and revmap at a time.
790 process is allowed to write that file's linelog and revmap at a time.
791
791
792 when something goes wrong, this function will assume the linelog and the
792 when something goes wrong, this function will assume the linelog and the
793 revmap are in a bad state, and remove them from disk.
793 revmap are in a bad state, and remove them from disk.
794
794
795 use this function in the following way:
795 use this function in the following way:
796
796
797 with annotatecontext(...) as actx:
797 with annotatecontext(...) as actx:
798 actx. ....
798 actx. ....
799 """
799 """
800 helper = pathhelper(repo, path, opts)
800 helper = pathhelper(repo, path, opts)
801 util.makedirs(helper.dirname)
801 util.makedirs(helper.dirname)
802 revmappath = helper.revmappath
802 revmappath = helper.revmappath
803 linelogpath = helper.linelogpath
803 linelogpath = helper.linelogpath
804 actx = None
804 actx = None
805 try:
805 try:
806 with helper.lock():
806 with helper.lock():
807 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
807 actx = _annotatecontext(repo, path, linelogpath, revmappath, opts)
808 if rebuild:
808 if rebuild:
809 actx.rebuild()
809 actx.rebuild()
810 yield actx
810 yield actx
811 except Exception:
811 except Exception:
812 if actx is not None:
812 if actx is not None:
813 actx.rebuild()
813 actx.rebuild()
814 repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
814 repo.ui.debug('fastannotate: %s: cache broken and deleted\n' % path)
815 raise
815 raise
816 finally:
816 finally:
817 if actx is not None:
817 if actx is not None:
818 actx.close()
818 actx.close()
819
819
820 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
820 def fctxannotatecontext(fctx, follow=True, diffopts=None, rebuild=False):
821 """like annotatecontext but get the context from a fctx. convenient when
821 """like annotatecontext but get the context from a fctx. convenient when
822 used in fctx.annotate
822 used in fctx.annotate
823 """
823 """
824 repo = fctx._repo
824 repo = fctx._repo
825 path = fctx._path
825 path = fctx._path
826 if repo.ui.configbool('fastannotate', 'forcefollow', True):
826 if repo.ui.configbool('fastannotate', 'forcefollow', True):
827 follow = True
827 follow = True
828 aopts = annotateopts(diffopts=diffopts, followrename=follow)
828 aopts = annotateopts(diffopts=diffopts, followrename=follow)
829 return annotatecontext(repo, path, aopts, rebuild)
829 return annotatecontext(repo, path, aopts, rebuild)
@@ -1,609 +1,609 b''
1 # Copyright 2009-2010 Gregory P. Ward
1 # Copyright 2009-2010 Gregory P. Ward
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 # Copyright 2010-2011 Fog Creek Software
3 # Copyright 2010-2011 Fog Creek Software
4 # Copyright 2010-2011 Unity Technologies
4 # Copyright 2010-2011 Unity Technologies
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 '''High-level command function for lfconvert, plus the cmdtable.'''
9 '''High-level command function for lfconvert, plus the cmdtable.'''
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import errno
12 import errno
13 import hashlib
13 import hashlib
14 import os
14 import os
15 import shutil
15 import shutil
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18
18
19 from mercurial import (
19 from mercurial import (
20 cmdutil,
20 cmdutil,
21 context,
21 context,
22 error,
22 error,
23 hg,
23 hg,
24 lock,
24 lock,
25 match as matchmod,
25 match as matchmod,
26 node,
26 node,
27 pycompat,
27 pycompat,
28 registrar,
28 registrar,
29 scmutil,
29 scmutil,
30 util,
30 util,
31 )
31 )
32
32
33 from ..convert import (
33 from ..convert import (
34 convcmd,
34 convcmd,
35 filemap,
35 filemap,
36 )
36 )
37
37
38 from . import (
38 from . import (
39 lfutil,
39 lfutil,
40 storefactory
40 storefactory
41 )
41 )
42
42
43 release = lock.release
43 release = lock.release
44
44
45 # -- Commands ----------------------------------------------------------
45 # -- Commands ----------------------------------------------------------
46
46
47 cmdtable = {}
47 cmdtable = {}
48 command = registrar.command(cmdtable)
48 command = registrar.command(cmdtable)
49
49
50 @command('lfconvert',
50 @command('lfconvert',
51 [('s', 'size', '',
51 [('s', 'size', '',
52 _('minimum size (MB) for files to be converted as largefiles'), 'SIZE'),
52 _('minimum size (MB) for files to be converted as largefiles'), 'SIZE'),
53 ('', 'to-normal', False,
53 ('', 'to-normal', False,
54 _('convert from a largefiles repo to a normal repo')),
54 _('convert from a largefiles repo to a normal repo')),
55 ],
55 ],
56 _('hg lfconvert SOURCE DEST [FILE ...]'),
56 _('hg lfconvert SOURCE DEST [FILE ...]'),
57 norepo=True,
57 norepo=True,
58 inferrepo=True)
58 inferrepo=True)
59 def lfconvert(ui, src, dest, *pats, **opts):
59 def lfconvert(ui, src, dest, *pats, **opts):
60 '''convert a normal repository to a largefiles repository
60 '''convert a normal repository to a largefiles repository
61
61
62 Convert repository SOURCE to a new repository DEST, identical to
62 Convert repository SOURCE to a new repository DEST, identical to
63 SOURCE except that certain files will be converted as largefiles:
63 SOURCE except that certain files will be converted as largefiles:
64 specifically, any file that matches any PATTERN *or* whose size is
64 specifically, any file that matches any PATTERN *or* whose size is
65 above the minimum size threshold is converted as a largefile. The
65 above the minimum size threshold is converted as a largefile. The
66 size used to determine whether or not to track a file as a
66 size used to determine whether or not to track a file as a
67 largefile is the size of the first version of the file. The
67 largefile is the size of the first version of the file. The
68 minimum size can be specified either with --size or in
68 minimum size can be specified either with --size or in
69 configuration as ``largefiles.size``.
69 configuration as ``largefiles.size``.
70
70
71 After running this command you will need to make sure that
71 After running this command you will need to make sure that
72 largefiles is enabled anywhere you intend to push the new
72 largefiles is enabled anywhere you intend to push the new
73 repository.
73 repository.
74
74
75 Use --to-normal to convert largefiles back to normal files; after
75 Use --to-normal to convert largefiles back to normal files; after
76 this, the DEST repository can be used without largefiles at all.'''
76 this, the DEST repository can be used without largefiles at all.'''
77
77
78 opts = pycompat.byteskwargs(opts)
78 opts = pycompat.byteskwargs(opts)
79 if opts['to_normal']:
79 if opts['to_normal']:
80 tolfile = False
80 tolfile = False
81 else:
81 else:
82 tolfile = True
82 tolfile = True
83 size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
83 size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
84
84
85 if not hg.islocal(src):
85 if not hg.islocal(src):
86 raise error.Abort(_('%s is not a local Mercurial repo') % src)
86 raise error.Abort(_('%s is not a local Mercurial repo') % src)
87 if not hg.islocal(dest):
87 if not hg.islocal(dest):
88 raise error.Abort(_('%s is not a local Mercurial repo') % dest)
88 raise error.Abort(_('%s is not a local Mercurial repo') % dest)
89
89
90 rsrc = hg.repository(ui, src)
90 rsrc = hg.repository(ui, src)
91 ui.status(_('initializing destination %s\n') % dest)
91 ui.status(_('initializing destination %s\n') % dest)
92 rdst = hg.repository(ui, dest, create=True)
92 rdst = hg.repository(ui, dest, create=True)
93
93
94 success = False
94 success = False
95 dstwlock = dstlock = None
95 dstwlock = dstlock = None
96 try:
96 try:
97 # Get a list of all changesets in the source. The easy way to do this
97 # Get a list of all changesets in the source. The easy way to do this
98 # is to simply walk the changelog, using changelog.nodesbetween().
98 # is to simply walk the changelog, using changelog.nodesbetween().
99 # Take a look at mercurial/revlog.py:639 for more details.
99 # Take a look at mercurial/revlog.py:639 for more details.
100 # Use a generator instead of a list to decrease memory usage
100 # Use a generator instead of a list to decrease memory usage
101 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
101 ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
102 rsrc.heads())[0])
102 rsrc.heads())[0])
103 revmap = {node.nullid: node.nullid}
103 revmap = {node.nullid: node.nullid}
104 if tolfile:
104 if tolfile:
105 # Lock destination to prevent modification while it is converted to.
105 # Lock destination to prevent modification while it is converted to.
106 # Don't need to lock src because we are just reading from its
106 # Don't need to lock src because we are just reading from its
107 # history which can't change.
107 # history which can't change.
108 dstwlock = rdst.wlock()
108 dstwlock = rdst.wlock()
109 dstlock = rdst.lock()
109 dstlock = rdst.lock()
110
110
111 lfiles = set()
111 lfiles = set()
112 normalfiles = set()
112 normalfiles = set()
113 if not pats:
113 if not pats:
114 pats = ui.configlist(lfutil.longname, 'patterns')
114 pats = ui.configlist(lfutil.longname, 'patterns')
115 if pats:
115 if pats:
116 matcher = matchmod.match(rsrc.root, '', list(pats))
116 matcher = matchmod.match(rsrc.root, '', list(pats))
117 else:
117 else:
118 matcher = None
118 matcher = None
119
119
120 lfiletohash = {}
120 lfiletohash = {}
121 with ui.makeprogress(_('converting revisions'),
121 with ui.makeprogress(_('converting revisions'),
122 unit=_('revisions'),
122 unit=_('revisions'),
123 total=rsrc['tip'].rev()) as progress:
123 total=rsrc['tip'].rev()) as progress:
124 for ctx in ctxs:
124 for ctx in ctxs:
125 progress.update(ctx.rev())
125 progress.update(ctx.rev())
126 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
126 _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
127 lfiles, normalfiles, matcher, size, lfiletohash)
127 lfiles, normalfiles, matcher, size, lfiletohash)
128
128
129 if rdst.wvfs.exists(lfutil.shortname):
129 if rdst.wvfs.exists(lfutil.shortname):
130 rdst.wvfs.rmtree(lfutil.shortname)
130 rdst.wvfs.rmtree(lfutil.shortname)
131
131
132 for f in lfiletohash.keys():
132 for f in lfiletohash.keys():
133 if rdst.wvfs.isfile(f):
133 if rdst.wvfs.isfile(f):
134 rdst.wvfs.unlink(f)
134 rdst.wvfs.unlink(f)
135 try:
135 try:
136 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
136 rdst.wvfs.removedirs(rdst.wvfs.dirname(f))
137 except OSError:
137 except OSError:
138 pass
138 pass
139
139
140 # If there were any files converted to largefiles, add largefiles
140 # If there were any files converted to largefiles, add largefiles
141 # to the destination repository's requirements.
141 # to the destination repository's requirements.
142 if lfiles:
142 if lfiles:
143 rdst.requirements.add('largefiles')
143 rdst.requirements.add('largefiles')
144 rdst._writerequirements()
144 rdst._writerequirements()
145 else:
145 else:
146 class lfsource(filemap.filemap_source):
146 class lfsource(filemap.filemap_source):
147 def __init__(self, ui, source):
147 def __init__(self, ui, source):
148 super(lfsource, self).__init__(ui, source, None)
148 super(lfsource, self).__init__(ui, source, None)
149 self.filemapper.rename[lfutil.shortname] = '.'
149 self.filemapper.rename[lfutil.shortname] = '.'
150
150
151 def getfile(self, name, rev):
151 def getfile(self, name, rev):
152 realname, realrev = rev
152 realname, realrev = rev
153 f = super(lfsource, self).getfile(name, rev)
153 f = super(lfsource, self).getfile(name, rev)
154
154
155 if (not realname.startswith(lfutil.shortnameslash)
155 if (not realname.startswith(lfutil.shortnameslash)
156 or f[0] is None):
156 or f[0] is None):
157 return f
157 return f
158
158
159 # Substitute in the largefile data for the hash
159 # Substitute in the largefile data for the hash
160 hash = f[0].strip()
160 hash = f[0].strip()
161 path = lfutil.findfile(rsrc, hash)
161 path = lfutil.findfile(rsrc, hash)
162
162
163 if path is None:
163 if path is None:
164 raise error.Abort(_("missing largefile for '%s' in %s")
164 raise error.Abort(_("missing largefile for '%s' in %s")
165 % (realname, realrev))
165 % (realname, realrev))
166 return util.readfile(path), f[1]
166 return util.readfile(path), f[1]
167
167
168 class converter(convcmd.converter):
168 class converter(convcmd.converter):
169 def __init__(self, ui, source, dest, revmapfile, opts):
169 def __init__(self, ui, source, dest, revmapfile, opts):
170 src = lfsource(ui, source)
170 src = lfsource(ui, source)
171
171
172 super(converter, self).__init__(ui, src, dest, revmapfile,
172 super(converter, self).__init__(ui, src, dest, revmapfile,
173 opts)
173 opts)
174
174
175 found, missing = downloadlfiles(ui, rsrc)
175 found, missing = downloadlfiles(ui, rsrc)
176 if missing != 0:
176 if missing != 0:
177 raise error.Abort(_("all largefiles must be present locally"))
177 raise error.Abort(_("all largefiles must be present locally"))
178
178
179 orig = convcmd.converter
179 orig = convcmd.converter
180 convcmd.converter = converter
180 convcmd.converter = converter
181
181
182 try:
182 try:
183 convcmd.convert(ui, src, dest, source_type='hg', dest_type='hg')
183 convcmd.convert(ui, src, dest, source_type='hg', dest_type='hg')
184 finally:
184 finally:
185 convcmd.converter = orig
185 convcmd.converter = orig
186 success = True
186 success = True
187 finally:
187 finally:
188 if tolfile:
188 if tolfile:
189 rdst.dirstate.clear()
189 rdst.dirstate.clear()
190 release(dstlock, dstwlock)
190 release(dstlock, dstwlock)
191 if not success:
191 if not success:
192 # we failed, remove the new directory
192 # we failed, remove the new directory
193 shutil.rmtree(rdst.root)
193 shutil.rmtree(rdst.root)
194
194
195 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
195 def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
196 matcher, size, lfiletohash):
196 matcher, size, lfiletohash):
197 # Convert src parents to dst parents
197 # Convert src parents to dst parents
198 parents = _convertparents(ctx, revmap)
198 parents = _convertparents(ctx, revmap)
199
199
200 # Generate list of changed files
200 # Generate list of changed files
201 files = _getchangedfiles(ctx, parents)
201 files = _getchangedfiles(ctx, parents)
202
202
203 dstfiles = []
203 dstfiles = []
204 for f in files:
204 for f in files:
205 if f not in lfiles and f not in normalfiles:
205 if f not in lfiles and f not in normalfiles:
206 islfile = _islfile(f, ctx, matcher, size)
206 islfile = _islfile(f, ctx, matcher, size)
207 # If this file was renamed or copied then copy
207 # If this file was renamed or copied then copy
208 # the largefile-ness of its predecessor
208 # the largefile-ness of its predecessor
209 if f in ctx.manifest():
209 if f in ctx.manifest():
210 fctx = ctx.filectx(f)
210 fctx = ctx.filectx(f)
211 renamed = fctx.renamed()
211 renamed = fctx.renamed()
212 if renamed is None:
212 if renamed is None:
213 # the code below assumes renamed to be a boolean or a list
213 # the code below assumes renamed to be a boolean or a list
214 # and won't quite work with the value None
214 # and won't quite work with the value None
215 renamed = False
215 renamed = False
216 renamedlfile = renamed and renamed[0] in lfiles
216 renamedlfile = renamed and renamed[0] in lfiles
217 islfile |= renamedlfile
217 islfile |= renamedlfile
218 if 'l' in fctx.flags():
218 if 'l' in fctx.flags():
219 if renamedlfile:
219 if renamedlfile:
220 raise error.Abort(
220 raise error.Abort(
221 _('renamed/copied largefile %s becomes symlink')
221 _('renamed/copied largefile %s becomes symlink')
222 % f)
222 % f)
223 islfile = False
223 islfile = False
224 if islfile:
224 if islfile:
225 lfiles.add(f)
225 lfiles.add(f)
226 else:
226 else:
227 normalfiles.add(f)
227 normalfiles.add(f)
228
228
229 if f in lfiles:
229 if f in lfiles:
230 fstandin = lfutil.standin(f)
230 fstandin = lfutil.standin(f)
231 dstfiles.append(fstandin)
231 dstfiles.append(fstandin)
232 # largefile in manifest if it has not been removed/renamed
232 # largefile in manifest if it has not been removed/renamed
233 if f in ctx.manifest():
233 if f in ctx.manifest():
234 fctx = ctx.filectx(f)
234 fctx = ctx.filectx(f)
235 if 'l' in fctx.flags():
235 if 'l' in fctx.flags():
236 renamed = fctx.renamed()
236 renamed = fctx.renamed()
237 if renamed and renamed[0] in lfiles:
237 if renamed and renamed[0] in lfiles:
238 raise error.Abort(_('largefile %s becomes symlink') % f)
238 raise error.Abort(_('largefile %s becomes symlink') % f)
239
239
240 # largefile was modified, update standins
240 # largefile was modified, update standins
241 m = hashlib.sha1('')
241 m = hashlib.sha1('')
242 m.update(ctx[f].data())
242 m.update(ctx[f].data())
243 hash = m.hexdigest()
243 hash = node.hex(m.digest())
244 if f not in lfiletohash or lfiletohash[f] != hash:
244 if f not in lfiletohash or lfiletohash[f] != hash:
245 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
245 rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
246 executable = 'x' in ctx[f].flags()
246 executable = 'x' in ctx[f].flags()
247 lfutil.writestandin(rdst, fstandin, hash,
247 lfutil.writestandin(rdst, fstandin, hash,
248 executable)
248 executable)
249 lfiletohash[f] = hash
249 lfiletohash[f] = hash
250 else:
250 else:
251 # normal file
251 # normal file
252 dstfiles.append(f)
252 dstfiles.append(f)
253
253
254 def getfilectx(repo, memctx, f):
254 def getfilectx(repo, memctx, f):
255 srcfname = lfutil.splitstandin(f)
255 srcfname = lfutil.splitstandin(f)
256 if srcfname is not None:
256 if srcfname is not None:
257 # if the file isn't in the manifest then it was removed
257 # if the file isn't in the manifest then it was removed
258 # or renamed, return None to indicate this
258 # or renamed, return None to indicate this
259 try:
259 try:
260 fctx = ctx.filectx(srcfname)
260 fctx = ctx.filectx(srcfname)
261 except error.LookupError:
261 except error.LookupError:
262 return None
262 return None
263 renamed = fctx.renamed()
263 renamed = fctx.renamed()
264 if renamed:
264 if renamed:
265 # standin is always a largefile because largefile-ness
265 # standin is always a largefile because largefile-ness
266 # doesn't change after rename or copy
266 # doesn't change after rename or copy
267 renamed = lfutil.standin(renamed[0])
267 renamed = lfutil.standin(renamed[0])
268
268
269 return context.memfilectx(repo, memctx, f,
269 return context.memfilectx(repo, memctx, f,
270 lfiletohash[srcfname] + '\n',
270 lfiletohash[srcfname] + '\n',
271 'l' in fctx.flags(), 'x' in fctx.flags(),
271 'l' in fctx.flags(), 'x' in fctx.flags(),
272 renamed)
272 renamed)
273 else:
273 else:
274 return _getnormalcontext(repo, ctx, f, revmap)
274 return _getnormalcontext(repo, ctx, f, revmap)
275
275
276 # Commit
276 # Commit
277 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
277 _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
278
278
279 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
279 def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
280 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
280 mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
281 getfilectx, ctx.user(), ctx.date(), ctx.extra())
281 getfilectx, ctx.user(), ctx.date(), ctx.extra())
282 ret = rdst.commitctx(mctx)
282 ret = rdst.commitctx(mctx)
283 lfutil.copyalltostore(rdst, ret)
283 lfutil.copyalltostore(rdst, ret)
284 rdst.setparents(ret)
284 rdst.setparents(ret)
285 revmap[ctx.node()] = rdst.changelog.tip()
285 revmap[ctx.node()] = rdst.changelog.tip()
286
286
287 # Generate list of changed files
287 # Generate list of changed files
288 def _getchangedfiles(ctx, parents):
288 def _getchangedfiles(ctx, parents):
289 files = set(ctx.files())
289 files = set(ctx.files())
290 if node.nullid not in parents:
290 if node.nullid not in parents:
291 mc = ctx.manifest()
291 mc = ctx.manifest()
292 mp1 = ctx.parents()[0].manifest()
292 mp1 = ctx.parents()[0].manifest()
293 mp2 = ctx.parents()[1].manifest()
293 mp2 = ctx.parents()[1].manifest()
294 files |= (set(mp1) | set(mp2)) - set(mc)
294 files |= (set(mp1) | set(mp2)) - set(mc)
295 for f in mc:
295 for f in mc:
296 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
296 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
297 files.add(f)
297 files.add(f)
298 return files
298 return files
299
299
300 # Convert src parents to dst parents
300 # Convert src parents to dst parents
301 def _convertparents(ctx, revmap):
301 def _convertparents(ctx, revmap):
302 parents = []
302 parents = []
303 for p in ctx.parents():
303 for p in ctx.parents():
304 parents.append(revmap[p.node()])
304 parents.append(revmap[p.node()])
305 while len(parents) < 2:
305 while len(parents) < 2:
306 parents.append(node.nullid)
306 parents.append(node.nullid)
307 return parents
307 return parents
308
308
309 # Get memfilectx for a normal file
309 # Get memfilectx for a normal file
310 def _getnormalcontext(repo, ctx, f, revmap):
310 def _getnormalcontext(repo, ctx, f, revmap):
311 try:
311 try:
312 fctx = ctx.filectx(f)
312 fctx = ctx.filectx(f)
313 except error.LookupError:
313 except error.LookupError:
314 return None
314 return None
315 renamed = fctx.renamed()
315 renamed = fctx.renamed()
316 if renamed:
316 if renamed:
317 renamed = renamed[0]
317 renamed = renamed[0]
318
318
319 data = fctx.data()
319 data = fctx.data()
320 if f == '.hgtags':
320 if f == '.hgtags':
321 data = _converttags (repo.ui, revmap, data)
321 data = _converttags (repo.ui, revmap, data)
322 return context.memfilectx(repo, ctx, f, data, 'l' in fctx.flags(),
322 return context.memfilectx(repo, ctx, f, data, 'l' in fctx.flags(),
323 'x' in fctx.flags(), renamed)
323 'x' in fctx.flags(), renamed)
324
324
325 # Remap tag data using a revision map
325 # Remap tag data using a revision map
326 def _converttags(ui, revmap, data):
326 def _converttags(ui, revmap, data):
327 newdata = []
327 newdata = []
328 for line in data.splitlines():
328 for line in data.splitlines():
329 try:
329 try:
330 id, name = line.split(' ', 1)
330 id, name = line.split(' ', 1)
331 except ValueError:
331 except ValueError:
332 ui.warn(_('skipping incorrectly formatted tag %s\n')
332 ui.warn(_('skipping incorrectly formatted tag %s\n')
333 % line)
333 % line)
334 continue
334 continue
335 try:
335 try:
336 newid = node.bin(id)
336 newid = node.bin(id)
337 except TypeError:
337 except TypeError:
338 ui.warn(_('skipping incorrectly formatted id %s\n')
338 ui.warn(_('skipping incorrectly formatted id %s\n')
339 % id)
339 % id)
340 continue
340 continue
341 try:
341 try:
342 newdata.append('%s %s\n' % (node.hex(revmap[newid]),
342 newdata.append('%s %s\n' % (node.hex(revmap[newid]),
343 name))
343 name))
344 except KeyError:
344 except KeyError:
345 ui.warn(_('no mapping for id %s\n') % id)
345 ui.warn(_('no mapping for id %s\n') % id)
346 continue
346 continue
347 return ''.join(newdata)
347 return ''.join(newdata)
348
348
349 def _islfile(file, ctx, matcher, size):
349 def _islfile(file, ctx, matcher, size):
350 '''Return true if file should be considered a largefile, i.e.
350 '''Return true if file should be considered a largefile, i.e.
351 matcher matches it or it is larger than size.'''
351 matcher matches it or it is larger than size.'''
352 # never store special .hg* files as largefiles
352 # never store special .hg* files as largefiles
353 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
353 if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
354 return False
354 return False
355 if matcher and matcher(file):
355 if matcher and matcher(file):
356 return True
356 return True
357 try:
357 try:
358 return ctx.filectx(file).size() >= size * 1024 * 1024
358 return ctx.filectx(file).size() >= size * 1024 * 1024
359 except error.LookupError:
359 except error.LookupError:
360 return False
360 return False
361
361
362 def uploadlfiles(ui, rsrc, rdst, files):
362 def uploadlfiles(ui, rsrc, rdst, files):
363 '''upload largefiles to the central store'''
363 '''upload largefiles to the central store'''
364
364
365 if not files:
365 if not files:
366 return
366 return
367
367
368 store = storefactory.openstore(rsrc, rdst, put=True)
368 store = storefactory.openstore(rsrc, rdst, put=True)
369
369
370 at = 0
370 at = 0
371 ui.debug("sending statlfile command for %d largefiles\n" % len(files))
371 ui.debug("sending statlfile command for %d largefiles\n" % len(files))
372 retval = store.exists(files)
372 retval = store.exists(files)
373 files = [h for h in files if not retval[h]]
373 files = [h for h in files if not retval[h]]
374 ui.debug("%d largefiles need to be uploaded\n" % len(files))
374 ui.debug("%d largefiles need to be uploaded\n" % len(files))
375
375
376 with ui.makeprogress(_('uploading largefiles'), unit=_('files'),
376 with ui.makeprogress(_('uploading largefiles'), unit=_('files'),
377 total=len(files)) as progress:
377 total=len(files)) as progress:
378 for hash in files:
378 for hash in files:
379 progress.update(at)
379 progress.update(at)
380 source = lfutil.findfile(rsrc, hash)
380 source = lfutil.findfile(rsrc, hash)
381 if not source:
381 if not source:
382 raise error.Abort(_('largefile %s missing from store'
382 raise error.Abort(_('largefile %s missing from store'
383 ' (needs to be uploaded)') % hash)
383 ' (needs to be uploaded)') % hash)
384 # XXX check for errors here
384 # XXX check for errors here
385 store.put(source, hash)
385 store.put(source, hash)
386 at += 1
386 at += 1
387
387
388 def verifylfiles(ui, repo, all=False, contents=False):
388 def verifylfiles(ui, repo, all=False, contents=False):
389 '''Verify that every largefile revision in the current changeset
389 '''Verify that every largefile revision in the current changeset
390 exists in the central store. With --contents, also verify that
390 exists in the central store. With --contents, also verify that
391 the contents of each local largefile file revision are correct (SHA-1 hash
391 the contents of each local largefile file revision are correct (SHA-1 hash
392 matches the revision ID). With --all, check every changeset in
392 matches the revision ID). With --all, check every changeset in
393 this repository.'''
393 this repository.'''
394 if all:
394 if all:
395 revs = repo.revs('all()')
395 revs = repo.revs('all()')
396 else:
396 else:
397 revs = ['.']
397 revs = ['.']
398
398
399 store = storefactory.openstore(repo)
399 store = storefactory.openstore(repo)
400 return store.verify(revs, contents=contents)
400 return store.verify(revs, contents=contents)
401
401
402 def cachelfiles(ui, repo, node, filelist=None):
402 def cachelfiles(ui, repo, node, filelist=None):
403 '''cachelfiles ensures that all largefiles needed by the specified revision
403 '''cachelfiles ensures that all largefiles needed by the specified revision
404 are present in the repository's largefile cache.
404 are present in the repository's largefile cache.
405
405
406 returns a tuple (cached, missing). cached is the list of files downloaded
406 returns a tuple (cached, missing). cached is the list of files downloaded
407 by this operation; missing is the list of files that were needed but could
407 by this operation; missing is the list of files that were needed but could
408 not be found.'''
408 not be found.'''
409 lfiles = lfutil.listlfiles(repo, node)
409 lfiles = lfutil.listlfiles(repo, node)
410 if filelist:
410 if filelist:
411 lfiles = set(lfiles) & set(filelist)
411 lfiles = set(lfiles) & set(filelist)
412 toget = []
412 toget = []
413
413
414 ctx = repo[node]
414 ctx = repo[node]
415 for lfile in lfiles:
415 for lfile in lfiles:
416 try:
416 try:
417 expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
417 expectedhash = lfutil.readasstandin(ctx[lfutil.standin(lfile)])
418 except IOError as err:
418 except IOError as err:
419 if err.errno == errno.ENOENT:
419 if err.errno == errno.ENOENT:
420 continue # node must be None and standin wasn't found in wctx
420 continue # node must be None and standin wasn't found in wctx
421 raise
421 raise
422 if not lfutil.findfile(repo, expectedhash):
422 if not lfutil.findfile(repo, expectedhash):
423 toget.append((lfile, expectedhash))
423 toget.append((lfile, expectedhash))
424
424
425 if toget:
425 if toget:
426 store = storefactory.openstore(repo)
426 store = storefactory.openstore(repo)
427 ret = store.get(toget)
427 ret = store.get(toget)
428 return ret
428 return ret
429
429
430 return ([], [])
430 return ([], [])
431
431
432 def downloadlfiles(ui, repo, rev=None):
432 def downloadlfiles(ui, repo, rev=None):
433 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
433 match = scmutil.match(repo[None], [repo.wjoin(lfutil.shortname)], {})
434 def prepare(ctx, fns):
434 def prepare(ctx, fns):
435 pass
435 pass
436 totalsuccess = 0
436 totalsuccess = 0
437 totalmissing = 0
437 totalmissing = 0
438 if rev != []: # walkchangerevs on empty list would return all revs
438 if rev != []: # walkchangerevs on empty list would return all revs
439 for ctx in cmdutil.walkchangerevs(repo, match, {'rev' : rev},
439 for ctx in cmdutil.walkchangerevs(repo, match, {'rev' : rev},
440 prepare):
440 prepare):
441 success, missing = cachelfiles(ui, repo, ctx.node())
441 success, missing = cachelfiles(ui, repo, ctx.node())
442 totalsuccess += len(success)
442 totalsuccess += len(success)
443 totalmissing += len(missing)
443 totalmissing += len(missing)
444 ui.status(_("%d additional largefiles cached\n") % totalsuccess)
444 ui.status(_("%d additional largefiles cached\n") % totalsuccess)
445 if totalmissing > 0:
445 if totalmissing > 0:
446 ui.status(_("%d largefiles failed to download\n") % totalmissing)
446 ui.status(_("%d largefiles failed to download\n") % totalmissing)
447 return totalsuccess, totalmissing
447 return totalsuccess, totalmissing
448
448
449 def updatelfiles(ui, repo, filelist=None, printmessage=None,
449 def updatelfiles(ui, repo, filelist=None, printmessage=None,
450 normallookup=False):
450 normallookup=False):
451 '''Update largefiles according to standins in the working directory
451 '''Update largefiles according to standins in the working directory
452
452
453 If ``printmessage`` is other than ``None``, it means "print (or
453 If ``printmessage`` is other than ``None``, it means "print (or
454 ignore, for false) message forcibly".
454 ignore, for false) message forcibly".
455 '''
455 '''
456 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
456 statuswriter = lfutil.getstatuswriter(ui, repo, printmessage)
457 with repo.wlock():
457 with repo.wlock():
458 lfdirstate = lfutil.openlfdirstate(ui, repo)
458 lfdirstate = lfutil.openlfdirstate(ui, repo)
459 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
459 lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
460
460
461 if filelist is not None:
461 if filelist is not None:
462 filelist = set(filelist)
462 filelist = set(filelist)
463 lfiles = [f for f in lfiles if f in filelist]
463 lfiles = [f for f in lfiles if f in filelist]
464
464
465 update = {}
465 update = {}
466 dropped = set()
466 dropped = set()
467 updated, removed = 0, 0
467 updated, removed = 0, 0
468 wvfs = repo.wvfs
468 wvfs = repo.wvfs
469 wctx = repo[None]
469 wctx = repo[None]
470 for lfile in lfiles:
470 for lfile in lfiles:
471 rellfile = lfile
471 rellfile = lfile
472 rellfileorig = os.path.relpath(
472 rellfileorig = os.path.relpath(
473 scmutil.origpath(ui, repo, wvfs.join(rellfile)),
473 scmutil.origpath(ui, repo, wvfs.join(rellfile)),
474 start=repo.root)
474 start=repo.root)
475 relstandin = lfutil.standin(lfile)
475 relstandin = lfutil.standin(lfile)
476 relstandinorig = os.path.relpath(
476 relstandinorig = os.path.relpath(
477 scmutil.origpath(ui, repo, wvfs.join(relstandin)),
477 scmutil.origpath(ui, repo, wvfs.join(relstandin)),
478 start=repo.root)
478 start=repo.root)
479 if wvfs.exists(relstandin):
479 if wvfs.exists(relstandin):
480 if (wvfs.exists(relstandinorig) and
480 if (wvfs.exists(relstandinorig) and
481 wvfs.exists(rellfile)):
481 wvfs.exists(rellfile)):
482 shutil.copyfile(wvfs.join(rellfile),
482 shutil.copyfile(wvfs.join(rellfile),
483 wvfs.join(rellfileorig))
483 wvfs.join(rellfileorig))
484 wvfs.unlinkpath(relstandinorig)
484 wvfs.unlinkpath(relstandinorig)
485 expecthash = lfutil.readasstandin(wctx[relstandin])
485 expecthash = lfutil.readasstandin(wctx[relstandin])
486 if expecthash != '':
486 if expecthash != '':
487 if lfile not in wctx: # not switched to normal file
487 if lfile not in wctx: # not switched to normal file
488 if repo.dirstate[relstandin] != '?':
488 if repo.dirstate[relstandin] != '?':
489 wvfs.unlinkpath(rellfile, ignoremissing=True)
489 wvfs.unlinkpath(rellfile, ignoremissing=True)
490 else:
490 else:
491 dropped.add(rellfile)
491 dropped.add(rellfile)
492
492
493 # use normallookup() to allocate an entry in largefiles
493 # use normallookup() to allocate an entry in largefiles
494 # dirstate to prevent lfilesrepo.status() from reporting
494 # dirstate to prevent lfilesrepo.status() from reporting
495 # missing files as removed.
495 # missing files as removed.
496 lfdirstate.normallookup(lfile)
496 lfdirstate.normallookup(lfile)
497 update[lfile] = expecthash
497 update[lfile] = expecthash
498 else:
498 else:
499 # Remove lfiles for which the standin is deleted, unless the
499 # Remove lfiles for which the standin is deleted, unless the
500 # lfile is added to the repository again. This happens when a
500 # lfile is added to the repository again. This happens when a
501 # largefile is converted back to a normal file: the standin
501 # largefile is converted back to a normal file: the standin
502 # disappears, but a new (normal) file appears as the lfile.
502 # disappears, but a new (normal) file appears as the lfile.
503 if (wvfs.exists(rellfile) and
503 if (wvfs.exists(rellfile) and
504 repo.dirstate.normalize(lfile) not in wctx):
504 repo.dirstate.normalize(lfile) not in wctx):
505 wvfs.unlinkpath(rellfile)
505 wvfs.unlinkpath(rellfile)
506 removed += 1
506 removed += 1
507
507
508 # largefile processing might be slow and be interrupted - be prepared
508 # largefile processing might be slow and be interrupted - be prepared
509 lfdirstate.write()
509 lfdirstate.write()
510
510
511 if lfiles:
511 if lfiles:
512 lfiles = [f for f in lfiles if f not in dropped]
512 lfiles = [f for f in lfiles if f not in dropped]
513
513
514 for f in dropped:
514 for f in dropped:
515 repo.wvfs.unlinkpath(lfutil.standin(f))
515 repo.wvfs.unlinkpath(lfutil.standin(f))
516
516
517 # This needs to happen for dropped files, otherwise they stay in
517 # This needs to happen for dropped files, otherwise they stay in
518 # the M state.
518 # the M state.
519 lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
519 lfutil.synclfdirstate(repo, lfdirstate, f, normallookup)
520
520
521 statuswriter(_('getting changed largefiles\n'))
521 statuswriter(_('getting changed largefiles\n'))
522 cachelfiles(ui, repo, None, lfiles)
522 cachelfiles(ui, repo, None, lfiles)
523
523
524 for lfile in lfiles:
524 for lfile in lfiles:
525 update1 = 0
525 update1 = 0
526
526
527 expecthash = update.get(lfile)
527 expecthash = update.get(lfile)
528 if expecthash:
528 if expecthash:
529 if not lfutil.copyfromcache(repo, expecthash, lfile):
529 if not lfutil.copyfromcache(repo, expecthash, lfile):
530 # failed ... but already removed and set to normallookup
530 # failed ... but already removed and set to normallookup
531 continue
531 continue
532 # Synchronize largefile dirstate to the last modified
532 # Synchronize largefile dirstate to the last modified
533 # time of the file
533 # time of the file
534 lfdirstate.normal(lfile)
534 lfdirstate.normal(lfile)
535 update1 = 1
535 update1 = 1
536
536
537 # copy the exec mode of largefile standin from the repository's
537 # copy the exec mode of largefile standin from the repository's
538 # dirstate to its state in the lfdirstate.
538 # dirstate to its state in the lfdirstate.
539 rellfile = lfile
539 rellfile = lfile
540 relstandin = lfutil.standin(lfile)
540 relstandin = lfutil.standin(lfile)
541 if wvfs.exists(relstandin):
541 if wvfs.exists(relstandin):
542 # exec is decided by the users permissions using mask 0o100
542 # exec is decided by the users permissions using mask 0o100
543 standinexec = wvfs.stat(relstandin).st_mode & 0o100
543 standinexec = wvfs.stat(relstandin).st_mode & 0o100
544 st = wvfs.stat(rellfile)
544 st = wvfs.stat(rellfile)
545 mode = st.st_mode
545 mode = st.st_mode
546 if standinexec != mode & 0o100:
546 if standinexec != mode & 0o100:
547 # first remove all X bits, then shift all R bits to X
547 # first remove all X bits, then shift all R bits to X
548 mode &= ~0o111
548 mode &= ~0o111
549 if standinexec:
549 if standinexec:
550 mode |= (mode >> 2) & 0o111 & ~util.umask
550 mode |= (mode >> 2) & 0o111 & ~util.umask
551 wvfs.chmod(rellfile, mode)
551 wvfs.chmod(rellfile, mode)
552 update1 = 1
552 update1 = 1
553
553
554 updated += update1
554 updated += update1
555
555
556 lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
556 lfutil.synclfdirstate(repo, lfdirstate, lfile, normallookup)
557
557
558 lfdirstate.write()
558 lfdirstate.write()
559 if lfiles:
559 if lfiles:
560 statuswriter(_('%d largefiles updated, %d removed\n') % (updated,
560 statuswriter(_('%d largefiles updated, %d removed\n') % (updated,
561 removed))
561 removed))
562
562
563 @command('lfpull',
563 @command('lfpull',
564 [('r', 'rev', [], _('pull largefiles for these revisions'))
564 [('r', 'rev', [], _('pull largefiles for these revisions'))
565 ] + cmdutil.remoteopts,
565 ] + cmdutil.remoteopts,
566 _('-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'))
566 _('-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'))
567 def lfpull(ui, repo, source="default", **opts):
567 def lfpull(ui, repo, source="default", **opts):
568 """pull largefiles for the specified revisions from the specified source
568 """pull largefiles for the specified revisions from the specified source
569
569
570 Pull largefiles that are referenced from local changesets but missing
570 Pull largefiles that are referenced from local changesets but missing
571 locally, pulling from a remote repository to the local cache.
571 locally, pulling from a remote repository to the local cache.
572
572
573 If SOURCE is omitted, the 'default' path will be used.
573 If SOURCE is omitted, the 'default' path will be used.
574 See :hg:`help urls` for more information.
574 See :hg:`help urls` for more information.
575
575
576 .. container:: verbose
576 .. container:: verbose
577
577
578 Some examples:
578 Some examples:
579
579
580 - pull largefiles for all branch heads::
580 - pull largefiles for all branch heads::
581
581
582 hg lfpull -r "head() and not closed()"
582 hg lfpull -r "head() and not closed()"
583
583
584 - pull largefiles on the default branch::
584 - pull largefiles on the default branch::
585
585
586 hg lfpull -r "branch(default)"
586 hg lfpull -r "branch(default)"
587 """
587 """
588 repo.lfpullsource = source
588 repo.lfpullsource = source
589
589
590 revs = opts.get(r'rev', [])
590 revs = opts.get(r'rev', [])
591 if not revs:
591 if not revs:
592 raise error.Abort(_('no revisions specified'))
592 raise error.Abort(_('no revisions specified'))
593 revs = scmutil.revrange(repo, revs)
593 revs = scmutil.revrange(repo, revs)
594
594
595 numcached = 0
595 numcached = 0
596 for rev in revs:
596 for rev in revs:
597 ui.note(_('pulling largefiles for revision %d\n') % rev)
597 ui.note(_('pulling largefiles for revision %d\n') % rev)
598 (cached, missing) = cachelfiles(ui, repo, rev)
598 (cached, missing) = cachelfiles(ui, repo, rev)
599 numcached += len(cached)
599 numcached += len(cached)
600 ui.status(_("%d largefiles cached\n") % numcached)
600 ui.status(_("%d largefiles cached\n") % numcached)
601
601
602 @command('debuglfput',
602 @command('debuglfput',
603 [] + cmdutil.remoteopts,
603 [] + cmdutil.remoteopts,
604 _('FILE'))
604 _('FILE'))
605 def debuglfput(ui, repo, filepath, **kwargs):
605 def debuglfput(ui, repo, filepath, **kwargs):
606 hash = lfutil.hashfile(filepath)
606 hash = lfutil.hashfile(filepath)
607 storefactory.openstore(repo).put(filepath, hash)
607 storefactory.openstore(repo).put(filepath, hash)
608 ui.write('%s\n' % hash)
608 ui.write('%s\n' % hash)
609 return 0
609 return 0
@@ -1,643 +1,644 b''
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
1 # blobstore.py - local and remote (speaking Git-LFS protocol) blob storages
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import contextlib
10 import contextlib
11 import errno
11 import errno
12 import hashlib
12 import hashlib
13 import json
13 import json
14 import os
14 import os
15 import re
15 import re
16 import socket
16 import socket
17
17
18 from mercurial.i18n import _
18 from mercurial.i18n import _
19
19
20 from mercurial import (
20 from mercurial import (
21 encoding,
21 encoding,
22 error,
22 error,
23 node,
23 pathutil,
24 pathutil,
24 pycompat,
25 pycompat,
25 url as urlmod,
26 url as urlmod,
26 util,
27 util,
27 vfs as vfsmod,
28 vfs as vfsmod,
28 worker,
29 worker,
29 )
30 )
30
31
31 from mercurial.utils import (
32 from mercurial.utils import (
32 stringutil,
33 stringutil,
33 )
34 )
34
35
35 from ..largefiles import lfutil
36 from ..largefiles import lfutil
36
37
37 # 64 bytes for SHA256
38 # 64 bytes for SHA256
38 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
39 _lfsre = re.compile(br'\A[a-f0-9]{64}\Z')
39
40
40 class lfsvfs(vfsmod.vfs):
41 class lfsvfs(vfsmod.vfs):
41 def join(self, path):
42 def join(self, path):
42 """split the path at first two characters, like: XX/XXXXX..."""
43 """split the path at first two characters, like: XX/XXXXX..."""
43 if not _lfsre.match(path):
44 if not _lfsre.match(path):
44 raise error.ProgrammingError('unexpected lfs path: %s' % path)
45 raise error.ProgrammingError('unexpected lfs path: %s' % path)
45 return super(lfsvfs, self).join(path[0:2], path[2:])
46 return super(lfsvfs, self).join(path[0:2], path[2:])
46
47
47 def walk(self, path=None, onerror=None):
48 def walk(self, path=None, onerror=None):
48 """Yield (dirpath, [], oids) tuple for blobs under path
49 """Yield (dirpath, [], oids) tuple for blobs under path
49
50
50 Oids only exist in the root of this vfs, so dirpath is always ''.
51 Oids only exist in the root of this vfs, so dirpath is always ''.
51 """
52 """
52 root = os.path.normpath(self.base)
53 root = os.path.normpath(self.base)
53 # when dirpath == root, dirpath[prefixlen:] becomes empty
54 # when dirpath == root, dirpath[prefixlen:] becomes empty
54 # because len(dirpath) < prefixlen.
55 # because len(dirpath) < prefixlen.
55 prefixlen = len(pathutil.normasprefix(root))
56 prefixlen = len(pathutil.normasprefix(root))
56 oids = []
57 oids = []
57
58
58 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
59 for dirpath, dirs, files in os.walk(self.reljoin(self.base, path or ''),
59 onerror=onerror):
60 onerror=onerror):
60 dirpath = dirpath[prefixlen:]
61 dirpath = dirpath[prefixlen:]
61
62
62 # Silently skip unexpected files and directories
63 # Silently skip unexpected files and directories
63 if len(dirpath) == 2:
64 if len(dirpath) == 2:
64 oids.extend([dirpath + f for f in files
65 oids.extend([dirpath + f for f in files
65 if _lfsre.match(dirpath + f)])
66 if _lfsre.match(dirpath + f)])
66
67
67 yield ('', [], oids)
68 yield ('', [], oids)
68
69
69 class nullvfs(lfsvfs):
70 class nullvfs(lfsvfs):
70 def __init__(self):
71 def __init__(self):
71 pass
72 pass
72
73
73 def exists(self, oid):
74 def exists(self, oid):
74 return False
75 return False
75
76
76 def read(self, oid):
77 def read(self, oid):
77 # store.read() calls into here if the blob doesn't exist in its
78 # store.read() calls into here if the blob doesn't exist in its
78 # self.vfs. Raise the same error as a normal vfs when asked to read a
79 # self.vfs. Raise the same error as a normal vfs when asked to read a
79 # file that doesn't exist. The only difference is the full file path
80 # file that doesn't exist. The only difference is the full file path
80 # isn't available in the error.
81 # isn't available in the error.
81 raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
82 raise IOError(errno.ENOENT, '%s: No such file or directory' % oid)
82
83
83 def walk(self, path=None, onerror=None):
84 def walk(self, path=None, onerror=None):
84 return ('', [], [])
85 return ('', [], [])
85
86
86 def write(self, oid, data):
87 def write(self, oid, data):
87 pass
88 pass
88
89
89 class filewithprogress(object):
90 class filewithprogress(object):
90 """a file-like object that supports __len__ and read.
91 """a file-like object that supports __len__ and read.
91
92
92 Useful to provide progress information for how many bytes are read.
93 Useful to provide progress information for how many bytes are read.
93 """
94 """
94
95
95 def __init__(self, fp, callback):
96 def __init__(self, fp, callback):
96 self._fp = fp
97 self._fp = fp
97 self._callback = callback # func(readsize)
98 self._callback = callback # func(readsize)
98 fp.seek(0, os.SEEK_END)
99 fp.seek(0, os.SEEK_END)
99 self._len = fp.tell()
100 self._len = fp.tell()
100 fp.seek(0)
101 fp.seek(0)
101
102
102 def __len__(self):
103 def __len__(self):
103 return self._len
104 return self._len
104
105
105 def read(self, size):
106 def read(self, size):
106 if self._fp is None:
107 if self._fp is None:
107 return b''
108 return b''
108 data = self._fp.read(size)
109 data = self._fp.read(size)
109 if data:
110 if data:
110 if self._callback:
111 if self._callback:
111 self._callback(len(data))
112 self._callback(len(data))
112 else:
113 else:
113 self._fp.close()
114 self._fp.close()
114 self._fp = None
115 self._fp = None
115 return data
116 return data
116
117
117 class local(object):
118 class local(object):
118 """Local blobstore for large file contents.
119 """Local blobstore for large file contents.
119
120
120 This blobstore is used both as a cache and as a staging area for large blobs
121 This blobstore is used both as a cache and as a staging area for large blobs
121 to be uploaded to the remote blobstore.
122 to be uploaded to the remote blobstore.
122 """
123 """
123
124
124 def __init__(self, repo):
125 def __init__(self, repo):
125 fullpath = repo.svfs.join('lfs/objects')
126 fullpath = repo.svfs.join('lfs/objects')
126 self.vfs = lfsvfs(fullpath)
127 self.vfs = lfsvfs(fullpath)
127
128
128 if repo.ui.configbool('experimental', 'lfs.disableusercache'):
129 if repo.ui.configbool('experimental', 'lfs.disableusercache'):
129 self.cachevfs = nullvfs()
130 self.cachevfs = nullvfs()
130 else:
131 else:
131 usercache = lfutil._usercachedir(repo.ui, 'lfs')
132 usercache = lfutil._usercachedir(repo.ui, 'lfs')
132 self.cachevfs = lfsvfs(usercache)
133 self.cachevfs = lfsvfs(usercache)
133 self.ui = repo.ui
134 self.ui = repo.ui
134
135
135 def open(self, oid):
136 def open(self, oid):
136 """Open a read-only file descriptor to the named blob, in either the
137 """Open a read-only file descriptor to the named blob, in either the
137 usercache or the local store."""
138 usercache or the local store."""
138 # The usercache is the most likely place to hold the file. Commit will
139 # The usercache is the most likely place to hold the file. Commit will
139 # write to both it and the local store, as will anything that downloads
140 # write to both it and the local store, as will anything that downloads
140 # the blobs. However, things like clone without an update won't
141 # the blobs. However, things like clone without an update won't
141 # populate the local store. For an init + push of a local clone,
142 # populate the local store. For an init + push of a local clone,
142 # the usercache is the only place it _could_ be. If not present, the
143 # the usercache is the only place it _could_ be. If not present, the
143 # missing file msg here will indicate the local repo, not the usercache.
144 # missing file msg here will indicate the local repo, not the usercache.
144 if self.cachevfs.exists(oid):
145 if self.cachevfs.exists(oid):
145 return self.cachevfs(oid, 'rb')
146 return self.cachevfs(oid, 'rb')
146
147
147 return self.vfs(oid, 'rb')
148 return self.vfs(oid, 'rb')
148
149
149 def download(self, oid, src):
150 def download(self, oid, src):
150 """Read the blob from the remote source in chunks, verify the content,
151 """Read the blob from the remote source in chunks, verify the content,
151 and write to this local blobstore."""
152 and write to this local blobstore."""
152 sha256 = hashlib.sha256()
153 sha256 = hashlib.sha256()
153
154
154 with self.vfs(oid, 'wb', atomictemp=True) as fp:
155 with self.vfs(oid, 'wb', atomictemp=True) as fp:
155 for chunk in util.filechunkiter(src, size=1048576):
156 for chunk in util.filechunkiter(src, size=1048576):
156 fp.write(chunk)
157 fp.write(chunk)
157 sha256.update(chunk)
158 sha256.update(chunk)
158
159
159 realoid = sha256.hexdigest()
160 realoid = node.hex(sha256.digest())
160 if realoid != oid:
161 if realoid != oid:
161 raise LfsCorruptionError(_('corrupt remote lfs object: %s')
162 raise LfsCorruptionError(_('corrupt remote lfs object: %s')
162 % oid)
163 % oid)
163
164
164 self._linktousercache(oid)
165 self._linktousercache(oid)
165
166
166 def write(self, oid, data):
167 def write(self, oid, data):
167 """Write blob to local blobstore.
168 """Write blob to local blobstore.
168
169
169 This should only be called from the filelog during a commit or similar.
170 This should only be called from the filelog during a commit or similar.
170 As such, there is no need to verify the data. Imports from a remote
171 As such, there is no need to verify the data. Imports from a remote
171 store must use ``download()`` instead."""
172 store must use ``download()`` instead."""
172 with self.vfs(oid, 'wb', atomictemp=True) as fp:
173 with self.vfs(oid, 'wb', atomictemp=True) as fp:
173 fp.write(data)
174 fp.write(data)
174
175
175 self._linktousercache(oid)
176 self._linktousercache(oid)
176
177
177 def linkfromusercache(self, oid):
178 def linkfromusercache(self, oid):
178 """Link blobs found in the user cache into this store.
179 """Link blobs found in the user cache into this store.
179
180
180 The server module needs to do this when it lets the client know not to
181 The server module needs to do this when it lets the client know not to
181 upload the blob, to ensure it is always available in this store.
182 upload the blob, to ensure it is always available in this store.
182 Normally this is done implicitly when the client reads or writes the
183 Normally this is done implicitly when the client reads or writes the
183 blob, but that doesn't happen when the server tells the client that it
184 blob, but that doesn't happen when the server tells the client that it
184 already has the blob.
185 already has the blob.
185 """
186 """
186 if (not isinstance(self.cachevfs, nullvfs)
187 if (not isinstance(self.cachevfs, nullvfs)
187 and not self.vfs.exists(oid)):
188 and not self.vfs.exists(oid)):
188 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
189 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
189 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
190 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
190
191
191 def _linktousercache(self, oid):
192 def _linktousercache(self, oid):
192 # XXX: should we verify the content of the cache, and hardlink back to
193 # XXX: should we verify the content of the cache, and hardlink back to
193 # the local store on success, but truncate, write and link on failure?
194 # the local store on success, but truncate, write and link on failure?
194 if (not self.cachevfs.exists(oid)
195 if (not self.cachevfs.exists(oid)
195 and not isinstance(self.cachevfs, nullvfs)):
196 and not isinstance(self.cachevfs, nullvfs)):
196 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
197 self.ui.note(_('lfs: adding %s to the usercache\n') % oid)
197 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
198 lfutil.link(self.vfs.join(oid), self.cachevfs.join(oid))
198
199
199 def read(self, oid, verify=True):
200 def read(self, oid, verify=True):
200 """Read blob from local blobstore."""
201 """Read blob from local blobstore."""
201 if not self.vfs.exists(oid):
202 if not self.vfs.exists(oid):
202 blob = self._read(self.cachevfs, oid, verify)
203 blob = self._read(self.cachevfs, oid, verify)
203
204
204 # Even if revlog will verify the content, it needs to be verified
205 # Even if revlog will verify the content, it needs to be verified
205 # now before making the hardlink to avoid propagating corrupt blobs.
206 # now before making the hardlink to avoid propagating corrupt blobs.
206 # Don't abort if corruption is detected, because `hg verify` will
207 # Don't abort if corruption is detected, because `hg verify` will
207 # give more useful info about the corruption- simply don't add the
208 # give more useful info about the corruption- simply don't add the
208 # hardlink.
209 # hardlink.
209 if verify or hashlib.sha256(blob).hexdigest() == oid:
210 if verify or node.hex(hashlib.sha256(blob).digest()) == oid:
210 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
211 self.ui.note(_('lfs: found %s in the usercache\n') % oid)
211 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
212 lfutil.link(self.cachevfs.join(oid), self.vfs.join(oid))
212 else:
213 else:
213 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
214 self.ui.note(_('lfs: found %s in the local lfs store\n') % oid)
214 blob = self._read(self.vfs, oid, verify)
215 blob = self._read(self.vfs, oid, verify)
215 return blob
216 return blob
216
217
217 def _read(self, vfs, oid, verify):
218 def _read(self, vfs, oid, verify):
218 """Read blob (after verifying) from the given store"""
219 """Read blob (after verifying) from the given store"""
219 blob = vfs.read(oid)
220 blob = vfs.read(oid)
220 if verify:
221 if verify:
221 _verify(oid, blob)
222 _verify(oid, blob)
222 return blob
223 return blob
223
224
224 def verify(self, oid):
225 def verify(self, oid):
225 """Indicate whether or not the hash of the underlying file matches its
226 """Indicate whether or not the hash of the underlying file matches its
226 name."""
227 name."""
227 sha256 = hashlib.sha256()
228 sha256 = hashlib.sha256()
228
229
229 with self.open(oid) as fp:
230 with self.open(oid) as fp:
230 for chunk in util.filechunkiter(fp, size=1048576):
231 for chunk in util.filechunkiter(fp, size=1048576):
231 sha256.update(chunk)
232 sha256.update(chunk)
232
233
233 return oid == sha256.hexdigest()
234 return oid == node.hex(sha256.digest())
234
235
235 def has(self, oid):
236 def has(self, oid):
236 """Returns True if the local blobstore contains the requested blob,
237 """Returns True if the local blobstore contains the requested blob,
237 False otherwise."""
238 False otherwise."""
238 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
239 return self.cachevfs.exists(oid) or self.vfs.exists(oid)
239
240
240 def _urlerrorreason(urlerror):
241 def _urlerrorreason(urlerror):
241 '''Create a friendly message for the given URLError to be used in an
242 '''Create a friendly message for the given URLError to be used in an
242 LfsRemoteError message.
243 LfsRemoteError message.
243 '''
244 '''
244 inst = urlerror
245 inst = urlerror
245
246
246 if isinstance(urlerror.reason, Exception):
247 if isinstance(urlerror.reason, Exception):
247 inst = urlerror.reason
248 inst = urlerror.reason
248
249
249 if util.safehasattr(inst, 'reason'):
250 if util.safehasattr(inst, 'reason'):
250 try: # usually it is in the form (errno, strerror)
251 try: # usually it is in the form (errno, strerror)
251 reason = inst.reason.args[1]
252 reason = inst.reason.args[1]
252 except (AttributeError, IndexError):
253 except (AttributeError, IndexError):
253 # it might be anything, for example a string
254 # it might be anything, for example a string
254 reason = inst.reason
255 reason = inst.reason
255 if isinstance(reason, pycompat.unicode):
256 if isinstance(reason, pycompat.unicode):
256 # SSLError of Python 2.7.9 contains a unicode
257 # SSLError of Python 2.7.9 contains a unicode
257 reason = encoding.unitolocal(reason)
258 reason = encoding.unitolocal(reason)
258 return reason
259 return reason
259 elif getattr(inst, "strerror", None):
260 elif getattr(inst, "strerror", None):
260 return encoding.strtolocal(inst.strerror)
261 return encoding.strtolocal(inst.strerror)
261 else:
262 else:
262 return stringutil.forcebytestr(urlerror)
263 return stringutil.forcebytestr(urlerror)
263
264
264 class _gitlfsremote(object):
265 class _gitlfsremote(object):
265
266
266 def __init__(self, repo, url):
267 def __init__(self, repo, url):
267 ui = repo.ui
268 ui = repo.ui
268 self.ui = ui
269 self.ui = ui
269 baseurl, authinfo = url.authinfo()
270 baseurl, authinfo = url.authinfo()
270 self.baseurl = baseurl.rstrip('/')
271 self.baseurl = baseurl.rstrip('/')
271 useragent = repo.ui.config('experimental', 'lfs.user-agent')
272 useragent = repo.ui.config('experimental', 'lfs.user-agent')
272 if not useragent:
273 if not useragent:
273 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
274 useragent = 'git-lfs/2.3.4 (Mercurial %s)' % util.version()
274 self.urlopener = urlmod.opener(ui, authinfo, useragent)
275 self.urlopener = urlmod.opener(ui, authinfo, useragent)
275 self.retry = ui.configint('lfs', 'retry')
276 self.retry = ui.configint('lfs', 'retry')
276
277
277 def writebatch(self, pointers, fromstore):
278 def writebatch(self, pointers, fromstore):
278 """Batch upload from local to remote blobstore."""
279 """Batch upload from local to remote blobstore."""
279 self._batch(_deduplicate(pointers), fromstore, 'upload')
280 self._batch(_deduplicate(pointers), fromstore, 'upload')
280
281
281 def readbatch(self, pointers, tostore):
282 def readbatch(self, pointers, tostore):
282 """Batch download from remote to local blostore."""
283 """Batch download from remote to local blostore."""
283 self._batch(_deduplicate(pointers), tostore, 'download')
284 self._batch(_deduplicate(pointers), tostore, 'download')
284
285
285 def _batchrequest(self, pointers, action):
286 def _batchrequest(self, pointers, action):
286 """Get metadata about objects pointed by pointers for given action
287 """Get metadata about objects pointed by pointers for given action
287
288
288 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
289 Return decoded JSON object like {'objects': [{'oid': '', 'size': 1}]}
289 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
290 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/batch.md
290 """
291 """
291 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
292 objects = [{'oid': p.oid(), 'size': p.size()} for p in pointers]
292 requestdata = json.dumps({
293 requestdata = json.dumps({
293 'objects': objects,
294 'objects': objects,
294 'operation': action,
295 'operation': action,
295 })
296 })
296 url = '%s/objects/batch' % self.baseurl
297 url = '%s/objects/batch' % self.baseurl
297 batchreq = util.urlreq.request(url, data=requestdata)
298 batchreq = util.urlreq.request(url, data=requestdata)
298 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
299 batchreq.add_header('Accept', 'application/vnd.git-lfs+json')
299 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
300 batchreq.add_header('Content-Type', 'application/vnd.git-lfs+json')
300 try:
301 try:
301 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
302 with contextlib.closing(self.urlopener.open(batchreq)) as rsp:
302 rawjson = rsp.read()
303 rawjson = rsp.read()
303 except util.urlerr.httperror as ex:
304 except util.urlerr.httperror as ex:
304 hints = {
305 hints = {
305 400: _('check that lfs serving is enabled on %s and "%s" is '
306 400: _('check that lfs serving is enabled on %s and "%s" is '
306 'supported') % (self.baseurl, action),
307 'supported') % (self.baseurl, action),
307 404: _('the "lfs.url" config may be used to override %s')
308 404: _('the "lfs.url" config may be used to override %s')
308 % self.baseurl,
309 % self.baseurl,
309 }
310 }
310 hint = hints.get(ex.code, _('api=%s, action=%s') % (url, action))
311 hint = hints.get(ex.code, _('api=%s, action=%s') % (url, action))
311 raise LfsRemoteError(_('LFS HTTP error: %s') % ex, hint=hint)
312 raise LfsRemoteError(_('LFS HTTP error: %s') % ex, hint=hint)
312 except util.urlerr.urlerror as ex:
313 except util.urlerr.urlerror as ex:
313 hint = (_('the "lfs.url" config may be used to override %s')
314 hint = (_('the "lfs.url" config may be used to override %s')
314 % self.baseurl)
315 % self.baseurl)
315 raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
316 raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
316 hint=hint)
317 hint=hint)
317 try:
318 try:
318 response = json.loads(rawjson)
319 response = json.loads(rawjson)
319 except ValueError:
320 except ValueError:
320 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
321 raise LfsRemoteError(_('LFS server returns invalid JSON: %s')
321 % rawjson)
322 % rawjson)
322
323
323 if self.ui.debugflag:
324 if self.ui.debugflag:
324 self.ui.debug('Status: %d\n' % rsp.status)
325 self.ui.debug('Status: %d\n' % rsp.status)
325 # lfs-test-server and hg serve return headers in different order
326 # lfs-test-server and hg serve return headers in different order
326 self.ui.debug('%s\n'
327 self.ui.debug('%s\n'
327 % '\n'.join(sorted(str(rsp.info()).splitlines())))
328 % '\n'.join(sorted(str(rsp.info()).splitlines())))
328
329
329 if 'objects' in response:
330 if 'objects' in response:
330 response['objects'] = sorted(response['objects'],
331 response['objects'] = sorted(response['objects'],
331 key=lambda p: p['oid'])
332 key=lambda p: p['oid'])
332 self.ui.debug('%s\n'
333 self.ui.debug('%s\n'
333 % json.dumps(response, indent=2,
334 % json.dumps(response, indent=2,
334 separators=('', ': '), sort_keys=True))
335 separators=('', ': '), sort_keys=True))
335
336
336 return response
337 return response
337
338
338 def _checkforservererror(self, pointers, responses, action):
339 def _checkforservererror(self, pointers, responses, action):
339 """Scans errors from objects
340 """Scans errors from objects
340
341
341 Raises LfsRemoteError if any objects have an error"""
342 Raises LfsRemoteError if any objects have an error"""
342 for response in responses:
343 for response in responses:
343 # The server should return 404 when objects cannot be found. Some
344 # The server should return 404 when objects cannot be found. Some
344 # server implementation (ex. lfs-test-server) does not set "error"
345 # server implementation (ex. lfs-test-server) does not set "error"
345 # but just removes "download" from "actions". Treat that case
346 # but just removes "download" from "actions". Treat that case
346 # as the same as 404 error.
347 # as the same as 404 error.
347 if 'error' not in response:
348 if 'error' not in response:
348 if (action == 'download'
349 if (action == 'download'
349 and action not in response.get('actions', [])):
350 and action not in response.get('actions', [])):
350 code = 404
351 code = 404
351 else:
352 else:
352 continue
353 continue
353 else:
354 else:
354 # An error dict without a code doesn't make much sense, so
355 # An error dict without a code doesn't make much sense, so
355 # treat as a server error.
356 # treat as a server error.
356 code = response.get('error').get('code', 500)
357 code = response.get('error').get('code', 500)
357
358
358 ptrmap = {p.oid(): p for p in pointers}
359 ptrmap = {p.oid(): p for p in pointers}
359 p = ptrmap.get(response['oid'], None)
360 p = ptrmap.get(response['oid'], None)
360 if p:
361 if p:
361 filename = getattr(p, 'filename', 'unknown')
362 filename = getattr(p, 'filename', 'unknown')
362 errors = {
363 errors = {
363 404: 'The object does not exist',
364 404: 'The object does not exist',
364 410: 'The object was removed by the owner',
365 410: 'The object was removed by the owner',
365 422: 'Validation error',
366 422: 'Validation error',
366 500: 'Internal server error',
367 500: 'Internal server error',
367 }
368 }
368 msg = errors.get(code, 'status code %d' % code)
369 msg = errors.get(code, 'status code %d' % code)
369 raise LfsRemoteError(_('LFS server error for "%s": %s')
370 raise LfsRemoteError(_('LFS server error for "%s": %s')
370 % (filename, msg))
371 % (filename, msg))
371 else:
372 else:
372 raise LfsRemoteError(
373 raise LfsRemoteError(
373 _('LFS server error. Unsolicited response for oid %s')
374 _('LFS server error. Unsolicited response for oid %s')
374 % response['oid'])
375 % response['oid'])
375
376
376 def _extractobjects(self, response, pointers, action):
377 def _extractobjects(self, response, pointers, action):
377 """extract objects from response of the batch API
378 """extract objects from response of the batch API
378
379
379 response: parsed JSON object returned by batch API
380 response: parsed JSON object returned by batch API
380 return response['objects'] filtered by action
381 return response['objects'] filtered by action
381 raise if any object has an error
382 raise if any object has an error
382 """
383 """
383 # Scan errors from objects - fail early
384 # Scan errors from objects - fail early
384 objects = response.get('objects', [])
385 objects = response.get('objects', [])
385 self._checkforservererror(pointers, objects, action)
386 self._checkforservererror(pointers, objects, action)
386
387
387 # Filter objects with given action. Practically, this skips uploading
388 # Filter objects with given action. Practically, this skips uploading
388 # objects which exist in the server.
389 # objects which exist in the server.
389 filteredobjects = [o for o in objects if action in o.get('actions', [])]
390 filteredobjects = [o for o in objects if action in o.get('actions', [])]
390
391
391 return filteredobjects
392 return filteredobjects
392
393
393 def _basictransfer(self, obj, action, localstore):
394 def _basictransfer(self, obj, action, localstore):
394 """Download or upload a single object using basic transfer protocol
395 """Download or upload a single object using basic transfer protocol
395
396
396 obj: dict, an object description returned by batch API
397 obj: dict, an object description returned by batch API
397 action: string, one of ['upload', 'download']
398 action: string, one of ['upload', 'download']
398 localstore: blobstore.local
399 localstore: blobstore.local
399
400
400 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
401 See https://github.com/git-lfs/git-lfs/blob/master/docs/api/\
401 basic-transfers.md
402 basic-transfers.md
402 """
403 """
403 oid = pycompat.bytestr(obj['oid'])
404 oid = pycompat.bytestr(obj['oid'])
404
405
405 href = pycompat.bytestr(obj['actions'][action].get('href'))
406 href = pycompat.bytestr(obj['actions'][action].get('href'))
406 headers = obj['actions'][action].get('header', {}).items()
407 headers = obj['actions'][action].get('header', {}).items()
407
408
408 request = util.urlreq.request(href)
409 request = util.urlreq.request(href)
409 if action == 'upload':
410 if action == 'upload':
410 # If uploading blobs, read data from local blobstore.
411 # If uploading blobs, read data from local blobstore.
411 if not localstore.verify(oid):
412 if not localstore.verify(oid):
412 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
413 raise error.Abort(_('detected corrupt lfs object: %s') % oid,
413 hint=_('run hg verify'))
414 hint=_('run hg verify'))
414 request.data = filewithprogress(localstore.open(oid), None)
415 request.data = filewithprogress(localstore.open(oid), None)
415 request.get_method = lambda: 'PUT'
416 request.get_method = lambda: 'PUT'
416 request.add_header('Content-Type', 'application/octet-stream')
417 request.add_header('Content-Type', 'application/octet-stream')
417
418
418 for k, v in headers:
419 for k, v in headers:
419 request.add_header(k, v)
420 request.add_header(k, v)
420
421
421 response = b''
422 response = b''
422 try:
423 try:
423 with contextlib.closing(self.urlopener.open(request)) as req:
424 with contextlib.closing(self.urlopener.open(request)) as req:
424 ui = self.ui # Shorten debug lines
425 ui = self.ui # Shorten debug lines
425 if self.ui.debugflag:
426 if self.ui.debugflag:
426 ui.debug('Status: %d\n' % req.status)
427 ui.debug('Status: %d\n' % req.status)
427 # lfs-test-server and hg serve return headers in different
428 # lfs-test-server and hg serve return headers in different
428 # order
429 # order
429 ui.debug('%s\n'
430 ui.debug('%s\n'
430 % '\n'.join(sorted(str(req.info()).splitlines())))
431 % '\n'.join(sorted(str(req.info()).splitlines())))
431
432
432 if action == 'download':
433 if action == 'download':
433 # If downloading blobs, store downloaded data to local
434 # If downloading blobs, store downloaded data to local
434 # blobstore
435 # blobstore
435 localstore.download(oid, req)
436 localstore.download(oid, req)
436 else:
437 else:
437 while True:
438 while True:
438 data = req.read(1048576)
439 data = req.read(1048576)
439 if not data:
440 if not data:
440 break
441 break
441 response += data
442 response += data
442 if response:
443 if response:
443 ui.debug('lfs %s response: %s' % (action, response))
444 ui.debug('lfs %s response: %s' % (action, response))
444 except util.urlerr.httperror as ex:
445 except util.urlerr.httperror as ex:
445 if self.ui.debugflag:
446 if self.ui.debugflag:
446 self.ui.debug('%s: %s\n' % (oid, ex.read()))
447 self.ui.debug('%s: %s\n' % (oid, ex.read()))
447 raise LfsRemoteError(_('LFS HTTP error: %s (oid=%s, action=%s)')
448 raise LfsRemoteError(_('LFS HTTP error: %s (oid=%s, action=%s)')
448 % (ex, oid, action))
449 % (ex, oid, action))
449 except util.urlerr.urlerror as ex:
450 except util.urlerr.urlerror as ex:
450 hint = (_('attempted connection to %s')
451 hint = (_('attempted connection to %s')
451 % util.urllibcompat.getfullurl(request))
452 % util.urllibcompat.getfullurl(request))
452 raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
453 raise LfsRemoteError(_('LFS error: %s') % _urlerrorreason(ex),
453 hint=hint)
454 hint=hint)
454
455
455 def _batch(self, pointers, localstore, action):
456 def _batch(self, pointers, localstore, action):
456 if action not in ['upload', 'download']:
457 if action not in ['upload', 'download']:
457 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
458 raise error.ProgrammingError('invalid Git-LFS action: %s' % action)
458
459
459 response = self._batchrequest(pointers, action)
460 response = self._batchrequest(pointers, action)
460 objects = self._extractobjects(response, pointers, action)
461 objects = self._extractobjects(response, pointers, action)
461 total = sum(x.get('size', 0) for x in objects)
462 total = sum(x.get('size', 0) for x in objects)
462 sizes = {}
463 sizes = {}
463 for obj in objects:
464 for obj in objects:
464 sizes[obj.get('oid')] = obj.get('size', 0)
465 sizes[obj.get('oid')] = obj.get('size', 0)
465 topic = {'upload': _('lfs uploading'),
466 topic = {'upload': _('lfs uploading'),
466 'download': _('lfs downloading')}[action]
467 'download': _('lfs downloading')}[action]
467 if len(objects) > 1:
468 if len(objects) > 1:
468 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
469 self.ui.note(_('lfs: need to transfer %d objects (%s)\n')
469 % (len(objects), util.bytecount(total)))
470 % (len(objects), util.bytecount(total)))
470
471
471 def transfer(chunk):
472 def transfer(chunk):
472 for obj in chunk:
473 for obj in chunk:
473 objsize = obj.get('size', 0)
474 objsize = obj.get('size', 0)
474 if self.ui.verbose:
475 if self.ui.verbose:
475 if action == 'download':
476 if action == 'download':
476 msg = _('lfs: downloading %s (%s)\n')
477 msg = _('lfs: downloading %s (%s)\n')
477 elif action == 'upload':
478 elif action == 'upload':
478 msg = _('lfs: uploading %s (%s)\n')
479 msg = _('lfs: uploading %s (%s)\n')
479 self.ui.note(msg % (obj.get('oid'),
480 self.ui.note(msg % (obj.get('oid'),
480 util.bytecount(objsize)))
481 util.bytecount(objsize)))
481 retry = self.retry
482 retry = self.retry
482 while True:
483 while True:
483 try:
484 try:
484 self._basictransfer(obj, action, localstore)
485 self._basictransfer(obj, action, localstore)
485 yield 1, obj.get('oid')
486 yield 1, obj.get('oid')
486 break
487 break
487 except socket.error as ex:
488 except socket.error as ex:
488 if retry > 0:
489 if retry > 0:
489 self.ui.note(
490 self.ui.note(
490 _('lfs: failed: %r (remaining retry %d)\n')
491 _('lfs: failed: %r (remaining retry %d)\n')
491 % (ex, retry))
492 % (ex, retry))
492 retry -= 1
493 retry -= 1
493 continue
494 continue
494 raise
495 raise
495
496
496 # Until https multiplexing gets sorted out
497 # Until https multiplexing gets sorted out
497 if self.ui.configbool('experimental', 'lfs.worker-enable'):
498 if self.ui.configbool('experimental', 'lfs.worker-enable'):
498 oids = worker.worker(self.ui, 0.1, transfer, (),
499 oids = worker.worker(self.ui, 0.1, transfer, (),
499 sorted(objects, key=lambda o: o.get('oid')))
500 sorted(objects, key=lambda o: o.get('oid')))
500 else:
501 else:
501 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
502 oids = transfer(sorted(objects, key=lambda o: o.get('oid')))
502
503
503 with self.ui.makeprogress(topic, total=total) as progress:
504 with self.ui.makeprogress(topic, total=total) as progress:
504 progress.update(0)
505 progress.update(0)
505 processed = 0
506 processed = 0
506 blobs = 0
507 blobs = 0
507 for _one, oid in oids:
508 for _one, oid in oids:
508 processed += sizes[oid]
509 processed += sizes[oid]
509 blobs += 1
510 blobs += 1
510 progress.update(processed)
511 progress.update(processed)
511 self.ui.note(_('lfs: processed: %s\n') % oid)
512 self.ui.note(_('lfs: processed: %s\n') % oid)
512
513
513 if blobs > 0:
514 if blobs > 0:
514 if action == 'upload':
515 if action == 'upload':
515 self.ui.status(_('lfs: uploaded %d files (%s)\n')
516 self.ui.status(_('lfs: uploaded %d files (%s)\n')
516 % (blobs, util.bytecount(processed)))
517 % (blobs, util.bytecount(processed)))
517 elif action == 'download':
518 elif action == 'download':
518 self.ui.status(_('lfs: downloaded %d files (%s)\n')
519 self.ui.status(_('lfs: downloaded %d files (%s)\n')
519 % (blobs, util.bytecount(processed)))
520 % (blobs, util.bytecount(processed)))
520
521
521 def __del__(self):
522 def __del__(self):
522 # copied from mercurial/httppeer.py
523 # copied from mercurial/httppeer.py
523 urlopener = getattr(self, 'urlopener', None)
524 urlopener = getattr(self, 'urlopener', None)
524 if urlopener:
525 if urlopener:
525 for h in urlopener.handlers:
526 for h in urlopener.handlers:
526 h.close()
527 h.close()
527 getattr(h, "close_all", lambda : None)()
528 getattr(h, "close_all", lambda : None)()
528
529
529 class _dummyremote(object):
530 class _dummyremote(object):
530 """Dummy store storing blobs to temp directory."""
531 """Dummy store storing blobs to temp directory."""
531
532
532 def __init__(self, repo, url):
533 def __init__(self, repo, url):
533 fullpath = repo.vfs.join('lfs', url.path)
534 fullpath = repo.vfs.join('lfs', url.path)
534 self.vfs = lfsvfs(fullpath)
535 self.vfs = lfsvfs(fullpath)
535
536
536 def writebatch(self, pointers, fromstore):
537 def writebatch(self, pointers, fromstore):
537 for p in _deduplicate(pointers):
538 for p in _deduplicate(pointers):
538 content = fromstore.read(p.oid(), verify=True)
539 content = fromstore.read(p.oid(), verify=True)
539 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
540 with self.vfs(p.oid(), 'wb', atomictemp=True) as fp:
540 fp.write(content)
541 fp.write(content)
541
542
542 def readbatch(self, pointers, tostore):
543 def readbatch(self, pointers, tostore):
543 for p in _deduplicate(pointers):
544 for p in _deduplicate(pointers):
544 with self.vfs(p.oid(), 'rb') as fp:
545 with self.vfs(p.oid(), 'rb') as fp:
545 tostore.download(p.oid(), fp)
546 tostore.download(p.oid(), fp)
546
547
547 class _nullremote(object):
548 class _nullremote(object):
548 """Null store storing blobs to /dev/null."""
549 """Null store storing blobs to /dev/null."""
549
550
550 def __init__(self, repo, url):
551 def __init__(self, repo, url):
551 pass
552 pass
552
553
553 def writebatch(self, pointers, fromstore):
554 def writebatch(self, pointers, fromstore):
554 pass
555 pass
555
556
556 def readbatch(self, pointers, tostore):
557 def readbatch(self, pointers, tostore):
557 pass
558 pass
558
559
559 class _promptremote(object):
560 class _promptremote(object):
560 """Prompt user to set lfs.url when accessed."""
561 """Prompt user to set lfs.url when accessed."""
561
562
562 def __init__(self, repo, url):
563 def __init__(self, repo, url):
563 pass
564 pass
564
565
565 def writebatch(self, pointers, fromstore, ui=None):
566 def writebatch(self, pointers, fromstore, ui=None):
566 self._prompt()
567 self._prompt()
567
568
568 def readbatch(self, pointers, tostore, ui=None):
569 def readbatch(self, pointers, tostore, ui=None):
569 self._prompt()
570 self._prompt()
570
571
571 def _prompt(self):
572 def _prompt(self):
572 raise error.Abort(_('lfs.url needs to be configured'))
573 raise error.Abort(_('lfs.url needs to be configured'))
573
574
574 _storemap = {
575 _storemap = {
575 'https': _gitlfsremote,
576 'https': _gitlfsremote,
576 'http': _gitlfsremote,
577 'http': _gitlfsremote,
577 'file': _dummyremote,
578 'file': _dummyremote,
578 'null': _nullremote,
579 'null': _nullremote,
579 None: _promptremote,
580 None: _promptremote,
580 }
581 }
581
582
582 def _deduplicate(pointers):
583 def _deduplicate(pointers):
583 """Remove any duplicate oids that exist in the list"""
584 """Remove any duplicate oids that exist in the list"""
584 reduced = util.sortdict()
585 reduced = util.sortdict()
585 for p in pointers:
586 for p in pointers:
586 reduced[p.oid()] = p
587 reduced[p.oid()] = p
587 return reduced.values()
588 return reduced.values()
588
589
589 def _verify(oid, content):
590 def _verify(oid, content):
590 realoid = hashlib.sha256(content).hexdigest()
591 realoid = node.hex(hashlib.sha256(content).digest())
591 if realoid != oid:
592 if realoid != oid:
592 raise LfsCorruptionError(_('detected corrupt lfs object: %s') % oid,
593 raise LfsCorruptionError(_('detected corrupt lfs object: %s') % oid,
593 hint=_('run hg verify'))
594 hint=_('run hg verify'))
594
595
595 def remote(repo, remote=None):
596 def remote(repo, remote=None):
596 """remotestore factory. return a store in _storemap depending on config
597 """remotestore factory. return a store in _storemap depending on config
597
598
598 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
599 If ``lfs.url`` is specified, use that remote endpoint. Otherwise, try to
599 infer the endpoint, based on the remote repository using the same path
600 infer the endpoint, based on the remote repository using the same path
600 adjustments as git. As an extension, 'http' is supported as well so that
601 adjustments as git. As an extension, 'http' is supported as well so that
601 ``hg serve`` works out of the box.
602 ``hg serve`` works out of the box.
602
603
603 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
604 https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
604 """
605 """
605 lfsurl = repo.ui.config('lfs', 'url')
606 lfsurl = repo.ui.config('lfs', 'url')
606 url = util.url(lfsurl or '')
607 url = util.url(lfsurl or '')
607 if lfsurl is None:
608 if lfsurl is None:
608 if remote:
609 if remote:
609 path = remote
610 path = remote
610 elif util.safehasattr(repo, '_subtoppath'):
611 elif util.safehasattr(repo, '_subtoppath'):
611 # The pull command sets this during the optional update phase, which
612 # The pull command sets this during the optional update phase, which
612 # tells exactly where the pull originated, whether 'paths.default'
613 # tells exactly where the pull originated, whether 'paths.default'
613 # or explicit.
614 # or explicit.
614 path = repo._subtoppath
615 path = repo._subtoppath
615 else:
616 else:
616 # TODO: investigate 'paths.remote:lfsurl' style path customization,
617 # TODO: investigate 'paths.remote:lfsurl' style path customization,
617 # and fall back to inferring from 'paths.remote' if unspecified.
618 # and fall back to inferring from 'paths.remote' if unspecified.
618 path = repo.ui.config('paths', 'default') or ''
619 path = repo.ui.config('paths', 'default') or ''
619
620
620 defaulturl = util.url(path)
621 defaulturl = util.url(path)
621
622
622 # TODO: support local paths as well.
623 # TODO: support local paths as well.
623 # TODO: consider the ssh -> https transformation that git applies
624 # TODO: consider the ssh -> https transformation that git applies
624 if defaulturl.scheme in (b'http', b'https'):
625 if defaulturl.scheme in (b'http', b'https'):
625 if defaulturl.path and defaulturl.path[:-1] != b'/':
626 if defaulturl.path and defaulturl.path[:-1] != b'/':
626 defaulturl.path += b'/'
627 defaulturl.path += b'/'
627 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
628 defaulturl.path = (defaulturl.path or b'') + b'.git/info/lfs'
628
629
629 url = util.url(bytes(defaulturl))
630 url = util.url(bytes(defaulturl))
630 repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
631 repo.ui.note(_('lfs: assuming remote store: %s\n') % url)
631
632
632 scheme = url.scheme
633 scheme = url.scheme
633 if scheme not in _storemap:
634 if scheme not in _storemap:
634 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
635 raise error.Abort(_('lfs: unknown url scheme: %s') % scheme)
635 return _storemap[scheme](repo, url)
636 return _storemap[scheme](repo, url)
636
637
637 class LfsRemoteError(error.StorageError):
638 class LfsRemoteError(error.StorageError):
638 pass
639 pass
639
640
640 class LfsCorruptionError(error.Abort):
641 class LfsCorruptionError(error.Abort):
641 """Raised when a corrupt blob is detected, aborting an operation
642 """Raised when a corrupt blob is detected, aborting an operation
642
643
643 It exists to allow specialized handling on the server side."""
644 It exists to allow specialized handling on the server side."""
@@ -1,539 +1,540 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import collections
3 import collections
4 import errno
4 import errno
5 import hashlib
5 import hashlib
6 import mmap
6 import mmap
7 import os
7 import os
8 import struct
8 import struct
9 import time
9 import time
10
10
11 from mercurial.i18n import _
11 from mercurial.i18n import _
12 from mercurial import (
12 from mercurial import (
13 node as nodemod,
13 policy,
14 policy,
14 pycompat,
15 pycompat,
15 util,
16 util,
16 vfs as vfsmod,
17 vfs as vfsmod,
17 )
18 )
18 from . import shallowutil
19 from . import shallowutil
19
20
20 osutil = policy.importmod(r'osutil')
21 osutil = policy.importmod(r'osutil')
21
22
22 # The pack version supported by this implementation. This will need to be
23 # The pack version supported by this implementation. This will need to be
23 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
24 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
24 # changing any of the int sizes, changing the delta algorithm, etc.
25 # changing any of the int sizes, changing the delta algorithm, etc.
25 PACKVERSIONSIZE = 1
26 PACKVERSIONSIZE = 1
26 INDEXVERSIONSIZE = 2
27 INDEXVERSIONSIZE = 2
27
28
28 FANOUTSTART = INDEXVERSIONSIZE
29 FANOUTSTART = INDEXVERSIONSIZE
29
30
30 # Constant that indicates a fanout table entry hasn't been filled in. (This does
31 # Constant that indicates a fanout table entry hasn't been filled in. (This does
31 # not get serialized)
32 # not get serialized)
32 EMPTYFANOUT = -1
33 EMPTYFANOUT = -1
33
34
34 # The fanout prefix is the number of bytes that can be addressed by the fanout
35 # The fanout prefix is the number of bytes that can be addressed by the fanout
35 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
36 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
36 # look in the fanout table (which will be 2^8 entries long).
37 # look in the fanout table (which will be 2^8 entries long).
37 SMALLFANOUTPREFIX = 1
38 SMALLFANOUTPREFIX = 1
38 LARGEFANOUTPREFIX = 2
39 LARGEFANOUTPREFIX = 2
39
40
40 # The number of entries in the index at which point we switch to a large fanout.
41 # The number of entries in the index at which point we switch to a large fanout.
41 # It is chosen to balance the linear scan through a sparse fanout, with the
42 # It is chosen to balance the linear scan through a sparse fanout, with the
42 # size of the bisect in actual index.
43 # size of the bisect in actual index.
43 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
44 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
44 # bisect) with (8 step fanout scan + 1 step bisect)
45 # bisect) with (8 step fanout scan + 1 step bisect)
45 # 5 step bisect = log(2^16 / 8 / 255) # fanout
46 # 5 step bisect = log(2^16 / 8 / 255) # fanout
46 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
47 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
47 SMALLFANOUTCUTOFF = 2**16 / 8
48 SMALLFANOUTCUTOFF = 2**16 / 8
48
49
49 # The amount of time to wait between checking for new packs. This prevents an
50 # The amount of time to wait between checking for new packs. This prevents an
50 # exception when data is moved to a new pack after the process has already
51 # exception when data is moved to a new pack after the process has already
51 # loaded the pack list.
52 # loaded the pack list.
52 REFRESHRATE = 0.1
53 REFRESHRATE = 0.1
53
54
54 if pycompat.isposix:
55 if pycompat.isposix:
55 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
56 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
56 # The 'e' flag will be ignored on older versions of glibc.
57 # The 'e' flag will be ignored on older versions of glibc.
57 PACKOPENMODE = 'rbe'
58 PACKOPENMODE = 'rbe'
58 else:
59 else:
59 PACKOPENMODE = 'rb'
60 PACKOPENMODE = 'rb'
60
61
61 class _cachebackedpacks(object):
62 class _cachebackedpacks(object):
62 def __init__(self, packs, cachesize):
63 def __init__(self, packs, cachesize):
63 self._packs = set(packs)
64 self._packs = set(packs)
64 self._lrucache = util.lrucachedict(cachesize)
65 self._lrucache = util.lrucachedict(cachesize)
65 self._lastpack = None
66 self._lastpack = None
66
67
67 # Avoid cold start of the cache by populating the most recent packs
68 # Avoid cold start of the cache by populating the most recent packs
68 # in the cache.
69 # in the cache.
69 for i in reversed(range(min(cachesize, len(packs)))):
70 for i in reversed(range(min(cachesize, len(packs)))):
70 self._movetofront(packs[i])
71 self._movetofront(packs[i])
71
72
72 def _movetofront(self, pack):
73 def _movetofront(self, pack):
73 # This effectively makes pack the first entry in the cache.
74 # This effectively makes pack the first entry in the cache.
74 self._lrucache[pack] = True
75 self._lrucache[pack] = True
75
76
76 def _registerlastpackusage(self):
77 def _registerlastpackusage(self):
77 if self._lastpack is not None:
78 if self._lastpack is not None:
78 self._movetofront(self._lastpack)
79 self._movetofront(self._lastpack)
79 self._lastpack = None
80 self._lastpack = None
80
81
81 def add(self, pack):
82 def add(self, pack):
82 self._registerlastpackusage()
83 self._registerlastpackusage()
83
84
84 # This method will mostly be called when packs are not in cache.
85 # This method will mostly be called when packs are not in cache.
85 # Therefore, adding pack to the cache.
86 # Therefore, adding pack to the cache.
86 self._movetofront(pack)
87 self._movetofront(pack)
87 self._packs.add(pack)
88 self._packs.add(pack)
88
89
89 def __iter__(self):
90 def __iter__(self):
90 self._registerlastpackusage()
91 self._registerlastpackusage()
91
92
92 # Cache iteration is based on LRU.
93 # Cache iteration is based on LRU.
93 for pack in self._lrucache:
94 for pack in self._lrucache:
94 self._lastpack = pack
95 self._lastpack = pack
95 yield pack
96 yield pack
96
97
97 cachedpacks = set(pack for pack in self._lrucache)
98 cachedpacks = set(pack for pack in self._lrucache)
98 # Yield for paths not in the cache.
99 # Yield for paths not in the cache.
99 for pack in self._packs - cachedpacks:
100 for pack in self._packs - cachedpacks:
100 self._lastpack = pack
101 self._lastpack = pack
101 yield pack
102 yield pack
102
103
103 # Data not found in any pack.
104 # Data not found in any pack.
104 self._lastpack = None
105 self._lastpack = None
105
106
106 class basepackstore(object):
107 class basepackstore(object):
107 # Default cache size limit for the pack files.
108 # Default cache size limit for the pack files.
108 DEFAULTCACHESIZE = 100
109 DEFAULTCACHESIZE = 100
109
110
110 def __init__(self, ui, path):
111 def __init__(self, ui, path):
111 self.ui = ui
112 self.ui = ui
112 self.path = path
113 self.path = path
113
114
114 # lastrefesh is 0 so we'll immediately check for new packs on the first
115 # lastrefesh is 0 so we'll immediately check for new packs on the first
115 # failure.
116 # failure.
116 self.lastrefresh = 0
117 self.lastrefresh = 0
117
118
118 packs = []
119 packs = []
119 for filepath, __, __ in self._getavailablepackfilessorted():
120 for filepath, __, __ in self._getavailablepackfilessorted():
120 try:
121 try:
121 pack = self.getpack(filepath)
122 pack = self.getpack(filepath)
122 except Exception as ex:
123 except Exception as ex:
123 # An exception may be thrown if the pack file is corrupted
124 # An exception may be thrown if the pack file is corrupted
124 # somehow. Log a warning but keep going in this case, just
125 # somehow. Log a warning but keep going in this case, just
125 # skipping this pack file.
126 # skipping this pack file.
126 #
127 #
127 # If this is an ENOENT error then don't even bother logging.
128 # If this is an ENOENT error then don't even bother logging.
128 # Someone could have removed the file since we retrieved the
129 # Someone could have removed the file since we retrieved the
129 # list of paths.
130 # list of paths.
130 if getattr(ex, 'errno', None) != errno.ENOENT:
131 if getattr(ex, 'errno', None) != errno.ENOENT:
131 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
132 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
132 continue
133 continue
133 packs.append(pack)
134 packs.append(pack)
134
135
135 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
136 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
136
137
137 def _getavailablepackfiles(self):
138 def _getavailablepackfiles(self):
138 """For each pack file (a index/data file combo), yields:
139 """For each pack file (a index/data file combo), yields:
139 (full path without extension, mtime, size)
140 (full path without extension, mtime, size)
140
141
141 mtime will be the mtime of the index/data file (whichever is newer)
142 mtime will be the mtime of the index/data file (whichever is newer)
142 size is the combined size of index/data file
143 size is the combined size of index/data file
143 """
144 """
144 indexsuffixlen = len(self.INDEXSUFFIX)
145 indexsuffixlen = len(self.INDEXSUFFIX)
145 packsuffixlen = len(self.PACKSUFFIX)
146 packsuffixlen = len(self.PACKSUFFIX)
146
147
147 ids = set()
148 ids = set()
148 sizes = collections.defaultdict(lambda: 0)
149 sizes = collections.defaultdict(lambda: 0)
149 mtimes = collections.defaultdict(lambda: [])
150 mtimes = collections.defaultdict(lambda: [])
150 try:
151 try:
151 for filename, type, stat in osutil.listdir(self.path, stat=True):
152 for filename, type, stat in osutil.listdir(self.path, stat=True):
152 id = None
153 id = None
153 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
154 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
154 id = filename[:-indexsuffixlen]
155 id = filename[:-indexsuffixlen]
155 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
156 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
156 id = filename[:-packsuffixlen]
157 id = filename[:-packsuffixlen]
157
158
158 # Since we expect to have two files corresponding to each ID
159 # Since we expect to have two files corresponding to each ID
159 # (the index file and the pack file), we can yield once we see
160 # (the index file and the pack file), we can yield once we see
160 # it twice.
161 # it twice.
161 if id:
162 if id:
162 sizes[id] += stat.st_size # Sum both files' sizes together
163 sizes[id] += stat.st_size # Sum both files' sizes together
163 mtimes[id].append(stat.st_mtime)
164 mtimes[id].append(stat.st_mtime)
164 if id in ids:
165 if id in ids:
165 yield (os.path.join(self.path, id), max(mtimes[id]),
166 yield (os.path.join(self.path, id), max(mtimes[id]),
166 sizes[id])
167 sizes[id])
167 else:
168 else:
168 ids.add(id)
169 ids.add(id)
169 except OSError as ex:
170 except OSError as ex:
170 if ex.errno != errno.ENOENT:
171 if ex.errno != errno.ENOENT:
171 raise
172 raise
172
173
173 def _getavailablepackfilessorted(self):
174 def _getavailablepackfilessorted(self):
174 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
175 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
175 yielding newest files first.
176 yielding newest files first.
176
177
177 This is desirable, since it is more likely newer packfiles have more
178 This is desirable, since it is more likely newer packfiles have more
178 desirable data.
179 desirable data.
179 """
180 """
180 files = []
181 files = []
181 for path, mtime, size in self._getavailablepackfiles():
182 for path, mtime, size in self._getavailablepackfiles():
182 files.append((mtime, size, path))
183 files.append((mtime, size, path))
183 files = sorted(files, reverse=True)
184 files = sorted(files, reverse=True)
184 for mtime, size, path in files:
185 for mtime, size, path in files:
185 yield path, mtime, size
186 yield path, mtime, size
186
187
187 def gettotalsizeandcount(self):
188 def gettotalsizeandcount(self):
188 """Returns the total disk size (in bytes) of all the pack files in
189 """Returns the total disk size (in bytes) of all the pack files in
189 this store, and the count of pack files.
190 this store, and the count of pack files.
190
191
191 (This might be smaller than the total size of the ``self.path``
192 (This might be smaller than the total size of the ``self.path``
192 directory, since this only considers fuly-writen pack files, and not
193 directory, since this only considers fuly-writen pack files, and not
193 temporary files or other detritus on the directory.)
194 temporary files or other detritus on the directory.)
194 """
195 """
195 totalsize = 0
196 totalsize = 0
196 count = 0
197 count = 0
197 for __, __, size in self._getavailablepackfiles():
198 for __, __, size in self._getavailablepackfiles():
198 totalsize += size
199 totalsize += size
199 count += 1
200 count += 1
200 return totalsize, count
201 return totalsize, count
201
202
202 def getmetrics(self):
203 def getmetrics(self):
203 """Returns metrics on the state of this store."""
204 """Returns metrics on the state of this store."""
204 size, count = self.gettotalsizeandcount()
205 size, count = self.gettotalsizeandcount()
205 return {
206 return {
206 'numpacks': count,
207 'numpacks': count,
207 'totalpacksize': size,
208 'totalpacksize': size,
208 }
209 }
209
210
210 def getpack(self, path):
211 def getpack(self, path):
211 raise NotImplementedError()
212 raise NotImplementedError()
212
213
213 def getmissing(self, keys):
214 def getmissing(self, keys):
214 missing = keys
215 missing = keys
215 for pack in self.packs:
216 for pack in self.packs:
216 missing = pack.getmissing(missing)
217 missing = pack.getmissing(missing)
217
218
218 # Ensures better performance of the cache by keeping the most
219 # Ensures better performance of the cache by keeping the most
219 # recently accessed pack at the beginning in subsequent iterations.
220 # recently accessed pack at the beginning in subsequent iterations.
220 if not missing:
221 if not missing:
221 return missing
222 return missing
222
223
223 if missing:
224 if missing:
224 for pack in self.refresh():
225 for pack in self.refresh():
225 missing = pack.getmissing(missing)
226 missing = pack.getmissing(missing)
226
227
227 return missing
228 return missing
228
229
229 def markledger(self, ledger, options=None):
230 def markledger(self, ledger, options=None):
230 for pack in self.packs:
231 for pack in self.packs:
231 pack.markledger(ledger)
232 pack.markledger(ledger)
232
233
233 def markforrefresh(self):
234 def markforrefresh(self):
234 """Tells the store that there may be new pack files, so the next time it
235 """Tells the store that there may be new pack files, so the next time it
235 has a lookup miss it should check for new files."""
236 has a lookup miss it should check for new files."""
236 self.lastrefresh = 0
237 self.lastrefresh = 0
237
238
238 def refresh(self):
239 def refresh(self):
239 """Checks for any new packs on disk, adds them to the main pack list,
240 """Checks for any new packs on disk, adds them to the main pack list,
240 and returns a list of just the new packs."""
241 and returns a list of just the new packs."""
241 now = time.time()
242 now = time.time()
242
243
243 # If we experience a lot of misses (like in the case of getmissing() on
244 # If we experience a lot of misses (like in the case of getmissing() on
244 # new objects), let's only actually check disk for new stuff every once
245 # new objects), let's only actually check disk for new stuff every once
245 # in a while. Generally this code path should only ever matter when a
246 # in a while. Generally this code path should only ever matter when a
246 # repack is going on in the background, and that should be pretty rare
247 # repack is going on in the background, and that should be pretty rare
247 # to have that happen twice in quick succession.
248 # to have that happen twice in quick succession.
248 newpacks = []
249 newpacks = []
249 if now > self.lastrefresh + REFRESHRATE:
250 if now > self.lastrefresh + REFRESHRATE:
250 self.lastrefresh = now
251 self.lastrefresh = now
251 previous = set(p.path for p in self.packs)
252 previous = set(p.path for p in self.packs)
252 for filepath, __, __ in self._getavailablepackfilessorted():
253 for filepath, __, __ in self._getavailablepackfilessorted():
253 if filepath not in previous:
254 if filepath not in previous:
254 newpack = self.getpack(filepath)
255 newpack = self.getpack(filepath)
255 newpacks.append(newpack)
256 newpacks.append(newpack)
256 self.packs.add(newpack)
257 self.packs.add(newpack)
257
258
258 return newpacks
259 return newpacks
259
260
260 class versionmixin(object):
261 class versionmixin(object):
261 # Mix-in for classes with multiple supported versions
262 # Mix-in for classes with multiple supported versions
262 VERSION = None
263 VERSION = None
263 SUPPORTED_VERSIONS = [2]
264 SUPPORTED_VERSIONS = [2]
264
265
265 def _checkversion(self, version):
266 def _checkversion(self, version):
266 if version in self.SUPPORTED_VERSIONS:
267 if version in self.SUPPORTED_VERSIONS:
267 if self.VERSION is None:
268 if self.VERSION is None:
268 # only affect this instance
269 # only affect this instance
269 self.VERSION = version
270 self.VERSION = version
270 elif self.VERSION != version:
271 elif self.VERSION != version:
271 raise RuntimeError('inconsistent version: %s' % version)
272 raise RuntimeError('inconsistent version: %s' % version)
272 else:
273 else:
273 raise RuntimeError('unsupported version: %s' % version)
274 raise RuntimeError('unsupported version: %s' % version)
274
275
275 class basepack(versionmixin):
276 class basepack(versionmixin):
276 # The maximum amount we should read via mmap before remmaping so the old
277 # The maximum amount we should read via mmap before remmaping so the old
277 # pages can be released (100MB)
278 # pages can be released (100MB)
278 MAXPAGEDIN = 100 * 1024**2
279 MAXPAGEDIN = 100 * 1024**2
279
280
280 SUPPORTED_VERSIONS = [2]
281 SUPPORTED_VERSIONS = [2]
281
282
282 def __init__(self, path):
283 def __init__(self, path):
283 self.path = path
284 self.path = path
284 self.packpath = path + self.PACKSUFFIX
285 self.packpath = path + self.PACKSUFFIX
285 self.indexpath = path + self.INDEXSUFFIX
286 self.indexpath = path + self.INDEXSUFFIX
286
287
287 self.indexsize = os.stat(self.indexpath).st_size
288 self.indexsize = os.stat(self.indexpath).st_size
288 self.datasize = os.stat(self.packpath).st_size
289 self.datasize = os.stat(self.packpath).st_size
289
290
290 self._index = None
291 self._index = None
291 self._data = None
292 self._data = None
292 self.freememory() # initialize the mmap
293 self.freememory() # initialize the mmap
293
294
294 version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
295 version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
295 self._checkversion(version)
296 self._checkversion(version)
296
297
297 version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
298 version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
298 self._checkversion(version)
299 self._checkversion(version)
299
300
300 if 0b10000000 & config:
301 if 0b10000000 & config:
301 self.params = indexparams(LARGEFANOUTPREFIX, version)
302 self.params = indexparams(LARGEFANOUTPREFIX, version)
302 else:
303 else:
303 self.params = indexparams(SMALLFANOUTPREFIX, version)
304 self.params = indexparams(SMALLFANOUTPREFIX, version)
304
305
305 @util.propertycache
306 @util.propertycache
306 def _fanouttable(self):
307 def _fanouttable(self):
307 params = self.params
308 params = self.params
308 rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
309 rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
309 fanouttable = []
310 fanouttable = []
310 for i in pycompat.xrange(0, params.fanoutcount):
311 for i in pycompat.xrange(0, params.fanoutcount):
311 loc = i * 4
312 loc = i * 4
312 fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
313 fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
313 fanouttable.append(fanoutentry)
314 fanouttable.append(fanoutentry)
314 return fanouttable
315 return fanouttable
315
316
316 @util.propertycache
317 @util.propertycache
317 def _indexend(self):
318 def _indexend(self):
318 nodecount = struct.unpack_from('!Q', self._index,
319 nodecount = struct.unpack_from('!Q', self._index,
319 self.params.indexstart - 8)[0]
320 self.params.indexstart - 8)[0]
320 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
321 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
321
322
322 def freememory(self):
323 def freememory(self):
323 """Unmap and remap the memory to free it up after known expensive
324 """Unmap and remap the memory to free it up after known expensive
324 operations. Return True if self._data and self._index were reloaded.
325 operations. Return True if self._data and self._index were reloaded.
325 """
326 """
326 if self._index:
327 if self._index:
327 if self._pagedin < self.MAXPAGEDIN:
328 if self._pagedin < self.MAXPAGEDIN:
328 return False
329 return False
329
330
330 self._index.close()
331 self._index.close()
331 self._data.close()
332 self._data.close()
332
333
333 # TODO: use an opener/vfs to access these paths
334 # TODO: use an opener/vfs to access these paths
334 with open(self.indexpath, PACKOPENMODE) as indexfp:
335 with open(self.indexpath, PACKOPENMODE) as indexfp:
335 # memory-map the file, size 0 means whole file
336 # memory-map the file, size 0 means whole file
336 self._index = mmap.mmap(indexfp.fileno(), 0,
337 self._index = mmap.mmap(indexfp.fileno(), 0,
337 access=mmap.ACCESS_READ)
338 access=mmap.ACCESS_READ)
338 with open(self.packpath, PACKOPENMODE) as datafp:
339 with open(self.packpath, PACKOPENMODE) as datafp:
339 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
340 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
340
341
341 self._pagedin = 0
342 self._pagedin = 0
342 return True
343 return True
343
344
344 def getmissing(self, keys):
345 def getmissing(self, keys):
345 raise NotImplementedError()
346 raise NotImplementedError()
346
347
347 def markledger(self, ledger, options=None):
348 def markledger(self, ledger, options=None):
348 raise NotImplementedError()
349 raise NotImplementedError()
349
350
350 def cleanup(self, ledger):
351 def cleanup(self, ledger):
351 raise NotImplementedError()
352 raise NotImplementedError()
352
353
353 def __iter__(self):
354 def __iter__(self):
354 raise NotImplementedError()
355 raise NotImplementedError()
355
356
356 def iterentries(self):
357 def iterentries(self):
357 raise NotImplementedError()
358 raise NotImplementedError()
358
359
359 class mutablebasepack(versionmixin):
360 class mutablebasepack(versionmixin):
360
361
361 def __init__(self, ui, packdir, version=2):
362 def __init__(self, ui, packdir, version=2):
362 self._checkversion(version)
363 self._checkversion(version)
363 # TODO(augie): make this configurable
364 # TODO(augie): make this configurable
364 self._compressor = 'GZ'
365 self._compressor = 'GZ'
365 opener = vfsmod.vfs(packdir)
366 opener = vfsmod.vfs(packdir)
366 opener.createmode = 0o444
367 opener.createmode = 0o444
367 self.opener = opener
368 self.opener = opener
368
369
369 self.entries = {}
370 self.entries = {}
370
371
371 shallowutil.mkstickygroupdir(ui, packdir)
372 shallowutil.mkstickygroupdir(ui, packdir)
372 self.packfp, self.packpath = opener.mkstemp(
373 self.packfp, self.packpath = opener.mkstemp(
373 suffix=self.PACKSUFFIX + '-tmp')
374 suffix=self.PACKSUFFIX + '-tmp')
374 self.idxfp, self.idxpath = opener.mkstemp(
375 self.idxfp, self.idxpath = opener.mkstemp(
375 suffix=self.INDEXSUFFIX + '-tmp')
376 suffix=self.INDEXSUFFIX + '-tmp')
376 self.packfp = os.fdopen(self.packfp, r'wb+')
377 self.packfp = os.fdopen(self.packfp, r'wb+')
377 self.idxfp = os.fdopen(self.idxfp, r'wb+')
378 self.idxfp = os.fdopen(self.idxfp, r'wb+')
378 self.sha = hashlib.sha1()
379 self.sha = hashlib.sha1()
379 self._closed = False
380 self._closed = False
380
381
381 # The opener provides no way of doing permission fixup on files created
382 # The opener provides no way of doing permission fixup on files created
382 # via mkstemp, so we must fix it ourselves. We can probably fix this
383 # via mkstemp, so we must fix it ourselves. We can probably fix this
383 # upstream in vfs.mkstemp so we don't need to use the private method.
384 # upstream in vfs.mkstemp so we don't need to use the private method.
384 opener._fixfilemode(opener.join(self.packpath))
385 opener._fixfilemode(opener.join(self.packpath))
385 opener._fixfilemode(opener.join(self.idxpath))
386 opener._fixfilemode(opener.join(self.idxpath))
386
387
387 # Write header
388 # Write header
388 # TODO: make it extensible (ex: allow specifying compression algorithm,
389 # TODO: make it extensible (ex: allow specifying compression algorithm,
389 # a flexible key/value header, delta algorithm, fanout size, etc)
390 # a flexible key/value header, delta algorithm, fanout size, etc)
390 versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
391 versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
391 self.writeraw(versionbuf)
392 self.writeraw(versionbuf)
392
393
393 def __enter__(self):
394 def __enter__(self):
394 return self
395 return self
395
396
396 def __exit__(self, exc_type, exc_value, traceback):
397 def __exit__(self, exc_type, exc_value, traceback):
397 if exc_type is None:
398 if exc_type is None:
398 self.close()
399 self.close()
399 else:
400 else:
400 self.abort()
401 self.abort()
401
402
402 def abort(self):
403 def abort(self):
403 # Unclean exit
404 # Unclean exit
404 self._cleantemppacks()
405 self._cleantemppacks()
405
406
406 def writeraw(self, data):
407 def writeraw(self, data):
407 self.packfp.write(data)
408 self.packfp.write(data)
408 self.sha.update(data)
409 self.sha.update(data)
409
410
410 def close(self, ledger=None):
411 def close(self, ledger=None):
411 if self._closed:
412 if self._closed:
412 return
413 return
413
414
414 try:
415 try:
415 sha = self.sha.hexdigest()
416 sha = nodemod.hex(self.sha.digest())
416 self.packfp.close()
417 self.packfp.close()
417 self.writeindex()
418 self.writeindex()
418
419
419 if len(self.entries) == 0:
420 if len(self.entries) == 0:
420 # Empty pack
421 # Empty pack
421 self._cleantemppacks()
422 self._cleantemppacks()
422 self._closed = True
423 self._closed = True
423 return None
424 return None
424
425
425 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
426 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
426 try:
427 try:
427 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
428 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
428 except Exception as ex:
429 except Exception as ex:
429 try:
430 try:
430 self.opener.unlink(sha + self.PACKSUFFIX)
431 self.opener.unlink(sha + self.PACKSUFFIX)
431 except Exception:
432 except Exception:
432 pass
433 pass
433 # Throw exception 'ex' explicitly since a normal 'raise' would
434 # Throw exception 'ex' explicitly since a normal 'raise' would
434 # potentially throw an exception from the unlink cleanup.
435 # potentially throw an exception from the unlink cleanup.
435 raise ex
436 raise ex
436 except Exception:
437 except Exception:
437 # Clean up temp packs in all exception cases
438 # Clean up temp packs in all exception cases
438 self._cleantemppacks()
439 self._cleantemppacks()
439 raise
440 raise
440
441
441 self._closed = True
442 self._closed = True
442 result = self.opener.join(sha)
443 result = self.opener.join(sha)
443 if ledger:
444 if ledger:
444 ledger.addcreated(result)
445 ledger.addcreated(result)
445 return result
446 return result
446
447
447 def _cleantemppacks(self):
448 def _cleantemppacks(self):
448 try:
449 try:
449 self.opener.unlink(self.packpath)
450 self.opener.unlink(self.packpath)
450 except Exception:
451 except Exception:
451 pass
452 pass
452 try:
453 try:
453 self.opener.unlink(self.idxpath)
454 self.opener.unlink(self.idxpath)
454 except Exception:
455 except Exception:
455 pass
456 pass
456
457
457 def writeindex(self):
458 def writeindex(self):
458 rawindex = ''
459 rawindex = ''
459
460
460 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
461 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
461 if largefanout:
462 if largefanout:
462 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
463 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
463 else:
464 else:
464 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
465 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
465
466
466 fanouttable = [EMPTYFANOUT] * params.fanoutcount
467 fanouttable = [EMPTYFANOUT] * params.fanoutcount
467
468
468 # Precompute the location of each entry
469 # Precompute the location of each entry
469 locations = {}
470 locations = {}
470 count = 0
471 count = 0
471 for node in sorted(self.entries):
472 for node in sorted(self.entries):
472 location = count * self.INDEXENTRYLENGTH
473 location = count * self.INDEXENTRYLENGTH
473 locations[node] = location
474 locations[node] = location
474 count += 1
475 count += 1
475
476
476 # Must use [0] on the unpack result since it's always a tuple.
477 # Must use [0] on the unpack result since it's always a tuple.
477 fanoutkey = struct.unpack(params.fanoutstruct,
478 fanoutkey = struct.unpack(params.fanoutstruct,
478 node[:params.fanoutprefix])[0]
479 node[:params.fanoutprefix])[0]
479 if fanouttable[fanoutkey] == EMPTYFANOUT:
480 if fanouttable[fanoutkey] == EMPTYFANOUT:
480 fanouttable[fanoutkey] = location
481 fanouttable[fanoutkey] = location
481
482
482 rawfanouttable = ''
483 rawfanouttable = ''
483 last = 0
484 last = 0
484 for offset in fanouttable:
485 for offset in fanouttable:
485 offset = offset if offset != EMPTYFANOUT else last
486 offset = offset if offset != EMPTYFANOUT else last
486 last = offset
487 last = offset
487 rawfanouttable += struct.pack('!I', offset)
488 rawfanouttable += struct.pack('!I', offset)
488
489
489 rawentrieslength = struct.pack('!Q', len(self.entries))
490 rawentrieslength = struct.pack('!Q', len(self.entries))
490
491
491 # The index offset is the it's location in the file. So after the 2 byte
492 # The index offset is the it's location in the file. So after the 2 byte
492 # header and the fanouttable.
493 # header and the fanouttable.
493 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
494 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
494
495
495 self._writeheader(params)
496 self._writeheader(params)
496 self.idxfp.write(rawfanouttable)
497 self.idxfp.write(rawfanouttable)
497 self.idxfp.write(rawentrieslength)
498 self.idxfp.write(rawentrieslength)
498 self.idxfp.write(rawindex)
499 self.idxfp.write(rawindex)
499 self.idxfp.close()
500 self.idxfp.close()
500
501
501 def createindex(self, nodelocations):
502 def createindex(self, nodelocations):
502 raise NotImplementedError()
503 raise NotImplementedError()
503
504
504 def _writeheader(self, indexparams):
505 def _writeheader(self, indexparams):
505 # Index header
506 # Index header
506 # <version: 1 byte>
507 # <version: 1 byte>
507 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
508 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
508 # <unused: 7 bit> # future use (compression, delta format, etc)
509 # <unused: 7 bit> # future use (compression, delta format, etc)
509 config = 0
510 config = 0
510 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
511 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
511 config = 0b10000000
512 config = 0b10000000
512 self.idxfp.write(struct.pack('!BB', self.VERSION, config))
513 self.idxfp.write(struct.pack('!BB', self.VERSION, config))
513
514
514 class indexparams(object):
515 class indexparams(object):
515 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
516 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
516 r'fanoutsize', r'indexstart')
517 r'fanoutsize', r'indexstart')
517
518
518 def __init__(self, prefixsize, version):
519 def __init__(self, prefixsize, version):
519 self.fanoutprefix = prefixsize
520 self.fanoutprefix = prefixsize
520
521
521 # The struct pack format for fanout table location (i.e. the format that
522 # The struct pack format for fanout table location (i.e. the format that
522 # converts the node prefix into an integer location in the fanout
523 # converts the node prefix into an integer location in the fanout
523 # table).
524 # table).
524 if prefixsize == SMALLFANOUTPREFIX:
525 if prefixsize == SMALLFANOUTPREFIX:
525 self.fanoutstruct = '!B'
526 self.fanoutstruct = '!B'
526 elif prefixsize == LARGEFANOUTPREFIX:
527 elif prefixsize == LARGEFANOUTPREFIX:
527 self.fanoutstruct = '!H'
528 self.fanoutstruct = '!H'
528 else:
529 else:
529 raise ValueError("invalid fanout prefix size: %s" % prefixsize)
530 raise ValueError("invalid fanout prefix size: %s" % prefixsize)
530
531
531 # The number of fanout table entries
532 # The number of fanout table entries
532 self.fanoutcount = 2**(prefixsize * 8)
533 self.fanoutcount = 2**(prefixsize * 8)
533
534
534 # The total bytes used by the fanout table
535 # The total bytes used by the fanout table
535 self.fanoutsize = self.fanoutcount * 4
536 self.fanoutsize = self.fanoutcount * 4
536
537
537 self.indexstart = FANOUTSTART + self.fanoutsize
538 self.indexstart = FANOUTSTART + self.fanoutsize
538 # Skip the index length
539 # Skip the index length
539 self.indexstart += 8
540 self.indexstart += 8
General Comments 0
You need to be logged in to leave comments. Login now